TSQL Challenge 17 - Solution by Bret Warnick



-- File Name: bret_warnick_tsqlchallenge_17.sql
--bret_warnick_tsqlchallenge_17.sql
/*
This solution only marks up keywords that are:
 surrounded by spaces
 at the beginning of the data followed by a space
 at the end of the data preceded by a space
*/

--create a cte for primarily the data with a space added to each end so we don't 
--have to look for words at the beginning or end of the string differently (the
--spaces will be stripped at final output)
;with DataOriginalKeywordOnly as (
select id, keyword, ' '+data+' ' data from @t)
--This cte will be the basis for all the keyword replacements and will 
--hold the keyword with a space for before and after delimeters.
--It will also hold the string replacement with the a href markup.  The
--replacement string itself will have all spaces replaced with a token 
--(assumed to not appear in the original keywords, original data, or delimiters)
--so smaller keywords that may be contained within larger keywords 
--won't nest themselves later.
,KeywordReplaceString as (
select *, ROW_NUMBER() over (order by len(keyword), id) KeywordID from 
(select id
	, ' '+keyword+' ' keyword
	, ' '+replace('<a href="tsql.com?id='+convert(varchar,id)+'">'+keyword+'</a>',' ','~|~')+' ' as sAHREF 
	from @t) sub
)
--With each data, iterate through each keyword starting with the longest keyword
--down to the shortest and replace with the markup that has tokens in place of spaces
--to prevent subsequent replaces from nesting markup if a shorter keyword is found 
--within a longer keyword
,ReplaceWithMarkup as (
select t.id, t.keyword, replace(t.data,k.keyword,k.sAHREF) data, k.keywordid
	from DataOriginalKeywordOnly t inner join KeywordReplaceString k on k.KeywordID=(select MAX(KeywordID) from KeywordReplaceString)
union all
select t.id, t.keyword, replace(t.data,k.keyword,k.sAHREF) data, k.keywordid
	from ReplaceWithMarkup t inner join KeywordReplaceString k on k.KeywordID=t.KeywordID-1)
--Now select only the last pass on replacing keywords in data, remove the tokens,
--and remove the spaces added to the beginning and end of each data
select id, keyword, replace(substring(data,2,LEN(data)-1),'~|~',' ') data 
	from ReplaceWithMarkup where KeywordID=1 /*because we worked recursivley backwards, only the first has the data after applying all replaces*/ 
	order by 1
	option (maxrecursion 32767)

Did you find something incorrect/wrong with this solution? Take a few seconds to Report It.

Did you understand how this solution work? If you find it difficult to understand, you can Request an Explanation or you can Write an explanation to help others better understand this solution.