TSQL Challenge 14 - Solution by Stefan Oude Vrielink (1)



-- Stefan_Oude_Vrielink_tsqlchallenge_14 (1).sql
DECLARE @t TABLE (Data VARCHAR(40));

INSERT @t (Data)
SELECT '9992EDC6-D117-4DEE-B410-4E5FAE46AE97'
UNION ALL SELECT '0BFC936B-BD9A-4C6A-AFB2-CF3F1752F8B1'
UNION ALL SELECT '4A73E7EB-7777-4A04-9258-F1E75097977C'
UNION ALL SELECT '5AAF477C-274D-400D-9067-035968F33B19'
UNION ALL SELECT '725DA718-30D0-44A9-B36A-89F27CDFEEDE'
UNION ALL SELECT '8083ED5A-D3B9-4694-BB04-F0B09C588888';

------------------------------------------------------------------------------------------
-- Author:		Stefan Oude Vrielink
------------------------------------------------------------------------------------------
;WITH T( DataId, Data ) AS
( -- Add an id value to the data
  SELECT Row_Number() OVER ( ORDER BY Data ), Data FROM @t
)
, CharPos( Pos ) AS
( -- Select enough rows numbered from 1 to ..
  SELECT Row_Number() OVER ( ORDER BY object_id ) FROM sys.columns
)
, Transposed( DataId, Pos, Chr ) AS
( -- Combine both sets (as a result we get one character per row, identified by the data id)
  SELECT T.DataId, CharPos.Pos, SubString(T.Data,CharPos.Pos,1) AS Chr
  FROM T
    LEFT OUTER JOIN
    CharPos ON
      CharPos.Pos <= Len( t.Data )
)
, SequenceCount( DataId, Pos, Chr, StartPos, ChrLen ) AS
( -- Count same characters (by recursing the characters per data id in the transposed set)
  SELECT DataId, Pos, Chr, Cast(1 AS INT), 1
  FROM Transposed
  WHERE Pos = 1
  UNION ALL
  SELECT SequenceCount.DataId, Transposed.Pos, Transposed.Chr, Cast(CASE WHEN Transposed.Chr = SequenceCount.Chr THEN SequenceCount.StartPos ELSE Transposed.Pos END AS INT), CASE WHEN Transposed.Chr = SequenceCount.Chr THEN SequenceCount.ChrLen + 1 ELSE 1 END
  FROM SequenceCount
    INNER JOIN
    Transposed ON
      Transposed.DataId = SequenceCount.DataId
      AND Transposed.Pos = SequenceCount.Pos + 1
)
, Sequence( DataId, Chr, StartPos, ChrLen ) AS
( -- Select the sequences
  SELECT DataId, Chr, StartPos, MAX(ChrLen)
  FROM SequenceCount
  WHERE ChrLen > 1
  GROUP BY DataId, Chr, StartPos
)
, DataMaxLen( DataId, [Len] ) AS
( -- Determine the longest sequence of characters per data id
  SELECT DataId, MAX(ChrLen) FROM Sequence GROUP BY DataId
)
-- Finally... output the result
SELECT
  Data		= T.Data
, [Char]	= Sequence.Chr
, Pos		= Sequence.StartPos
, [Len]		= Sequence.ChrLen
FROM Sequence
  INNER JOIN
  T ON T.DataId = Sequence.DataId
  INNER JOIN
  DataMaxLen ON
    DataMaxLen.DataId = Sequence.DataId
ORDER BY
  DataMaxLen.[Len] DESC --> Data with longest sequence should come on top followed by the next longest sequence
, T.Data, Sequence.StartPos --> Within each data, the rows should be ordered by the position at which the sequence starts