TSQL Challenge 13 - Solution by Brad Schulz




DECLARE @t TABLE (
    InvID INT IDENTITY,
    BatchNumber INT,
    InvoiceNumber INT, 
    VisitDate DATETIME,
    Amount MONEY )
INSERT INTO @t(BatchNumber, InvoiceNumber, VisitDate, Amount)
SELECT 10000001,20001,'2009-01-01',50.00 UNION ALL
SELECT 10000001,20001,'2009-01-02',50.00 UNION ALL
SELECT 10000001,20001,'2009-01-03',50.00 UNION ALL
SELECT 10000001,20001,'2009-01-04',50.00 UNION ALL
SELECT 10000001,20002,'2009-01-01',50.00 UNION ALL
SELECT 10000001,20002,'2009-01-02',50.00 UNION ALL
SELECT 10000001,20002,'2009-01-03',50.00 UNION ALL
SELECT 10000001,20003,'2009-01-01',50.00 UNION ALL
SELECT 10000001,20003,'2009-01-02',50.00 UNION ALL
SELECT 10000001,20004,'2009-01-01',50.00 UNION ALL
SELECT 10000001,20004,'2009-01-02',50.00 UNION ALL
SELECT 10000001,20004,'2009-01-03',50.00 UNION ALL
SELECT 10000001,20004,'2009-01-04',50.00 UNION ALL
SELECT 10000001,20004,'2009-01-05',50.00 UNION ALL
SELECT 10000001,20004,'2009-01-06',50.00 UNION ALL
SELECT 10000001,20005,'2009-01-01',50.00 UNION ALL
SELECT 10000001,20005,'2009-01-02',50.00 UNION ALL
SELECT 10000001,20005,'2009-01-13',50.00 UNION ALL
SELECT 10000001,20005,'2009-01-14',50.00 UNION ALL
SELECT 10000001,20005,'2009-01-15',50.00 UNION ALL
SELECT 10000001,20005,'2009-01-06',50.00 UNION ALL
SELECT 10000001,20005,'2009-01-07',50.00 UNION ALL
SELECT 10000001,20005,'2009-01-08',50.00 UNION ALL
SELECT 10000001,20006,'2009-01-01',50.00 UNION ALL
SELECT 10000002,20007,'2009-01-01',50.00 UNION ALL
SELECT 10000002,20007,'2009-01-02',50.00 UNION ALL
SELECT 10000002,20007,'2009-01-03',50.00 UNION ALL
SELECT 10000002,20008,'2009-01-01',50.00 UNION ALL
SELECT 10000002,20008,'2009-01-02',50.00 UNION ALL
SELECT 10000002,20008,'2009-01-03',50.00


;with BISmry as
(
  select BatchNumber
        ,InvoiceNumber
        ,NumLines=count(*)
        ,RowNum=row_number() over (partition by BatchNumber 
                                   order by InvoiceNumber)
  from @t
  group by BatchNumber
          ,InvoiceNumber
)
--
-- The above BISmry CTE creates a summary of the BatchNumber/InvoiceNumber 
--   combinations, calculating the number of lines taken up by each 
--   invoice and calculating a sequential RowNumber for each Invoice 
--   within the Batch:
--
-- BatchNumber InvoiceNumber NumLines RowNum
-- ----------- ------------- -------- ------
--    10000001         20001        4      1
--    10000001         20002        3      2
--    10000001         20003        2      3
--    10000001         20004        6      4
--    10000001         20005        8      5
--    10000001         20006        1      6
--    10000002         20007        3      1
--    10000002         20008        3      2
--
,BISets as
( 
  select BatchNumber
        ,InvoiceNumber
        ,NumLines
        ,RowNum
        ,[Set]=cast(1 as integer)
        ,AccumLines=NumLines
  from BISmry
  where RowNum=1
  -- 
  -- The anchor of the recursive BISets CTE (above) takes all BISmry
  --   rows that have a RowNum of 1, and it establishes the desired 
  --   column named Set, and a column called AccumLines, which will
  --   be the number accumulated lines so far for the Set column value.
  --
  -- BatchNumber InvoiceNumber NumLines RowNum Set AccumLines
  -- ----------- ------------- -------- ------ --- ----------
  --    10000001         20001        4      1   1          4
  --    10000002         20007        3      1   1          3
  --
  union all
  select CurrRow.BatchNumber
        ,CurrRow.InvoiceNumber
        ,CurrRow.NumLines
        ,CurrRow.RowNum
        ,[Set]=PriorRow.[Set] + case 
                                  when NewAccumLines>10
                                  then 1
                                  else 0 
                                end
        ,AccumLines=case 
                      when NewAccumLines>10
                      then CurrRow.NumLines
                      else NewAccumLines
                    end           
  from BISets PriorRow
  join BISmry CurrRow on PriorRow.BatchNumber=CurrRow.BatchNumber 
                         and CurrRow.RowNum=PriorRow.RowNum+1
  cross apply (select NewAccumLines=PriorRow.AccumLines+CurrRow.NumLines) F
  --
  -- The recursive part of the query brings in the next RowNum 
  --   in the Batch, and, as it does so, it will calculate new 
  --   values for the Set and AccumLines columns.  
  -- Note how I alias the recursive FROM BISets as "PriorRow" 
  --   and I alias the new current data being brought in from
  --   BISmry as "CurrRow"... this helps the query to look
  --   a little clearer as I calculate new Set and AccumLines
  --   values.
  -- Also note how I use CROSS APPLY to create a "work" column
  --   called "NewAccumLines".  This takes on the value of the
  --   prior row's AccumLines value plus the new current row's
  --   NumRows value.  Again, this helps to make it clearer
  --   as to what is going on below:
  -- For the Set column, it looks to see if the NewAccumLines
  --   value exceeds 10.
  --   If it does, then it's going to increase the prior row's 
  --     Set value by 1, establishing a new set.
  --   If it does not exceed 10, then it just carries forward 
  --     the prior row's Set value.
  -- Similarly, for the AccumLines column, it looks to see if 
  --   the NewAccumLines value exceeds 10.
  --   If it does, then it's going to "reset" the current row's 
  --     AccumLines value to its NumLines value, because we've 
  --     established a new set, and we are starting a new
  --     accumulation from scratch.
  --   If it does not exceed 10, then it just sets the AccumLines
  --     value to the NewAccumLines value.
  -- 
  -- FIRST ITERATION BRINGS IN RowNum=2:
  -- Looking at BatchNumber 10000001, the NewAccumLines value
  --   is set to the prior row's AccumLines value (4) plus the 
  --   new row's NumLines value (3), which equals 7.  This does
  --   NOT exceed 10, so we carry forward the same Set Value and we
  --   set AccumLines to the NewAccumLines value.
  -- The same kind of situation occurred for 
  --   BatchNumber 10000002.
  -- BatchNumber InvoiceNumber NumLines RowNum Set AccumLines
  -- ----------- ------------- -------- ------ --- ----------
  --    10000001         20001        4      1   1          4
  --    10000001         20002        3      2   1          7 <==
  --    10000002         20007        3      1   1          3
  --    10000002         20008        3      2   1          6 <==
  --
  -- SECOND ITERATION BRINGS IN RowNum=3:
  -- Looking at BatchNumber 10000001, the NewAccumLines value
  --   is set to the prior row's AccumLines value (7) plus the 
  --   new row's NumLines value (2), which equals 9.  This does
  --   NOT exceed 10, so we carry forward the same Set Value and we
  --   set AccumLines to the NewAccumLines value.
  -- Note that BatchNumber 10000002 had no more rows
  --   to bring in... it is finished.
  -- BatchNumber InvoiceNumber NumLines RowNum Set AccumLines
  -- ----------- ------------- -------- ------ --- ----------
  --    10000001         20001        4      1   1          4
  --    10000001         20002        3      2   1          7
  --    10000001         20003        2      3   1          9 <==
  --    10000002         20007        3      1   1          3
  --    10000002         20008        3      2   1          6
  --
  -- THIRD ITERATION BRINGS IN RowNum=4:
  -- Here, the NewAccumLines value is set to the prior row's
  --   AccumLines value (9) plus the new row's NumLines 
  --   value (6), which equals 15.  This DOES exceed 10,
  --   so we increment the prior row's Set value, and we
  --   reset the new row's AccumLines value to its NumLines 
  --   value, since we are starting a brand new set.
  -- BatchNumber InvoiceNumber NumLines RowNum Set AccumLines
  -- ----------- ------------- -------- ------ --- ----------
  --    10000001         20001        4      1   1          4
  --    10000001         20002        3      2   1          7
  --    10000001         20003        2      3   1          9
  --    10000001         20004        6      4   2          6 <==
  --    10000002         20007        3      1   1          3
  --    10000002         20008        3      2   1          6
  --
  -- FOURTH ITERATION BRINGS IN RowNum=5:
  -- Again, the NewAccumLines value is set to the prior row's
  --   AccumLines value (6) plus the new row's NumLines 
  --   value (8), which equals 14.  This DOES exceed 10,
  --   so we increment the prior row's Set value, and we
  --   reset the new row's AccumLines value to its NumLines 
  --   value, since we are starting a brand new set.
  -- BatchNumber InvoiceNumber NumLines RowNum Set AccumLines
  -- ----------- ------------- -------- ------ --- ----------
  --    10000001         20001        4      1   1          4
  --    10000001         20002        3      2   1          7
  --    10000001         20003        2      3   1          9
  --    10000001         20004        6      4   2          6
  --    10000001         20005        8      5   3          8 <==
  --    10000002         20007        3      1   1          3
  --    10000002         20008        3      2   1          6
  --
  -- FIFTH ITERATION BRINGS IN RowNum=6:
  -- This should be self-explanatory by now.  Note that
  --   this will be the final iteration, because there will
  --   be no RowNum of 7.
  -- BatchNumber InvoiceNumber NumLines RowNum Set AccumLines
  -- ----------- ------------- -------- ------ --- ----------
  --    10000001         20001        4      1   1          4
  --    10000001         20002        3      2   1          7
  --    10000001         20003        2      3   1          9
  --    10000001         20004        6      4   2          6
  --    10000001         20005        8      5   3          8 
  --    10000001         20006        1      6   3          9 <==
  --    10000002         20007        3      1   1          3
  --    10000002         20008        3      2   1          6
)
select t.InvID
      ,t.BatchNumber
      ,t.InvoiceNumber
      ,t.VisitDate
      ,t.Amount
      ,s.[Set]
from @t t
join BISets s on t.BatchNumber=s.BatchNumber 
                 and t.InvoiceNumber=s.InvoiceNumber
order by t.InvID
-- 
-- Now that the BISets CTE has established the Set value for
-- each BatchNumber/InvoiceNumber combination, we just JOIN it
-- back to the original table.
-- We also ORDER BY InvID so we can see the output in the 
-- same order as the original table as it was populated.