--===== This is the original data setup script posted at...
-- http://beyondrelational.com/blogs/tc/archive/2009/08/31/tsql-challenge-1...
-- The only thing that has been changed is some minor formatting for readablity
DECLARE @t TABLE
(
InvID INT IDENTITY,
BatchNumber INT,
InvoiceNumber INT,
VisitDate DATETIME,
Amount MONEY
)
INSERT INTO @t(BatchNumber, InvoiceNumber, VisitDate, Amount)
SELECT 10000001,20001,'2009-01-01',50.00 UNION ALL
SELECT 10000001,20001,'2009-01-02',50.00 UNION ALL
SELECT 10000001,20001,'2009-01-03',50.00 UNION ALL
SELECT 10000001,20001,'2009-01-04',50.00 UNION ALL
SELECT 10000001,20002,'2009-01-01',50.00 UNION ALL
SELECT 10000001,20002,'2009-01-02',50.00 UNION ALL
SELECT 10000001,20002,'2009-01-03',50.00 UNION ALL
SELECT 10000001,20003,'2009-01-01',50.00 UNION ALL
SELECT 10000001,20003,'2009-01-02',50.00 UNION ALL
SELECT 10000001,20004,'2009-01-01',50.00 UNION ALL
SELECT 10000001,20004,'2009-01-02',50.00 UNION ALL
SELECT 10000001,20004,'2009-01-03',50.00 UNION ALL
SELECT 10000001,20004,'2009-01-04',50.00 UNION ALL
SELECT 10000001,20004,'2009-01-05',50.00 UNION ALL
SELECT 10000001,20004,'2009-01-06',50.00 UNION ALL
SELECT 10000001,20005,'2009-01-01',50.00 UNION ALL
SELECT 10000001,20005,'2009-01-02',50.00 UNION ALL
SELECT 10000001,20005,'2009-01-13',50.00 UNION ALL
SELECT 10000001,20005,'2009-01-14',50.00 UNION ALL
SELECT 10000001,20005,'2009-01-15',50.00 UNION ALL
SELECT 10000001,20005,'2009-01-06',50.00 UNION ALL
SELECT 10000001,20005,'2009-01-07',50.00 UNION ALL
SELECT 10000001,20005,'2009-01-08',50.00 UNION ALL
SELECT 10000001,20006,'2009-01-01',50.00 UNION ALL
SELECT 10000002,20007,'2009-01-01',50.00 UNION ALL
SELECT 10000002,20007,'2009-01-02',50.00 UNION ALL
SELECT 10000002,20007,'2009-01-03',50.00 UNION ALL
SELECT 10000002,20008,'2009-01-01',50.00 UNION ALL
SELECT 10000002,20008,'2009-01-02',50.00 UNION ALL
SELECT 10000002,20008,'2009-01-03',50.00
;
--====================================================================
-- Prepare the work table
--====================================================================
--===== Conditionally drop the work table.
-- You don't need to do this in a stored procedure because Temp tables only have a narrow scope
-- and they will drop automatically at the end the proc that created them. I'm doing the drop
-- here just to make reruns of this code a total "no brainer".
IF OBJECT_ID('TempDB..#Work','U') IS NOT NULL
DROP TABLE #Work
;
--===== Create the work table on the fly while populating it with data from the original table.
-- Note the creation of two new columns. MyCount uses ROW_NUMBER() to determine how many
-- invoices there are for each invoice. The "first" entry for each invoice will contain the
-- number of entries for that invoice so we don't need to have a bunch of correlated sub
-- queries to effectively count how many items there are for each invoice before we pass
-- through all the invoices. The other new column is the SET column as stipulated in the
-- requirements for this challenge. Note that I wouldn't have picked a reserved word for a
-- column name, but it is what it is. Also note that SELECT/INTO is super fast even if
-- used in the FULL RECOVERY MODE. Depending on the machine and the data copied, it can
-- copy a million rows AND create the target table in just a second or two.
--
-- Another little trick. I need for BatchNumber and InvoiceNumber to be NOT NULL just
-- like InvID will be because it's an IDENTITY column in the original table. Notice how
-- I use ISNULL to accomplish that while copying the data.
--
-- Also, since ANSI compliance on this code is out the window anyway, I decided to make
-- the code VERY readable with the columnalias = expression method. And, NO, that method
-- has NOT been deprecated. 'string' = expression is the method that has been deprecated.
-- Please refer to the 2008 deprecation list in Books Online for proof.
SELECT InvID,
BatchNumber = ISNULL(BatchNumber,0),
InvoiceNumber = ISNULL(InvoiceNumber,0),
VisitDate,
Amount,
MyCount = ROW_NUMBER() OVER (PARTITION BY BatchNumber, InvoiceNumber
ORDER BY BatchNumber, InvoiceNumber, InvID DESC),
[Set] = CAST(0 AS INT)
INTO #Work
FROM @t
;
--===== This is the "magic" dust we need to make this bit of code fly. This is the required
-- Clustered index I spoke of earlier. It must be in the order we expect the "Pseudo
-- Cursor" update to work in because it will always follow that order when a "Quirky
-- Update" is used. Despite a million warnings from various other MVP's, this method
-- has worked correctly since the days of Sybase and, if properly setup and other "rules"
-- are followed, NO ONE has ever been able to make this method fail either by accident
-- or intentionally. It works in all versions of SQL Server.
ALTER TABLE #Work
ADD PRIMARY KEY CLUSTERED (BatchNumber, InvoiceNumber, InvID) WITH FILLFACTOR = 100
;
--===== Just doing a sanity check here so that everyone can see what the MyCount column
-- contains. Uncomment this section of code if you want to see it.
-- SELECT *
-- FROM #Work
-- ORDER BY InvID
--;
--====================================================================
-- Solve the problem as defined in the requirements of Challenge #13 using the "Quirky Update".
-- It's a true "set-based" solution that uses a handful of local working variables and a single
-- UPDATE that uses the "3 part" Quirky Update method whose syntax is documented but not
-- explained in Books Online.
--
-- I've also made it so that batch size is adjustable by changing the value of a single
-- variable. This variable could be populated by the content of a table according to vendor
-- and batch type. For the purposes of this challenge, I've hardcoded the value assignment
-- to "10". No error checking has been done for this value because the value SHOULD be checked
-- by a CHECK constraint on the recommended "Configuration" table I just mentioned.
--
-- This code will do the job on a million rows in just several seconds. It's one of the
-- fastest methods known to do calculated "data smears" like those found in the requirements
-- for this challenge.
--
-- Just to say it again, NO ONE has ever been able to make a properly formed "Quirky Update"
-- that follows the rules fail.
--====================================================================
--===== Declare some obviously named local variables.
-- Note that these are in alphabetical order to make finding a variable definition easier to do
-- when troubleshooting. This is especially important if there are a lot of variables.
DECLARE @BatchSize INT,
@CurrentSet INT,
@PrevBatchNumber INT,
@PrevInvoiceNumber INT,
@RunningCount INT
;
--===== Preset the variables including the programmable batch size.
-- Since the "Quirky Update" isn't exactly ANSI compliant, I threw ANSI compliance out the
-- bloody door here. We're going for speed and readability here just like we did for the
-- DECLARE!
SELECT @BatchSize = 10,
@CurrentSet = 0,
@PrevBatchNumber = 0,
@PrevInvoiceNumber = 0,
@RunningCount = 0
;
--===== All set... do the "Quirky Update" just like we would if we were doing this in VB or C#...
-- Basically, read a row, set some variables according to some decisions, update the SET
-- column based on those decisions, and set some more variables to remember the current
-- row's data for when we process the next row. The details are in the comments in the
-- code below.
--
-- Notice that all of the CASE statements have been "short ciruited" to take the most
-- likely condition first to maximize performance and reduce CPU usage.
UPDATE #Work
SET @RunningCount = CASE
--==== If same batch/invoice then no change in running count
WHEN BatchNumber = @PrevBatchNumber
AND InvoiceNumber = @PrevInvoiceNumber
THEN @RunningCount
--==== If different batch/invoice then add new count to "running count"
ELSE @RunningCount + MyCount
END,
@CurrentSet = [Set] -- 3 part "quirky" update (@var = ColName = expression)
= CASE -- This 3 part update changes the value of "SET" in the table.
--==== If same batch & running count <= batch size then no change in "SET"
WHEN BatchNumber = @PrevBatchNumber
AND @RunningCount <= @BatchSize
THEN @CurrentSet
--==== If same batch & running count > batch size then add 1 to "SET".
WHEN BatchNumber = @PrevBatchNumber
THEN @CurrentSet + 1
--==== Otherwise, batch has changed and we need to start at 1
ELSE 1
END,
@RunningCount = CASE
--==== If the running count hasn't exceeded the batch size,
-- we keep the same running count. If the running count
-- HAS exceeded the batch size, we've already updated rows
-- with a new SET value and we have to reset the running
-- count to remember the rows wejust processed. Notice
-- that this is the SECOND time we're updating the running
-- count variable in this update. SQL Server allows it
-- even when it's not a Quirky Update.
-- Heh... like I said, throwing ANSI out the door because
-- we're in this for performance. Besides, the requirements
-- said that we need this to run in 2005/2008.
WHEN @RunningCount <= @BatchSize
THEN @RunningCount
ELSE MyCount
END,
@PrevBatchNumber = BatchNumber, --Remember current row for next row to be processed
@PrevInvoiceNumber = InvoiceNumber --Remember current row for next row to be processed
FROM #Work WITH (TABLOCKX) --TABLOCKX makes just one lock instead of
OPTION (MAXDOP 1) --You MUST prevent parallelism for a "Quirky Update" just like in VB or C#
;
--===== Do the "final" SELECT to produce the desired result set according to the requirements
-- of Challenge 13.
SELECT InvID,
BatchNumber,
InvoiceNumber,
VisitDate,
Amount,
[Set]
FROM #Work
ORDER BY InvID --NEVER EVER trust the order of a SELECT to a Clustered Index even
--though this sort will NOT show up in the Execution Plan! That type
--of stuff ONLY works with the "Quirky Update". TRUST ME ON THIS!!!
;
--===================================================================
-- Verification Code
-- Heh... to quote Sergiy from SQLServerCentral.com, "A Developer must NOT guess...
-- A Developer must KNOW." In the name of all that is "holy" in the world of IT, here's a
-- bit of formatted test code to make it easy on the judges to visually verify that all is
-- well in "Quirky Update" land. This method could easily be adapted as an automated checker.
-- Even with this test, the "Quirky Update" will still beat any cursor and most other "Set
-- Based" methods (except for one possibility that I haven't figured out, yet) that are capable
-- of doing "previous row" calculations like those required to meet this challenge.
--
-- Yep... I know... reporting is normally the wrong thing to do in SQL especially with the
-- "right justification" I did. But, if you don't have anything else, now you know how
-- to do it (take a look at the output in the TEXT mode instead of the GRID mode. Besides,
-- it was fun and I have to get my "Brownie Points" in with the judges by making life easier
-- for them. ;-)
--
-- As a sidebar, this type of "report" could easily be inserted into a table (without the
-- fancy justification and skipped lines) so that an automated post run analysis could be
-- done to give doubters of the "Quirky Update" the nice warm fuzzies that it works every
-- time. Even with said analysis, the "Quirky Update" method will still beat most all other
-- methods including all cursors and 99% of all other set based methods.
--
-- Since GROUPING() in a GROUP BY is well beyond this challenge, I'll leave it up to the reader
-- to look it up in Books Online. Look under GROUP BY, CUBE... it's not under ROLLUP as you
-- think it might be.
--
-- Before anyone get's started on me about it, COUNT(*) is just as effective as COUNT(1).
-- Unless you have code to prove otherwise, please don't bring it up because I have code to
-- prove they're the same. I also have a bucket of pork chops at the ready. ;-)
--===================================================================
SELECT 'Validation Table Follows...' AS Notice
;
WITH cteConvert AS
(
SELECT BatchNumber = STR(BatchNumber,11),
InvoiceNumber = STR(InvoiceNumber,13),
[Set] = STR([Set],7),
Amount
FROM #Work
)
SELECT BatchNumber = CASE
WHEN GROUPING(BatchNumber) = 0
THEN BatchNumber
ELSE ''
END,
InvoiceNumber = CASE
WHEN GROUPING(InvoiceNumber) = 0
THEN InvoiceNumber
ELSE ''
END,
[ Set] = CASE
WHEN GROUPING([Set]) = 0
THEN [Set]
ELSE ''
END,
LineType = CASE
WHEN GROUPING(BatchNumber) = 0 AND GROUPING(InvoiceNumber) = 0 AND GROUPING([Set]) = 0
THEN 'SubTotal for Invoice/Set'
WHEN GROUPING(BatchNumber) = 0 AND GROUPING(InvoiceNumber) = 0 AND GROUPING([Set]) = 1
THEN 'SubTotal for Invoice'
WHEN GROUPING(BatchNumber) = 0 AND GROUPING(InvoiceNumber) = 1 AND GROUPING([Set]) = 1
THEN 'SubTotal for BatchNumber'
WHEN GROUPING(BatchNumber) = 1 AND GROUPING(InvoiceNumber) = 1 AND GROUPING([Set]) = 1
THEN 'Grand Total'
END,
[ Qty] = STR(COUNT(*),5),
TotalAmount = STR(SUM(Amount),11,2),
[ ] = CASE --Heh... Guess what this does... hope you know something about ASCII characters. ;-)
WHEN GROUPING([Set]) = 0
THEN ''
ELSE CHAR(10) --CHAR(10) is the LineFeed or NewLine character.
END
FROM cteConvert
GROUP BY
BatchNumber,
InvoiceNumber,
[Set]
WITH ROLLUP