patternsqlMinor
Query to normalize table/combine row text
Viewed 0 times
normalizecombinequerytextrowtable
Problem
I have a table (call it oldTable) with columns like so:
ID (int),Rank (int),TextLineNumber (int),SomeText (varchar)
The primarykey is multi-part: ID+Rank+TextLineNumber.
I'm trying to transform/join it into another table (call it newTable) with columns like so:
ID (int), Rank (int), CombinedText (varchar)
and the primary key would be ID+Rank.
ID and Rank on the new table are already populated, but I need a query that would update the CombinedText column of the newTable with the following considerations:
Here's some example data:
old-
http://i54.tinypic.com/jq0vmx.png
new-
http://i53.tinypic.com/dhfyn8.png
I'm using MSSql 2005 if that matters.
I currently do this using T-SQL and while loops, but it's become a serious performance bottle neck (taking about 1 minute for 10000 rows).
Edit: Expanded example data in CSV:
Old:
New:
edit2:
Here's an example query that I found that does work but isn't fast enough (relying on multiple sub-queries):
```
update newtable set combinedtext =
coalesce ((select top 1 sometext from OldTable where OldTable.id=newtable.id and oldtable.rank=(select top 1 rank from oldtable where oldtable.id=newtable.id and oldtable.rank<=newtable.rank order by rank desc) and
ID (int),Rank (int),TextLineNumber (int),SomeText (varchar)
The primarykey is multi-part: ID+Rank+TextLineNumber.
I'm trying to transform/join it into another table (call it newTable) with columns like so:
ID (int), Rank (int), CombinedText (varchar)
and the primary key would be ID+Rank.
ID and Rank on the new table are already populated, but I need a query that would update the CombinedText column of the newTable with the following considerations:
- The Rank given on the new table may not exist on the old table, in which case it needs to pick the highest available rank from the old table that is not greater than the rank on the new table.
- The CombinedText column is a string concatenation of the "SomeText" column from the old table, concatenated in order of "TextLineNumber" using the Rank found from the first consideration.
Here's some example data:
old-
http://i54.tinypic.com/jq0vmx.png
new-
http://i53.tinypic.com/dhfyn8.png
I'm using MSSql 2005 if that matters.
I currently do this using T-SQL and while loops, but it's become a serious performance bottle neck (taking about 1 minute for 10000 rows).
Edit: Expanded example data in CSV:
Old:
ID,Rank,LineNumber,SomeText
1,1,1,the qu
1,1,2,ick br
1,1,3,own
1,2,1,some te
1,2,2,xt
1,3,1,sample
2,7,1,jumped ov
2,7,2,er the
2,7,3,lazy
2,13,1,samp
2,13,2,le text
3,1,1,ABC
3,1,2,DEF
3,1,3,GHI
3,1,4,JKL
3,50,1,XYZNew:
ID,Rank,CombinedText
1,2,some text
2,13,sample text
2,14,sample text
3,4,ABCDEFGHIJKL
3,5,ABCDEFGHIJKL
3,50,XYZ
3,55,XYZedit2:
Here's an example query that I found that does work but isn't fast enough (relying on multiple sub-queries):
```
update newtable set combinedtext =
coalesce ((select top 1 sometext from OldTable where OldTable.id=newtable.id and oldtable.rank=(select top 1 rank from oldtable where oldtable.id=newtable.id and oldtable.rank<=newtable.rank order by rank desc) and
Solution
This should work, I will clean it up later so its more efficient.
DECLARE @Old TABLE (
id INT,
rank INT,
linenumber INT,
sometext VARCHAR(1000))
DECLARE @New TABLE (
id INT,
rank INT,
combinedtext VARCHAR(1000))
;WITH combinedresults(ctid, id, rank, linenumber, combinedtext)
AS (SELECT 0,
id,
rank,
linenumber,
CAST (sometext AS VARCHAR(8000))
FROM @Old o
WHERE NOT EXISTS (SELECT TOP 1 1
FROM @Old
WHERE id = o.id
AND rank = o.rank
AND linenumber ct.linenumber)
UPDATE n
SET combinedtext = ct.combinedtext
FROM @New n
INNER JOIN (SELECT n.id,
n.rank,
MAX(o.rank) orank
FROM @new n
INNER JOIN @Old o
ON n.id = o.id
AND o.rank <= n.rank
GROUP BY n.id,
n.rank) r
ON n.id = r.id
AND n.rank = r.rank
INNER JOIN (SELECT id,
ct.rank,
MAX(ctid) ctid
FROM combinedresults ct
GROUP BY ct.id,
ct.rank) r2
ON r2.id = r.id
AND r2.rank = r.orank
INNER JOIN combinedresults ct
ON r.id = ct.id
AND ct.rank = r.orank
AND ct.ctid = r2.ctid
SELECT *
FROM @NewCode Snippets
DECLARE @Old TABLE (
id INT,
rank INT,
linenumber INT,
sometext VARCHAR(1000))
DECLARE @New TABLE (
id INT,
rank INT,
combinedtext VARCHAR(1000))
;WITH combinedresults(ctid, id, rank, linenumber, combinedtext)
AS (SELECT 0,
id,
rank,
linenumber,
CAST (sometext AS VARCHAR(8000))
FROM @Old o
WHERE NOT EXISTS (SELECT TOP 1 1
FROM @Old
WHERE id = o.id
AND rank = o.rank
AND linenumber < o.linenumber)
UNION ALL
SELECT ctid + 1,
o.id,
o.rank,
o.linenumber,
ct.combinedtext + o.sometext
FROM @Old o
INNER JOIN combinedresults ct
ON ct.id = o.id
AND ct.rank = o.rank
WHERE o.linenumber > ct.linenumber)
UPDATE n
SET combinedtext = ct.combinedtext
FROM @New n
INNER JOIN (SELECT n.id,
n.rank,
MAX(o.rank) orank
FROM @new n
INNER JOIN @Old o
ON n.id = o.id
AND o.rank <= n.rank
GROUP BY n.id,
n.rank) r
ON n.id = r.id
AND n.rank = r.rank
INNER JOIN (SELECT id,
ct.rank,
MAX(ctid) ctid
FROM combinedresults ct
GROUP BY ct.id,
ct.rank) r2
ON r2.id = r.id
AND r2.rank = r.orank
INNER JOIN combinedresults ct
ON r.id = ct.id
AND ct.rank = r.orank
AND ct.ctid = r2.ctid
SELECT *
FROM @NewContext
StackExchange Database Administrators Q#2193, answer score: 2
Revisions (0)
No revisions yet.