patternsqlMinor
Theil-Sen estimator function in T-SQL
Viewed 0 times
estimatorsqlfunctionsentheil
Problem
Does anyone have a Theil-Sen regression function written in T-SQL?
I found one written in Perl, but I'm not able to recode it into SQL.
I found one written in Perl, but I'm not able to recode it into SQL.
Solution
I was lying when I said, I'm not able to recode it into SQL. I was just too lazy. Here is the code with an example of usage.
The Code is based on a TheiSen perl library, using QuickMedian.
Let's define a new table type to easily pass our data to the procedure.
Please note the ID column, which is important here as our solution uses CROSS APPLY statement in order to achieve correct interpretation of the inner loop found in TheilSen.pm.
We will also need a new data type for storing an array of real type values.
Here is the f_QuickMedian function, returning median for given array. Credit for this one goes to Itzik Ben-Gan.
And the p_TheilSen Estimator:
Example:
Returns:
Just for a comparision, perl version returns following values for the same data set:
I use TheilSen estimator to calculate DaysToFill metric for filesystems.
Enjoy!
The Code is based on a TheiSen perl library, using QuickMedian.
Let's define a new table type to easily pass our data to the procedure.
CREATE TYPE dbo.TheilSenInputDataTableType AS TABLE
(
ID INT IDENTITY(1,1),
x REAL,
y REAL
)Please note the ID column, which is important here as our solution uses CROSS APPLY statement in order to achieve correct interpretation of the inner loop found in TheilSen.pm.
my ($x1,$x2,$y1,$y2);
foreach my $i(0 .. $n-2){
$y1 = $y->[$i];
$x1 = $x->[$i];
foreach my $j($i+1 .. $n-1){
$y2 = $y->[$j];
$x2 = $x->[$j];We will also need a new data type for storing an array of real type values.
CREATE TYPE [dbo].[RealArray] AS TABLE(
[val] [real] NULL
)Here is the f_QuickMedian function, returning median for given array. Credit for this one goes to Itzik Ben-Gan.
CREATE FUNCTION [dbo].[f_QuickMedian](@RealArray RealArray READONLY)
RETURNS REAL
AS
BEGIN
DECLARE @Median REAL;
DECLARE @QMedian REAL;
SELECT @Median = AVG(1.0 * val)
FROM
(
SELECT o.val, rn = ROW_NUMBER() OVER (ORDER BY o.val), c.c
FROM @RealArray AS o
CROSS JOIN (SELECT c = COUNT(*) FROM @RealArray) AS c
) AS x
WHERE rn IN ((c + 1)/2, (c + 2)/2);
SELECT TOP 1 @QMedian = val FROM @RealArray
ORDER BY ABS(val - @Median) ASC, val DESC
RETURN @QMedian
ENDAnd the p_TheilSen Estimator:
CREATE PROCEDURE [dbo].[p_TheilSen](
@TheilSenInput TheilSenInputDataTableType READONLY
, @m Real OUTPUT
, @c Real OUTPUT
)
AS
BEGIN
DECLARE
@m_arr RealArray
, @c_arr RealArray;
INSERT INTO @m_arr
SELECT m
FROM
(
SELECT
t1.x as x1
, t1.y as y1
, t2o.x as x2
, t2o.y as y2
, t2o.y-t1.y as [y2 - y1]
, t2o.x-t1.x as [x2 - x1]
, CASE WHEN (t2o.x <> t1.x) THEN CAST((t2o.y-t1.y) AS Real)/(t2o.x-t1.x) ELSE NULL END AS [($y2-$y1)/($x2-$x1)]
, CASE WHEN t1.y = t2o.y THEN 0
ELSE
CASE WHEN t1.x = t2o.x THEN NULL
ELSE
-- push @M, ($y2-$y1)/($x2-$x1);
CAST((t2o.y-t1.y) AS Real)/(t2o.x-t1.x)
END
END as m
FROM @TheilSenInput t1
CROSS APPLY
(
SELECT t2.x, t2.y
FROM @TheilSenInput t2
WHERE t2.ID > t1.ID
) t2o
) t
WHERE m IS NOT NULL
SELECT @m = dbo.f_QuickMedian(@m_arr)
INSERT INTO @c_arr
SELECT y - (@m * x)
FROM @TheilSenInput
SELECT @c = dbo.f_QuickMedian(@c_arr)
ENDExample:
DECLARE
@in TheilSenInputDataTableType
, @m Real
, @c Real
INSERT INTO @in(x,y) VALUES (10.79,118.99)
INSERT INTO @in(x,y) VALUES (10.8,120.76)
INSERT INTO @in(x,y) VALUES (10.86,122.71)
INSERT INTO @in(x,y) VALUES (10.93,125.48)
INSERT INTO @in(x,y) VALUES (10.99,127.31)
INSERT INTO @in(x,y) VALUES (10.96,130.06)
INSERT INTO @in(x,y) VALUES (10.98,132.41)
INSERT INTO @in(x,y) VALUES (11.03,135.89)
INSERT INTO @in(x,y) VALUES (11.08,139.02)
INSERT INTO @in(x,y) VALUES (11.1,140.25)
INSERT INTO @in(x,y) VALUES (11.19,145.61)
INSERT INTO @in(x,y) VALUES (11.25,153.45)
INSERT INTO @in(x,y) VALUES (11.4,158.03)
INSERT INTO @in(x,y) VALUES (11.61,162.72)
INSERT INTO @in(x,y) VALUES (11.69,167.67)
INSERT INTO @in(x,y) VALUES (11.91,172.86)
INSERT INTO @in(x,y) VALUES (12.07,177.52)
INSERT INTO @in(x,y) VALUES (12.32,182.09)
EXEC p_TheilSen @in, @m = @m OUTPUT, @c = @c OUTPUT
SELECT @m
SELECT @cReturns:
m = 52.7079
c = -448.4853Just for a comparision, perl version returns following values for the same data set:
m = 52.7078651685394
c = -448.484943820225I use TheilSen estimator to calculate DaysToFill metric for filesystems.
Enjoy!
Code Snippets
CREATE TYPE dbo.TheilSenInputDataTableType AS TABLE
(
ID INT IDENTITY(1,1),
x REAL,
y REAL
)my ($x1,$x2,$y1,$y2);
foreach my $i(0 .. $n-2){
$y1 = $y->[$i];
$x1 = $x->[$i];
foreach my $j($i+1 .. $n-1){
$y2 = $y->[$j];
$x2 = $x->[$j];CREATE TYPE [dbo].[RealArray] AS TABLE(
[val] [real] NULL
)CREATE FUNCTION [dbo].[f_QuickMedian](@RealArray RealArray READONLY)
RETURNS REAL
AS
BEGIN
DECLARE @Median REAL;
DECLARE @QMedian REAL;
SELECT @Median = AVG(1.0 * val)
FROM
(
SELECT o.val, rn = ROW_NUMBER() OVER (ORDER BY o.val), c.c
FROM @RealArray AS o
CROSS JOIN (SELECT c = COUNT(*) FROM @RealArray) AS c
) AS x
WHERE rn IN ((c + 1)/2, (c + 2)/2);
SELECT TOP 1 @QMedian = val FROM @RealArray
ORDER BY ABS(val - @Median) ASC, val DESC
RETURN @QMedian
ENDCREATE PROCEDURE [dbo].[p_TheilSen](
@TheilSenInput TheilSenInputDataTableType READONLY
, @m Real OUTPUT
, @c Real OUTPUT
)
AS
BEGIN
DECLARE
@m_arr RealArray
, @c_arr RealArray;
INSERT INTO @m_arr
SELECT m
FROM
(
SELECT
t1.x as x1
, t1.y as y1
, t2o.x as x2
, t2o.y as y2
, t2o.y-t1.y as [y2 - y1]
, t2o.x-t1.x as [x2 - x1]
, CASE WHEN (t2o.x <> t1.x) THEN CAST((t2o.y-t1.y) AS Real)/(t2o.x-t1.x) ELSE NULL END AS [($y2-$y1)/($x2-$x1)]
, CASE WHEN t1.y = t2o.y THEN 0
ELSE
CASE WHEN t1.x = t2o.x THEN NULL
ELSE
-- push @M, ($y2-$y1)/($x2-$x1);
CAST((t2o.y-t1.y) AS Real)/(t2o.x-t1.x)
END
END as m
FROM @TheilSenInput t1
CROSS APPLY
(
SELECT t2.x, t2.y
FROM @TheilSenInput t2
WHERE t2.ID > t1.ID
) t2o
) t
WHERE m IS NOT NULL
SELECT @m = dbo.f_QuickMedian(@m_arr)
INSERT INTO @c_arr
SELECT y - (@m * x)
FROM @TheilSenInput
SELECT @c = dbo.f_QuickMedian(@c_arr)
ENDContext
StackExchange Database Administrators Q#133138, answer score: 9
Revisions (0)
No revisions yet.