patternsqlMinor
Aggregate sales of the past 12 months for the current row date
Viewed 0 times
thedatepastmonthsforsalescurrentrowaggregate
Problem
I need to compute the trailing 12 months sum of sales for a given client_id, based on each row given month.
Here is the initial table in a month by month aggregated sales by client (filtered here on a specific client
Please note that some months do not have any sales (no row), so I guess I cannot use a
Using this great answer for a similar problem (Rolling sum / count / average over date interval) I have done this query:
Results are OK:
```
idclient | month_transac | sales | sales_ttm
-----------+---------------+--------+---------
511656A7
Here is the initial table in a month by month aggregated sales by client (filtered here on a specific client
511656A75):CREATE TEMP TABLE foo AS
SELECT idclient, month_transac, sales
FROM ( VALUES
( '511656A75', '2010-06-01', 68.57 ),
( '511656A75', '2010-07-01', 88.63 ),
( '511656A75', '2010-08-01', 94.91 ),
( '511656A75', '2010-09-01', 70.66 ),
( '511656A75', '2010-10-01', 28.84 ),
( '511656A75', '2015-10-01', 85.00 ),
( '511656A75', '2015-12-01', 114.42 ),
( '511656A75', '2016-01-01', 137.08 ),
( '511656A75', '2016-03-01', 172.92 ),
( '511656A75', '2016-04-01', 125.00 ),
( '511656A75', '2016-05-01', 127.08 ),
( '511656A75', '2016-06-01', 104.17 ),
( '511656A75', '2016-07-01', 98.22 ),
( '511656A75', '2016-08-01', 37.08 ),
( '511656A75', '2016-10-01', 108.33 ),
( '511656A75', '2016-11-01', 104.17 ),
( '511656A75', '2017-01-01', 201.67 )
) AS t(idclient, month_transac, sales);Please note that some months do not have any sales (no row), so I guess I cannot use a
WINDOW function (with the preceeding 12 rows for example).Using this great answer for a similar problem (Rolling sum / count / average over date interval) I have done this query:
SELECT t1.idclient
, t1.month_transac
, t1.sales
, SUM(t2.sales) as sales_ttm
FROM temp_sales_sample_month_aggr t1
LEFT JOIN temp_sales_sample_month_aggr t2 USING (idclient)
WHERE
t1.idclient = '511656A75' -- for example only
AND t2.month_transac >= (t1.month_transac - interval '12 months')
AND t2.month_transac < t1.month_transac
GROUP BY 1, 2, 3
ORDER BY 2
;Results are OK:
sales_ttm is the trailing 12 months sum of sales, without the sales of the row month (i.e. last row Jan 2017 sums all 2016 sales). ```
idclient | month_transac | sales | sales_ttm
-----------+---------------+--------+---------
511656A7
Solution
Something like this should work..
Here we
-
Calculate the date-range for the idclient in a CTE.
-
-
Use a window function that uses windows over
-
Select just the rows where
Output,
-- IN A CTE
-- Grab the idclient, and the monthly range needed
-- We need the range because you can't sum over NULL (yet, afaik).
WITH idclient_month AS (
SELECT idclient, month_transac
FROM (
SELECT idclient, min(month_transac), max(month_transac)
FROM foo
GROUP BY idclient
) AS t
CROSS JOIN LATERAL generate_series(min::date, max::date, '1 month')
AS gs(month_transac)
)
-- If we move this where clause down the rows get filtered /before/ the window function
SELECT *
FROM (
SELECT
idclient,
month_transac,
monthly_sales,
sum(monthly_sales) OVER (
PARTITION BY idclient
ORDER BY month_transac
ROWS 12 PRECEDING
)
- monthly_sales
AS sales_ttm
-- Here, we sum up the sales by idclient, and month
-- We coalesce to 0 so we can use this in a window function
FROM (
SELECT idclient, month_transac, coalesce(sum(sales), 0) AS monthly_sales
FROM foo
RIGHT OUTER JOIN idclient_month
USING (idclient,month_transac)
GROUP BY idclient, month_transac
ORDER BY idclient, month_transac
) AS t
) AS g
WHERE g.monthly_sales > 0;Here we
-
Calculate the date-range for the idclient in a CTE.
SELECT idclient, month_transac
FROM (
SELECT idclient, min(month_transac), max(month_transac)
FROM foo
GROUP BY idclient
) AS t
CROSS JOIN LATERAL generate_series(min::date, max::date, '1 month')
AS gs(month_transac)
idclient | month_transac
-----------+------------------------
511656A75 | 2010-06-01 00:00:00-05
511656A75 | 2010-07-01 00:00:00-05
511656A75 | 2010-08-01 00:00:00-05
511656A75 | 2010-09-01 00:00:00-05
511656A75 | 2010-10-01 00:00:00-05
511656A75 | 2010-11-01 00:00:00-05
511656A75 | 2010-12-01 00:00:00-06
511656A75 | 2011-01-01 00:00:00-06
[....]-
RIGHT OUTER that CTE to a our sample dataset. We do this so we grow our sample dataset and we have entries with monthly_sales = 0 where needed.-
Use a window function that uses windows over
ROWS 12 PRECEDING. That's the key. That's the past 12 months. The window function can't operate on rows that are null, so we set them to 0 before we get to this step.-
Select just the rows where
monthly_sales > 0. We have to do this after the window function so as not to much with what is available for calculation (the window).Output,
idclient | month_transac | monthly_sales | sales_ttm
-----------+------------------------+---------------+-----------
511656A75 | 2010-06-01 00:00:00-05 | 68.57 | 0.00
511656A75 | 2010-07-01 00:00:00-05 | 88.63 | 68.57
511656A75 | 2010-08-01 00:00:00-05 | 94.91 | 157.20
511656A75 | 2010-09-01 00:00:00-05 | 70.66 | 252.11
511656A75 | 2010-10-01 00:00:00-05 | 28.84 | 322.77
511656A75 | 2015-10-01 00:00:00-05 | 85.00 | 0.00
511656A75 | 2015-12-01 00:00:00-06 | 114.42 | 85.00
511656A75 | 2016-01-01 00:00:00-06 | 137.08 | 199.42
511656A75 | 2016-03-01 00:00:00-06 | 172.92 | 336.50
511656A75 | 2016-04-01 00:00:00-05 | 125.00 | 509.42
511656A75 | 2016-05-01 00:00:00-05 | 127.08 | 634.42
511656A75 | 2016-06-01 00:00:00-05 | 104.17 | 761.50
511656A75 | 2016-07-01 00:00:00-05 | 98.22 | 865.67
511656A75 | 2016-08-01 00:00:00-05 | 37.08 | 963.89
511656A75 | 2016-10-01 00:00:00-05 | 108.33 | 1000.97
511656A75 | 2016-11-01 00:00:00-05 | 104.17 | 1024.30
511656A75 | 2017-01-01 00:00:00-06 | 201.67 | 1014.05
(17 rows)Code Snippets
-- IN A CTE
-- Grab the idclient, and the monthly range needed
-- We need the range because you can't sum over NULL (yet, afaik).
WITH idclient_month AS (
SELECT idclient, month_transac
FROM (
SELECT idclient, min(month_transac), max(month_transac)
FROM foo
GROUP BY idclient
) AS t
CROSS JOIN LATERAL generate_series(min::date, max::date, '1 month')
AS gs(month_transac)
)
-- If we move this where clause down the rows get filtered /before/ the window function
SELECT *
FROM (
SELECT
idclient,
month_transac,
monthly_sales,
sum(monthly_sales) OVER (
PARTITION BY idclient
ORDER BY month_transac
ROWS 12 PRECEDING
)
- monthly_sales
AS sales_ttm
-- Here, we sum up the sales by idclient, and month
-- We coalesce to 0 so we can use this in a window function
FROM (
SELECT idclient, month_transac, coalesce(sum(sales), 0) AS monthly_sales
FROM foo
RIGHT OUTER JOIN idclient_month
USING (idclient,month_transac)
GROUP BY idclient, month_transac
ORDER BY idclient, month_transac
) AS t
) AS g
WHERE g.monthly_sales > 0;SELECT idclient, month_transac
FROM (
SELECT idclient, min(month_transac), max(month_transac)
FROM foo
GROUP BY idclient
) AS t
CROSS JOIN LATERAL generate_series(min::date, max::date, '1 month')
AS gs(month_transac)
idclient | month_transac
-----------+------------------------
511656A75 | 2010-06-01 00:00:00-05
511656A75 | 2010-07-01 00:00:00-05
511656A75 | 2010-08-01 00:00:00-05
511656A75 | 2010-09-01 00:00:00-05
511656A75 | 2010-10-01 00:00:00-05
511656A75 | 2010-11-01 00:00:00-05
511656A75 | 2010-12-01 00:00:00-06
511656A75 | 2011-01-01 00:00:00-06
[....]idclient | month_transac | monthly_sales | sales_ttm
-----------+------------------------+---------------+-----------
511656A75 | 2010-06-01 00:00:00-05 | 68.57 | 0.00
511656A75 | 2010-07-01 00:00:00-05 | 88.63 | 68.57
511656A75 | 2010-08-01 00:00:00-05 | 94.91 | 157.20
511656A75 | 2010-09-01 00:00:00-05 | 70.66 | 252.11
511656A75 | 2010-10-01 00:00:00-05 | 28.84 | 322.77
511656A75 | 2015-10-01 00:00:00-05 | 85.00 | 0.00
511656A75 | 2015-12-01 00:00:00-06 | 114.42 | 85.00
511656A75 | 2016-01-01 00:00:00-06 | 137.08 | 199.42
511656A75 | 2016-03-01 00:00:00-06 | 172.92 | 336.50
511656A75 | 2016-04-01 00:00:00-05 | 125.00 | 509.42
511656A75 | 2016-05-01 00:00:00-05 | 127.08 | 634.42
511656A75 | 2016-06-01 00:00:00-05 | 104.17 | 761.50
511656A75 | 2016-07-01 00:00:00-05 | 98.22 | 865.67
511656A75 | 2016-08-01 00:00:00-05 | 37.08 | 963.89
511656A75 | 2016-10-01 00:00:00-05 | 108.33 | 1000.97
511656A75 | 2016-11-01 00:00:00-05 | 104.17 | 1024.30
511656A75 | 2017-01-01 00:00:00-06 | 201.67 | 1014.05
(17 rows)Context
StackExchange Database Administrators Q#164838, answer score: 3
Revisions (0)
No revisions yet.