patternsqlMinor
Speed up query calculation. Where can I add indexes or optimize the query or server?
Viewed 0 times
cancalculationthequerywhereindexesoptimizeserverspeedadd
Problem
I'm looking to speed up some calculations on a single table.
Here is the table, which I believe has over 93 million rows and it grows every day:
Here is what I actually/ult
Here is the table, which I believe has over 93 million rows and it grows every day:
CREATE TABLE daily_data
(
id serial NOT NULL,
company_id integer NOT NULL,
trade_date date NOT NULL,
daily_val numeric NOT NULL,
bbg_pulls_id integer,
gen_qtr_end_dt_id integer,
ern_release_date_id integer,
wh_calc_id integer,
CONSTRAINT daily_data_pkey PRIMARY KEY (id),
CONSTRAINT daily_data_bbg_pulls_id_fkey FOREIGN KEY (bbg_pulls_id)
REFERENCES bbg_pulls (id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION,
CONSTRAINT daily_data_company_id_fkey FOREIGN KEY (company_id)
REFERENCES company (id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION,
CONSTRAINT daily_data_ern_release_date_id_fkey FOREIGN KEY (ern_release_date_id)
REFERENCES ern_dt (id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION,
CONSTRAINT daily_data_wh_calc_id_fkey FOREIGN KEY (wh_calc_id)
REFERENCES wh_calc (id) MATCH SIMPLE
ON UPDATE NO ACTION ON DELETE NO ACTION DEFERRABLE INITIALLY IMMEDIATE,
CONSTRAINT daily_data_company_id_trade_date_bbg_pulls_id_key UNIQUE (company_id, trade_date, bbg_pulls_id),
CONSTRAINT daily_data_company_id_trade_date_wh_calc_id_key UNIQUE (company_id, trade_date, wh_calc_id),
CONSTRAINT daily_data_check CHECK ((wh_calc_id IS NULL) <> (bbg_pulls_id IS NULL))
)
CREATE INDEX daily_data_bbg_pulls_id_idx
ON daily_data
USING btree
(bbg_pulls_id)
WHERE bbg_pulls_id IS NOT NULL;
CREATE INDEX daily_data_company_id_idx
ON daily_data
USING btree
(company_id);
CREATE INDEX daily_data_gen_qtr_end_dt_id_idx
ON daily_data
USING btree
(gen_qtr_end_dt_id)
WHERE gen_qtr_end_dt_id IS NOT NULL;
CREATE INDEX daily_data_trade_date_idx
ON daily_data
USING btree
(trade_date);
CREATE INDEX daily_data_wh_calc_id_idx
ON daily_data
USING btree
(wh_calc_id)
WHERE wh_calc_id IS NOT NULL;Here is what I actually/ult
Solution
To testing I created small sample table:
Original query:
Subquery version:
Partial window + partial subquery version:#
Pure window version (but some tricky):
Original query:
Subquery version:
```
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Sort (cost=9949.69..9950.25 rows=226 width=80) (actual time=10558.505..10559.017 rows=7540 loops=1)
Sort Key: d.company_id, d.trade_date
Sort Method: quicksort Memory: 782kB
CTE dd2
-> Bitmap Heap Scan on daily_data d_1 (cost=21.55..9934.07 rows=226 widt
INSERT INTO daily_data (
company_id
,trade_date
,daily_val
,wh_calc_id
)
SELECT
(random() * 10)::int,
'2010-01-01'::timestamp + (i::text || ' day')::interval,
random() * 100,
(random() * 20) + 230
FROM
generate_series(1, 200000) AS i
;Original query:
EXPLAIN ANALYZE
with dd2 as (select * from daily_data where wh_calc_id = 241 AND company_id < 8
)
SELECT d.*
, ct, ct_lt
, round(CASE WHEN x.ct = 0 THEN numeric '1'
ELSE x.ct_lt / x.ct END, 6) AS pctl_calc
FROM dd2 d, LATERAL (
SELECT count(daily_val) AS ct
, count(daily_val < d.daily_val OR NULL)::numeric As ct_lt
FROM dd2
WHERE company_id = d.company_id
-- and company_id < 8
AND trade_date < d.trade_date
and wh_calc_id = d.wh_calc_id
-- and wh_calc_id = 241
) x
ORDER BY company_id, trade_date;Subquery version:
with dd2 as (
select company_id, trade_date
, (SELECT count(daily_val) FROM daily_data WHERE company_id = d.company_id AND trade_date < d.trade_date and wh_calc_id = d.wh_calc_id) AS ct
, (SELECT count(daily_val < d.daily_val OR NULL)::numeric FROM daily_data WHERE company_id = d.company_id AND trade_date < d.trade_date and wh_calc_id = d.wh_calc_id) AS ct_lt
from daily_data AS d where wh_calc_id = 241 AND company_id < 8
)
SELECT d.*
, round(CASE WHEN d.ct = 0 THEN numeric '1'
ELSE d.ct_lt / d.ct END, 6) AS pctl_calc
FROM dd2 d
ORDER BY company_id, trade_date;Partial window + partial subquery version:#
EXPLAIN ANALYZE
with dd2 as (select *,
count(daily_val) OVER (PARTITION BY company_id, wh_calc_id ORDER BY trade_date) - 1 AS ct,
(SELECT count(daily_val < d.daily_val OR NULL)::numeric FROM daily_data WHERE company_id = d.company_id AND trade_date < d.trade_date and wh_calc_id = d.wh_calc_id) AS ct_lt
from daily_data AS d where wh_calc_id = 241 AND company_id < 8
)
SELECT d.*
, round(CASE WHEN ct = 0 THEN numeric '1'
ELSE ct_lt / ct END, 6) AS pctl_calc
FROM dd2 d
ORDER BY company_id, trade_date;Pure window version (but some tricky):
EXPLAIN ANALYZE
with dd2 as (select d.company_id, d.trade_date, d.daily_val,
count(daily_val) OVER w - 1 AS ct,
array_agg(daily_val) OVER w AS ct_lt2
from daily_data AS d
where wh_calc_id = 241 AND company_id < 8
WINDOW w AS (PARTITION BY company_id, wh_calc_id ORDER BY trade_date)
) , dd1 AS (
SELECT d.*
, (SELECT count(*) FILTER (WHERE u < d.daily_val) FROM unnest(ct_lt2) AS u) AS ct_lt
FROM dd2 d
) SELECT *
, round(CASE WHEN ct = 0 THEN numeric '1'
ELSE ct_lt / ct END, 6) AS pctl_calc
FROM dd1 AS d
ORDER BY company_id, trade_date;Original query:
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------
Sort (cost=2653.28..2653.84 rows=226 width=132) (actual time=8399.345..8399.886 rows=7540 loops=1)
Sort Key: d.company_id, d.trade_date
Sort Method: quicksort Memory: 1253kB
CTE dd2
-> Bitmap Heap Scan on daily_data (cost=21.55..1224.03 rows=226 width=60) (actual time=1.026..4.794 rows=7540 loops=1)
Recheck Cond: (wh_calc_id = 241)
Filter: (company_id Bitmap Index Scan on daily_data_wh_calc_id_idx (cost=0.00..21.50 rows=677 width=0) (actual time=0.848..0.848 rows=10066 loops=1)
Index Cond: (wh_calc_id = 241)
-> Nested Loop (cost=6.22..1420.41 rows=226 width=132) (actual time=6.196..8391.961 rows=7540 loops=1)
-> CTE Scan on dd2 d (cost=0.00..4.52 rows=226 width=60) (actual time=1.029..1.933 rows=7540 loops=1)
-> Aggregate (cost=6.22..6.24 rows=1 width=40) (actual time=1.111..1.111 rows=1 loops=7540)
-> CTE Scan on dd2 (cost=0.00..6.22 rows=1 width=32) (actual time=0.003..0.995 rows=484 loops=7540)
Filter: ((trade_date < d.trade_date) AND (company_id = d.company_id) AND (wh_calc_id = d.wh_calc_id))
Rows Removed by Filter: 7056
Planning time: 0.345 ms
Execution time: 8400.233 ms
(19 rows)Subquery version:
```
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Sort (cost=9949.69..9950.25 rows=226 width=80) (actual time=10558.505..10559.017 rows=7540 loops=1)
Sort Key: d.company_id, d.trade_date
Sort Method: quicksort Memory: 782kB
CTE dd2
-> Bitmap Heap Scan on daily_data d_1 (cost=21.55..9934.07 rows=226 widt
Code Snippets
INSERT INTO daily_data (
company_id
,trade_date
,daily_val
,wh_calc_id
)
SELECT
(random() * 10)::int,
'2010-01-01'::timestamp + (i::text || ' day')::interval,
random() * 100,
(random() * 20) + 230
FROM
generate_series(1, 200000) AS i
;EXPLAIN ANALYZE
with dd2 as (select * from daily_data where wh_calc_id = 241 AND company_id < 8
)
SELECT d.*
, ct, ct_lt
, round(CASE WHEN x.ct = 0 THEN numeric '1'
ELSE x.ct_lt / x.ct END, 6) AS pctl_calc
FROM dd2 d, LATERAL (
SELECT count(daily_val) AS ct
, count(daily_val < d.daily_val OR NULL)::numeric As ct_lt
FROM dd2
WHERE company_id = d.company_id
-- and company_id < 8
AND trade_date < d.trade_date
and wh_calc_id = d.wh_calc_id
-- and wh_calc_id = 241
) x
ORDER BY company_id, trade_date;with dd2 as (
select company_id, trade_date
, (SELECT count(daily_val) FROM daily_data WHERE company_id = d.company_id AND trade_date < d.trade_date and wh_calc_id = d.wh_calc_id) AS ct
, (SELECT count(daily_val < d.daily_val OR NULL)::numeric FROM daily_data WHERE company_id = d.company_id AND trade_date < d.trade_date and wh_calc_id = d.wh_calc_id) AS ct_lt
from daily_data AS d where wh_calc_id = 241 AND company_id < 8
)
SELECT d.*
, round(CASE WHEN d.ct = 0 THEN numeric '1'
ELSE d.ct_lt / d.ct END, 6) AS pctl_calc
FROM dd2 d
ORDER BY company_id, trade_date;EXPLAIN ANALYZE
with dd2 as (select *,
count(daily_val) OVER (PARTITION BY company_id, wh_calc_id ORDER BY trade_date) - 1 AS ct,
(SELECT count(daily_val < d.daily_val OR NULL)::numeric FROM daily_data WHERE company_id = d.company_id AND trade_date < d.trade_date and wh_calc_id = d.wh_calc_id) AS ct_lt
from daily_data AS d where wh_calc_id = 241 AND company_id < 8
)
SELECT d.*
, round(CASE WHEN ct = 0 THEN numeric '1'
ELSE ct_lt / ct END, 6) AS pctl_calc
FROM dd2 d
ORDER BY company_id, trade_date;EXPLAIN ANALYZE
with dd2 as (select d.company_id, d.trade_date, d.daily_val,
count(daily_val) OVER w - 1 AS ct,
array_agg(daily_val) OVER w AS ct_lt2
from daily_data AS d
where wh_calc_id = 241 AND company_id < 8
WINDOW w AS (PARTITION BY company_id, wh_calc_id ORDER BY trade_date)
) , dd1 AS (
SELECT d.*
, (SELECT count(*) FILTER (WHERE u < d.daily_val) FROM unnest(ct_lt2) AS u) AS ct_lt
FROM dd2 d
) SELECT *
, round(CASE WHEN ct = 0 THEN numeric '1'
ELSE ct_lt / ct END, 6) AS pctl_calc
FROM dd1 AS d
ORDER BY company_id, trade_date;Context
StackExchange Database Administrators Q#165322, answer score: 2
Revisions (0)
No revisions yet.