HiveBrain v1.2.0
Get Started
← Back to all entries
patternsqlMinor

Speed up query calculation. Where can I add indexes or optimize the query or server?

Submitted by: @import:stackexchange-dba··
0
Viewed 0 times
cancalculationthequerywhereindexesoptimizeserverspeedadd

Problem

I'm looking to speed up some calculations on a single table.

Here is the table, which I believe has over 93 million rows and it grows every day:

CREATE TABLE daily_data
(
  id serial NOT NULL,
  company_id integer NOT NULL,
  trade_date date NOT NULL,
  daily_val numeric NOT NULL,
  bbg_pulls_id integer,
  gen_qtr_end_dt_id integer,
  ern_release_date_id integer,
  wh_calc_id integer,
  CONSTRAINT daily_data_pkey PRIMARY KEY (id),
  CONSTRAINT daily_data_bbg_pulls_id_fkey FOREIGN KEY (bbg_pulls_id)
      REFERENCES bbg_pulls (id) MATCH SIMPLE
      ON UPDATE NO ACTION ON DELETE NO ACTION,
  CONSTRAINT daily_data_company_id_fkey FOREIGN KEY (company_id)
      REFERENCES company (id) MATCH SIMPLE
      ON UPDATE NO ACTION ON DELETE NO ACTION,
  CONSTRAINT daily_data_ern_release_date_id_fkey FOREIGN KEY (ern_release_date_id)
      REFERENCES ern_dt (id) MATCH SIMPLE
      ON UPDATE NO ACTION ON DELETE NO ACTION,
  CONSTRAINT daily_data_wh_calc_id_fkey FOREIGN KEY (wh_calc_id)
      REFERENCES wh_calc (id) MATCH SIMPLE
      ON UPDATE NO ACTION ON DELETE NO ACTION DEFERRABLE INITIALLY IMMEDIATE,
  CONSTRAINT daily_data_company_id_trade_date_bbg_pulls_id_key UNIQUE (company_id, trade_date, bbg_pulls_id),
  CONSTRAINT daily_data_company_id_trade_date_wh_calc_id_key UNIQUE (company_id, trade_date, wh_calc_id),
  CONSTRAINT daily_data_check CHECK ((wh_calc_id IS NULL) <> (bbg_pulls_id IS NULL))
)

CREATE INDEX daily_data_bbg_pulls_id_idx
  ON daily_data
  USING btree
  (bbg_pulls_id)
  WHERE bbg_pulls_id IS NOT NULL;

CREATE INDEX daily_data_company_id_idx
  ON daily_data
  USING btree
  (company_id);

CREATE INDEX daily_data_gen_qtr_end_dt_id_idx
  ON daily_data
  USING btree
  (gen_qtr_end_dt_id)
  WHERE gen_qtr_end_dt_id IS NOT NULL;

CREATE INDEX daily_data_trade_date_idx
  ON daily_data
  USING btree
  (trade_date);

CREATE INDEX daily_data_wh_calc_id_idx
  ON daily_data
  USING btree
  (wh_calc_id)
  WHERE wh_calc_id IS NOT NULL;


Here is what I actually/ult

Solution

To testing I created small sample table:

INSERT INTO daily_data (
        company_id
        ,trade_date
        ,daily_val
        ,wh_calc_id
        )
SELECT
        (random() * 10)::int,
        '2010-01-01'::timestamp + (i::text || ' day')::interval,
        random() * 100,
        (random() * 20) + 230
FROM
        generate_series(1, 200000) AS i
;


Original query:

EXPLAIN ANALYZE
with dd2 as (select * from daily_data where wh_calc_id = 241 AND company_id < 8 
) 
SELECT d.*
, ct, ct_lt
     , round(CASE WHEN x.ct = 0 THEN numeric '1'  
                  ELSE x.ct_lt / x.ct END, 6) AS pctl_calc
FROM   dd2 d, LATERAL (
   SELECT count(daily_val) AS ct
        , count(daily_val < d.daily_val OR NULL)::numeric As ct_lt
   FROM   dd2
   WHERE  company_id = d.company_id
   --   and       company_id < 8
   AND    trade_date < d.trade_date
   and    wh_calc_id = d.wh_calc_id
   -- and     wh_calc_id = 241

   ) x
ORDER  BY company_id, trade_date;


Subquery version:

with dd2 as (
        select company_id, trade_date
   , (SELECT count(daily_val) FROM daily_data WHERE company_id = d.company_id AND trade_date < d.trade_date and wh_calc_id = d.wh_calc_id) AS ct
   , (SELECT count(daily_val < d.daily_val OR NULL)::numeric FROM daily_data WHERE company_id = d.company_id AND trade_date < d.trade_date and wh_calc_id = d.wh_calc_id) AS ct_lt
        from daily_data AS d where wh_calc_id = 241 AND company_id < 8 
)
SELECT d.*
     , round(CASE WHEN d.ct = 0 THEN numeric '1'  
                  ELSE d.ct_lt / d.ct END, 6) AS pctl_calc
FROM   dd2 d
ORDER  BY company_id, trade_date;


Partial window + partial subquery version:#

EXPLAIN ANALYZE
with dd2 as (select *,
        count(daily_val) OVER (PARTITION BY company_id, wh_calc_id ORDER BY trade_date) - 1 AS ct,
        (SELECT count(daily_val < d.daily_val OR NULL)::numeric FROM daily_data WHERE company_id = d.company_id AND trade_date < d.trade_date and wh_calc_id = d.wh_calc_id) AS ct_lt
        from daily_data AS d where wh_calc_id = 241 AND company_id < 8 
) 
SELECT d.*
     , round(CASE WHEN ct = 0 THEN numeric '1'  
                  ELSE ct_lt / ct END, 6) AS pctl_calc
FROM   dd2 d
ORDER  BY company_id, trade_date;


Pure window version (but some tricky):

EXPLAIN ANALYZE
with dd2 as (select d.company_id, d.trade_date, d.daily_val,
        count(daily_val) OVER w - 1 AS ct,
        array_agg(daily_val) OVER w AS ct_lt2
        from daily_data AS d
        where wh_calc_id = 241 AND company_id < 8 
        WINDOW w AS (PARTITION BY company_id, wh_calc_id ORDER BY trade_date)
) , dd1 AS (  
SELECT d.*
, (SELECT count(*) FILTER (WHERE u < d.daily_val) FROM unnest(ct_lt2) AS u) AS ct_lt

FROM   dd2 d
) SELECT *
     , round(CASE WHEN ct = 0 THEN numeric '1'  
                  ELSE ct_lt / ct END, 6) AS pctl_calc
FROM dd1 AS d
ORDER  BY company_id, trade_date;


Original query:

QUERY PLAN                                                                    
-------------------------------------------------------------------------------------------------------------------------------------------------
 Sort  (cost=2653.28..2653.84 rows=226 width=132) (actual time=8399.345..8399.886 rows=7540 loops=1)
   Sort Key: d.company_id, d.trade_date
   Sort Method: quicksort  Memory: 1253kB
   CTE dd2
     ->  Bitmap Heap Scan on daily_data  (cost=21.55..1224.03 rows=226 width=60) (actual time=1.026..4.794 rows=7540 loops=1)
           Recheck Cond: (wh_calc_id = 241)
           Filter: (company_id   Bitmap Index Scan on daily_data_wh_calc_id_idx  (cost=0.00..21.50 rows=677 width=0) (actual time=0.848..0.848 rows=10066 loops=1)
                 Index Cond: (wh_calc_id = 241)
   ->  Nested Loop  (cost=6.22..1420.41 rows=226 width=132) (actual time=6.196..8391.961 rows=7540 loops=1)
         ->  CTE Scan on dd2 d  (cost=0.00..4.52 rows=226 width=60) (actual time=1.029..1.933 rows=7540 loops=1)
         ->  Aggregate  (cost=6.22..6.24 rows=1 width=40) (actual time=1.111..1.111 rows=1 loops=7540)
               ->  CTE Scan on dd2  (cost=0.00..6.22 rows=1 width=32) (actual time=0.003..0.995 rows=484 loops=7540)
                     Filter: ((trade_date < d.trade_date) AND (company_id = d.company_id) AND (wh_calc_id = d.wh_calc_id))
                     Rows Removed by Filter: 7056
 Planning time: 0.345 ms
 Execution time: 8400.233 ms
(19 rows)


Subquery version:

```
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Sort (cost=9949.69..9950.25 rows=226 width=80) (actual time=10558.505..10559.017 rows=7540 loops=1)
Sort Key: d.company_id, d.trade_date
Sort Method: quicksort Memory: 782kB
CTE dd2
-> Bitmap Heap Scan on daily_data d_1 (cost=21.55..9934.07 rows=226 widt

Code Snippets

INSERT INTO daily_data (
        company_id
        ,trade_date
        ,daily_val
        ,wh_calc_id
        )
SELECT
        (random() * 10)::int,
        '2010-01-01'::timestamp + (i::text || ' day')::interval,
        random() * 100,
        (random() * 20) + 230
FROM
        generate_series(1, 200000) AS i
;
EXPLAIN ANALYZE
with dd2 as (select * from daily_data where wh_calc_id = 241 AND company_id < 8 
) 
SELECT d.*
, ct, ct_lt
     , round(CASE WHEN x.ct = 0 THEN numeric '1'  
                  ELSE x.ct_lt / x.ct END, 6) AS pctl_calc
FROM   dd2 d, LATERAL (
   SELECT count(daily_val) AS ct
        , count(daily_val < d.daily_val OR NULL)::numeric As ct_lt
   FROM   dd2
   WHERE  company_id = d.company_id
   --   and       company_id < 8
   AND    trade_date < d.trade_date
   and    wh_calc_id = d.wh_calc_id
   -- and     wh_calc_id = 241

   ) x
ORDER  BY company_id, trade_date;
with dd2 as (
        select company_id, trade_date
   , (SELECT count(daily_val) FROM daily_data WHERE company_id = d.company_id AND trade_date < d.trade_date and wh_calc_id = d.wh_calc_id) AS ct
   , (SELECT count(daily_val < d.daily_val OR NULL)::numeric FROM daily_data WHERE company_id = d.company_id AND trade_date < d.trade_date and wh_calc_id = d.wh_calc_id) AS ct_lt
        from daily_data AS d where wh_calc_id = 241 AND company_id < 8 
)
SELECT d.*
     , round(CASE WHEN d.ct = 0 THEN numeric '1'  
                  ELSE d.ct_lt / d.ct END, 6) AS pctl_calc
FROM   dd2 d
ORDER  BY company_id, trade_date;
EXPLAIN ANALYZE
with dd2 as (select *,
        count(daily_val) OVER (PARTITION BY company_id, wh_calc_id ORDER BY trade_date) - 1 AS ct,
        (SELECT count(daily_val < d.daily_val OR NULL)::numeric FROM daily_data WHERE company_id = d.company_id AND trade_date < d.trade_date and wh_calc_id = d.wh_calc_id) AS ct_lt
        from daily_data AS d where wh_calc_id = 241 AND company_id < 8 
) 
SELECT d.*
     , round(CASE WHEN ct = 0 THEN numeric '1'  
                  ELSE ct_lt / ct END, 6) AS pctl_calc
FROM   dd2 d
ORDER  BY company_id, trade_date;
EXPLAIN ANALYZE
with dd2 as (select d.company_id, d.trade_date, d.daily_val,
        count(daily_val) OVER w - 1 AS ct,
        array_agg(daily_val) OVER w AS ct_lt2
        from daily_data AS d
        where wh_calc_id = 241 AND company_id < 8 
        WINDOW w AS (PARTITION BY company_id, wh_calc_id ORDER BY trade_date)
) , dd1 AS (  
SELECT d.*
, (SELECT count(*) FILTER (WHERE u < d.daily_val) FROM unnest(ct_lt2) AS u) AS ct_lt

FROM   dd2 d
) SELECT *
     , round(CASE WHEN ct = 0 THEN numeric '1'  
                  ELSE ct_lt / ct END, 6) AS pctl_calc
FROM dd1 AS d
ORDER  BY company_id, trade_date;

Context

StackExchange Database Administrators Q#165322, answer score: 2

Revisions (0)

No revisions yet.