gotchasqlMinor
Why does PostgreSQL scan a lot of rows?
Viewed 0 times
postgresqlrowswhyscanlotdoes
Problem
I have a small table of 135,000 rows.
Here is the schema:
This is my SQL:
```
SELECT
"hs"."customer_id",
"hs"."customer_email" AS "account",
CASE WHEN max("monthlyAvg"."monthlyAvgValue") is not null THEN
max("monthlyAvg"."monthlyAvgValue") ELSE 0::int END AS "monthlyAvg",
CASE WHEN max("weeklyAvg"."weeklyAvgValue") is not null THEN
max("weeklyAvg"."weeklyAvgValue") ELSE 0::int END AS "weeklyAvg"
FROM
"history_sum" AS "hs"
LEFT JOIN (
SELECT
"hs"."customer_id" AS "customer_id",
round((SUM("hs"."statistics_real_hits")::numeric / 30::numeric), 2) AS "monthlyAvgValue"
FROM
"history_sum" AS "hs"
WHERE
"hs"."date" BETWEEN '2016-04-29' AND '2016-05-29'
GROUP BY
"hs"."customer_id",
"hs"."date"
ORDER BY
"hs"."date" DESC
) AS "monthlyAvg" ON "monthlyAvg"."customer_id" = "hs"."customer_id"
LEFT JOIN (
SELECT
"hs"."customer_id" AS "customer_id",
round((SUM("hs"."statistics_real_hits")::numeric / 7::numeric), 2) AS "weeklyAvgValue"
Here is the schema:
id integer NOT NULL nextval('history_sum_id_seq'::regclass)
zone_id integer NOT NULL
spot_id character varying(24) NULL::character varying
customer_id integer
broker_id integer
date date NOT NULL
created_date timestamp(0) without time zone NOT NULL
statistics_hits integer 0
statistics_real_hits integer 0
statistics_paid_hits integer 0
statistics_clicks integer 0
earnings_eur double precision '0'::double precision
earnings_usd double precision '0'::double precision
earnings_rub double precision '0'::double precision
broker_name character varying(100) NULL::character varying
customer_email character varying(128) NULL::character varying
customer_commission integerThis is my SQL:
```
SELECT
"hs"."customer_id",
"hs"."customer_email" AS "account",
CASE WHEN max("monthlyAvg"."monthlyAvgValue") is not null THEN
max("monthlyAvg"."monthlyAvgValue") ELSE 0::int END AS "monthlyAvg",
CASE WHEN max("weeklyAvg"."weeklyAvgValue") is not null THEN
max("weeklyAvg"."weeklyAvgValue") ELSE 0::int END AS "weeklyAvg"
FROM
"history_sum" AS "hs"
LEFT JOIN (
SELECT
"hs"."customer_id" AS "customer_id",
round((SUM("hs"."statistics_real_hits")::numeric / 30::numeric), 2) AS "monthlyAvgValue"
FROM
"history_sum" AS "hs"
WHERE
"hs"."date" BETWEEN '2016-04-29' AND '2016-05-29'
GROUP BY
"hs"."customer_id",
"hs"."date"
ORDER BY
"hs"."date" DESC
) AS "monthlyAvg" ON "monthlyAvg"."customer_id" = "hs"."customer_id"
LEFT JOIN (
SELECT
"hs"."customer_id" AS "customer_id",
round((SUM("hs"."statistics_real_hits")::numeric / 7::numeric), 2) AS "weeklyAvgValue"
Solution
As far as I can tell, you only need a single derived table together with a conditional aggregation:
You can add more aggregations without the need to add new derived tables.
Using an
SELECT
hs.customer_id,
hs.customer_email AS account,
coalesce(max(x.monthlyAvgValue),0) AS monthlyAvg,
coalesce(max(x.prevMonthlyAvg),0) AS prevMonthlyAvg,
coalesce(max(x.weeklyAvgValue),0) AS weeklyAvg
FROM history_sum AS hs
LEFT JOIN (
SELECT
hs2.customer_id AS customer_id,
round((SUM(hs2.statistics_real_hits)::numeric / 30::numeric), 2) filter (where hs2.date BETWEEN '2016-04-29' AND '2016-05-29') AS monthlyAvgValue,
round((SUM(hs2.statistics_real_hits)::numeric / 30::numeric), 2) filter (where hs2.date BETWEEN :minus60days AND :minus31days) AS prevMonthlyAvg,
round((SUM(hs2.statistics_real_hits)::numeric / 7::numeric), 2) filter (where hs2.date BETWEEN '2016-05-22' AND '2016-05-29') AS weeklyAvgValue,
FROM history_sum AS hs2
GROUP BY hs2.customer_id, hs2.date
) x ON x.customer_id = hs.customer_id
WHERE
hs.customer_email is not null
GROUP BY
hs.customer_id,
hs.customer_email,
hs.customer_commission
ORDER BY
account ASC;You can add more aggregations without the need to add new derived tables.
Using an
order by in a sub-select or derived table is useless and can be removed (not sure if Postgres optimizes that away).Code Snippets
SELECT
hs.customer_id,
hs.customer_email AS account,
coalesce(max(x.monthlyAvgValue),0) AS monthlyAvg,
coalesce(max(x.prevMonthlyAvg),0) AS prevMonthlyAvg,
coalesce(max(x.weeklyAvgValue),0) AS weeklyAvg
FROM history_sum AS hs
LEFT JOIN (
SELECT
hs2.customer_id AS customer_id,
round((SUM(hs2.statistics_real_hits)::numeric / 30::numeric), 2) filter (where hs2.date BETWEEN '2016-04-29' AND '2016-05-29') AS monthlyAvgValue,
round((SUM(hs2.statistics_real_hits)::numeric / 30::numeric), 2) filter (where hs2.date BETWEEN :minus60days AND :minus31days) AS prevMonthlyAvg,
round((SUM(hs2.statistics_real_hits)::numeric / 7::numeric), 2) filter (where hs2.date BETWEEN '2016-05-22' AND '2016-05-29') AS weeklyAvgValue,
FROM history_sum AS hs2
GROUP BY hs2.customer_id, hs2.date
) x ON x.customer_id = hs.customer_id
WHERE
hs.customer_email is not null
GROUP BY
hs.customer_id,
hs.customer_email,
hs.customer_commission
ORDER BY
account ASC;Context
StackExchange Database Administrators Q#139992, answer score: 3
Revisions (0)
No revisions yet.