patternsqlMinor
Postgresql partitioned table timestamptz constraint problem
Viewed 0 times
postgresqlproblemtimestamptzconstraintpartitionedtable
Problem
The table
Constraint SQL is defined as follow
Lets consider two type of query
Above query runs within sub-seconds and everything is fine.
On contrary, above query runs at least 15 seconds.
returns
When I examine why the latter runs long (using explain analyze), I finished the result that it visits and scans every table (~500), but the former visits only
I want to query for today, without using prepared statements as the latter query does. Why
reports is partitioned table by day like reports_20170414,reports_20170415Constraint SQL is defined as follow
CHECK (
rpt_datetime >= '2017-04-14 00:00:00+00'::timestamp with time zone
AND
rpt_datetime < '2017-04-15 00:00:00+00'::timestamp with time zone
)Lets consider two type of query
SELECT SUM(rpt_unique_clicks)
FROM reports WHERE rpt_datetime >= '2017-04-14 00:00:00';Above query runs within sub-seconds and everything is fine.
SELECT SUM(rpt_unique_clicks)
FROM reports WHERE rpt_datetime >=
date_trunc('day', current_timestamp);On contrary, above query runs at least 15 seconds.
SELECT date_trunc('day', CURRENT_TIMESTAMP), '2017-04-14 00:00:00';returns
2017-04-14 00:00:00 +00:00 | 2017-04-14 00:00:00When I examine why the latter runs long (using explain analyze), I finished the result that it visits and scans every table (~500), but the former visits only
reports_20170414 so there is problem with constraint checks.I want to query for today, without using prepared statements as the latter query does. Why
date_trunc('day', CURRENT_TIMESTAMP) is not equivalent to 2017-04-14 00:00:00 ?Solution
I cannot fully respond as to why
But I think we can make an experimental educated guess: Apparently, the PostgreSQL planner does not evaluate functions. As such, it doesn't have any good way to know which partitions to check, and makes a plan that checks them all.
Experimental check
We create a base (parent) table:
We create an auto-partition insert trigger:
Fill the (partitioned) table with some data; and check the partitions the trigger made:
| table_name |
| :--------------- |
| reports_20170409 |
| reports_20170410 |
| reports_20170411 |
| reports_20170412 |
| reports_20170413 |
| reports_20170414 |
| reports_20170415 |
| reports_20170416 |
At this point, we check two different queries. The first one does use a
Using a constant timestamp, only 'reports' and the appropriate partition are checked:
| QUERY PLAN |
| :---------------------------------------------------------------------------------------------------------------------- |
| Aggregate (cost=25.07..25.08 rows=1 width=8) (actual time=0.015..0.015 rows=1 loops=1) |
| -> Append (cost=0.00..24.12 rows=378 width=4) (actual time=0.009..0.010 rows=1 loops=1) |
| -> Seq Scan on reports (cost=0.00..0.00 rows=1 width=4) (actual time=0.003..0.003 rows=0 loops=1) |
| Filter: (rpt_datetime >= '2017-04-16 00:00:00'::timestamp without time zone) |
| -> Seq Scan on reports_20170416 (cost=0.00..24.12 rows=377 width=4) (actual time=0.006..0.007 rows=1 loops=1) |
| Filter: (rpt_datetime >= '2017-04-16 00:00:00'::timestamp without time zone) |
| Planning time: 0.713 ms |
| Execution time: 0.040 ms |
If we use the equivalent
| QUERY PLAN |
| :-----------------------------------------------------------------------------------------------------------------
date_trunc('day', CURRENT_TIMESTAMP) is not equivalent to a constant... even if both CURRENT_TIMESTAMP and date_trunc are defined as IMMUTABLE. But I think we can make an experimental educated guess: Apparently, the PostgreSQL planner does not evaluate functions. As such, it doesn't have any good way to know which partitions to check, and makes a plan that checks them all.
Experimental check
We create a base (parent) table:
-- Base table
CREATE TABLE reports
(
rpt_datetime timestamp without time zone DEFAULT now() PRIMARY KEY,
rpt_unique_clicks integer NOT NULL DEFAULT 1,
something_else text
) ;We create an auto-partition insert trigger:
-- Auto-partition using trigger
-- Adapted from http://blog.l1x.me/post/2016/02/16/creating-partitions-automatically-in-postgresql.html
CREATE OR REPLACE FUNCTION create_partition_and_insert ()
RETURNS TRIGGER AS
$
DECLARE
_partition_date text ;
_partition_date_p1 text ;
_partition text ;
BEGIN
_partition_date := to_char(new.rpt_datetime, 'YYYYMMDD');
_partition := 'reports_' || _partition_date ;
-- Check if table exists...
-- (oversimplistic: doesn't take schemas into account... doesn't check for possible race conditions)
if not exists (SELECT relname FROM pg_class WHERE relname=_partition) THEN
_partition_date_p1 := to_char(new.rpt_datetime + interval '1 day', 'YYYYMMDD');
RAISE NOTICE 'Creating %', _partition ;
EXECUTE 'CREATE TABLE ' || _partition ||
' (CHECK (rpt_datetime >= timestamp ''' || _partition_date || ''' AND rpt_datetime < timestamp ''' || _partition_date_p1 || '''))' ||
' INHERITS (reports)' ;
end if ;
EXECUTE 'INSERT INTO ' || _partition || ' SELECT(reports ' || quote_literal(NEW) || ').* ;' ;
-- We won't insert anything on parent table
RETURN NULL ;
END
$
LANGUAGE plpgsql VOLATILE
COST 1000;
-- Attach trigger to parent table
CREATE TRIGGER reports_insert_trigger
BEFORE INSERT ON reports
FOR EACH ROW EXECUTE PROCEDURE create_partition_and_insert();Fill the (partitioned) table with some data; and check the partitions the trigger made:
INSERT INTO
reports (rpt_datetime, rpt_unique_clicks, something_else)
SELECT
d, 1, 'Hello'
FROM
generate_series(timestamp '20170416' - interval '7 days', timestamp '20170416', interval '10 minutes') x(d) ;
-- Check how many partitions we made
SELECT
table_name
FROM
information_schema.tables
WHERE
table_name like 'reports_%'
ORDER BY
table_name;| table_name |
| :--------------- |
| reports_20170409 |
| reports_20170410 |
| reports_20170411 |
| reports_20170412 |
| reports_20170413 |
| reports_20170414 |
| reports_20170415 |
| reports_20170416 |
At this point, we check two different queries. The first one does use a
constant compared to rpt_datetime:EXPLAIN (ANALYZE)
SELECT
SUM(rpt_unique_clicks)
FROM
reports
WHERE
rpt_datetime >= timestamp '20170416' ;Using a constant timestamp, only 'reports' and the appropriate partition are checked:
| QUERY PLAN |
| :---------------------------------------------------------------------------------------------------------------------- |
| Aggregate (cost=25.07..25.08 rows=1 width=8) (actual time=0.015..0.015 rows=1 loops=1) |
| -> Append (cost=0.00..24.12 rows=378 width=4) (actual time=0.009..0.010 rows=1 loops=1) |
| -> Seq Scan on reports (cost=0.00..0.00 rows=1 width=4) (actual time=0.003..0.003 rows=0 loops=1) |
| Filter: (rpt_datetime >= '2017-04-16 00:00:00'::timestamp without time zone) |
| -> Seq Scan on reports_20170416 (cost=0.00..24.12 rows=377 width=4) (actual time=0.006..0.007 rows=1 loops=1) |
| Filter: (rpt_datetime >= '2017-04-16 00:00:00'::timestamp without time zone) |
| Planning time: 0.713 ms |
| Execution time: 0.040 ms |
If we use the equivalent
SELECT using a function-call (even if the result of this function call is a constant), the plan is completely different:EXPLAIN (ANALYZE)
SELECT
SUM(rpt_unique_clicks)
FROM
reports
WHERE
rpt_datetime >= date_trunc('day', now()) ;| QUERY PLAN |
| :-----------------------------------------------------------------------------------------------------------------
Code Snippets
-- Base table
CREATE TABLE reports
(
rpt_datetime timestamp without time zone DEFAULT now() PRIMARY KEY,
rpt_unique_clicks integer NOT NULL DEFAULT 1,
something_else text
) ;-- Auto-partition using trigger
-- Adapted from http://blog.l1x.me/post/2016/02/16/creating-partitions-automatically-in-postgresql.html
CREATE OR REPLACE FUNCTION create_partition_and_insert ()
RETURNS TRIGGER AS
$$
DECLARE
_partition_date text ;
_partition_date_p1 text ;
_partition text ;
BEGIN
_partition_date := to_char(new.rpt_datetime, 'YYYYMMDD');
_partition := 'reports_' || _partition_date ;
-- Check if table exists...
-- (oversimplistic: doesn't take schemas into account... doesn't check for possible race conditions)
if not exists (SELECT relname FROM pg_class WHERE relname=_partition) THEN
_partition_date_p1 := to_char(new.rpt_datetime + interval '1 day', 'YYYYMMDD');
RAISE NOTICE 'Creating %', _partition ;
EXECUTE 'CREATE TABLE ' || _partition ||
' (CHECK (rpt_datetime >= timestamp ''' || _partition_date || ''' AND rpt_datetime < timestamp ''' || _partition_date_p1 || '''))' ||
' INHERITS (reports)' ;
end if ;
EXECUTE 'INSERT INTO ' || _partition || ' SELECT(reports ' || quote_literal(NEW) || ').* ;' ;
-- We won't insert anything on parent table
RETURN NULL ;
END
$$
LANGUAGE plpgsql VOLATILE
COST 1000;
-- Attach trigger to parent table
CREATE TRIGGER reports_insert_trigger
BEFORE INSERT ON reports
FOR EACH ROW EXECUTE PROCEDURE create_partition_and_insert();INSERT INTO
reports (rpt_datetime, rpt_unique_clicks, something_else)
SELECT
d, 1, 'Hello'
FROM
generate_series(timestamp '20170416' - interval '7 days', timestamp '20170416', interval '10 minutes') x(d) ;
-- Check how many partitions we made
SELECT
table_name
FROM
information_schema.tables
WHERE
table_name like 'reports_%'
ORDER BY
table_name;EXPLAIN (ANALYZE)
SELECT
SUM(rpt_unique_clicks)
FROM
reports
WHERE
rpt_datetime >= timestamp '20170416' ;EXPLAIN (ANALYZE)
SELECT
SUM(rpt_unique_clicks)
FROM
reports
WHERE
rpt_datetime >= date_trunc('day', now()) ;Context
StackExchange Database Administrators Q#171057, answer score: 5
Revisions (0)
No revisions yet.