patternsqlMinor
PostgreSQL partition hierarchy design for centralized logging
Viewed 0 times
postgresqlloggingpartitionhierarchydesigncentralizedfor
Problem
I am looking into setting up logging from all of my servers into a centralized postgresql database. It makes sense to be able to drop logs by date or by host, so I'd like to set it up with partitioning, but a multi-layer partition:
I've worked out an example scheme for a specific service (php errors in this case) and am looking for some PostgreSQL experts to critique it for obvious performance bottlenecks.
I'll also be adding services such as apache access/errors.
I think I have to trigger to insert into the host_service (to utilize the host check constraint), and then each host_service must trigger to insert into the host_service_yyyymm table.
What can I expect performance-wise of such a trigger/partition scheme?
Some extra info discussed in chat:
host-service inherits service, host-service-yyyymm inherits host-service.I've worked out an example scheme for a specific service (php errors in this case) and am looking for some PostgreSQL experts to critique it for obvious performance bottlenecks.
// SET UP MASTER PHP LOG TABLE //
CREATE TABLE php (
log_id int not null,
host char(5),
logdate date not null,
message text
);
// SET UP HOST-SPECIFIC 'PARTITIONS' //
CREATE TABLE host1_php (
CHECK ( host = 'host1' )
) INHERITS (php);
CREATE TABLE host2_php (
CHECK ( host = 'host2' )
) INHERITS (php);
// SET UP HOST-SPECIFIC TIME 'PARTITIONS' //
CREATE TABLE host1_php_2011m12 (
CHECK ( logdate >= DATE '2011-12-01' AND logdate = DATE '2012-01-01' AND logdate = DATE '2011-12-01' AND logdate = DATE '2012-01-01' AND logdate < DATE '2012-02-01' )
) INHERITS (host2_php);
CREATE INDEX host1_php_2011m12_logdate ON host1_php_2011m12 (logdate);
CREATE INDEX host1_php_2012m01_logdate ON host1_php_2012m01 (logdate);
CREATE INDEX host2_php_2011m12_logdate ON host2_php_2011m12 (logdate);
CREATE INDEX host2_php_2012m01_logdate ON host2_php_2012m01 (logdate);I'll also be adding services such as apache access/errors.
I think I have to trigger to insert into the host_service (to utilize the host check constraint), and then each host_service must trigger to insert into the host_service_yyyymm table.
What can I expect performance-wise of such a trigger/partition scheme?
Some extra info discussed in chat:
- PostgreSQL version 9.1.2
- Queries on data won't be very often, and mostly on the current month of data for multiple hosts.
- PHP is only about 1 per minute insert, but total apache is probably going to be around 300-500 per second across all hosts.
Solution
What can I expect performance-wise of such a trigger/partition scheme?
Context switches mean using triggers is always going to use a lot more CPU than a simple
Please note I haven't included any indexing, or any consideration of
Context switches mean using triggers is always going to use a lot more CPU than a simple
insert. The script below can be used to quantify how much impact that will have - and also demonstrates auto-creating partitions using triggers and compares the performance either way.Please note I haven't included any indexing, or any consideration of
update statements.begin;
set role dba;
create role stack;
grant stack to dba;
create schema authorization stack;
set role stack;
--
--**** the above creates a nice clean schema as a test area
--
set client_min_messages to warning; --**** or you get a lot of "NOTICE: merging column "xyz" with inherited definition" notices
--
create table phpheap(log_id serial not null, host text not null, logdate date not null, message text not null); --**** This table is used to compare 'insert' performance with that on the partitioned version
create table php(log_id serial not null, host text not null, logdate date not null, message text not null);
--
create function php_host_insert() returns trigger language plpgsql security definer as $$
begin
set search_path to 'stack';
execute 'insert into php_'||new.host||'_'||to_char(new.logdate, 'YYYYmMM')||'(log_id, host, logdate, message) values($1, $2, $3, $4)' using new.log_id, new.host, new.logdate, new.message;
return null;
exception when undefined_table then
execute 'create table php_'||new.host||'_'||to_char(new.logdate, 'YYYYmMM')||'(log_id int not null, host text not null check(host='''||new.host||'''), logdate date not null check(to_char(logdate, ''YYYYmMM'')='''||to_char(new.logdate, 'YYYYmMM')||'''), message text not null) inherits (php_'||new.host||')';
execute 'insert into php_'||new.host||'_'||to_char(new.logdate, 'YYYYmMM')||'(log_id, host, logdate, message) values($1, $2, $3, $4)' using new.log_id, new.host, new.logdate, new.message;
return null;
end;$$;
--
create function php_insert() returns trigger language plpgsql security definer as $$
begin
set search_path to 'stack';
execute 'insert into php_'||new.host||'(log_id, host, logdate, message) values($1, $2, $3, $4)' using new.log_id, new.host, new.logdate, new.message;
return null;
exception when undefined_table then
execute 'create table php_'||new.host||'(log_id int not null, host text not null check(host='''||new.host||'''), logdate date not null, message text not null) inherits(php)';
execute 'create trigger trig_insert_php_'||new.host||' before insert on php_'||new.host||' for each row execute procedure php_host_insert()';
execute 'insert into php_'||new.host||'(log_id, host, logdate, message) values($1, $2, $3, $4)' using new.log_id, new.host, new.logdate, new.message;
return null;
end;$$;
--
create trigger trig_insert_php before insert on php for each row execute procedure php_insert();
--
\timing on
insert into phpheap(host, logdate, message) select 'host1', current_date-(generate_series(-99999, 0, 1)/1000)::integer, repeat('hello',20);
--**** output
--INSERT 0 100000
--Time: 1102.140 ms
insert into php(host, logdate, message) select 'host1', current_date-(generate_series(-99999, 0, 1)/1000)::integer, repeat('hello',20);
--**** output
--INSERT 0 0
--Time: 35615.498 ms
insert into php(host, logdate, message) select 'host1', current_date-(generate_series(-99999, 0, 1)/1000)::integer, repeat('hello',20);
--**** output
--INSERT 0 0
--Time: 34074.579 ms
\timing off
--
--**** Now we replace the trigger functions with the 'normal' kind that don't auto-create partitions
--
create or replace function php_host_insert() returns trigger language plpgsql security definer as $$
begin
set search_path to 'stack';
execute 'insert into php_'||new.host||'_'||to_char(new.logdate, 'YYYYmMM')||'(log_id, host, logdate, message) values($1, $2, $3, $4)' using new.log_id, new.host, new.logdate, new.message;
return null;
end;$$;
--
create or replace function php_insert() returns trigger language plpgsql security definer as $$
begin
set search_path to 'stack';
execute 'insert into php_'||new.host||'(log_id, host, logdate, message) values($1, $2, $3, $4)' using new.log_id, new.host, new.logdate, new.message;
return null;
end;$$;
--
\timing on
insert into php(host, logdate, message) select 'host1', current_date-(generate_series(-99999, 0, 1)/1000)::integer, repeat('hello',20);
--**** output
--INSERT 0 0
--Time: 28457.146 ms
\timing off
--
rollback;
Context
StackExchange Database Administrators Q#9227, answer score: 6
Revisions (0)
No revisions yet.