snippetsqlMinor
How to improve this union to ensure consistent results?
Viewed 0 times
thisconsistentunionensureimprovehowresults
Problem
I've been trying to find an reliable and economic way of counting rows over a 7 day period and so far, this is the best I've been able to come up with:
Schema
Data to test with
```
INSERT INTO "campaign" ("id", "userId", "userCompanyId", "type", "title", "description", "sources", "configuration", "active", "excludedUserNames", "limit", "startAt", "endAt", "removedAt", "createdAt", "updatedAt") VALUES
(1, 1, 1, E'test', E'Test', E'Test', E'{a}', E'{"query":{"accounts":[],"hashtags":["GavinTest1234","XYZ"]}}', E'true', NULL, 0, E'2016-01-25 15:06:00+00', E'2016-01-27 23:59:59+00', NULL, E'2016-01-25 15:06:27.474+00', E'2016-01-26 16:48:19.693+00');
INSERT INTO "data" ("id", "hash", "source", "isFiltered",
Schema
CREATE TABLE IF NOT EXISTS "data" (
"id" SERIAL,
"hash" CHARACTER VARYING(255) NOT NULL,
"source" CHARACTER VARYING(255) NOT NULL,
"isFiltered" BOOLEAN NOT NULL,
"campaignId" INTEGER NOT NULL,
"data" JSON NOT NULL,
"meta" JSON NOT NULL,
"modifiedReason" CHARACTER VARYING(255) NULL DEFAULT NULL,
"createdAt" TIMESTAMP WITH TIME ZONE NOT NULL,
"updatedAt" TIMESTAMP WITH TIME ZONE NOT NULL,
PRIMARY KEY ("id")
);
CREATE TABLE IF NOT EXISTS "campaign" (
"id" SERIAL,
"userId" INTEGER NOT NULL,
"userCompanyId" INTEGER NOT NULL,
"type" CHARACTER VARYING(255) NULL DEFAULT NULL,
"title" CHARACTER VARYING(255) NULL DEFAULT NULL,
"description" CHARACTER VARYING(255) NULL DEFAULT NULL,
"sources" CHARACTER VARYING(255)[] NULL DEFAULT NULL,
"configuration" JSON NOT NULL,
"active" BOOLEAN NULL DEFAULT true,
"excludedUserNames" CHARACTER VARYING(255)[] NULL DEFAULT NULL,
"limit" INTEGER NULL DEFAULT 0,
"startAt" TIMESTAMP WITH TIME ZONE NOT NULL,
"endAt" TIMESTAMP WITH TIME ZONE NOT NULL,
"removedAt" TIMESTAMP WITH TIME ZONE NULL DEFAULT NULL,
"createdAt" TIMESTAMP WITH TIME ZONE NOT NULL,
"updatedAt" TIMESTAMP WITH TIME ZONE NOT NULL,
PRIMARY KEY ("id")
);Data to test with
```
INSERT INTO "campaign" ("id", "userId", "userCompanyId", "type", "title", "description", "sources", "configuration", "active", "excludedUserNames", "limit", "startAt", "endAt", "removedAt", "createdAt", "updatedAt") VALUES
(1, 1, 1, E'test', E'Test', E'Test', E'{a}', E'{"query":{"accounts":[],"hashtags":["GavinTest1234","XYZ"]}}', E'true', NULL, 0, E'2016-01-25 15:06:00+00', E'2016-01-27 23:59:59+00', NULL, E'2016-01-25 15:06:27.474+00', E'2016-01-26 16:48:19.693+00');
INSERT INTO "data" ("id", "hash", "source", "isFiltered",
Solution
Your query can be simplified in various respects:
This should be faster for multiple reasons. Not least it can use a multi-column index on
One of the very rare cases where a
And rather use
SELECT to_char(day, 'DD/MM/YYYY') AS date
, hashtag
, count(d.*)::int AS count
FROM (
campaign c
CROSS JOIN json_array_elements_text(c.configuration#>'{query,hashtags}') ch(hashtag)
CROSS JOIN (SELECT g::date AS day
FROM generate_series(timestamp '2016-01-20', '2016-01-26', interval '1 day') g) day
)
NATURAL LEFT JOIN (
SELECT "createdAt"::date AS day, dh.hashtag
FROM data, json_array_elements_text(meta#>'{matchedOn,hashtags}') dh(hashtag)
WHERE "campaignId" = 1
AND "createdAt" >= '2016-01-20'
AND "createdAt" < '2016-01-27'
) d
WHERE c.id = 1
GROUP BY day, hashtag
ORDER BY day, hashtag, count;This should be faster for multiple reasons. Not least it can use a multi-column index on
data("campaignId", "createdAt") - which you should create unless you have it.LEFT JOIN is the core feature you need. Then count the column, only non-null values count ...One of the very rare cases where a
NATURAL JOIN is useful, btw. You don't need it, though. It's just a minor syntax shortcut.And rather use
timestamp input for generate_series():- Generating time series between two dates in PostgreSQL
Code Snippets
SELECT to_char(day, 'DD/MM/YYYY') AS date
, hashtag
, count(d.*)::int AS count
FROM (
campaign c
CROSS JOIN json_array_elements_text(c.configuration#>'{query,hashtags}') ch(hashtag)
CROSS JOIN (SELECT g::date AS day
FROM generate_series(timestamp '2016-01-20', '2016-01-26', interval '1 day') g) day
)
NATURAL LEFT JOIN (
SELECT "createdAt"::date AS day, dh.hashtag
FROM data, json_array_elements_text(meta#>'{matchedOn,hashtags}') dh(hashtag)
WHERE "campaignId" = 1
AND "createdAt" >= '2016-01-20'
AND "createdAt" < '2016-01-27'
) d
WHERE c.id = 1
GROUP BY day, hashtag
ORDER BY day, hashtag, count;Context
StackExchange Database Administrators Q#127404, answer score: 4
Revisions (0)
No revisions yet.