By using db<>fiddle, you agree to license everything you submit by Creative Commons CC0.
Help with an interesting Postgres question: Why isn't an Index Only Scan used on a partition accessed via the parent table?.
select version();
version |
---|
PostgreSQL 12.7 on x86_64-pc-linux-gnu, compiled by gcc (GCC) 8.3.1 20191121 (Red Hat 8.3.1-5), 64-bit |
CREATE TABLE test
(
the_date TIMESTAMPTZ(0) NOT NULL
);
INSERT INTO test VALUES
('2021-06-23 05:47:05'::TIMESTAMPTZ), -- NO Gaps > 3 hours on this date!
('2021-06-23 07:47:05'::TIMESTAMPTZ),
('2021-06-23 09:47:05'::TIMESTAMPTZ),
('2021-06-23 11:47:05'::TIMESTAMPTZ),
('2021-06-23 13:47:05'::TIMESTAMPTZ),
('2021-06-23 14:47:05'::TIMESTAMPTZ),
('2021-06-23 16:47:05'::TIMESTAMPTZ),
('2021-06-23 17:47:05'::TIMESTAMPTZ),
('2021-06-24 05:47:05'::TIMESTAMPTZ), -- TWO gaps > 3 hours on this date
-- 1st gap > 3 hours
('2021-06-24 09:47:05'::TIMESTAMPTZ),
-- 2nd gap > 3 hours
('2021-06-24 13:47:05'::TIMESTAMPTZ),
('2021-06-24 14:47:05'::TIMESTAMPTZ), -- added for testing
('2021-06-24 16:47:05'::TIMESTAMPTZ), -- added for testing
('2021-06-24 17:47:05'::TIMESTAMPTZ);
14 rows affected
SELECT * FROM test;
the_date |
---|
2021-06-23 05:47:05+01 |
2021-06-23 07:47:05+01 |
2021-06-23 09:47:05+01 |
2021-06-23 11:47:05+01 |
2021-06-23 13:47:05+01 |
2021-06-23 14:47:05+01 |
2021-06-23 16:47:05+01 |
2021-06-23 17:47:05+01 |
2021-06-24 05:47:05+01 |
2021-06-24 09:47:05+01 |
2021-06-24 13:47:05+01 |
2021-06-24 14:47:05+01 |
2021-06-24 16:47:05+01 |
2021-06-24 17:47:05+01 |
SELECT
the_date::DATE AS dat,
the_date AS td,
LEAD(the_date)
OVER (PARTITION BY the_date::DATE
ORDER BY the_date ASC) AS l_td,
LEAD(the_date)
OVER (PARTITION BY the_date::DATE
ORDER BY the_date ASC) - the_date AS diff -- for demonstration
FROM -- purposes - see diffs
test -- > 3 HOUR - 2 on 24/06
ORDER BY dat, td;
dat | td | l_td | diff |
---|---|---|---|
2021-06-23 | 2021-06-23 05:47:05+01 | 2021-06-23 07:47:05+01 | 02:00:00 |
2021-06-23 | 2021-06-23 07:47:05+01 | 2021-06-23 09:47:05+01 | 02:00:00 |
2021-06-23 | 2021-06-23 09:47:05+01 | 2021-06-23 11:47:05+01 | 02:00:00 |
2021-06-23 | 2021-06-23 11:47:05+01 | 2021-06-23 13:47:05+01 | 02:00:00 |
2021-06-23 | 2021-06-23 13:47:05+01 | 2021-06-23 14:47:05+01 | 01:00:00 |
2021-06-23 | 2021-06-23 14:47:05+01 | 2021-06-23 16:47:05+01 | 02:00:00 |
2021-06-23 | 2021-06-23 16:47:05+01 | 2021-06-23 17:47:05+01 | 01:00:00 |
2021-06-23 | 2021-06-23 17:47:05+01 | null | null |
2021-06-24 | 2021-06-24 05:47:05+01 | 2021-06-24 09:47:05+01 | 04:00:00 |
2021-06-24 | 2021-06-24 09:47:05+01 | 2021-06-24 13:47:05+01 | 04:00:00 |
2021-06-24 | 2021-06-24 13:47:05+01 | 2021-06-24 14:47:05+01 | 01:00:00 |
2021-06-24 | 2021-06-24 14:47:05+01 | 2021-06-24 16:47:05+01 | 02:00:00 |
2021-06-24 | 2021-06-24 16:47:05+01 | 2021-06-24 17:47:05+01 | 01:00:00 |
2021-06-24 | 2021-06-24 17:47:05+01 | null | null |
WITH leads AS
(
SELECT
the_date::DATE AS dat, the_date AS td, LEAD(the_date)
OVER (PARTITION BY the_date::DATE) AS l_td
FROM
test
)
SELECT DISTINCT ON(dat)
dat AS "The date", td AS "Gap start or last ts"
FROM leads
WHERE l_td - td > INTERVAL '3 HOUR'
OR l_td IS NULL
ORDER BY dat, td;
The date | Gap start or last ts |
---|---|
2021-06-23 | 2021-06-23 17:47:05+01 |
2021-06-24 | 2021-06-24 05:47:05+01 |
--
-- Note: 2 records for 2021-06-24
--
WITH leads AS
(
SELECT
the_date::DATE AS dat, the_date AS td,
LEAD(the_date)
OVER (PARTITION BY the_date::DATE) AS l_td
FROM
test
),
gaps AS
(
SELECT
dat, td,
ROW_NUMBER()
OVER (PARTITION BY dat ORDER BY td) AS rn
FROM leads
WHERE (l_td - td > INTERVAL '3 HOUR')
OR (l_td IS NULL)
)
SELECT
dat, td
FROM gaps
WHERE rn <= 2 -- NOTE 2!
ORDER BY dat, td;
dat | td |
---|---|
2021-06-23 | 2021-06-23 17:47:05+01 |
2021-06-24 | 2021-06-24 05:47:05+01 |
2021-06-24 | 2021-06-24 09:47:05+01 |
WITH long_gaps AS
(
SELECT dat, MIN(td) AS gap
FROM
(
SELECT
the_date::DATE AS dat,
the_date AS td,
LEAD(the_date)
OVER (PARTITION BY the_date::DATE) AS l_td
FROM
test
) AS t1
WHERE l_td - td > INTERVAL '3 HOUR'
GROUP BY dat
),
short_gaps AS
(
SELECT the_date::DATE AS dat2, MAX(the_date)
FROM test
WHERE the_date::DATE NOT IN (SELECT dat FROM long_gaps)
GROUP BY dat2
)
SELECT dat AS "The date", gap AS "Gap start or last ts" FROM long_gaps
UNION
SELECT * FROM short_gaps
ORDER BY 1; -- parameter 1 which ORDERs BY the first field in the query
-- ORDER BY "The date"; -- you can do this if you find it more legible
The date | Gap start or last ts |
---|---|
2021-06-23 | 2021-06-23 17:47:05+01 |
2021-06-24 | 2021-06-24 05:47:05+01 |
EXPLAIN (ANALYZE, VERBOSE, BUFFERS, COSTS)
WITH long_gaps AS
(
SELECT
the_date::DATE AS dat, the_date AS td, LEAD(the_date)
OVER (PARTITION BY the_date::DATE) AS l_td
FROM
test
)
SELECT
DISTINCT ON (dat) dat AS "The date",
td AS "Gap start or last ts"
FROM long_gaps
WHERE l_td - td > INTERVAL '3 HOUR'
OR l_td IS NULL
ORDER BY dat, td ;
QUERY PLAN |
---|
Unique (cost=279.68..283.48 rows=198 width=12) (actual time=0.050..0.052 rows=2 loops=1) |
Output: long_gaps.dat, long_gaps.td |
Buffers: shared hit=1 |
-> Sort (cost=279.68..281.58 rows=761 width=12) (actual time=0.050..0.051 rows=4 loops=1) |
Output: long_gaps.dat, long_gaps.td |
Sort Key: long_gaps.dat, long_gaps.td |
Sort Method: quicksort Memory: 25kB |
Buffers: shared hit=1 |
-> Subquery Scan on long_gaps (cost=164.16..243.26 rows=761 width=12) (actual time=0.040..0.047 rows=4 loops=1) |
Output: long_gaps.dat, long_gaps.td |
Filter: (((long_gaps.l_td - long_gaps.td) > '03:00:00'::interval) OR (long_gaps.l_td IS NULL)) |
Rows Removed by Filter: 10 |
Buffers: shared hit=1 |
-> WindowAgg (cost=164.16..209.36 rows=2260 width=20) (actual time=0.032..0.042 rows=14 loops=1) |
Output: ((test.the_date)::date), test.the_date, lead(test.the_date) OVER (?) |
Buffers: shared hit=1 |
-> Sort (cost=164.16..169.81 rows=2260 width=12) (actual time=0.023..0.025 rows=14 loops=1) |
Output: ((test.the_date)::date), test.the_date |
Sort Key: ((test.the_date)::date) |
Sort Method: quicksort Memory: 25kB |
Buffers: shared hit=1 |
-> Seq Scan on public.test (cost=0.00..38.25 rows=2260 width=12) (actual time=0.010..0.014 rows=14 loops=1) |
Output: (test.the_date)::date, test.the_date |
Buffers: shared hit=1 |
Planning Time: 0.103 ms |
Execution Time: 0.090 ms |
EXPLAIN (ANALYZE, VERBOSE, BUFFERS, COSTS)
WITH gaps AS
(
SELECT
the_date::DATE AS dat, the_date AS td,
LEAD(the_date)
OVER (PARTITION BY the_date::DATE) AS l_td
FROM
test
),
long_gaps AS
(
SELECT
dat, td,
ROW_NUMBER()
OVER (PARTITION BY dat ORDER BY td) AS rn
FROM gaps
WHERE l_td - td > INTERVAL '3 HOUR'
OR l_td IS NULL
)
SELECT
dat, td
FROM long_gaps
WHERE rn = 2
ORDER BY dat, td;
QUERY PLAN |
---|
Subquery Scan on long_gaps (cost=279.68..304.41 rows=4 width=12) (actual time=0.054..0.056 rows=1 loops=1) |
Output: long_gaps.dat, long_gaps.td |
Filter: (long_gaps.rn = 2) |
Rows Removed by Filter: 3 |
Buffers: shared hit=1 |
-> WindowAgg (cost=279.68..294.90 rows=761 width=20) (actual time=0.050..0.054 rows=4 loops=1) |
Output: gaps.dat, gaps.td, row_number() OVER (?) |
Buffers: shared hit=1 |
-> Sort (cost=279.68..281.58 rows=761 width=12) (actual time=0.048..0.049 rows=4 loops=1) |
Output: gaps.dat, gaps.td |
Sort Key: gaps.dat, gaps.td |
Sort Method: quicksort Memory: 25kB |
Buffers: shared hit=1 |
-> Subquery Scan on gaps (cost=164.16..243.26 rows=761 width=12) (actual time=0.030..0.037 rows=4 loops=1) |
Output: gaps.dat, gaps.td |
Filter: (((gaps.l_td - gaps.td) > '03:00:00'::interval) OR (gaps.l_td IS NULL)) |
Rows Removed by Filter: 10 |
Buffers: shared hit=1 |
-> WindowAgg (cost=164.16..209.36 rows=2260 width=20) (actual time=0.022..0.032 rows=14 loops=1) |
Output: ((test.the_date)::date), test.the_date, lead(test.the_date) OVER (?) |
Buffers: shared hit=1 |
-> Sort (cost=164.16..169.81 rows=2260 width=12) (actual time=0.018..0.020 rows=14 loops=1) |
Output: ((test.the_date)::date), test.the_date |
Sort Key: ((test.the_date)::date) |
Sort Method: quicksort Memory: 25kB |
Buffers: shared hit=1 |
-> Seq Scan on public.test (cost=0.00..38.25 rows=2260 width=12) (actual time=0.009..0.013 rows=14 loops=1) |
Output: (test.the_date)::date, test.the_date |
Buffers: shared hit=1 |
Planning Time: 0.137 ms |
Execution Time: 0.083 ms |
EXPLAIN (ANALYZE, VERBOSE, BUFFERS, COSTS)
WITH long_gaps AS
(
SELECT dat, MIN(td) AS gap
FROM
(
SELECT
the_date::DATE AS dat, the_date AS td, LEAD(the_date)
OVER (PARTITION BY the_date::DATE) AS l_td
FROM
test
) AS t1
WHERE l_td - td > INTERVAL '3 HOUR'
GROUP BY dat
),
short_gaps AS
(
SELECT the_date::DATE AS dat2, MAX(the_date)
FROM test
WHERE the_date::DATE NOT IN (SELECT dat FROM long_gaps)
GROUP BY dat2
)
SELECT dat AS "The date", gap AS "Gap start or last ts" FROM long_gaps
UNION
SELECT * FROM short_gaps
ORDER BY 1; -- parameter 1 which ORDERs BY the first field in the query
QUERY PLAN |
---|
Sort (cost=343.42..344.41 rows=398 width=12) (actual time=0.096..0.098 rows=2 loops=1) |
Output: long_gaps.dat, long_gaps.gap |
Sort Key: long_gaps.dat |
Sort Method: quicksort Memory: 25kB |
Buffers: shared hit=2 |
CTE long_gaps |
-> GroupAggregate (cost=164.16..249.00 rows=198 width=12) (actual time=0.047..0.048 rows=1 loops=1) |
Output: t1.dat, min(t1.td) |
Group Key: t1.dat |
Buffers: shared hit=1 |
-> Subquery Scan on t1 (cost=164.16..243.26 rows=753 width=12) (actual time=0.039..0.044 rows=2 loops=1) |
Output: t1.dat, t1.td, t1.l_td |
Filter: ((t1.l_td - t1.td) > '03:00:00'::interval) |
Rows Removed by Filter: 12 |
Buffers: shared hit=1 |
-> WindowAgg (cost=164.16..209.36 rows=2260 width=20) (actual time=0.030..0.040 rows=14 loops=1) |
Output: ((test_1.the_date)::date), test_1.the_date, lead(test_1.the_date) OVER (?) |
Buffers: shared hit=1 |
-> Sort (cost=164.16..169.81 rows=2260 width=12) (actual time=0.021..0.023 rows=14 loops=1) |
Output: ((test_1.the_date)::date), test_1.the_date |
Sort Key: ((test_1.the_date)::date) |
Sort Method: quicksort Memory: 25kB |
Buffers: shared hit=1 |
-> Seq Scan on public.test test_1 (cost=0.00..38.25 rows=2260 width=12) (actual time=0.009..0.013 rows=14 loops=1) |
Output: (test_1.the_date)::date, test_1.the_date |
Buffers: shared hit=1 |
-> HashAggregate (cost=73.25..77.23 rows=398 width=12) (actual time=0.089..0.090 rows=2 loops=1) |
Output: long_gaps.dat, long_gaps.gap |
Group Key: long_gaps.dat, long_gaps.gap |
Buffers: shared hit=2 |
-> Append (cost=0.00..71.26 rows=398 width=12) (actual time=0.048..0.085 rows=2 loops=1) |
Buffers: shared hit=2 |
-> CTE Scan on long_gaps (cost=0.00..3.96 rows=198 width=12) (actual time=0.048..0.048 rows=1 loops=1) |
Output: long_gaps.dat, long_gaps.gap |
Buffers: shared hit=1 |
-> HashAggregate (cost=56.83..59.33 rows=200 width=12) (actual time=0.035..0.035 rows=1 loops=1) |
Output: ((test.the_date)::date), max(test.the_date) |
Group Key: (test.the_date)::date |
Buffers: shared hit=1 |
-> Seq Scan on public.test (cost=4.46..51.18 rows=1130 width=12) (actual time=0.023..0.030 rows=8 loops=1) |
Output: (test.the_date)::date, test.the_date |
Filter: (NOT (hashed SubPlan 2)) |
Rows Removed by Filter: 6 |
Buffers: shared hit=1 |
SubPlan 2 |
-> CTE Scan on long_gaps long_gaps_1 (cost=0.00..3.96 rows=198 width=4) (actual time=0.001..0.001 rows=1 loops=1) |
Output: long_gaps_1.dat |
Planning Time: 0.176 ms |
Execution Time: 0.180 ms |