By using db<>fiddle, you agree to license everything you submit by Creative Commons CC0.
Help with an interesting Postgres question: Why isn't an Index Only Scan used on a partition accessed via the parent table?.
CREATE TABLE tbl (
id int PRIMARY KEY
, part int NOT NULL
, ballast text
);
INSERT INTO tbl
SELECT g, g/2 -- !
, '0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789' -- 80 char
FROM generate_series(1, 200000) g
ORDER BY random();
CREATE UNIQUE INDEX tbl_part_id_idx ON tbl (part, id);
DELETE FROM tbl WHERE random() > .9; -- some dead tuples
CREATE TABLE
INSERT 0 200000
CREATE INDEX
DELETE 20000
VACUUM ANALYZE tbl;
VACUUM
set work_mem = '384MB';
SET
-- org asc (idx only)
EXPLAIN ANALYZE
SELECT id, part
FROM (
SELECT *, row_number() OVER (PARTITION BY part ORDER BY id) AS rn
FROM tbl
) sub
WHERE rn = 1;
QUERY PLAN |
---|
Subquery Scan on sub (cost=0.42..10304.42 rows=900 width=8) (actual time=0.064..152.521 rows=99020 loops=1) |
Filter: (sub.rn = 1) |
Rows Removed by Filter: 80980 |
-> WindowAgg (cost=0.42..8054.42 rows=180000 width=48) (actual time=0.063..135.080 rows=180000 loops=1) |
-> Index Only Scan using tbl_part_id_idx on tbl (cost=0.42..4904.42 rows=180000 width=8) (actual time=0.050..33.194 rows=180000 loops=1) |
Heap Fetches: 0 |
Planning Time: 0.221 ms |
Execution Time: 155.998 ms |
EXPLAIN
-- org desc (Incremental Sort in pg 14+; additional sort step in pg 11)
EXPLAIN ANALYZE
SELECT id, part
FROM (
SELECT *, row_number() OVER (PARTITION BY part ORDER BY id DESC) AS rn
FROM tbl
) sub
WHERE rn = 1;
QUERY PLAN |
---|
Subquery Scan on sub (cost=20616.29..26466.29 rows=900 width=8) (actual time=76.705..197.456 rows=99020 loops=1) |
Filter: (sub.rn = 1) |
Rows Removed by Filter: 80980 |
-> WindowAgg (cost=20616.29..24216.29 rows=180000 width=48) (actual time=76.704..182.210 rows=180000 loops=1) |
-> Sort (cost=20616.29..21066.29 rows=180000 width=8) (actual time=76.688..88.969 rows=180000 loops=1) |
Sort Key: tbl.part, tbl.id DESC |
Sort Method: quicksort Memory: 14582kB |
-> Index Only Scan using tbl_part_id_idx on tbl (cost=0.42..4904.42 rows=180000 width=8) (actual time=0.013..22.226 rows=180000 loops=1) |
Heap Fetches: 0 |
Planning Time: 0.127 ms |
Execution Time: 201.031 ms |
EXPLAIN
-- org desc, optmized with ROWS (Incremental Sort in pg 14+; additional sort step in pg 11)
EXPLAIN ANALYZE
SELECT id, part
FROM (
SELECT *, row_number() OVER (PARTITION BY part ORDER BY id DESC
ROWS UNBOUNDED PRECEDING) AS rn
FROM tbl
) sub
WHERE rn = 1;
QUERY PLAN |
---|
Subquery Scan on sub (cost=20616.29..26466.29 rows=900 width=8) (actual time=69.897..174.614 rows=99020 loops=1) |
Filter: (sub.rn = 1) |
Rows Removed by Filter: 80980 |
-> WindowAgg (cost=20616.29..24216.29 rows=180000 width=48) (actual time=69.895..159.605 rows=180000 loops=1) |
-> Sort (cost=20616.29..21066.29 rows=180000 width=8) (actual time=69.881..82.250 rows=180000 loops=1) |
Sort Key: tbl.part, tbl.id DESC |
Sort Method: quicksort Memory: 14582kB |
-> Index Only Scan using tbl_part_id_idx on tbl (cost=0.42..4904.42 rows=180000 width=8) (actual time=0.019..20.699 rows=180000 loops=1) |
Heap Fetches: 0 |
Planning Time: 0.112 ms |
Execution Time: 179.184 ms |
EXPLAIN
-- Paul's query, adapted to get greatest id per group
EXPLAIN ANALYZE
SELECT id, part
FROM (
SELECT *
, CASE WHEN part = lag(part) OVER (ORDER BY part DESC, id DESC)
THEN 0
ELSE 1
END AS f
FROM tbl
) sub
WHERE sub.f = 1;
QUERY PLAN |
---|
Subquery Scan on sub (cost=0.42..10754.42 rows=900 width=8) (actual time=0.031..137.655 rows=99020 loops=1) |
Filter: (sub.f = 1) |
Rows Removed by Filter: 80980 |
-> WindowAgg (cost=0.42..8504.42 rows=180000 width=44) (actual time=0.030..122.083 rows=180000 loops=1) |
-> Index Only Scan Backward using tbl_part_id_idx on tbl (cost=0.42..4904.42 rows=180000 width=8) (actual time=0.024..28.768 rows=180000 loops=1) |
Heap Fetches: 0 |
Planning Time: 0.113 ms |
Execution Time: 141.244 ms |
EXPLAIN
-- Paul's query, optimized (without index scan backward)
EXPLAIN ANALYZE
SELECT id, part
FROM (
SELECT *
, CASE WHEN part = lead(part) OVER (ORDER BY part, id ROWS UNBOUNDED PRECEDING)
THEN false
ELSE true END AS qualified
FROM tbl
) sub
WHERE qualified;
QUERY PLAN |
---|
Subquery Scan on sub (cost=0.42..10304.42 rows=90000 width=8) (actual time=0.018..111.960 rows=99020 loops=1) |
Filter: sub.qualified |
Rows Removed by Filter: 80980 |
-> WindowAgg (cost=0.42..8504.42 rows=180000 width=41) (actual time=0.018..98.070 rows=180000 loops=1) |
-> Index Only Scan using tbl_part_id_idx on tbl (cost=0.42..4904.42 rows=180000 width=8) (actual time=0.013..27.726 rows=180000 loops=1) |
Heap Fetches: 0 |
Planning Time: 0.102 ms |
Execution Time: 115.276 ms |
EXPLAIN
-- DISTINCT ON is faster for few rows per group (seq scan)
EXPLAIN ANALYZE
SELECT DISTINCT ON (part) id, part
FROM tbl
ORDER BY part, id DESC
QUERY PLAN |
---|
Unique (cost=20616.29..21516.29 rows=95078 width=8) (actual time=81.047..110.954 rows=99020 loops=1) |
-> Sort (cost=20616.29..21066.29 rows=180000 width=8) (actual time=81.043..91.037 rows=180000 loops=1) |
Sort Key: part, id DESC |
Sort Method: quicksort Memory: 14582kB |
-> Index Only Scan using tbl_part_id_idx on tbl (cost=0.42..4904.42 rows=180000 width=8) (actual time=0.015..23.289 rows=180000 loops=1) |
Heap Fetches: 0 |
Planning Time: 0.111 ms |
Execution Time: 116.476 ms |
EXPLAIN
-- DISTINCT ON is faster for few rows per group (seq scan)
EXPLAIN ANALYZE
SELECT DISTINCT ON (part) id, part
FROM tbl
ORDER BY part, id DESC;
QUERY PLAN |
---|
Unique (cost=20616.29..21516.29 rows=95078 width=8) (actual time=81.408..110.971 rows=99020 loops=1) |
-> Sort (cost=20616.29..21066.29 rows=180000 width=8) (actual time=81.405..91.239 rows=180000 loops=1) |
Sort Key: part, id DESC |
Sort Method: quicksort Memory: 14582kB |
-> Index Only Scan using tbl_part_id_idx on tbl (cost=0.42..4904.42 rows=180000 width=8) (actual time=0.020..23.255 rows=180000 loops=1) |
Heap Fetches: 0 |
Planning Time: 0.117 ms |
Execution Time: 115.432 ms |
EXPLAIN
-- DISTINCT ON with matching index (your first query to get min id per group)
EXPLAIN ANALYZE
SELECT DISTINCT ON (part) id, part
FROM tbl
ORDER BY part, id;
QUERY PLAN |
---|
Result (cost=0.42..5354.42 rows=95078 width=8) (actual time=0.023..49.411 rows=99020 loops=1) |
-> Unique (cost=0.42..5354.42 rows=95078 width=8) (actual time=0.021..39.302 rows=99020 loops=1) |
-> Index Only Scan using tbl_part_id_idx on tbl (cost=0.42..4904.42 rows=180000 width=8) (actual time=0.020..18.973 rows=180000 loops=1) |
Heap Fetches: 0 |
Planning Time: 0.108 ms |
Execution Time: 52.644 ms |
EXPLAIN
-- rCTE is faster for more than a few rows per group (idx scan)
EXPLAIN ANALYZE
WITH RECURSIVE cte AS (
( -- parentheses required
SELECT part, id
FROM tbl
ORDER BY part DESC, id DESC
LIMIT 1
)
UNION ALL
SELECT l.*
FROM cte c
CROSS JOIN LATERAL (
SELECT t.part, t.id
FROM tbl t
WHERE t.part < c.part -- lateral reference
ORDER BY t.part DESC, t.id DESC
LIMIT 1
) l
)
TABLE cte;
QUERY PLAN |
---|
CTE Scan on cte (cost=51.44..53.46 rows=101 width=8) (actual time=0.012..753.396 rows=99020 loops=1) |
CTE cte |
-> Recursive Union (cost=0.42..51.44 rows=101 width=8) (actual time=0.011..726.633 rows=99020 loops=1) |
-> Limit (cost=0.42..0.45 rows=1 width=8) (actual time=0.010..0.011 rows=1 loops=1) |
-> Index Only Scan Backward using tbl_part_id_idx on tbl (cost=0.42..4904.42 rows=180000 width=8) (actual time=0.009..0.010 rows=1 loops=1) |
Heap Fetches: 0 |
-> Nested Loop (cost=0.42..4.90 rows=10 width=8) (actual time=0.007..0.007 rows=1 loops=99020) |
-> WorkTable Scan on cte c (cost=0.00..0.20 rows=10 width=4) (actual time=0.000..0.000 rows=1 loops=99020) |
-> Limit (cost=0.42..0.45 rows=1 width=8) (actual time=0.006..0.006 rows=1 loops=99020) |
-> Index Only Scan Backward using tbl_part_id_idx on tbl t (cost=0.42..1786.42 rows=60000 width=8) (actual time=0.006..0.006 rows=1 loops=99020) |
Index Cond: (part < c.part) |
Heap Fetches: 0 |
Planning Time: 0.199 ms |
Execution Time: 759.135 ms |
EXPLAIN
-- original woraround, inferior to the new query, but now with more work_mem.
EXPLAIN ANALYZE
SELECT id, part
FROM (
SELECT *, count(*) OVER w AS ct
, row_number() OVER (w ORDER BY id ROWS UNBOUNDED PRECEDING) AS rn
FROM tbl
WINDOW w AS (PARTITION BY part)
) sub
WHERE rn = ct;
QUERY PLAN |
---|
Subquery Scan on sub (cost=23316.29..29166.29 rows=900 width=8) (actual time=152.619..252.193 rows=99020 loops=1) |
Filter: (sub.rn = sub.ct) |
Rows Removed by Filter: 80980 |
-> WindowAgg (cost=23316.29..26916.29 rows=180000 width=56) (actual time=152.617..236.750 rows=180000 loops=1) |
-> Sort (cost=23316.29..23766.29 rows=180000 width=16) (actual time=152.604..164.067 rows=180000 loops=1) |
Sort Key: tbl.part, tbl.id |
Sort Method: quicksort Memory: 14582kB |
-> WindowAgg (cost=0.42..7604.42 rows=180000 width=16) (actual time=0.018..121.918 rows=180000 loops=1) |
-> Index Only Scan using tbl_part_id_idx on tbl (cost=0.42..4904.42 rows=180000 width=8) (actual time=0.012..25.923 rows=180000 loops=1) |
Heap Fetches: 0 |
Planning Time: 0.138 ms |
Execution Time: 256.587 ms |
EXPLAIN