By using db<>fiddle, you agree to license everything you submit by Creative Commons CC0.
Help with an interesting Postgres question: Why isn't an Index Only Scan used on a partition accessed via the parent table?.
CREATE TABLE purchases (
id serial
, customer_id int -- REFERENCES customer
, total int -- could be amount of money in Cent
, some_column text -- to make the row bigger, more realistic
);
CREATE TABLE
INSERT INTO purchases (customer_id, total, some_column) -- insert 200k rows
SELECT (random() * 10000)::int AS customer_id -- 10k customers
, (random() * random() * 100000)::int AS total
, 'note: ' || repeat('x', (random()^2 * random() * random() * 500)::int)
FROM generate_series(1,200000) g;
ALTER TABLE purchases ADD CONSTRAINT purchases_id_pkey PRIMARY KEY (id);
CREATE INDEX purchases_3c_idx ON purchases (customer_id, total DESC, id);
INSERT 0 200000
ALTER TABLE
CREATE INDEX
VACUUM ANALYZE purchases; -- makes a BIG difference
VACUUM
-- metrics (and prewarm cache)
SELECT l.metric, l.nr AS bytes
, CASE WHEN is_size THEN pg_size_pretty(nr) END AS bytes_pretty
, CASE WHEN is_size THEN nr / NULLIF(x.ct, 0) END AS bytes_per_row
FROM (
SELECT min(tableoid) AS tbl -- = 'public.tbl'::regclass::oid
, count(*) AS ct
, sum(length(t::text)) AS txt_len -- length in characters
FROM purchases t
) x
CROSS JOIN LATERAL (
VALUES
(true , 'core_relation_size' , pg_relation_size(tbl))
, (true , 'visibility_map' , pg_relation_size(tbl, 'vm'))
, (true , 'free_space_map' , pg_relation_size(tbl, 'fsm'))
, (true , 'table_size_incl_toast' , pg_table_size(tbl))
, (true , 'indexes_size' , pg_indexes_size(tbl))
, (true , 'total_size_incl_toast_and_indexes', pg_total_relation_size(tbl))
, (true , 'live_rows_in_text_representation' , txt_len)
, (false, '------------------------------' , NULL)
, (false, 'live_tuples' , pg_stat_get_live_tuples(tbl))
, (false, 'dead_tuples' , pg_stat_get_dead_tuples(tbl))
) l(is_size, metric, nr);
metric | bytes | bytes_pretty | bytes_per_row |
---|---|---|---|
core_relation_size | 18628608 | 18 MB | 93 |
visibility_map | 8192 | 8192 bytes | 0 |
free_space_map | 24576 | 24 kB | 0 |
table_size_incl_toast | 18669568 | 18 MB | 93 |
indexes_size | 10838016 | 10 MB | 54 |
total_size_incl_toast_and_indexes | 29507584 | 28 MB | 147 |
live_rows_in_text_representation | 13713279 | 13 MB | 68 |
------------------------------ | null | null | null |
live_tuples | 200000 | null | null |
dead_tuples | 0 | null | null |
SELECT 10
-- customer table for query 5. (and prewarm cache)
CREATE TABLE customer AS
WITH RECURSIVE cte AS (
SELECT min(customer_id) AS customer_id FROM purchases
UNION ALL
SELECT (SELECT p.customer_id
FROM purchases p
WHERE p.customer_id > c.customer_id
ORDER BY p.customer_id
LIMIT 1)
FROM cte c
WHERE c.customer_id IS NOT NULL
)
SELECT * FROM cte
WHERE customer_id IS NOT NULL;
SELECT 10001
VACUUM ANALYZE customer;
VACUUM
-- avg rows per customer
SELECT count(*) AS customer, round(200000.0 / count(*)) AS avg_rows FROM customer;
customer | avg_rows |
---|---|
10001 | 20 |
SELECT 1
-- 1.
EXPLAIN (ANALYZE, TIMING OFF)
WITH cte AS (
SELECT id, customer_id, total
, row_number() OVER(PARTITION BY customer_id ORDER BY total DESC) AS rn
FROM purchases
)
SELECT id, customer_id, total
FROM cte
WHERE rn = 1;
QUERY PLAN |
---|
Subquery Scan on cte (cost=0.42..12088.42 rows=1000 width=12) (actual rows=10001 loops=1) |
Filter: (cte.rn = 1) |
Rows Removed by Filter: 189999 |
-> WindowAgg (cost=0.42..9588.42 rows=200000 width=20) (actual rows=200000 loops=1) |
-> Index Only Scan using purchases_3c_idx on purchases (cost=0.42..6088.42 rows=200000 width=12) (actual rows=200000 loops=1) |
Heap Fetches: 0 |
Planning Time: 0.366 ms |
Execution Time: 300.918 ms |
EXPLAIN
-- 2.
EXPLAIN (ANALYZE, TIMING OFF)
SELECT id, customer_id, total
FROM (
SELECT id, customer_id, total
, row_number() OVER(PARTITION BY customer_id ORDER BY total DESC) AS rn
FROM purchases
) sub
WHERE rn = 1;
QUERY PLAN |
---|
Subquery Scan on sub (cost=0.42..12088.42 rows=1000 width=12) (actual rows=10001 loops=1) |
Filter: (sub.rn = 1) |
Rows Removed by Filter: 189999 |
-> WindowAgg (cost=0.42..9588.42 rows=200000 width=20) (actual rows=200000 loops=1) |
-> Index Only Scan using purchases_3c_idx on purchases (cost=0.42..6088.42 rows=200000 width=12) (actual rows=200000 loops=1) |
Heap Fetches: 0 |
Planning Time: 0.390 ms |
Execution Time: 170.325 ms |
EXPLAIN
-- 3.
EXPLAIN (ANALYZE, TIMING OFF)
SELECT DISTINCT ON (customer_id)
id, customer_id, total
FROM purchases
ORDER BY customer_id, total DESC, id;
QUERY PLAN |
---|
Result (cost=0.42..6588.42 rows=9956 width=12) (actual rows=10001 loops=1) |
-> Unique (cost=0.42..6588.42 rows=9956 width=12) (actual rows=10001 loops=1) |
-> Index Only Scan using purchases_3c_idx on purchases (cost=0.42..6088.42 rows=200000 width=12) (actual rows=200000 loops=1) |
Heap Fetches: 0 |
Planning Time: 0.366 ms |
Execution Time: 41.460 ms |
EXPLAIN
-- 4.
EXPLAIN (ANALYZE, TIMING OFF)
WITH RECURSIVE cte AS (
( -- parentheses required
SELECT id, customer_id, total
FROM purchases
ORDER BY customer_id, total DESC
LIMIT 1
)
UNION ALL
SELECT u.*
FROM cte c
, LATERAL (
SELECT id, customer_id, total
FROM purchases
WHERE customer_id > c.customer_id -- lateral reference
ORDER BY customer_id, total DESC
LIMIT 1
) u
)
SELECT id, customer_id, total
FROM cte
ORDER BY customer_id;
QUERY PLAN |
---|
Sort (cost=57.15..57.40 rows=101 width=12) (actual rows=10001 loops=1) |
Sort Key: cte.customer_id |
Sort Method: quicksort Memory: 853kB |
CTE cte |
-> Recursive Union (cost=0.42..51.77 rows=101 width=12) (actual rows=10001 loops=1) |
-> Limit (cost=0.42..0.45 rows=1 width=12) (actual rows=1 loops=1) |
-> Index Only Scan using purchases_3c_idx on purchases (cost=0.42..6088.42 rows=200000 width=12) (actual rows=1 loops=1) |
Heap Fetches: 0 |
-> Nested Loop (cost=0.42..4.93 rows=10 width=12) (actual rows=1 loops=10001) |
-> WorkTable Scan on cte c (cost=0.00..0.20 rows=10 width=4) (actual rows=1 loops=10001) |
-> Limit (cost=0.42..0.45 rows=1 width=12) (actual rows=1 loops=10001) |
-> Index Only Scan using purchases_3c_idx on purchases purchases_1 (cost=0.42..2199.09 rows=66667 width=12) (actual rows=1 loops=10001) |
Index Cond: (customer_id > c.customer_id) |
Heap Fetches: 0 |
-> CTE Scan on cte (cost=0.00..2.02 rows=101 width=12) (actual rows=10001 loops=1) |
Planning Time: 0.218 ms |
Execution Time: 119.139 ms |
EXPLAIN
-- 5.
EXPLAIN (ANALYZE, TIMING OFF)
SELECT l.*
FROM customer c
, LATERAL (
SELECT id, customer_id, total
FROM purchases
WHERE customer_id = c.customer_id -- lateral reference
ORDER BY total DESC
LIMIT 1
) l;
QUERY PLAN |
---|
Nested Loop (cost=0.42..6720.67 rows=10001 width=12) (actual rows=10001 loops=1) |
-> Seq Scan on customer c (cost=0.00..145.01 rows=10001 width=4) (actual rows=10001 loops=1) |
-> Limit (cost=0.42..0.64 rows=1 width=12) (actual rows=1 loops=10001) |
-> Index Only Scan using purchases_3c_idx on purchases (cost=0.42..4.77 rows=20 width=12) (actual rows=1 loops=10001) |
Index Cond: (customer_id = c.customer_id) |
Heap Fetches: 0 |
Planning Time: 0.176 ms |
Execution Time: 42.183 ms |
EXPLAIN
-- 6.
EXPLAIN (ANALYZE, TIMING OFF)
SELECT (array_agg(id ORDER BY total DESC))[1] AS id
, customer_id
, max(total) AS total
FROM purchases
GROUP BY customer_id;
QUERY PLAN |
---|
GroupAggregate (cost=0.42..7712.87 rows=9956 width=12) (actual rows=10001 loops=1) |
Group Key: customer_id |
-> Index Only Scan using purchases_3c_idx on purchases (cost=0.42..6088.42 rows=200000 width=12) (actual rows=200000 loops=1) |
Heap Fetches: 0 |
Planning Time: 0.206 ms |
Execution Time: 137.648 ms |
EXPLAIN