By using db<>fiddle, you agree to license everything you submit by Creative Commons CC0.
Help with an interesting Postgres question: Why isn't an Index Only Scan used on a partition accessed via the parent table?.
-- building on Abelisto's setup
CREATE TABLE station_logs (
station_log_id bigserial PRIMARY KEY
, station_id int
, submitted_at timestamp
, level_sensor double precision);
INSERT INTO station_logs (station_id, submitted_at, level_sensor)
SELECT (random() * 400)::int
, now() - make_interval(secs := random()*100000000)
, random()
FROM generate_series(1,1000000);
CREATE INDEX station_id__submitted_at ON station_logs(station_id, submitted_at DESC NULLS LAST);
CREATE INDEX idx_station_logs__station_id ON station_logs(station_id);
1000000 rows affected
VACUUM ANALYZE station_logs;
-- adding station table (very fast)
CREATE TABLE station AS
WITH RECURSIVE cte AS (
(
SELECT station_id
FROM station_logs
ORDER BY station_id
LIMIT 1
)
UNION ALL
SELECT l.station_id
FROM cte c
, LATERAL (
SELECT station_id
FROM station_logs
WHERE station_id > c.station_id
ORDER BY station_id
LIMIT 1
) l
)
TABLE cte;
401 rows affected
VACUUM ANALYZE station;
EXPLAIN (ANALYZE, BUFFERS)
SELECT s.station_id, l.submitted_at, l.level_sensor
FROM station s
CROSS JOIN LATERAL (
SELECT submitted_at, level_sensor
FROM station_logs
WHERE station_id = s.station_id
ORDER BY submitted_at DESC NULLS LAST
LIMIT 1
) l;
QUERY PLAN |
---|
Nested Loop (cost=0.42..1569.72 rows=401 width=20) (actual time=0.117..15.433 rows=401 loops=1) |
Buffers: shared hit=810 read=796 written=269 |
-> Seq Scan on station s (cost=0.00..6.01 rows=401 width=4) (actual time=0.011..0.247 rows=401 loops=1) |
Buffers: shared hit=1 read=1 |
-> Limit (cost=0.42..3.88 rows=1 width=16) (actual time=0.036..0.037 rows=1 loops=401) |
Buffers: shared hit=809 read=795 written=269 |
-> Index Scan using station_id__submitted_at on station_logs (cost=0.42..8616.01 rows=2494 width=16) (actual time=0.036..0.036 rows=1 loops=401) |
Index Cond: (station_id = s.station_id) |
Buffers: shared hit=809 read=795 written=269 |
Planning time: 0.270 ms |
Execution time: 15.576 ms |
-- Abelisto's query
EXPLAIN (ANALYZE, BUFFERS)
with t as (select station_id, max(submitted_at) submitted_at from station_logs group by station_id)
select * from t join station_logs l on (l.station_id = t.station_id and l.submitted_at = t.submitted_at);
QUERY PLAN |
---|
Nested Loop (cost=14736.35..18018.40 rows=1 width=40) (actual time=452.380..463.350 rows=401 loops=1) |
Buffers: shared hit=1276 read=7685 written=102 |
CTE t |
-> Finalize GroupAggregate (cost=14725.90..14735.92 rows=401 width=12) (actual time=452.324..453.133 rows=401 loops=1) |
Group Key: station_logs.station_id |
Buffers: shared hit=374 read=6979 written=96 |
-> Sort (cost=14725.90..14727.90 rows=802 width=12) (actual time=452.317..452.591 rows=1203 loops=1) |
Sort Key: station_logs.station_id |
Sort Method: quicksort Memory: 105kB |
Buffers: shared hit=374 read=6979 written=96 |
-> Gather (cost=14603.00..14687.21 rows=802 width=12) (actual time=446.706..452.100 rows=1203 loops=1) |
Workers Planned: 2 |
Workers Launched: 2 |
Buffers: shared hit=374 read=6979 written=96 |
-> Partial HashAggregate (cost=13603.00..13607.01 rows=401 width=12) (actual time=425.202..425.281 rows=401 loops=3) |
Group Key: station_logs.station_id |
Buffers: shared hit=374 read=6979 written=96 |
-> Parallel Seq Scan on station_logs (cost=0.00..11519.67 rows=416667 width=12) (actual time=0.042..200.695 rows=333333 loops=3) |
Buffers: shared hit=374 read=6979 written=96 |
-> CTE Scan on t (cost=0.00..8.02 rows=401 width=12) (actual time=452.327..453.382 rows=401 loops=1) |
Buffers: shared hit=374 read=6979 written=96 |
-> Index Scan using station_id__submitted_at on station_logs l (cost=0.42..8.16 rows=1 width=28) (actual time=0.023..0.023 rows=1 loops=401) |
Index Cond: ((station_id = t.station_id) AND (submitted_at = t.submitted_at)) |
Buffers: shared hit=902 read=706 written=6 |
Planning time: 0.404 ms |
Execution time: 463.795 ms |
-- your org. query
EXPLAIN (ANALYZE, BUFFERS)
SELECT DISTINCT ON(station_id) station_id, submitted_at, level_sensor
FROM station_logs ORDER BY station_id, submitted_at DESC NULLS LAST;
QUERY PLAN |
---|
Unique (cost=0.42..62324.27 rows=401 width=20) (actual time=0.013..9128.843 rows=401 loops=1) |
Buffers: shared hit=137475 read=866230 written=16 |
-> Index Scan using station_id__submitted_at on station_logs (cost=0.42..59824.27 rows=1000000 width=20) (actual time=0.012..8816.765 rows=1000000 loops=1) |
Buffers: shared hit=137475 read=866230 written=16 |
Planning time: 0.115 ms |
Execution time: 9129.139 ms |