By using db<>fiddle, you agree to license everything you submit by Creative Commons CC0.
Help with an interesting Postgres question: Why isn't an Index Only Scan used on a partition accessed via the parent table?.
CREATE EXTENSION btree_gist;
CREATE EXTENSION
CREATE TABLE stock (
stock_id serial PRIMARY KEY
, store_id integer NOT NULL
, product_id integer NOT NULL
, day date NOT NULL
, value integer NOT NULL
, UNIQUE (store_id, product_id, day)
);
INSERT INTO stock(store_id, product_id, day, value)
SELECT * FROM (
SELECT DISTINCT ON (s,p,d)
(random() * 25)::int AS s -- 25 stores
, (random() * 500)::int AS p -- 500 products
, '1987-3-1'::date + g/20 AS d -- 28 years of history
, (random() * 100)::int AS v
FROM generate_series(1, 100000) g -- ~ 100k rows
) sub
ORDER BY d, random();
CREATE TABLE
INSERT 0 99917
VACUUM ANALYZE stock;
VACUUM
CREATE MATERIALIZED VIEW mv_stock AS
SELECT store_id, product_id, value
, daterange (day, lead(day, 1, now()::date)
OVER (PARTITION BY store_id, product_id ORDER BY day)) AS day_range
FROM stock;
CREATE INDEX mv_stock_range_idx ON mv_stock USING gist (day_range);
SELECT 99917
CREATE INDEX
VACUUM ANALYZE mv_stock;
VACUUM
-- Erwin
WITH input(a,b) AS (SELECT '2013-01-01'::date -- your time frame here
, '2013-01-15'::date) -- inclusive borders
SELECT store_id, product_id
, sum(upper(days) - lower(days)) AS days_in_range
, round(sum(value * (upper(days) - lower(days)))::numeric
/ sum(upper(days) - lower(days)), 2) AS my_result
FROM (
SELECT store_id, product_id, value, s.day_range * x.day_range AS days
FROM (
SELECT store_id, product_id, value
, daterange (day, lead(day, 1, CURRENT_DATE)
OVER (PARTITION BY store_id, product_id ORDER BY day)) AS day_range
FROM stock
) s
JOIN (
SELECT daterange(a, b+1) FROM input
) x(day_range) ON s.day_range && x.day_range
) sub
GROUP BY 1, 2
ORDER BY 1, 2
LIMIT 10;
store_id | product_id | days_in_range | my_result |
---|---|---|---|
0 | 0 | 15 | 68.00 |
0 | 1 | 15 | 44.00 |
0 | 2 | 15 | 41.00 |
0 | 3 | 15 | 61.00 |
0 | 4 | 15 | 57.00 |
0 | 5 | 15 | 49.00 |
0 | 6 | 15 | 18.00 |
0 | 7 | 15 | 92.00 |
0 | 8 | 15 | 1.00 |
0 | 9 | 15 | 16.00 |
SELECT 10
-- Erwin, building on MV
WITH input(a,b) AS (SELECT '2013-01-01'::date -- your time frame here
, '2013-01-15'::date) -- inlcusive borders
SELECT store_id, product_id
, sum(upper(days) - lower(days)) AS days_in_range
, round(sum(value * (upper(days) - lower(days)))::numeric
/ sum(upper(days) - lower(days)), 2) AS my_result
FROM (
SELECT s.store_id, s.product_id, s.value, s.day_range * x.day_range AS days
FROM mv_stock s
JOIN (
SELECT daterange(a, b+1) FROM input
) x(day_range) ON s.day_range && x.day_range
) sub
GROUP BY 1, 2
ORDER BY 1, 2
LIMIT 10;
store_id | product_id | days_in_range | my_result |
---|---|---|---|
0 | 0 | 15 | 68.00 |
0 | 1 | 15 | 44.00 |
0 | 2 | 15 | 41.00 |
0 | 3 | 15 | 61.00 |
0 | 4 | 15 | 57.00 |
0 | 5 | 15 | 49.00 |
0 | 6 | 15 | 18.00 |
0 | 7 | 15 | 92.00 |
0 | 8 | 15 | 1.00 |
0 | 9 | 15 | 16.00 |
SELECT 10
-- joop
WITH calendar(day) AS ( SELECT generate_series('2013-01-01'::date, '2013-01-15'::date, '1 d'::interval)::date )
SELECT st.store_id,st.product_id
, (SUM(st.value)::decimal(8,2) / COUNT(*) )::decimal(8,2) AS wval
FROM calendar
JOIN stock st ON calendar.day >= st.day
AND NOT EXISTS ( -- this calendar entry belongs to the next stock entry
SELECT * FROM stock nx
WHERE nx.store_id = st.store_id AND nx.product_id = st.product_id
AND nx.day > st.day AND nx.day <= calendar.day
)
GROUP BY st.store_id,st.product_id
ORDER BY st.store_id,st.product_id
LIMIT 10;
store_id | product_id | wval |
---|---|---|
0 | 0 | 68.00 |
0 | 1 | 44.00 |
0 | 2 | 41.00 |
0 | 3 | 61.00 |
0 | 4 | 57.00 |
0 | 5 | 49.00 |
0 | 6 | 18.00 |
0 | 7 | 92.00 |
0 | 8 | 1.00 |
0 | 9 | 16.00 |
SELECT 10
-- Jaaz
select store_id, product_id, CASE WHEN sum(nextday-day) > 0 THEN sum(Value*(nextday-day)) / sum(nextday-day) END as Avg_Value
from (
select *
, (
select value
from stock b
where a.store_id = b.store_id
and a.product_id = b.product_id
and a.day >= b.day
order by b.day
limit 1
)*1.0 "value"
, coalesce((
select day
from stock b
where a.store_id = b.store_id
and a.product_id = b.product_id
and a.day < b.day
order by b.day
limit 1
),case when current_date > '2013-01-01' then '2013-01-15' else current_date end) nextday
from (
select store_id, product_id, min(case when day < '2013-01-01' then '2013-01-15' else day end) AS day
from stock z
where day < '2013-01-15'
group by store_id, product_id
) a
union all
select store_id, product_id, day, value*1.0 "value"
, coalesce((
select day
from stock b
where a.store_id = b.store_id
and a.product_id = b.product_id
and a.day < b.day
store_id | product_id | avg_value |
---|---|---|
0 | 0 | null |
0 | 1 | null |
0 | 2 | null |
0 | 3 | null |
0 | 4 | null |
0 | 5 | null |
0 | 6 | null |
0 | 7 | null |
0 | 8 | null |
0 | 9 | null |
SELECT 10
-- Erwin query plan (wihtout LIMIT!)
EXPLAIN ANALYZE
WITH input(a,b) AS (SELECT '2013-01-01'::date -- your time frame here
, '2013-01-15'::date) -- inclusive borders
SELECT store_id, product_id
, sum(upper(days) - lower(days)) AS days_in_range
, round(sum(value * (upper(days) - lower(days)))::numeric
/ sum(upper(days) - lower(days)), 2) AS my_result
FROM (
SELECT store_id, product_id, value, s.day_range * x.day_range AS days
FROM (
SELECT store_id, product_id, value
, daterange (day, lead(day, 1, CURRENT_DATE)
OVER (PARTITION BY store_id, product_id ORDER BY day)) AS day_range
FROM stock
) s
JOIN (
SELECT daterange(a, b+1) FROM input
) x(day_range) ON s.day_range && x.day_range
) sub
GROUP BY 1, 2
ORDER BY 1, 2;
QUERY PLAN |
---|
GroupAggregate (cost=0.42..9992.55 rows=955 width=48) (actual time=0.037..210.467 rows=12998 loops=1) |
Group Key: s.store_id, s.product_id |
-> Subquery Scan on s (cost=0.42..9935.98 rows=999 width=44) (actual time=0.021..175.447 rows=12998 loops=1) |
Filter: (s.day_range && '[2013-01-01,2013-01-16)'::daterange) |
Rows Removed by Filter: 86919 |
-> WindowAgg (cost=0.42..8687.02 rows=99917 width=48) (actual time=0.020..158.387 rows=99917 loops=1) |
-> Index Scan using stock_store_id_product_id_day_key on stock (cost=0.42..6189.10 rows=99917 width=16) (actual time=0.009..57.255 rows=99917 loops=1) |
Planning Time: 0.175 ms |
Execution Time: 211.266 ms |
EXPLAIN
-- Erwin MV query plan (wihtout LIMIT!)
EXPLAIN ANALYZE
WITH input(a,b) AS (SELECT '2013-01-01'::date -- your time frame here
, '2013-01-15'::date) -- inlcusive borders
SELECT store_id, product_id
, sum(upper(days) - lower(days)) AS days_in_range
, round(sum(value * (upper(days) - lower(days)))::numeric
/ sum(upper(days) - lower(days)), 2) AS my_result
FROM (
SELECT s.store_id, s.product_id, s.value, s.day_range * x.day_range AS days
FROM mv_stock s
JOIN (
SELECT daterange(a, b+1) FROM input
) x(day_range) ON s.day_range && x.day_range
) sub
GROUP BY 1, 2
ORDER BY 1, 2;
QUERY PLAN |
---|
Sort (cost=2607.30..2626.40 rows=7641 width=48) (actual time=38.900..39.616 rows=12998 loops=1) |
Sort Key: s.store_id, s.product_id |
Sort Method: quicksort Memory: 994kB |
-> HashAggregate (cost=1961.65..2114.47 rows=7641 width=48) (actual time=27.178..33.434 rows=12998 loops=1) |
Group Key: s.store_id, s.product_id |
Batches: 1 Memory Usage: 1425kB |
-> Bitmap Heap Scan on mv_stock s (cost=520.60..1456.87 rows=13461 width=26) (actual time=4.843..7.776 rows=12998 loops=1) |
Recheck Cond: (day_range && '[2013-01-01,2013-01-16)'::daterange) |
Heap Blocks: exact=735 |
-> Bitmap Index Scan on mv_stock_range_idx (cost=0.00..517.24 rows=13461 width=0) (actual time=4.753..4.754 rows=12998 loops=1) |
Index Cond: (day_range && '[2013-01-01,2013-01-16)'::daterange) |
Planning Time: 0.182 ms |
Execution Time: 40.152 ms |
EXPLAIN
CREATE INDEX mv_stock_range_plus_idx ON mv_stock USING gist (day_range) INCLUDE (store_id, product_id, value);
CREATE INDEX
-- Erwin MV query plan (wihtout LIMIT!)
EXPLAIN ANALYZE
WITH input(a,b) AS (SELECT '2013-01-01'::date -- your time frame here
, '2013-01-15'::date) -- inlcusive borders
SELECT store_id, product_id
, sum(upper(days) - lower(days)) AS days_in_range
, round(sum(value * (upper(days) - lower(days)))::numeric
/ sum(upper(days) - lower(days)), 2) AS my_result
FROM (
SELECT s.store_id, s.product_id, s.value, s.day_range * x.day_range AS days
FROM mv_stock s
JOIN (
SELECT daterange(a, b+1) FROM input
) x(day_range) ON s.day_range && x.day_range
) sub
GROUP BY 1, 2
ORDER BY 1, 2;
QUERY PLAN |
---|
Sort (cost=2174.28..2193.38 rows=7641 width=48) (actual time=40.515..41.230 rows=12998 loops=1) |
Sort Key: s.store_id, s.product_id |
Sort Method: quicksort Memory: 994kB |
-> HashAggregate (cost=1528.63..1681.46 rows=7641 width=48) (actual time=28.874..35.080 rows=12998 loops=1) |
Group Key: s.store_id, s.product_id |
Batches: 1 Memory Usage: 1425kB |
-> Index Only Scan using mv_stock_range_plus_idx on mv_stock s (cost=0.28..1023.85 rows=13461 width=26) (actual time=0.147..9.272 rows=12998 loops=1) |
Index Cond: (day_range && '[2013-01-01,2013-01-16)'::daterange) |
Heap Fetches: 0 |
Planning Time: 0.262 ms |
Execution Time: 41.763 ms |
EXPLAIN
-- Erwin MV query plan (wihtout LIMIT!)
EXPLAIN ANALYZE
WITH input(a,b) AS (SELECT '2013-01-01'::date -- your time frame here
, '2013-01-15'::date) -- inlcusive borders
SELECT store_id, product_id
, sum(upper(days) - lower(days)) AS days_in_range
, round(sum(value * (upper(days) - lower(days)))::numeric
/ sum(upper(days) - lower(days)), 2) AS my_result
FROM (
SELECT s.store_id, s.product_id, s.value, s.day_range * x.day_range AS days
FROM mv_stock s
JOIN (
SELECT daterange(a, b+1) FROM input
) x(day_range) ON s.day_range && x.day_range
) sub
GROUP BY 1, 2
QUERY PLAN |
---|
HashAggregate (cost=1528.63..1681.46 rows=7641 width=48) (actual time=28.980..35.163 rows=12998 loops=1) |
Group Key: s.store_id, s.product_id |
Batches: 1 Memory Usage: 1425kB |
-> Index Only Scan using mv_stock_range_plus_idx on mv_stock s (cost=0.28..1023.85 rows=13461 width=26) (actual time=0.132..9.379 rows=12998 loops=1) |
Index Cond: (day_range && '[2013-01-01,2013-01-16)'::daterange) |
Heap Fetches: 0 |
Planning Time: 0.181 ms |
Execution Time: 35.676 ms |
EXPLAIN
-- joop query plan (wihtout LIMIT!)
EXPLAIN ANALYZE
WITH calendar(day) AS ( SELECT generate_series('2013-01-01'::date, '2013-01-15'::date, '1 d'::interval)::date )
SELECT st.store_id,st.product_id
, (SUM(st.value)::decimal(8,2) / COUNT(*) )::decimal(8,2) AS wval
FROM calendar
JOIN stock st ON calendar.day >= st.day
AND NOT EXISTS ( -- this calendar entry belongs to the next stock entry
SELECT * FROM stock nx
WHERE nx.store_id = st.store_id AND nx.product_id = st.product_id
AND nx.day > st.day AND nx.day <= calendar.day
)
GROUP BY st.store_id,st.product_id
ORDER BY st.store_id,st.product_id;
QUERY PLAN |
---|
Sort (cost=2930811.88..2930836.86 rows=9992 width=22) (actual time=1712.592..1713.323 rows=12998 loops=1) |
Sort Key: st.store_id, st.product_id |
Sort Method: quicksort Memory: 892kB |
-> HashAggregate (cost=2929923.27..2930148.09 rows=9992 width=22) (actual time=1700.089..1706.724 rows=12998 loops=1) |
Group Key: st.store_id, st.product_id |
Batches: 1 Memory Usage: 1425kB |
-> Hash Anti Join (cost=3135.34..2633872.90 rows=29605037 width=12) (actual time=376.388..1643.413 rows=194970 loops=1) |
Hash Cond: ((st.store_id = nx.store_id) AND (st.product_id = nx.product_id)) |
Join Filter: ((nx.day > st.day) AND (nx.day <= (((generate_series(('2013-01-01'::date)::timestamp with time zone, ('2013-01-15'::date)::timestamp with time zone, '1 day'::interval)))::date))) |
Rows Removed by Join Filter: 1498755 |
-> Nested Loop (cost=0.42..1420632.02 rows=33305667 width=20) (actual time=340.663..1196.145 rows=1498755 loops=1) |
-> Result (cost=0.00..25.02 rows=1000 width=4) (actual time=340.618..340.718 rows=15 loops=1) |
-> ProjectSet (cost=0.00..5.02 rows=1000 width=8) (actual time=340.614..340.698 rows=15 loops=1) |
-> Result (cost=0.00..0.01 rows=1 width=0) (actual time=340.581..340.582 rows=1 loops=1) |
-> Index Scan using stock_store_id_product_id_day_key on stock st (cost=0.42..1087.55 rows=33306 width=16) (actual time=0.011..45.852 rows=99917 loops=15) |
Index Cond: (day <= (((generate_series(('2013-01-01'::date)::timestamp with time zone, ('2013-01-15'::date)::timestamp with time zone, '1 day'::interval)))::date)) |
-> Hash (cost=1636.17..1636.17 rows=99917 width=12) (actual time=34.445..34.446 rows=99917 loops=1) |
Buckets: 131072 Batches: 1 Memory Usage: 5318kB |
-> Seq Scan on stock nx (cost=0.00..1636.17 rows=99917 width=12) (actual time=0.019..10.117 rows=99917 loops=1) |
Planning Time: 0.298 ms |
JIT: |
Functions: 26 |
Options: Inlining true, Optimization true, Expressions true, Deforming true |
Timing: Generation 1.948 ms, Inlining 106.791 ms, Optimization 127.281 ms, Emission 106.635 ms, Total 342.654 ms |
Execution Time: 1932.971 ms |
EXPLAIN