By using db<>fiddle, you agree to license everything you submit by Creative Commons CC0.
Help with an interesting Postgres question: Why isn't an Index Only Scan used on a partition accessed via the parent table?.
select setseed(.42);
create table large_table as
select now()-random()*'5 years'::interval as column_date
from generate_series(1,7e5);
alter table large_table alter column column_date set default now()-random()*'5 years'::interval;
create index on large_table(column_date);
setseed |
---|
SELECT 1
SELECT 700000
ALTER TABLE
CREATE INDEX
vacuum analyze large_table;
VACUUM
create table large_table_tally_daily as
select date_trunc('day',column_date) as column_date
,count(*)
from large_table
group by 1;
create function trgf_large_table_tally_daily_delete_insert()returns trigger as $f$
begin
with cte as(
select date_trunc('day',column_date) as column_date
,(case when TG_OP='DELETE' then -1 else 1 end) * count(*) as diff
from difftab
group by 1)
update large_table_tally_daily as a
set count=a.count+cte.diff
from cte
where a.column_date=cte.column_date;
return null;
end $f$ language plpgsql;
create trigger trg_large_table_tally_daily_delete
after delete on large_table
referencing old table as difftab
for each statement
execute function trgf_large_table_tally_daily_delete_insert();
create trigger trg_large_table_tally_daily_insert
after insert on large_table
referencing new table as difftab
for each statement
execute function trgf_large_table_tally_daily_delete_insert();
create function trgf_large_table_tally_daily_truncate()returns trigger as $f$
begin update large_table_tally_daily set count=0;
return null;
end $f$ language plpgsql;
SELECT 1827
CREATE FUNCTION
CREATE TRIGGER
CREATE TRIGGER
CREATE FUNCTION
CREATE TRIGGER
create view large_table_tally_yearly as
select date_trunc('year',column_date) as column_date
, sum(count)
from large_table_tally_daily
group by 1
order by 1;
table large_table_tally_yearly;
CREATE VIEW
column_date | sum |
---|---|
2019-01-01 00:00:00+00 | 7554 |
2020-01-01 00:00:00+00 | 140586 |
2021-01-01 00:00:00+00 | 139782 |
2022-01-01 00:00:00+00 | 140437 |
2023-01-01 00:00:00+00 | 139971 |
2024-01-01 00:00:00+00 | 131670 |
SELECT 6
insert into large_table select from generate_series(1,100);
table large_table_tally_yearly;
INSERT 0 100
column_date | sum |
---|---|
2019-01-01 00:00:00+00 | 7555 |
2020-01-01 00:00:00+00 | 140613 |
2021-01-01 00:00:00+00 | 139803 |
2022-01-01 00:00:00+00 | 140451 |
2023-01-01 00:00:00+00 | 139988 |
2024-01-01 00:00:00+00 | 131690 |
SELECT 6
delete from large_table
where ctid in(select ctid
from large_table tablesample bernoulli(0.046)repeatable(.42)
limit 200);
table large_table_tally_yearly;
DELETE 200
column_date | sum |
---|---|
2019-01-01 00:00:00+00 | 7552 |
2020-01-01 00:00:00+00 | 140564 |
2021-01-01 00:00:00+00 | 139765 |
2022-01-01 00:00:00+00 | 140418 |
2023-01-01 00:00:00+00 | 139949 |
2024-01-01 00:00:00+00 | 131652 |
SELECT 6
with cte as(select '2020-06-07 12:34:56.123456'::timestamptz as lower_bound
,'2023-01-02 01:23:45.678901'::timestamptz as upper_bound)
select count(*) from large_table cross join cte
where column_date>=lower_bound
and column_date<upper_bound;
explain analyze verbose
with cte as(select '2020-06-07 12:34:56.123456'::timestamptz as lower_bound
,'2023-01-02 01:23:45.678901'::timestamptz as upper_bound)
select count(*) from large_table cross join cte
where column_date>=lower_bound
and column_date<upper_bound;
count |
---|
359997 |
SELECT 1
QUERY PLAN |
---|
Finalize Aggregate (cost=8848.53..8848.54 rows=1 width=8) (actual time=98.801..98.862 rows=1 loops=1) |
Output: count(*) |
-> Gather (cost=8848.31..8848.52 rows=2 width=8) (actual time=98.792..98.856 rows=3 loops=1) |
Output: (PARTIAL count(*)) |
Workers Planned: 2 |
Workers Launched: 2 |
-> Partial Aggregate (cost=7848.31..7848.32 rows=1 width=8) (actual time=76.341..76.342 rows=1 loops=3) |
Output: PARTIAL count(*) |
Worker 0: actual time=61.021..61.022 rows=1 loops=1 |
Worker 1: actual time=69.430..69.432 rows=1 loops=1 |
-> Parallel Seq Scan on public.large_table (cost=0.00..7473.00 rows=150126 width=0) (actual time=0.010..62.947 rows=119999 loops=3) |
Output: large_table.column_date |
Filter: ((large_table.column_date >= '2020-06-07 12:34:56.123456+01'::timestamp with time zone) AND (large_table.column_date < '2023-01-02 01:23:45.678901+00'::timestamp with time zone)) |
Rows Removed by Filter: 113301 |
Worker 0: actual time=0.010..57.335 rows=73443 loops=1 |
Worker 1: actual time=0.014..42.117 rows=105877 loops=1 |
Planning Time: 0.122 ms |
Execution Time: 98.892 ms |
EXPLAIN
with cte as(select '2020-06-07 12:34:56.123456'::timestamptz as lower_bound
,'2023-01-02 01:23:45.678901'::timestamptz as upper_bound)
select (select sum(count)
from large_table_tally_daily cross join cte
where column_date>=lower_bound
and column_date+'1 day'::interval<upper_bound)
+(select count(*) from large_table cross join cte
where column_date>=greatest( date_trunc('day',upper_bound)
,lower_bound)
and column_date<upper_bound )
+(select count(*) from large_table cross join cte
where column_date>=lower_bound
and column_date<least( date_trunc('day',lower_bound)+'1 day'::interval
,upper_bound) );
explain analyze verbose
with cte as(select '2020-06-07 12:34:56.123456'::timestamptz as lower_bound
,'2023-01-02 01:23:45.678901'::timestamptz as upper_bound)
select (select sum(count)
from large_table_tally_daily cross join cte
where column_date>=lower_bound
and column_date+'1 day'::interval<upper_bound)
+(select count(*) from large_table cross join cte
where column_date>=greatest( date_trunc('day',upper_bound)
,lower_bound)
and column_date<upper_bound )
+(select count(*) from large_table cross join cte
where column_date>=lower_bound
and column_date<least( date_trunc('day',lower_bound)+'1 day'::interval
,upper_bound) );
?column? |
---|
359997 |
SELECT 1
QUERY PLAN |
---|
Result (cost=6836.06..6836.08 rows=1 width=32) (actual time=0.862..0.864 rows=1 loops=1) |
Output: (($1 + ($4)::numeric) + ($7)::numeric) |
CTE cte |
-> Result (cost=0.00..0.01 rows=1 width=16) (actual time=0.000..0.001 rows=1 loops=1) |
Output: '2020-06-07 12:34:56.123456+01'::timestamp with time zone, '2023-01-02 01:23:45.678901+00'::timestamp with time zone |
InitPlan 2 (returns $1) |
-> Aggregate (cost=67.55..67.56 rows=1 width=32) (actual time=0.779..0.780 rows=1 loops=1) |
Output: sum(large_table_tally_daily.count) |
-> Nested Loop (cost=0.00..66.98 rows=226 width=8) (actual time=0.014..0.724 rows=938 loops=1) |
Output: large_table_tally_daily.count |
Join Filter: ((large_table_tally_daily.column_date >= cte.lower_bound) AND ((large_table_tally_daily.column_date + '1 day'::interval) < cte.upper_bound)) |
Rows Removed by Join Filter: 889 |
-> CTE Scan on cte (cost=0.00..0.02 rows=1 width=16) (actual time=0.001..0.002 rows=1 loops=1) |
Output: cte.lower_bound, cte.upper_bound |
-> Seq Scan on public.large_table_tally_daily (cost=0.00..31.35 rows=2035 width=16) (actual time=0.010..0.176 rows=1827 loops=1) |
Output: large_table_tally_daily.column_date, large_table_tally_daily.count |
InitPlan 3 (returns $4) |
-> Aggregate (cost=3384.23..3384.24 rows=1 width=8) (actual time=0.020..0.021 rows=1 loops=1) |
Output: count(*) |
-> Nested Loop (cost=0.43..3189.79 rows=77778 width=0) (actual time=0.011..0.018 rows=27 loops=1) |
-> CTE Scan on cte cte_1 (cost=0.00..0.02 rows=1 width=16) (actual time=0.000..0.001 rows=1 loops=1) |
Output: cte_1.lower_bound, cte_1.upper_bound |
-> Index Only Scan using large_table_column_date_idx on public.large_table (cost=0.43..2411.99 rows=77778 width=8) (actual time=0.008..0.012 rows=27 loops=1) |
Output: large_table.column_date |
Index Cond: ((large_table.column_date >= GREATEST(date_trunc('day'::text, cte_1.upper_bound), cte_1.lower_bound)) AND (large_table.column_date < cte_1.upper_bound)) |
Heap Fetches: 2 |
InitPlan 4 (returns $7) |
-> Aggregate (cost=3384.24..3384.25 rows=1 width=8) (actual time=0.058..0.058 rows=1 loops=1) |
Output: count(*) |
-> Nested Loop (cost=0.43..3189.79 rows=77778 width=0) (actual time=0.008..0.049 rows=172 loops=1) |
-> CTE Scan on cte cte_2 (cost=0.00..0.02 rows=1 width=16) (actual time=0.000..0.000 rows=1 loops=1) |
Output: cte_2.lower_bound, cte_2.upper_bound |
-> Index Only Scan using large_table_column_date_idx on public.large_table large_table_1 (cost=0.43..2411.99 rows=77778 width=8) (actual time=0.006..0.032 rows=172 loops=1) |
Output: large_table_1.column_date |
Index Cond: ((large_table_1.column_date >= cte_2.lower_bound) AND (large_table_1.column_date < LEAST((date_trunc('day'::text, cte_2.lower_bound) + '1 day'::interval), cte_2.upper_bound))) |
Heap Fetches: 11 |
Planning Time: 0.190 ms |
Execution Time: 0.905 ms |
EXPLAIN
EXPLAIN(ANALYZE,format json)
SELECT*FROM large_table where column_date <= '2023-01-01 00:00:00';
do $d$
declare r json;
count integer;
begin
EXECUTE 'EXPLAIN(format json)SELECT*FROM large_table where column_date <=
''2023-01-01 00:00:00'';' into r;
count := r#>>'{0,Plan,Plan Rows}';
create table debug_ as select count;--can't `raise notice` on db<>fiddle
end $d$;
table debug_;
QUERY PLAN |
---|
[ { "Plan": { "Node Type": "Seq Scan", "Parallel Aware": false, "Async Capable": false, "Relation Name": "large_table", "Alias": "large_table", "Startup Cost": 0.00, "Total Cost": 11848.00, "Plan Rows": 427309, "Plan Width": 8, "Actual Startup Time": 0.009, "Actual Total Time": 62.757, "Actual Rows": 428299, "Actual Loops": 1, "Filter": "(column_date <= '2023-01-01 00:00:00+00'::timestamp with time zone)", "Rows Removed by Filter": 271601 }, "Planning Time": 0.053, "Triggers": [ ], "Execution Time": 77.211 } ] |
EXPLAIN
DO
count |
---|
427309 |
SELECT 1