By using db<>fiddle, you agree to license everything you submit by Creative Commons CC0.
Help with an interesting Postgres question: Why isn't an Index Only Scan used on a partition accessed via the parent table?.
select version();
version |
---|
PostgreSQL 15.0 on x86_64-pc-linux-gnu, compiled by gcc (GCC) 8.5.0 20210514 (Red Hat 8.5.0-10), 64-bit |
SELECT 1
CREATE TABLE status_table(
name text NOT NULL PRIMARY KEY
,processed BOOLEAN NOT NULL
,processing BOOLEAN NOT NULL
,updated INTEGER NOT NULL
,ref_time INTEGER NOT NULL
);
INSERT INTO status_table(name,processed,processing,updated,ref_time) VALUES
('abc','t','f',27794395,27794160),
('def','f','f',27794395,27793440),
('ghi','t','f',27794395,27793440),
('jkl','t','f',27794395,27794160),
('mno','t','f',27794395,27793440),
('pqr','t','t',27794395,27794160),
('stu','t','t',27794395,27794160),--2nd cnt_total, 2nd completed ref_time
('vwx','t','t',27794395,27794160),--should be kept
('cba','t','f',27794395,27794170),
('fed','f','f',27794395,27793450),
('ihg','t','f',27794395,27793450),
('lkj','t','f',27794395,27794170),
('onm','t','f',27794395,27793450),
('rqp','t','t',27794395,27794170),
('uts','t','t',27794395,27793430),--4th cnt_total, 3rd completed_ref_time
('xwv','t','t',27794395,27793430),--should be deleted
('acb','t','t',27794395,27793420),--1st cnt_total, 4th completed ref_time
('dfe','t','t',27794395,27793420),--should be deleted
('gih','t','t',27794395,27793420),
('jlk','t','t',27794395,27793420),
('mon','t','t',27794395,27793420),
('prq','t','t',27794395,27793420);
CREATE TABLE
INSERT 0 22
select * from status_table order by ref_time desc;
name | processed | processing | updated | ref_time |
---|---|---|---|---|
lkj | t | f | 27794395 | 27794170 |
rqp | t | t | 27794395 | 27794170 |
cba | t | f | 27794395 | 27794170 |
stu | t | t | 27794395 | 27794160 |
vwx | t | t | 27794395 | 27794160 |
abc | t | f | 27794395 | 27794160 |
jkl | t | f | 27794395 | 27794160 |
pqr | t | t | 27794395 | 27794160 |
fed | f | f | 27794395 | 27793450 |
ihg | t | f | 27794395 | 27793450 |
onm | t | f | 27794395 | 27793450 |
mno | t | f | 27794395 | 27793440 |
ghi | t | f | 27794395 | 27793440 |
def | f | f | 27794395 | 27793440 |
xwv | t | t | 27794395 | 27793430 |
uts | t | t | 27794395 | 27793430 |
acb | t | t | 27794395 | 27793420 |
dfe | t | t | 27794395 | 27793420 |
gih | t | t | 27794395 | 27793420 |
jlk | t | t | 27794395 | 27793420 |
mon | t | t | 27794395 | 27793420 |
prq | t | t | 27794395 | 27793420 |
SELECT 22
select ref_time,
count(*) cnt_total,
count(*) filter(where processed) cnt_processed,
round(avg(processed::int),2) ratio_processed
from status_table
group by ref_time order by ratio_processed desc, ref_time desc;
ref_time | cnt_total | cnt_processed | ratio_processed |
---|---|---|---|
27794170 | 3 | 3 | 1.00 |
27794160 | 5 | 5 | 1.00 |
27793430 | 2 | 2 | 1.00 |
27793420 | 6 | 6 | 1.00 |
27793450 | 3 | 2 | 0.67 |
27793440 | 3 | 2 | 0.67 |
SELECT 6
--this is what what the later `delete` sees
with
count_per_completed_ref_time as (
select
ref_time,
count(*) cnt_total
from status_table
group by ref_time
having bool_and(processed)
order by ref_time desc)
select
ref_time,
cnt_total,
row_number() over w1 as ref_time_num,
lag(cnt_total) over w1 as preceding_cnt_total
from count_per_completed_ref_time
window w1 as (order by ref_time desc);
ref_time | cnt_total | ref_time_num | preceding_cnt_total |
---|---|---|---|
27794170 | 3 | 1 | null |
27794160 | 5 | 2 | 3 |
27793430 | 2 | 3 | 5 |
27793420 | 6 | 4 | 2 |
SELECT 4
--suggested solution
with
count_per_completed_ref_time as (
select
ref_time,
count(*) cnt_total
from status_table
group by ref_time
having bool_and(processed)
order by ref_time desc)
,windowed_counts as (
select
ref_time,
cnt_total,
row_number() over w1 as ref_time_num,
lag(cnt_total) over w1 as preceding_cnt_total
from count_per_completed_ref_time
window w1 as (order by ref_time desc) )
delete from status_table s
where ref_time in (select ref_time from count_per_completed_ref_time)
and ref_time not in (
select ref_time
from windowed_counts
where ref_time_num=1 --latest completed ref_time
or ( ref_time_num=2 --second latest
and cnt_total>preceding_cnt_total)--has higher total than the latest
)
returning *;
name | processed | processing | updated | ref_time |
---|---|---|---|---|
uts | t | t | 27794395 | 27793430 |
xwv | t | t | 27794395 | 27793430 |
acb | t | t | 27794395 | 27793420 |
dfe | t | t | 27794395 | 27793420 |
gih | t | t | 27794395 | 27793420 |
jlk | t | t | 27794395 | 27793420 |
mon | t | t | 27794395 | 27793420 |
prq | t | t | 27794395 | 27793420 |
DELETE 8
select ref_time,
count(*) cnt_total,
count(*) filter(where processed) cnt_processed,
round(avg(processed::int),2) ratio_processed
from status_table
group by ref_time order by ratio_processed desc, ref_time desc;
ref_time | cnt_total | cnt_processed | ratio_processed |
---|---|---|---|
27794170 | 3 | 3 | 1.00 |
27794160 | 5 | 5 | 1.00 |
27793450 | 3 | 2 | 0.67 |
27793440 | 3 | 2 | 0.67 |
SELECT 4
--Here's what happens when the second most recent,
-- fully completed ref_time has less cnt_total
truncate status_table;
INSERT INTO status_table(name,processed,processing,updated,ref_time) VALUES
('abc','t','f',27794395,27794160),
('def','f','f',27794395,27793440),
('ghi','t','f',27794395,27793440),
('jkl','t','f',27794395,27794160),
('mno','t','f',27794395,27793440),
('pqr','t','t',27794395,27794160),
('cba','t','f',27794395,27794170),
('fed','f','f',27794395,27793450),
('ihg','t','f',27794395,27793450),
('lkj','t','f',27794395,27794170),
('onm','t','f',27794395,27793450),
('rqp','t','t',27794395,27794170),
('uts','t','t',27794395,27793430),
('xwv','t','t',27794395,27793430),
('acb','t','t',27794395,27793420),
('dfe','t','t',27794395,27793420),
('gih','t','t',27794395,27793420),
('jlk','t','t',27794395,27793420),
('mon','t','t',27794395,27793420),
('prq','t','t',27794395,27793420);
select * from status_table order by ref_time desc;
TRUNCATE TABLE
INSERT 0 20
name | processed | processing | updated | ref_time |
---|---|---|---|---|
cba | t | f | 27794395 | 27794170 |
rqp | t | t | 27794395 | 27794170 |
lkj | t | f | 27794395 | 27794170 |
abc | t | f | 27794395 | 27794160 |
jkl | t | f | 27794395 | 27794160 |
pqr | t | t | 27794395 | 27794160 |
onm | t | f | 27794395 | 27793450 |
fed | f | f | 27794395 | 27793450 |
ihg | t | f | 27794395 | 27793450 |
mno | t | f | 27794395 | 27793440 |
ghi | t | f | 27794395 | 27793440 |
def | f | f | 27794395 | 27793440 |
uts | t | t | 27794395 | 27793430 |
xwv | t | t | 27794395 | 27793430 |
acb | t | t | 27794395 | 27793420 |
dfe | t | t | 27794395 | 27793420 |
gih | t | t | 27794395 | 27793420 |
jlk | t | t | 27794395 | 27793420 |
mon | t | t | 27794395 | 27793420 |
prq | t | t | 27794395 | 27793420 |
SELECT 20
select ref_time,
count(*) cnt_total,
count(*) filter(where processed) cnt_processed,
round(avg(processed::int),2) ratio_processed
from status_table
group by ref_time order by ratio_processed desc, ref_time desc;
ref_time | cnt_total | cnt_processed | ratio_processed |
---|---|---|---|
27794170 | 3 | 3 | 1.00 |
27794160 | 3 | 3 | 1.00 |
27793430 | 2 | 2 | 1.00 |
27793420 | 6 | 6 | 1.00 |
27793450 | 3 | 2 | 0.67 |
27793440 | 3 | 2 | 0.67 |
SELECT 6
--this is what what the later `delete` sees
--2nd most recent ref_time has the same cnt_total, so it will be deleted with the rest
--leaving only the top one
with
count_per_completed_ref_time as (
select
ref_time,
count(*) cnt_total
from status_table
group by ref_time
having bool_and(processed)
order by ref_time desc)
select
ref_time,
cnt_total,
row_number() over w1 as ref_time_num,
lag(cnt_total) over w1 as preceding_cnt_total
from count_per_completed_ref_time
window w1 as (order by ref_time desc);
ref_time | cnt_total | ref_time_num | preceding_cnt_total |
---|---|---|---|
27794170 | 3 | 1 | null |
27794160 | 3 | 2 | 3 |
27793430 | 2 | 3 | 3 |
27793420 | 6 | 4 | 2 |
SELECT 4
--suggested solution
with
count_per_completed_ref_time as (
select
ref_time,
count(*) cnt_total
from status_table
group by ref_time
having bool_and(processed)
order by ref_time desc)
,windowed_counts as (
select
ref_time,
cnt_total,
row_number() over w1 as ref_time_num,
lag(cnt_total) over w1 as preceding_cnt_total
from count_per_completed_ref_time
window w1 as (order by ref_time desc) )
delete from status_table s
where ref_time in (select ref_time from count_per_completed_ref_time)
and ref_time not in (
select ref_time
from windowed_counts
where ref_time_num=1 --latest completed ref_time
or ( ref_time_num=2 --second latest
and cnt_total>preceding_cnt_total)--has higher total than the latest
)
returning *;
name | processed | processing | updated | ref_time |
---|---|---|---|---|
abc | t | f | 27794395 | 27794160 |
jkl | t | f | 27794395 | 27794160 |
pqr | t | t | 27794395 | 27794160 |
uts | t | t | 27794395 | 27793430 |
xwv | t | t | 27794395 | 27793430 |
acb | t | t | 27794395 | 27793420 |
dfe | t | t | 27794395 | 27793420 |
gih | t | t | 27794395 | 27793420 |
jlk | t | t | 27794395 | 27793420 |
mon | t | t | 27794395 | 27793420 |
prq | t | t | 27794395 | 27793420 |
DELETE 11
select ref_time,
count(*) cnt_total,
count(*) filter(where processed) cnt_processed,
round(avg(processed::int),2) ratio_processed
from status_table
group by ref_time order by ratio_processed desc, ref_time desc;
ref_time | cnt_total | cnt_processed | ratio_processed |
---|---|---|---|
27794170 | 3 | 3 | 1.00 |
27793450 | 3 | 2 | 0.67 |
27793440 | 3 | 2 | 0.67 |
SELECT 3