By using db<>fiddle, you agree to license everything you submit by Creative Commons CC0.
Help with an interesting Postgres question: Why isn't an Index Only Scan used on a partition accessed via the parent table?.
create table the_table (
id integer, root_id integer, parent_id integer, status text, ts timestamp, comment text);
insert into the_table values
(1, null, null, 'COMPLETE', now()-'92d'::interval, '>90 days old, complete, standalone'),
(2, null, null, 'COMPLETE', now()-'92d'::interval, '>90 days old, complete, root of 3,4'),
(3, 2, null, 'COMPLETE', now()-'92d'::interval, '>90 days old, complete, child of 2, parent of 4'),
(4, 2, 3, 'OPEN', now()-'92d'::interval, '>90 days old, open, child of 2,3'),
(5, null, null, 'COMPLETE', now()-'92d'::interval, '>90 days old, complete, root of 6,7'),
(6, 5, null, 'COMPLETE', now()-'92d'::interval, '>90 days old, complete, child of 5, parent of 4'),
(7, 5, 6, 'COMPLETE', now()-'10d'::interval, '<=90 days old, complete, child of 5,6' )
returning *;
CREATE TABLE
id | root_id | parent_id | status | ts | comment |
---|---|---|---|---|---|
1 | null | null | COMPLETE | 2023-08-13 12:23:19.254852 | >90 days old, complete, standalone |
2 | null | null | COMPLETE | 2023-08-13 12:23:19.254852 | >90 days old, complete, root of 3,4 |
3 | 2 | null | COMPLETE | 2023-08-13 12:23:19.254852 | >90 days old, complete, child of 2, parent of 4 |
4 | 2 | 3 | OPEN | 2023-08-13 12:23:19.254852 | >90 days old, open, child of 2,3 |
5 | null | null | COMPLETE | 2023-08-13 12:23:19.254852 | >90 days old, complete, root of 6,7 |
6 | 5 | null | COMPLETE | 2023-08-13 12:23:19.254852 | >90 days old, complete, child of 5, parent of 4 |
7 | 5 | 6 | COMPLETE | 2023-11-03 12:23:19.254852 | <=90 days old, complete, child of 5,6 |
INSERT 0 7
/*
--https://stackoverflow.com/q/77451429/5298879
Delete all the records from the table which are
- older than 90 days
- in `COMPLETE` status.
If a row has a `root_id` or a `parent_id` and is `OPEN`,
then the `root_id` and the `parent_id` rows should not be deleted.
In the above table, the row with `id=1` should be deleted.
`id=2` and `id=3` are `COMPLETE` and older than 90 days but since `id=4` is `OPEN` and has `root_id=2` and `parent_id=3`, these three rows (`id in (2,3,4)`) should not be deleted.
`id=5`,`id=6` and `id=7` are all `COMPLETE` but `id=7` is not older than 90 days and has `root_id=5` and `parent_id=6`, so these three rows (`id=5`,`id=6` and `id=7`) should not be deleted.
I tried multiple ways with inner queries
but I am not able to write a query for this scenario.
*/
--https://stackoverflow.com/a/77452714/5298879
begin work;
DELETE FROM the_table t1
WHERE 'COMPLETE'=status AND now()-ts > '90d'::interval
AND NOT EXISTS (SELECT FROM the_table AS t2
WHERE ( t1.id IN(t2.root_id,t2.parent_id)
OR t2.id IN(t1.root_id,t1.parent_id) )
AND ( 'OPEN'=t2.status
OR now()-t2.ts <= '90d'::interval ) )
RETURNING *;
rollback;
BEGIN
id | root_id | parent_id | status | ts | comment |
---|---|---|---|---|---|
1 | null | null | COMPLETE | 2023-08-13 12:23:19.254852 | >90 days old, complete, standalone |
DELETE 1
ROLLBACK
insert into the_table values
(8, null, null, 'COMPLETE', now()-'10d'::interval, '<=90 days old, complete, standalone'),
(9, null, null, 'COMPLETE', now()-'10d'::interval, '<=90 days old, complete, root of 10'),
(10,9, null, 'COMPLETE', now()-'92d'::interval, '>90 days old, complete, child of 9' ),
(11,11, null, 'COMPLETE', now()-'92d'::interval, '>90 days old, complete, parent/child of self'),
(12,null, 12, 'COMPLETE', now()-'92d'::interval, '>90 days old, complete, parent/child of self'),
(13,14, null, 'COMPLETE', now()-'92d'::interval, '>90 days old, complete, cross-parent/child of 14'),
(14,13, null, 'COMPLETE', now()-'92d'::interval, '>90 days old, complete, cross-parent/child of 13'),
(15,null, null, 'COMPLETE', now()-'92d'::interval, '>90 days old, complete, parent of 16,17'),
(16,null, 15, 'COMPLETE', now()-'92d'::interval, '>90 days old, complete, child of 15'),
(17,null, 15, 'OPEN', now()-'10d'::interval, '<=90 days old, open, child of 15');
table the_table;
INSERT 0 10
id | root_id | parent_id | status | ts | comment |
---|---|---|---|---|---|
1 | null | null | COMPLETE | 2023-08-13 12:23:19.254852 | >90 days old, complete, standalone |
2 | null | null | COMPLETE | 2023-08-13 12:23:19.254852 | >90 days old, complete, root of 3,4 |
3 | 2 | null | COMPLETE | 2023-08-13 12:23:19.254852 | >90 days old, complete, child of 2, parent of 4 |
4 | 2 | 3 | OPEN | 2023-08-13 12:23:19.254852 | >90 days old, open, child of 2,3 |
5 | null | null | COMPLETE | 2023-08-13 12:23:19.254852 | >90 days old, complete, root of 6,7 |
6 | 5 | null | COMPLETE | 2023-08-13 12:23:19.254852 | >90 days old, complete, child of 5, parent of 4 |
7 | 5 | 6 | COMPLETE | 2023-11-03 12:23:19.254852 | <=90 days old, complete, child of 5,6 |
8 | null | null | COMPLETE | 2023-11-03 12:23:19.270277 | <=90 days old, complete, standalone |
9 | null | null | COMPLETE | 2023-11-03 12:23:19.270277 | <=90 days old, complete, root of 10 |
10 | 9 | null | COMPLETE | 2023-08-13 12:23:19.270277 | >90 days old, complete, child of 9 |
11 | 11 | null | COMPLETE | 2023-08-13 12:23:19.270277 | >90 days old, complete, parent/child of self |
12 | null | 12 | COMPLETE | 2023-08-13 12:23:19.270277 | >90 days old, complete, parent/child of self |
13 | 14 | null | COMPLETE | 2023-08-13 12:23:19.270277 | >90 days old, complete, cross-parent/child of 14 |
14 | 13 | null | COMPLETE | 2023-08-13 12:23:19.270277 | >90 days old, complete, cross-parent/child of 13 |
15 | null | null | COMPLETE | 2023-08-13 12:23:19.270277 | >90 days old, complete, parent of 16,17 |
16 | null | 15 | COMPLETE | 2023-08-13 12:23:19.270277 | >90 days old, complete, child of 15 |
17 | null | 15 | OPEN | 2023-11-03 12:23:19.270277 | <=90 days old, open, child of 15 |
SELECT 17
begin work;
DELETE FROM the_table t1
WHERE 'COMPLETE'=status
AND now()-ts > '90d'::interval
AND NOT EXISTS (SELECT FROM the_table AS t2
WHERE ( 'OPEN'=t2.status
OR now()-t2.ts <= '90d'::interval )
AND ( t1.id IN(t2.root_id,t2.parent_id)
OR t2.id IN(t1.root_id,t1.parent_id) ) )
RETURNING *;
rollback;
BEGIN
id | root_id | parent_id | status | ts | comment |
---|---|---|---|---|---|
1 | null | null | COMPLETE | 2023-08-13 12:23:19.254852 | >90 days old, complete, standalone |
11 | 11 | null | COMPLETE | 2023-08-13 12:23:19.270277 | >90 days old, complete, parent/child of self |
12 | null | 12 | COMPLETE | 2023-08-13 12:23:19.270277 | >90 days old, complete, parent/child of self |
13 | 14 | null | COMPLETE | 2023-08-13 12:23:19.270277 | >90 days old, complete, cross-parent/child of 14 |
14 | 13 | null | COMPLETE | 2023-08-13 12:23:19.270277 | >90 days old, complete, cross-parent/child of 13 |
16 | null | 15 | COMPLETE | 2023-08-13 12:23:19.270277 | >90 days old, complete, child of 15 |
DELETE 6
ROLLBACK
begin work;
WITH recursive RecursiveHierarchy AS (
SELECT id, root_id, parent_id, status, ts, comment
FROM the_table
WHERE status = 'OPEN'
UNION ALL
SELECT t.id, t.root_id, t.parent_id, t.status, t.ts, t.comment
FROM the_table t
JOIN RecursiveHierarchy r ON ( ( 'OPEN'=t.status
OR now()-t.ts <= '90d'::interval )
AND ( t.id IN(r.root_id,r.parent_id)
OR r.id IN(t.root_id,t.parent_id) ) )
)
DELETE FROM the_table
WHERE ts < NOW() - '90 days'::interval
AND status = 'COMPLETE'
AND id NOT IN (SELECT id FROM RecursiveHierarchy)
RETURNING *;
rollback;
BEGIN
id | root_id | parent_id | status | ts | comment |
---|---|---|---|---|---|
1 | null | null | COMPLETE | 2023-08-13 12:23:19.254852 | >90 days old, complete, standalone |
2 | null | null | COMPLETE | 2023-08-13 12:23:19.254852 | >90 days old, complete, root of 3,4 |
3 | 2 | null | COMPLETE | 2023-08-13 12:23:19.254852 | >90 days old, complete, child of 2, parent of 4 |
5 | null | null | COMPLETE | 2023-08-13 12:23:19.254852 | >90 days old, complete, root of 6,7 |
6 | 5 | null | COMPLETE | 2023-08-13 12:23:19.254852 | >90 days old, complete, child of 5, parent of 4 |
10 | 9 | null | COMPLETE | 2023-08-13 12:23:19.270277 | >90 days old, complete, child of 9 |
11 | 11 | null | COMPLETE | 2023-08-13 12:23:19.270277 | >90 days old, complete, parent/child of self |
12 | null | 12 | COMPLETE | 2023-08-13 12:23:19.270277 | >90 days old, complete, parent/child of self |
13 | 14 | null | COMPLETE | 2023-08-13 12:23:19.270277 | >90 days old, complete, cross-parent/child of 14 |
14 | 13 | null | COMPLETE | 2023-08-13 12:23:19.270277 | >90 days old, complete, cross-parent/child of 13 |
15 | null | null | COMPLETE | 2023-08-13 12:23:19.270277 | >90 days old, complete, parent of 16,17 |
16 | null | 15 | COMPLETE | 2023-08-13 12:23:19.270277 | >90 days old, complete, child of 15 |
DELETE 12
ROLLBACK
begin work;
WITH recursive RecursiveHierarchy AS (
SELECT id, root_id, parent_id
FROM the_table
WHERE status = 'OPEN' or now()-ts <= '90d'::interval
UNION ALL
SELECT t.id, t.root_id, t.parent_id
FROM the_table t
JOIN RecursiveHierarchy r ON r.id in(t.parent_id,t.root_id)
OR t.id in(r.parent_id,r.root_id)
WHERE NOT is_cycle
) CYCLE id SET is_cycle USING path
DELETE FROM the_table
WHERE id NOT IN (SELECT id FROM RecursiveHierarchy)
RETURNING *;
rollback;
BEGIN
id | root_id | parent_id | status | ts | comment |
---|---|---|---|---|---|
1 | null | null | COMPLETE | 2023-08-13 12:23:19.254852 | >90 days old, complete, standalone |
11 | 11 | null | COMPLETE | 2023-08-13 12:23:19.270277 | >90 days old, complete, parent/child of self |
12 | null | 12 | COMPLETE | 2023-08-13 12:23:19.270277 | >90 days old, complete, parent/child of self |
13 | 14 | null | COMPLETE | 2023-08-13 12:23:19.270277 | >90 days old, complete, cross-parent/child of 14 |
14 | 13 | null | COMPLETE | 2023-08-13 12:23:19.270277 | >90 days old, complete, cross-parent/child of 13 |
DELETE 5
ROLLBACK
begin work;
WITH recursive RecursiveHierarchy AS (
SELECT id as candidate_id, id, root_id, parent_id, true AS is_removable
FROM the_table
WHERE status = 'COMPLETE' AND now()-ts > '90d'::interval
UNION ALL
SELECT r.candidate_id, t.id, t.root_id, t.parent_id, t.status = 'COMPLETE' AND now()-t.ts > '90d'::interval
FROM the_table t
JOIN RecursiveHierarchy r ON ( r.id in(t.parent_id,t.root_id)
OR t.id in(r.parent_id,r.root_id) )
AND t.id NOT IN (r.candidate_id,r.id)
WHERE NOT is_cycle
) CYCLE id SET is_cycle USING path
,removables AS (
SELECT candidate_id AS id FROM RecursiveHierarchy
GROUP BY candidate_id HAVING bool_and(is_removable)
)
DELETE FROM the_table
WHERE id IN (SELECT id FROM removables)
RETURNING *;
rollback;
BEGIN
id | root_id | parent_id | status | ts | comment |
---|---|---|---|---|---|
1 | null | null | COMPLETE | 2023-08-13 12:23:19.254852 | >90 days old, complete, standalone |
11 | 11 | null | COMPLETE | 2023-08-13 12:23:19.270277 | >90 days old, complete, parent/child of self |
12 | null | 12 | COMPLETE | 2023-08-13 12:23:19.270277 | >90 days old, complete, parent/child of self |
13 | 14 | null | COMPLETE | 2023-08-13 12:23:19.270277 | >90 days old, complete, cross-parent/child of 14 |
14 | 13 | null | COMPLETE | 2023-08-13 12:23:19.270277 | >90 days old, complete, cross-parent/child of 13 |
DELETE 5
ROLLBACK