By using db<>fiddle, you agree to license everything you submit by Creative Commons CC0.
Help with an interesting Postgres question: Why isn't an Index Only Scan used on a partition accessed via the parent table?.
create extension if not exists postgis;
select setseed(.42);
create table "potential_missed_areas"(
"ID" bigint generated by default as identity primary key
,"areaID" int default random()*1e2
,"campaignid" int default -1
,"status" text default 'approved'
,"metaData" jsonb
,"createdAt" timestamptz default now()-'1 week'::interval-random()*'1 year'::interval
,"updatedAt" timestamptz default now()-random()*'1 week'::interval
,"centerPoint" geometry(Point,4326) default st_makepoint( 60.86+random()*(77.83-60.86)
,23.64+random()*(37.07-23.64) )
);
--WGS84 bounds of 24313: x60.86y23.64 - 77.83x37.07y
insert into "potential_missed_areas"
select from generate_series(1,5)
returning "ID","areaID","campaignid","status","metaData","createdAt","updatedAt",st_astext("centerPoint");
insert into "potential_missed_areas"
select from generate_series(1,1e3);
CREATE EXTENSION
setseed |
---|
SELECT 1
CREATE TABLE
ID | areaID | campaignid | status | metaData | createdAt | updatedAt | st_astext |
---|---|---|---|---|---|---|---|
1 | 74 | -1 | approved | null | 2024-05-08 01:43:54.887265+01 | 2024-09-19 00:07:43.775577+01 | POINT(61.95151097080002 24.905870949999734) |
2 | 21 | -1 | approved | null | 2024-03-22 20:18:46.544865+00 | 2024-09-22 20:05:05.842104+01 | POINT(70.62622858478846 33.765635343528274) |
3 | 57 | -1 | approved | null | 2023-09-29 13:25:12.464865+01 | 2024-09-19 16:37:55.137696+01 | POINT(74.90235221723744 25.39695524996175) |
4 | 19 | -1 | approved | null | 2024-05-27 06:46:16.813665+01 | 2024-09-19 10:19:28.802982+01 | POINT(62.25244253106473 34.527890788297434) |
5 | 41 | -1 | approved | null | 2024-05-25 09:31:57.220065+01 | 2024-09-22 14:23:47.670068+01 | POINT(68.75692038849616 36.76016979377685) |
INSERT 0 5
INSERT 0 1000
WITH RECURSIVE clustered_data AS (-- Step 1: Perform initial clustering
SELECT pma.*
, ST_ClusterKMeans(ST_Transform("centerPoint", 24313), 1, 20000) OVER (PARTITION BY "areaID" ORDER BY "ID") AS "nta_cluster_id" -- Initial clustering
, 1 AS "recursion_depth" -- Initialize recursion depth
FROM "potential_missed_areas" pma
WHERE pma."campaignid" = -1
AND pma."status" = 'approved'
),cluster_sizes AS (-- Step 2: Calculate cluster sizes
SELECT "nta_cluster_id"
, COUNT(*) AS "cluster_size"
FROM clustered_data
GROUP BY "nta_cluster_id"
),recursive_clusters AS (-- Base case: Start with clusters from previous CTE
SELECT cd."ID"
, cd."centerPoint"
, cd."areaID"
, cd."metaData"
, cd."createdAt"
, cd."updatedAt"
, cd."campaignid"
, cd."nta_cluster_id" -- Include nta_cluster_id
, cs."cluster_size" -- Include cluster size from the previous part
, cd."recursion_depth" -- Include recursion_depth from base case
FROM clustered_data cd
JOIN cluster_sizes cs ON cd."nta_cluster_id" = cs."nta_cluster_id"
UNION ALL-- Recursive case: Re-cluster if any cluster has more than 20 items
SELECT rc."ID"
, rc."centerPoint"
, rc."areaID"
, rc."metaData"
, rc."createdAt"
, rc."updatedAt"
, rc."campaignid"
, rc."nta_cluster_id" -- Include nta_cluster_id
, cs."cluster_size" -- Include cluster size from the previous part
, rc."recursion_depth" + 1 AS "recursion_depth" -- Increment recursion depth
count |
---|
95847 |
SELECT 1