INSERT INTO tmp(date, id_type)
SELECT date + interval '1 day' * g, id_type
FROM tmp, generate_series(1,8000) g;
128000 rows affected
hidden batch(es)
VACUUM ANALYZE tmp;
✓
hidden batch(es)
CREATE OR REPLACE FUNCTION f_tmp_groups()
RETURNS TABLE (id_type int, grp_begin timestamp, grp_end timestamp) AS
$func$
DECLARE
_row tmp; -- use table type for row variable
BEGIN
FOR _row IN
TABLE tmp ORDER BY date -- add more columns to make order deterministic
LOOP
CASE _row.id_type = id_type
WHEN TRUE THEN -- same group continues
grp_end := _row.date; -- remember last date so far
WHEN FALSE THEN -- next group starts
RETURN NEXT; -- return result for last group
id_type := _row.id_type;
grp_begin := _row.date;
grp_end := _row.date;
ELSE -- NULL for 1st row
id_type := _row.id_type; -- remember row data for starters
grp_begin := _row.date;
grp_end := _row.date;
END CASE;
END LOOP;
RETURN NEXT; -- return last result row
END
$func$ LANGUAGE plpgsql;
✓
hidden batch(es)
-- 1.
EXPLAIN (ANALYZE, TIMING OFF)
SELECT id_type
, min(date) AS begin
, max(date) AS end
FROM (
SELECT date, id_type, count(step OR NULL) OVER (ORDER BY date) AS grp
FROM (
SELECT date, id_type
, lag(id_type) OVER (ORDER BY date) <> id_type AS step
FROM tmp
) sub1
) sub2
GROUP BY id_type, grp
ORDER BY min(date);
-> Index Scan using tmp_date_key on tmp (cost=0.29..4023.53 rows=128016 width=12) (actual rows=128016 loops=1)
Planning Time: 0.286 ms
Execution Time: 276.741 ms
…
hidden batch(es)
-- 2.
EXPLAIN (ANALYZE, TIMING OFF)
SELECT min(date) AS begin
, max(date) AS end
, id_type
FROM (
SELECT date, id_type
, row_number() OVER (ORDER BY date)
- row_number() OVER (PARTITION BY id_type ORDER BY date) AS grp
FROM tmp
) sub
GROUP BY id_type, grp
ORDER BY min(date);