Postgres 15

By using db<>fiddle, you agree to license everything you submit by Creative Commons CC0.
Help with an interesting Postgres question: Why isn't an Index Only Scan used on a partition accessed via the parent table?.
CREATE TABLE Measures(
    expId SERIAL,
    iteration INT NOT NULL,
    value float4 NOT NULL,
    PRIMARY KEY(expId, iteration)
);

--So, a table of various measurements, repeated for n iterations. 
--Though, because we have more data than originally expected, 
--I want to move to a new table layout that instead uses an array column, 
--which overall gives better performance (already tested and benchmarked):

CREATE TABLE TmpMeasures(
    expId SERIAL PRIMARY KEY,
    values float4[] NOT NULL
);
--My problem now is how to get the old data into the new format. 
--The data may look something like this. 
--Not that iterations don't always produce all data, 
--so there may be NULL values in the final array:

select setseed(.42);--stabilise random() between tests

INSERT INTO Measures (expId, iteration, value)
select expId, iteration, round(random()::numeric*10,1)
from generate_series(1,60)expId
join generate_series(1,800)iteration on .6>random();
--each expId randomly tries to get a value for an iteration
--40% of the time it will not get an iteration, therefore no value, no entry
--now below 30k, to let the looped update finish
--array_agg took 370ms on 120k Measures total, 
--plpgsql looped update failed without completing

select * from Measures tablesample bernoulli(.42)repeatable(.42)limit 15;

CREATE TABLE



CREATE TABLE




setseed






SELECT 1



INSERT 0 28716




expid
iteration
value


1
187
0.5

2
242
4.6

2
464
8.7

2
710
6.8

3
635
9.2

5
450
5

5
464
5.9

6
699
0

8
27
7.2

8
467
1.4

8
497
4.9

12
14
6.9

12
112
0.3

12
256
2

12
328
1.4



SELECT 15


drop table if exists TmpMeasures;

explain analyze verbose
create table if not exists TmpMeasures as
with min_max_iteration(min,max) as (
  select min(iteration),max(iteration) from Measures)
,distinct_expIds(dExpId) as (
  select distinct expId from Measures)
select dExpId as expId,
       array_agg(value order by i) as "values"
from min_max_iteration
cross join distinct_expIds d
cross join generate_series(min,max)g(i)
left join Measures m on m.iteration=g.i
                    and m.expId=d.dExpId
group by dExpId;
/*

--needed to set the identity column to the right position
--subselect in alter table won't be allowed
select max(expId)+1 from TmpMeasures;
--add column constraints
alter table TmpMeasures 
   add primary key (expId)
  ,alter column expId add generated by default as identity(start with 4)
  ,alter column values set not null;

--test to see if the generated column works fine, starting from the latest
insert into TmpMeasures(values) select '{1,2,3,4}' returning *;
*/

DROP TABLE




QUERY PLAN


GroupAggregate  (cost=26742.48..28244.98 rows=200 width=36) (actual time=157.406..204.991 rows=60 loops=1)

  Output: measures_1.expid, array_agg(m.value ORDER BY g.i)

  Group Key: measures_1.expid

  ->  Sort  (cost=26742.48..27242.48 rows=200000 width=12) (actual time=156.936..185.732 rows=48000 loops=1)

        Output: measures_1.expid, m.value, g.i

        Sort Key: measures_1.expid

        Sort Method: quicksort  Memory: 3755kB

        ->  Hash Left Join  (cost=2138.76..5713.34 rows=200000 width=12) (actual time=55.250..124.868 rows=48000 loops=1)

              Output: measures_1.expid, m.value, g.i

              Inner Unique: true

              Hash Cond: ((g.i = m.iteration) AND (measures_1.expid = m.expid))

              ->  Nested Loop  (cost=1187.16..3711.68 rows=200000 width=8) (actual time=23.129..32.282 rows=48000 loops=1)

                    Output: g.i, measures_1.expid

                    ->  Nested Loop  (cost=633.36..653.38 rows=1000 width=4) (actual time=5.549..5.821 rows=800 loops=1)

                          Output: g.i

                          ->  Aggregate  (cost=633.36..633.37 rows=1 width=8) (actual time=5.422..5.424 rows=1 loops=1)

                                Output: min(measures.iteration), max(measures.iteration)

                                ->  Seq Scan on public.measures  (cost=0.00..474.24 rows=31824 width=4) (actual time=0.019..3.067 rows=28716 loops=1)

                                      Output: measures.expid, measures.iteration, measures.value

                          ->  Function Scan on pg_catalog.generate_series g  (cost=0.00..10.00 rows=1000 width=4) (actual time=0.120..0.253 rows=800 loops=1)

                                Output: g.i

                                Function Call: generate_series((min(measures.iteration)), (max(measures.iteration)))

                    ->  Materialize  (cost=553.80..558.80 rows=200 width=4) (actual time=0.022..0.026 rows=60 loops=800)

                          Output: measures_1.expid

                          ->  HashAggregate  (cost=553.80..555.80 rows=200 width=4) (actual time=17.570..17.585 rows=60 loops=1)

                                Output: measures_1.expid

                                Group Key: measures_1.expid

                                Batches: 1  Memory Usage: 40kB

                                ->  Seq Scan on public.measures measures_1  (cost=0.00..474.24 rows=31824 width=4) (actual time=0.018..2.560 rows=28716 loops=1)

                                      Output: measures_1.expid, measures_1.iteration, measures_1.value

              ->  Hash  (cost=474.24..474.24 rows=31824 width=12) (actual time=31.951..31.952 rows=28716 loops=1)

                    Output: m.value, m.iteration, m.expid

                    Buckets: 32768  Batches: 1  Memory Usage: 1490kB

                    ->  Seq Scan on public.measures m  (cost=0.00..474.24 rows=31824 width=12) (actual time=0.014..3.870 rows=28716 loops=1)

                          Output: m.value, m.iteration, m.expid

Planning Time: 0.590 ms

Execution Time: 254.830 ms



EXPLAIN


create function plpgsql_looped_update() returns void as $do$
BEGIN 
   FOR i IN 1..(SELECT max(iteration) FROM Measures m) LOOP
      UPDATE TmpMeasures tm
      SET values[i] = m.value
      FROM Measures m
      WHERE
        tm.expId = m.expId AND
        m.iteration=i;
   END LOOP;
END $do$ language plpgsql;

explain analyze verbose select plpgsql_looped_update();

CREATE FUNCTION




QUERY PLAN


Result  (cost=0.00..0.26 rows=1 width=4) (actual time=15809.635..15809.635 rows=1 loops=1)

  Output: plpgsql_looped_update()

Planning Time: 0.023 ms

Execution Time: 15809.671 ms



EXPLAIN
expid	iteration	value
1	187	0.5
2	242	4.6
2	464	8.7
2	710	6.8
3	635	9.2
5	450	5
5	464	5.9
6	699	0
8	27	7.2
8	467	1.4
8	497	4.9
12	14	6.9
12	112	0.3
12	256	2
12	328	1.4
expid	iteration	value
1	187	0.5
2	242	4.6
2	464	8.7
2	710	6.8
3	635	9.2
5	450	5
5	464	5.9
6	699	0
8	27	7.2
8	467	1.4
8	497	4.9
12	14	6.9
12	112	0.3
12	256	2
12	328	1.4