Postgres 10

By using db<>fiddle, you agree to license everything you submit by Creative Commons CC0.
Help with an interesting Postgres question: Why isn't an Index Only Scan used on a partition accessed via the parent table?.
CREATE TABLE actor (
    actor_id serial,  -- PRIMARY KEY -- see below
    first_name character varying(45) NOT NULL,
    last_name character varying(45) NOT NULL,
    last_update timestamptz DEFAULT now() NOT NULL
);

CREATE TABLE film (
    film_id serial,   -- PRIMARY KEY -- see below
    title character varying(255) NOT NULL,
    last_update timestamptz DEFAULT now() NOT NULL
);

CREATE TABLE film_actor (
    actor_id int NOT NULL,
    film_id  int NOT NULL,
    last_update timestamp without time zone DEFAULT now() NOT NULL
    -- PRIMARY KEY (film_id, actor_id)   -- see below
);

-- PK, indexes and FK are added *after* the bulk INSERT

INSERT INTO actor (actor_id, first_name, last_name, last_update)
VALUES
 (1,'PENELOPE','GUINESS','2006-02-15 09:34:33'),
 (2,'NICK','WAHLBERG','2006-02-15 09:34:33'),
 (3,'ED','CHASE','2006-02-15 09:34:33'),
 (4,'JENNIFER','DAVIS','2006-02-15 09:34:33'),
 (5,'JOHNNY','LOLLOBRIGIDA','2006-02-15 09:34:33'),
 (6,'BETTE','NICHOLSON','2006-02-15 09:34:33'),
 (7,'GRACE','MOSTEL','2006-02-15 09:34:33'),
 (8,'MATTHEW','JOHANSSON','2006-02-15 09:34:33'),
 (9,'JOE','SWANK','2006-02-15 09:34:33'),
 (10,'CHRISTIAN','GABLE','2006-02-15 09:34:33'),
 (11,'ZERO','CAGE','2006-02-15 09:34:33'),
 (12,'KARL','BERRY','2006-02-15 09:34:33'),
200 rows affected

1000 rows affected

5462 rows affected

-- add indexes after bulk INSERT (vaster, cheaper, less bloat)
ALTER TABLE actor ADD CONSTRAINT actor_pkey PRIMARY KEY (actor_id);
ALTER TABLE film  ADD CONSTRAINT film_pkey  PRIMARY KEY (film_id);
ALTER TABLE film_actor
   ADD CONSTRAINT film_actor_pkey          PRIMARY KEY (film_id, actor_id)                  -- ! NOTE reversed column order
 , ADD CONSTRAINT film_actor_actor_id_fkey FOREIGN KEY (actor_id) REFERENCES actor(actor_id)
 , ADD CONSTRAINT film_actor_film_id_fkey  FOREIGN KEY (film_id)  REFERENCES film(film_id);

CREATE INDEX idx_film_title_film_id        ON film  (title, film_id);  -- helps some more if we can get index-only scans
CREATE INDEX idx_actor_first_name_actor_id ON actor (actor_id, first_name);  -- helps some more if we can get index-only scans
VACUUM FREEZE ANALYZE;
explain analyze
select film.film_id, film.title, a.actors
from   film
left join
  (         
       select     film_actor.film_id, array_agg(first_name) as actors
       from       actor
       inner join film_actor using(actor_id)
       group by   film_actor.film_id
  ) as a
on       a.film_id = film.film_id
where    film.title = 'ACADEMY DINOSAUR';
-- order by film.title;


QUERY PLAN


Merge Left Join  (cost=445.09..515.01 rows=1 width=51) (actual time=2.798..2.800 rows=1 loops=1)

  Merge Cond: (film.film_id = film_actor.film_id)

  ->  Index Only Scan using idx_film_title_film_id on film  (cost=0.28..4.29 rows=1 width=19) (actual time=0.018..0.019 rows=1 loops=1)

        Index Cond: (title = 'ACADEMY DINOSAUR'::text)

        Heap Fetches: 0

  ->  GroupAggregate  (cost=444.82..498.25 rows=997 width=36) (actual time=2.777..2.777 rows=1 loops=1)

        Group Key: film_actor.film_id

        ->  Sort  (cost=444.82..458.47 rows=5462 width=10) (actual time=2.766..2.767 rows=11 loops=1)

              Sort Key: film_actor.film_id

              Sort Method: quicksort  Memory: 449kB

              ->  Hash Join  (cost=6.50..105.76 rows=5462 width=10) (actual time=0.093..1.698 rows=5462 loops=1)

                    Hash Cond: (film_actor.actor_id = actor.actor_id)

                    ->  Seq Scan on film_actor  (cost=0.00..84.62 rows=5462 width=8) (actual time=0.007..0.390 rows=5462 loops=1)

                    ->  Hash  (cost=4.00..4.00 rows=200 width=10) (actual time=0.070..0.070 rows=200 loops=1)

                          Buckets: 1024  Batches: 1  Memory Usage: 17kB

                          ->  Seq Scan on actor  (cost=0.00..4.00 rows=200 width=10) (actual time=0.005..0.041 rows=200 loops=1)

Planning time: 0.443 ms

Execution time: 2.849 ms


explain analyze
select film.film_id, film.title, a.actors
from   film
left join lateral
  (
       select     array_agg(first_name) as actors
       from       actor
       inner join film_actor using(actor_id)
       where      film_actor.film_id = film.film_id
  ) as a
on       true
where    film.title = 'ACADEMY DINOSAUR';
-- order by film.title;


QUERY PLAN


Nested Loop Left Join  (cost=9.26..13.31 rows=1 width=51) (actual time=0.083..0.084 rows=1 loops=1)

  ->  Index Only Scan using idx_film_title_film_id on film  (cost=0.28..4.29 rows=1 width=19) (actual time=0.009..0.010 rows=1 loops=1)

        Index Cond: (title = 'ACADEMY DINOSAUR'::text)

        Heap Fetches: 0

  ->  Aggregate  (cost=8.98..8.99 rows=1 width=32) (actual time=0.072..0.072 rows=1 loops=1)

        ->  Hash Join  (cost=4.43..8.97 rows=5 width=6) (actual time=0.035..0.065 rows=10 loops=1)

              Hash Cond: (actor.actor_id = film_actor.actor_id)

              ->  Seq Scan on actor  (cost=0.00..4.00 rows=200 width=10) (actual time=0.008..0.020 rows=200 loops=1)

              ->  Hash  (cost=4.37..4.37 rows=5 width=4) (actual time=0.014..0.014 rows=10 loops=1)

                    Buckets: 1024  Batches: 1  Memory Usage: 9kB

                    ->  Index Only Scan using film_actor_pkey on film_actor  (cost=0.28..4.37 rows=5 width=4) (actual time=0.009..0.011 rows=10 loops=1)

                          Index Cond: (film_id = film.film_id)

                          Heap Fetches: 0

Planning time: 0.219 ms

Execution time: 0.114 ms


explain analyze
select film.film_id, film.title, array_agg(a.first_name) as actors
from   film
left join
  (         
       select     film_actor.film_id, actor.first_name
       from       actor
       inner join film_actor using(actor_id)
  ) as a
on       a.film_id = film.film_id
where    film.title = 'ACADEMY DINOSAUR'
group by film.film_id, film.title;
-- order by film.title;


QUERY PLAN


GroupAggregate  (cost=0.70..11.52 rows=1 width=51) (actual time=0.055..0.055 rows=1 loops=1)

  Group Key: film.film_id

  ->  Nested Loop Left Join  (cost=0.70..11.48 rows=5 width=25) (actual time=0.013..0.047 rows=10 loops=1)

        ->  Index Only Scan using idx_film_title_film_id on film  (cost=0.28..4.29 rows=1 width=19) (actual time=0.007..0.007 rows=1 loops=1)

              Index Cond: (title = 'ACADEMY DINOSAUR'::text)

              Heap Fetches: 0

        ->  Nested Loop  (cost=0.43..5.19 rows=200 width=10) (actual time=0.005..0.038 rows=10 loops=1)

              ->  Index Only Scan using film_actor_pkey on film_actor  (cost=0.28..4.37 rows=5 width=8) (actual time=0.002..0.023 rows=10 loops=1)

                    Index Cond: (film_id = film.film_id)

                    Heap Fetches: 0

              ->  Index Only Scan using idx_actor_first_name_actor_id on actor  (cost=0.14..0.16 rows=1 width=10) (actual time=0.001..0.001 rows=1 loops=10)

                    Index Cond: (actor_id = film_actor.actor_id)

                    Heap Fetches: 0

Planning time: 0.269 ms

Execution time: 0.081 ms


explain analyze
select film.film_id, film.title, array_agg(a.first_name) as actors
from   film
left join lateral
  (
       select     actor.first_name
       from       actor
       inner join film_actor using(actor_id)
       where      film_actor.film_id = film.film_id
  ) as a
on       true
where    film.title = 'ACADEMY DINOSAUR'
group by film.film_id, film.title;
-- order by film.title;


QUERY PLAN


GroupAggregate  (cost=0.70..11.52 rows=1 width=51) (actual time=0.030..0.031 rows=1 loops=1)

  Group Key: film.film_id

  ->  Nested Loop Left Join  (cost=0.70..11.48 rows=5 width=25) (actual time=0.012..0.024 rows=10 loops=1)

        ->  Index Only Scan using idx_film_title_film_id on film  (cost=0.28..4.29 rows=1 width=19) (actual time=0.007..0.007 rows=1 loops=1)

              Index Cond: (title = 'ACADEMY DINOSAUR'::text)

              Heap Fetches: 0

        ->  Nested Loop  (cost=0.43..5.19 rows=200 width=10) (actual time=0.005..0.015 rows=10 loops=1)

              ->  Index Only Scan using film_actor_pkey on film_actor  (cost=0.28..4.37 rows=5 width=8) (actual time=0.002..0.004 rows=10 loops=1)

                    Index Cond: (film_id = film.film_id)

                    Heap Fetches: 0

              ->  Index Only Scan using idx_actor_first_name_actor_id on actor  (cost=0.14..0.16 rows=1 width=10) (actual time=0.001..0.001 rows=1 loops=10)

                    Index Cond: (actor_id = film_actor.actor_id)

                    Heap Fetches: 0

Planning time: 0.256 ms

Execution time: 0.055 ms


-- Erwin: for a small selection 1:
EXPLAIN ANALYZE
SELECT f.film_id, f.title, a.actors
FROM   film f
LEFT   JOIN LATERAL (
   SELECT ARRAY (
      SELECT a.first_name
      FROM   film_actor fa
      JOIN   actor a USING (actor_id)
      WHERE  fa.film_id = f.film_id
      ) AS actors
   ) a ON true
WHERE  f.title = 'ACADEMY DINOSAUR';


QUERY PLAN


Nested Loop Left Join  (cost=9.24..13.29 rows=1 width=51) (actual time=0.073..0.074 rows=1 loops=1)

  ->  Index Only Scan using idx_film_title_film_id on film f  (cost=0.28..4.29 rows=1 width=19) (actual time=0.007..0.008 rows=1 loops=1)

        Index Cond: (title = 'ACADEMY DINOSAUR'::text)

        Heap Fetches: 0

  ->  Result  (cost=8.97..8.98 rows=1 width=32) (actual time=0.065..0.065 rows=1 loops=1)

        InitPlan 1 (returns $1)

          ->  Hash Join  (cost=4.43..8.97 rows=5 width=6) (actual time=0.020..0.049 rows=10 loops=1)

                Hash Cond: (a.actor_id = fa.actor_id)

                ->  Seq Scan on actor a  (cost=0.00..4.00 rows=200 width=10) (actual time=0.007..0.018 rows=200 loops=1)

                ->  Hash  (cost=4.37..4.37 rows=5 width=4) (actual time=0.008..0.008 rows=10 loops=1)

                      Buckets: 1024  Batches: 1  Memory Usage: 9kB

                      ->  Index Only Scan using film_actor_pkey on film_actor fa  (cost=0.28..4.37 rows=5 width=4) (actual time=0.004..0.005 rows=10 loops=1)

                            Index Cond: (film_id = $0)

                            Heap Fetches: 0

Planning time: 0.207 ms

Execution time: 0.103 ms


-- Erwin: for a small selection 2:
EXPLAIN ANALYZE
SELECT f.film_id, f.title
     , ARRAY (SELECT a.first_name
              FROM   film_actor fa
              JOIN   actor a USING (actor_id)
              WHERE  fa.film_id = f.film_id) AS actors
FROM   film f
WHERE  f.title = 'ACADEMY DINOSAUR';


QUERY PLAN


Index Only Scan using idx_film_title_film_id on film f  (cost=0.28..13.26 rows=1 width=51) (actual time=0.075..0.075 rows=1 loops=1)

  Index Cond: (title = 'ACADEMY DINOSAUR'::text)

  Heap Fetches: 0

  SubPlan 1

    ->  Hash Join  (cost=4.43..8.97 rows=5 width=6) (actual time=0.027..0.055 rows=10 loops=1)

          Hash Cond: (a.actor_id = fa.actor_id)

          ->  Seq Scan on actor a  (cost=0.00..4.00 rows=200 width=10) (actual time=0.006..0.018 rows=200 loops=1)

          ->  Hash  (cost=4.37..4.37 rows=5 width=4) (actual time=0.014..0.014 rows=10 loops=1)

                Buckets: 1024  Batches: 1  Memory Usage: 9kB

                ->  Index Only Scan using film_actor_pkey on film_actor fa  (cost=0.28..4.37 rows=5 width=4) (actual time=0.010..0.012 rows=10 loops=1)

                      Index Cond: (film_id = f.film_id)

                      Heap Fetches: 0

Planning time: 0.169 ms

Execution time: 0.096 ms


-- Erwin: for a small selection 3:
EXPLAIN ANALYZE
SELECT f.film_id, f.title, array_agg(a.first_name) AS actors
FROM   film f
LEFT   JOIN film_actor fa USING (film_id)
LEFT   JOIN actor a USING (actor_id)
WHERE  f.title = 'ACADEMY DINOSAUR'
GROUP  BY f.film_id;


QUERY PLAN


GroupAggregate  (cost=0.70..9.57 rows=1 width=51) (actual time=0.031..0.031 rows=1 loops=1)

  Group Key: f.film_id

  ->  Nested Loop Left Join  (cost=0.70..9.53 rows=5 width=25) (actual time=0.013..0.024 rows=10 loops=1)

        ->  Nested Loop Left Join  (cost=0.56..8.71 rows=5 width=23) (actual time=0.011..0.013 rows=10 loops=1)

              ->  Index Only Scan using idx_film_title_film_id on film f  (cost=0.28..4.29 rows=1 width=19) (actual time=0.007..0.007 rows=1 loops=1)

                    Index Cond: (title = 'ACADEMY DINOSAUR'::text)

                    Heap Fetches: 0

              ->  Index Only Scan using film_actor_pkey on film_actor fa  (cost=0.28..4.37 rows=5 width=8) (actual time=0.003..0.004 rows=10 loops=1)

                    Index Cond: (film_id = f.film_id)

                    Heap Fetches: 0

        ->  Index Only Scan using idx_actor_first_name_actor_id on actor a  (cost=0.14..0.16 rows=1 width=10) (actual time=0.001..0.001 rows=1 loops=10)

              Index Cond: (actor_id = fa.actor_id)

              Heap Fetches: 0

Planning time: 0.245 ms

Execution time: 0.058 ms


-- Erwin: for retrieving all or most rows (almost the same as your 1st query)
EXPLAIN ANALYZE
SELECT f.film_id, f.title, a.actors
FROM   film f
LEFT   JOIN (         
   SELECT fa.film_id, array_agg(first_name) AS actors
   FROM   actor
   JOIN   film_actor fa USING(actor_id)
   GROUP  by fa.film_id
   ) a USING (film_id)
WHERE  f.title = 'ACADEMY DINOSAUR';  -- not good for a single (or few) films!


QUERY PLAN


Merge Left Join  (cost=445.09..515.01 rows=1 width=51) (actual time=2.668..2.670 rows=1 loops=1)

  Merge Cond: (f.film_id = fa.film_id)

  ->  Index Only Scan using idx_film_title_film_id on film f  (cost=0.28..4.29 rows=1 width=19) (actual time=0.006..0.008 rows=1 loops=1)

        Index Cond: (title = 'ACADEMY DINOSAUR'::text)

        Heap Fetches: 0

  ->  GroupAggregate  (cost=444.82..498.25 rows=997 width=36) (actual time=2.660..2.660 rows=1 loops=1)

        Group Key: fa.film_id

        ->  Sort  (cost=444.82..458.47 rows=5462 width=10) (actual time=2.650..2.651 rows=11 loops=1)

              Sort Key: fa.film_id

              Sort Method: quicksort  Memory: 449kB

              ->  Hash Join  (cost=6.50..105.76 rows=5462 width=10) (actual time=0.060..1.629 rows=5462 loops=1)

                    Hash Cond: (fa.actor_id = actor.actor_id)

                    ->  Seq Scan on film_actor fa  (cost=0.00..84.62 rows=5462 width=8) (actual time=0.008..0.381 rows=5462 loops=1)

                    ->  Hash  (cost=4.00..4.00 rows=200 width=10) (actual time=0.047..0.047 rows=200 loops=1)

                          Buckets: 1024  Batches: 1  Memory Usage: 17kB

                          ->  Seq Scan on actor  (cost=0.00..4.00 rows=200 width=10) (actual time=0.005..0.023 rows=200 loops=1)

Planning time: 0.191 ms

Execution time: 2.699 ms