add batch remove batch split batch comment selection show hidden batches hide batch highlight batch
db<>fiddle
donate feedback about
By using db<>fiddle, you agree to license everything you submit by Creative Commons CC0.
Help with an interesting Postgres question: Why isn't an Index Only Scan used on a partition accessed via the parent table?.
CREATE TABLE actor (
actor_id serial, -- PRIMARY KEY -- see below
first_name character varying(45) NOT NULL,
last_name character varying(45) NOT NULL,
last_update timestamptz DEFAULT now() NOT NULL
);

CREATE TABLE film (
film_id serial, -- PRIMARY KEY -- see below
title character varying(255) NOT NULL,
last_update timestamptz DEFAULT now() NOT NULL
);

CREATE TABLE film_actor (
actor_id int NOT NULL,
film_id int NOT NULL,
last_update timestamp without time zone DEFAULT now() NOT NULL
-- PRIMARY KEY (film_id, actor_id) -- see below
);

-- PK, indexes and FK are added *after* the bulk INSERT

INSERT INTO actor (actor_id, first_name, last_name, last_update)
VALUES
(1,'PENELOPE','GUINESS','2006-02-15 09:34:33'),
(2,'NICK','WAHLBERG','2006-02-15 09:34:33'),
(3,'ED','CHASE','2006-02-15 09:34:33'),
(4,'JENNIFER','DAVIS','2006-02-15 09:34:33'),
(5,'JOHNNY','LOLLOBRIGIDA','2006-02-15 09:34:33'),
(6,'BETTE','NICHOLSON','2006-02-15 09:34:33'),
(7,'GRACE','MOSTEL','2006-02-15 09:34:33'),
(8,'MATTHEW','JOHANSSON','2006-02-15 09:34:33'),
(9,'JOE','SWANK','2006-02-15 09:34:33'),
(10,'CHRISTIAN','GABLE','2006-02-15 09:34:33'),
(11,'ZERO','CAGE','2006-02-15 09:34:33'),
(12,'KARL','BERRY','2006-02-15 09:34:33'),
200 rows affected
1000 rows affected
5462 rows affected
-- add indexes after bulk INSERT (vaster, cheaper, less bloat)
ALTER TABLE actor ADD CONSTRAINT actor_pkey PRIMARY KEY (actor_id);
ALTER TABLE film ADD CONSTRAINT film_pkey PRIMARY KEY (film_id);
ALTER TABLE film_actor
ADD CONSTRAINT film_actor_pkey PRIMARY KEY (film_id, actor_id) -- ! NOTE reversed column order
, ADD CONSTRAINT film_actor_actor_id_fkey FOREIGN KEY (actor_id) REFERENCES actor(actor_id)
, ADD CONSTRAINT film_actor_film_id_fkey FOREIGN KEY (film_id) REFERENCES film(film_id);

CREATE INDEX idx_film_title_film_id ON film (title, film_id); -- helps some more if we can get index-only scans
CREATE INDEX idx_actor_first_name_actor_id ON actor (actor_id, first_name); -- helps some more if we can get index-only scans
VACUUM FREEZE ANALYZE;
explain analyze
select film.film_id, film.title, a.actors
from film
left join
(
select film_actor.film_id, array_agg(first_name) as actors
from actor
inner join film_actor using(actor_id)
group by film_actor.film_id
) as a
on a.film_id = film.film_id
where film.title = 'ACADEMY DINOSAUR';
-- order by film.title;
QUERY PLAN
Merge Left Join (cost=445.09..515.01 rows=1 width=51) (actual time=2.798..2.800 rows=1 loops=1)
  Merge Cond: (film.film_id = film_actor.film_id)
  -> Index Only Scan using idx_film_title_film_id on film (cost=0.28..4.29 rows=1 width=19) (actual time=0.018..0.019 rows=1 loops=1)
        Index Cond: (title = 'ACADEMY DINOSAUR'::text)
        Heap Fetches: 0
  -> GroupAggregate (cost=444.82..498.25 rows=997 width=36) (actual time=2.777..2.777 rows=1 loops=1)
        Group Key: film_actor.film_id
        -> Sort (cost=444.82..458.47 rows=5462 width=10) (actual time=2.766..2.767 rows=11 loops=1)
              Sort Key: film_actor.film_id
              Sort Method: quicksort Memory: 449kB
              -> Hash Join (cost=6.50..105.76 rows=5462 width=10) (actual time=0.093..1.698 rows=5462 loops=1)
                    Hash Cond: (film_actor.actor_id = actor.actor_id)
                    -> Seq Scan on film_actor (cost=0.00..84.62 rows=5462 width=8) (actual time=0.007..0.390 rows=5462 loops=1)
                    -> Hash (cost=4.00..4.00 rows=200 width=10) (actual time=0.070..0.070 rows=200 loops=1)
                          Buckets: 1024 Batches: 1 Memory Usage: 17kB
                          -> Seq Scan on actor (cost=0.00..4.00 rows=200 width=10) (actual time=0.005..0.041 rows=200 loops=1)
Planning time: 0.443 ms
Execution time: 2.849 ms
explain analyze
select film.film_id, film.title, a.actors
from film
left join lateral
(
select array_agg(first_name) as actors
from actor
inner join film_actor using(actor_id)
where film_actor.film_id = film.film_id
) as a
on true
where film.title = 'ACADEMY DINOSAUR';
-- order by film.title;
QUERY PLAN
Nested Loop Left Join (cost=9.26..13.31 rows=1 width=51) (actual time=0.083..0.084 rows=1 loops=1)
  -> Index Only Scan using idx_film_title_film_id on film (cost=0.28..4.29 rows=1 width=19) (actual time=0.009..0.010 rows=1 loops=1)
        Index Cond: (title = 'ACADEMY DINOSAUR'::text)
        Heap Fetches: 0
  -> Aggregate (cost=8.98..8.99 rows=1 width=32) (actual time=0.072..0.072 rows=1 loops=1)
        -> Hash Join (cost=4.43..8.97 rows=5 width=6) (actual time=0.035..0.065 rows=10 loops=1)
              Hash Cond: (actor.actor_id = film_actor.actor_id)
              -> Seq Scan on actor (cost=0.00..4.00 rows=200 width=10) (actual time=0.008..0.020 rows=200 loops=1)
              -> Hash (cost=4.37..4.37 rows=5 width=4) (actual time=0.014..0.014 rows=10 loops=1)
                    Buckets: 1024 Batches: 1 Memory Usage: 9kB
                    -> Index Only Scan using film_actor_pkey on film_actor (cost=0.28..4.37 rows=5 width=4) (actual time=0.009..0.011 rows=10 loops=1)
                          Index Cond: (film_id = film.film_id)
                          Heap Fetches: 0
Planning time: 0.219 ms
Execution time: 0.114 ms
explain analyze
select film.film_id, film.title, array_agg(a.first_name) as actors
from film
left join
(
select film_actor.film_id, actor.first_name
from actor
inner join film_actor using(actor_id)
) as a
on a.film_id = film.film_id
where film.title = 'ACADEMY DINOSAUR'
group by film.film_id, film.title;
-- order by film.title;
QUERY PLAN
GroupAggregate (cost=0.70..11.52 rows=1 width=51) (actual time=0.055..0.055 rows=1 loops=1)
  Group Key: film.film_id
  -> Nested Loop Left Join (cost=0.70..11.48 rows=5 width=25) (actual time=0.013..0.047 rows=10 loops=1)
        -> Index Only Scan using idx_film_title_film_id on film (cost=0.28..4.29 rows=1 width=19) (actual time=0.007..0.007 rows=1 loops=1)
              Index Cond: (title = 'ACADEMY DINOSAUR'::text)
              Heap Fetches: 0
        -> Nested Loop (cost=0.43..5.19 rows=200 width=10) (actual time=0.005..0.038 rows=10 loops=1)
              -> Index Only Scan using film_actor_pkey on film_actor (cost=0.28..4.37 rows=5 width=8) (actual time=0.002..0.023 rows=10 loops=1)
                    Index Cond: (film_id = film.film_id)
                    Heap Fetches: 0
              -> Index Only Scan using idx_actor_first_name_actor_id on actor (cost=0.14..0.16 rows=1 width=10) (actual time=0.001..0.001 rows=1 loops=10)
                    Index Cond: (actor_id = film_actor.actor_id)
                    Heap Fetches: 0
Planning time: 0.269 ms
Execution time: 0.081 ms
explain analyze
select film.film_id, film.title, array_agg(a.first_name) as actors
from film
left join lateral
(
select actor.first_name
from actor
inner join film_actor using(actor_id)
where film_actor.film_id = film.film_id
) as a
on true
where film.title = 'ACADEMY DINOSAUR'
group by film.film_id, film.title;
-- order by film.title;
QUERY PLAN
GroupAggregate (cost=0.70..11.52 rows=1 width=51) (actual time=0.030..0.031 rows=1 loops=1)
  Group Key: film.film_id
  -> Nested Loop Left Join (cost=0.70..11.48 rows=5 width=25) (actual time=0.012..0.024 rows=10 loops=1)
        -> Index Only Scan using idx_film_title_film_id on film (cost=0.28..4.29 rows=1 width=19) (actual time=0.007..0.007 rows=1 loops=1)
              Index Cond: (title = 'ACADEMY DINOSAUR'::text)
              Heap Fetches: 0
        -> Nested Loop (cost=0.43..5.19 rows=200 width=10) (actual time=0.005..0.015 rows=10 loops=1)
              -> Index Only Scan using film_actor_pkey on film_actor (cost=0.28..4.37 rows=5 width=8) (actual time=0.002..0.004 rows=10 loops=1)
                    Index Cond: (film_id = film.film_id)
                    Heap Fetches: 0
              -> Index Only Scan using idx_actor_first_name_actor_id on actor (cost=0.14..0.16 rows=1 width=10) (actual time=0.001..0.001 rows=1 loops=10)
                    Index Cond: (actor_id = film_actor.actor_id)
                    Heap Fetches: 0
Planning time: 0.256 ms
Execution time: 0.055 ms
-- Erwin: for a small selection 1:
EXPLAIN ANALYZE
SELECT f.film_id, f.title, a.actors
FROM film f
LEFT JOIN LATERAL (
SELECT ARRAY (
SELECT a.first_name
FROM film_actor fa
JOIN actor a USING (actor_id)
WHERE fa.film_id = f.film_id
) AS actors
) a ON true
WHERE f.title = 'ACADEMY DINOSAUR';
QUERY PLAN
Nested Loop Left Join (cost=9.24..13.29 rows=1 width=51) (actual time=0.073..0.074 rows=1 loops=1)
  -> Index Only Scan using idx_film_title_film_id on film f (cost=0.28..4.29 rows=1 width=19) (actual time=0.007..0.008 rows=1 loops=1)
        Index Cond: (title = 'ACADEMY DINOSAUR'::text)
        Heap Fetches: 0
  -> Result (cost=8.97..8.98 rows=1 width=32) (actual time=0.065..0.065 rows=1 loops=1)
        InitPlan 1 (returns $1)
          -> Hash Join (cost=4.43..8.97 rows=5 width=6) (actual time=0.020..0.049 rows=10 loops=1)
                Hash Cond: (a.actor_id = fa.actor_id)
                -> Seq Scan on actor a (cost=0.00..4.00 rows=200 width=10) (actual time=0.007..0.018 rows=200 loops=1)
                -> Hash (cost=4.37..4.37 rows=5 width=4) (actual time=0.008..0.008 rows=10 loops=1)
                      Buckets: 1024 Batches: 1 Memory Usage: 9kB
                      -> Index Only Scan using film_actor_pkey on film_actor fa (cost=0.28..4.37 rows=5 width=4) (actual time=0.004..0.005 rows=10 loops=1)
                            Index Cond: (film_id = $0)
                            Heap Fetches: 0
Planning time: 0.207 ms
Execution time: 0.103 ms
-- Erwin: for a small selection 2:
EXPLAIN ANALYZE
SELECT f.film_id, f.title
, ARRAY (SELECT a.first_name
FROM film_actor fa
JOIN actor a USING (actor_id)
WHERE fa.film_id = f.film_id) AS actors
FROM film f
WHERE f.title = 'ACADEMY DINOSAUR';
QUERY PLAN
Index Only Scan using idx_film_title_film_id on film f (cost=0.28..13.26 rows=1 width=51) (actual time=0.075..0.075 rows=1 loops=1)
  Index Cond: (title = 'ACADEMY DINOSAUR'::text)
  Heap Fetches: 0
  SubPlan 1
    -> Hash Join (cost=4.43..8.97 rows=5 width=6) (actual time=0.027..0.055 rows=10 loops=1)
          Hash Cond: (a.actor_id = fa.actor_id)
          -> Seq Scan on actor a (cost=0.00..4.00 rows=200 width=10) (actual time=0.006..0.018 rows=200 loops=1)
          -> Hash (cost=4.37..4.37 rows=5 width=4) (actual time=0.014..0.014 rows=10 loops=1)
                Buckets: 1024 Batches: 1 Memory Usage: 9kB
                -> Index Only Scan using film_actor_pkey on film_actor fa (cost=0.28..4.37 rows=5 width=4) (actual time=0.010..0.012 rows=10 loops=1)
                      Index Cond: (film_id = f.film_id)
                      Heap Fetches: 0
Planning time: 0.169 ms
Execution time: 0.096 ms
-- Erwin: for a small selection 3:
EXPLAIN ANALYZE
SELECT f.film_id, f.title, array_agg(a.first_name) AS actors
FROM film f
LEFT JOIN film_actor fa USING (film_id)
LEFT JOIN actor a USING (actor_id)
WHERE f.title = 'ACADEMY DINOSAUR'
GROUP BY f.film_id;
QUERY PLAN
GroupAggregate (cost=0.70..9.57 rows=1 width=51) (actual time=0.031..0.031 rows=1 loops=1)
  Group Key: f.film_id
  -> Nested Loop Left Join (cost=0.70..9.53 rows=5 width=25) (actual time=0.013..0.024 rows=10 loops=1)
        -> Nested Loop Left Join (cost=0.56..8.71 rows=5 width=23) (actual time=0.011..0.013 rows=10 loops=1)
              -> Index Only Scan using idx_film_title_film_id on film f (cost=0.28..4.29 rows=1 width=19) (actual time=0.007..0.007 rows=1 loops=1)
                    Index Cond: (title = 'ACADEMY DINOSAUR'::text)
                    Heap Fetches: 0
              -> Index Only Scan using film_actor_pkey on film_actor fa (cost=0.28..4.37 rows=5 width=8) (actual time=0.003..0.004 rows=10 loops=1)
                    Index Cond: (film_id = f.film_id)
                    Heap Fetches: 0
        -> Index Only Scan using idx_actor_first_name_actor_id on actor a (cost=0.14..0.16 rows=1 width=10) (actual time=0.001..0.001 rows=1 loops=10)
              Index Cond: (actor_id = fa.actor_id)
              Heap Fetches: 0
Planning time: 0.245 ms
Execution time: 0.058 ms
-- Erwin: for retrieving all or most rows (almost the same as your 1st query)
EXPLAIN ANALYZE
SELECT f.film_id, f.title, a.actors
FROM film f
LEFT JOIN (
SELECT fa.film_id, array_agg(first_name) AS actors
FROM actor
JOIN film_actor fa USING(actor_id)
GROUP by fa.film_id
) a USING (film_id)
WHERE f.title = 'ACADEMY DINOSAUR'; -- not good for a single (or few) films!
QUERY PLAN
Merge Left Join (cost=445.09..515.01 rows=1 width=51) (actual time=2.668..2.670 rows=1 loops=1)
  Merge Cond: (f.film_id = fa.film_id)
  -> Index Only Scan using idx_film_title_film_id on film f (cost=0.28..4.29 rows=1 width=19) (actual time=0.006..0.008 rows=1 loops=1)
        Index Cond: (title = 'ACADEMY DINOSAUR'::text)
        Heap Fetches: 0
  -> GroupAggregate (cost=444.82..498.25 rows=997 width=36) (actual time=2.660..2.660 rows=1 loops=1)
        Group Key: fa.film_id
        -> Sort (cost=444.82..458.47 rows=5462 width=10) (actual time=2.650..2.651 rows=11 loops=1)
              Sort Key: fa.film_id
              Sort Method: quicksort Memory: 449kB
              -> Hash Join (cost=6.50..105.76 rows=5462 width=10) (actual time=0.060..1.629 rows=5462 loops=1)
                    Hash Cond: (fa.actor_id = actor.actor_id)
                    -> Seq Scan on film_actor fa (cost=0.00..84.62 rows=5462 width=8) (actual time=0.008..0.381 rows=5462 loops=1)
                    -> Hash (cost=4.00..4.00 rows=200 width=10) (actual time=0.047..0.047 rows=200 loops=1)
                          Buckets: 1024 Batches: 1 Memory Usage: 17kB
                          -> Seq Scan on actor (cost=0.00..4.00 rows=200 width=10) (actual time=0.005..0.023 rows=200 loops=1)
Planning time: 0.191 ms
Execution time: 2.699 ms