Postgres 13

By using db<>fiddle, you agree to license everything you submit by Creative Commons CC0.
Help with an interesting Postgres question: Why isn't an Index Only Scan used on a partition accessed via the parent table?.
CREATE TABLE users (
  id    serial
, email varchar
);

INSERT INTO users (email)
SELECT CASE WHEN random() > .1 THEN 'user' || g || '@foo.com' END -- 10 % NULL
FROM   generate_series(1, 10000) g;

ALTER TABLE users ADD CONSTRAINT users_pkey PRIMARY KEY (id);
CREATE UNIQUE INDEX index_users_on_email ON users (email);
10000 rows affected

VACUUM ANALYZE users;
-- your first query
EXPLAIN (ANALYZE, BUFFERS)
select id
from  (
   select distinct on (email) id
   from   users
   ) t
where id = 123;


QUERY PLAN


Subquery Scan on t  (cost=0.29..601.72 rows=45 width=4) (actual time=0.198..5.724 rows=1 loops=1)

  Filter: (t.id = 123)

  Rows Removed by Filter: 8983

  Buffers: shared hit=1857 read=14

  ->  Unique  (cost=0.29..489.43 rows=8983 width=20) (actual time=0.041..4.998 rows=8984 loops=1)

        Buffers: shared hit=1857 read=14

        ->  Index Scan using index_users_on_email on users  (cost=0.29..464.43 rows=10000 width=20) (actual time=0.040..2.874 rows=10000 loops=1)

              Buffers: shared hit=1857 read=14

Planning Time: 0.237 ms

  Buffers: shared hit=26 read=1

Execution Time: 5.770 ms


-- your second query
EXPLAIN (ANALYZE, BUFFERS)
select id
from  (
   select distinct on (id) id
   from   users
   ) t
where id = 123;


QUERY PLAN


Unique  (cost=0.29..4.30 rows=1 width=4) (actual time=0.056..0.058 rows=1 loops=1)

  Buffers: shared hit=1 read=2

  ->  Index Only Scan using users_pkey on users  (cost=0.29..4.30 rows=1 width=4) (actual time=0.055..0.056 rows=1 loops=1)

        Index Cond: (id = 123)

        Heap Fetches: 0

        Buffers: shared hit=1 read=2

Planning Time: 0.079 ms

Execution Time: 0.075 ms


-- compare to first query: switching roles of PK and UNIQUE column
EXPLAIN (ANALYZE, BUFFERS)
select email
from  (
   select distinct on (id) email
   from   users
   ) t
where email = 'user123@foo.com';


QUERY PLAN


Subquery Scan on t  (cost=0.29..485.58 rows=50 width=16) (actual time=0.081..6.000 rows=1 loops=1)

  Filter: ((t.email)::text = 'user123@foo.com'::text)

  Rows Removed by Filter: 9999

  Buffers: shared hit=69 read=27

  ->  Unique  (cost=0.29..360.58 rows=10000 width=20) (actual time=0.011..5.056 rows=10000 loops=1)

        Buffers: shared hit=69 read=27

        ->  Index Scan using users_pkey on users  (cost=0.29..335.58 rows=10000 width=20) (actual time=0.010..2.861 rows=10000 loops=1)

              Buffers: shared hit=69 read=27

Planning Time: 0.054 ms

Execution Time: 6.017 ms


-- compare to second query: trying the same with the UNIQUE column instead of the PK column
EXPLAIN (ANALYZE, BUFFERS)
select email
from  (
   select distinct on (email) email
   from users
   ) t
where email = 'user123@foo.com';


QUERY PLAN


Unique  (cost=0.29..4.30 rows=1 width=16) (actual time=0.014..0.014 rows=1 loops=1)

  Buffers: shared hit=3

  ->  Index Only Scan using index_users_on_email on users  (cost=0.29..4.30 rows=1 width=16) (actual time=0.013..0.013 rows=1 loops=1)

        Index Cond: (email = 'user123@foo.com'::text)

        Heap Fetches: 0

        Buffers: shared hit=3

Planning Time: 0.076 ms

Execution Time: 0.031 ms


-- With GROUP BY, the PK actually matters
EXPLAIN (ANALYZE, BUFFERS)
select email
from  (
   select email -- no aggregate required, because id = PK
   from   users
   GROUP  BY id  -- !
   ) t
where email = 'user123@foo.com';


QUERY PLAN


Subquery Scan on t  (cost=8.31..8.33 rows=1 width=16) (actual time=0.020..0.020 rows=1 loops=1)

  Buffers: shared hit=3

  ->  Group  (cost=8.31..8.32 rows=1 width=20) (actual time=0.019..0.020 rows=1 loops=1)

        Group Key: users.id

        Buffers: shared hit=3

        ->  Sort  (cost=8.31..8.32 rows=1 width=20) (actual time=0.018..0.018 rows=1 loops=1)

              Sort Key: users.id

              Sort Method: quicksort  Memory: 25kB

              Buffers: shared hit=3

              ->  Index Scan using index_users_on_email on users  (cost=0.29..8.30 rows=1 width=20) (actual time=0.011..0.012 rows=1 loops=1)

                    Index Cond: ((email)::text = 'user123@foo.com'::text)

                    Buffers: shared hit=3

Planning Time: 0.078 ms

Execution Time: 0.044 ms


-- With GROUP BY, the PK actuall matters: UNIQUE constraint gets no special treatment
-- So this fails:
EXPLAIN (ANALYZE, BUFFERS)
select id
from  (
   select id -- aggregate required, because email <> PK
   from   users
   GROUP  BY email  -- !
   ) t
where id = 123;
ERROR:  column "users.id" must appear in the GROUP BY clause or be used in an aggregate function
LINE 6:    select id -- aggregate required, because email <> PK
                  ^


-- This works:
EXPLAIN (ANALYZE, BUFFERS)
select id
from  (
   select min(id) AS id -- aggregate required, because email <> PK
   from   users
   GROUP  BY email  -- !
   ) t
where id = 123;


QUERY PLAN


Subquery Scan on t  (cost=237.00..349.74 rows=45 width=4) (actual time=4.565..4.834 rows=1 loops=1)

  Buffers: shared hit=62

  ->  HashAggregate  (cost=237.00..349.29 rows=45 width=20) (actual time=4.564..4.834 rows=1 loops=1)

        Group Key: users.email

        Filter: (min(users.id) = 123)

        Peak Memory Usage: 1681 kB

        Rows Removed by Filter: 8983

        Buffers: shared hit=62

        ->  Seq Scan on users  (cost=0.00..162.00 rows=10000 width=20) (actual time=0.009..1.355 rows=10000 loops=1)

              Buffers: shared hit=62

Planning Time: 0.108 ms

  Buffers: shared hit=2 read=1

Execution Time: 5.036 ms


-- If email is UNIQUE NOT NULL, we can instead:
EXPLAIN (ANALYZE, BUFFERS)
select id
from  (
   select id
   from   users
   GROUP  BY email, id
   ) t
where id = 123;
-- better query plan. the query is still nonsense.


QUERY PLAN


Subquery Scan on t  (cost=0.29..8.31 rows=1 width=4) (actual time=0.010..0.011 rows=1 loops=1)

  Buffers: shared hit=3

  ->  Group  (cost=0.29..8.30 rows=1 width=20) (actual time=0.009..0.010 rows=1 loops=1)

        Group Key: users.id

        Buffers: shared hit=3

        ->  Index Scan using users_pkey on users  (cost=0.29..8.30 rows=1 width=20) (actual time=0.008..0.009 rows=1 loops=1)

              Index Cond: (id = 123)

              Buffers: shared hit=3

Planning Time: 0.080 ms

  Buffers: shared hit=2

Execution Time: 0.028 ms