add batch remove batch split batch comment selection show hidden batches hide batch highlight batch
db<>fiddle
donate feedback about
By using db<>fiddle, you agree to license everything you submit by Creative Commons CC0.
Help with an interesting Postgres question: Why isn't an Index Only Scan used on a partition accessed via the parent table?.
CREATE TABLE users (
id serial
, email varchar
);

INSERT INTO users (email)
SELECT CASE WHEN random() > .1 THEN 'user' || g || '@foo.com' END -- 10 % NULL
FROM generate_series(1, 10000) g;

ALTER TABLE users ADD CONSTRAINT users_pkey PRIMARY KEY (id);
CREATE UNIQUE INDEX index_users_on_email ON users (email);
10000 rows affected
VACUUM ANALYZE users;
-- your first query
EXPLAIN (ANALYZE, BUFFERS)
select id
from (
select distinct on (email) id
from users
) t
where id = 123;
QUERY PLAN
Subquery Scan on t (cost=0.29..601.72 rows=45 width=4) (actual time=0.198..5.724 rows=1 loops=1)
  Filter: (t.id = 123)
  Rows Removed by Filter: 8983
  Buffers: shared hit=1857 read=14
  -> Unique (cost=0.29..489.43 rows=8983 width=20) (actual time=0.041..4.998 rows=8984 loops=1)
        Buffers: shared hit=1857 read=14
        -> Index Scan using index_users_on_email on users (cost=0.29..464.43 rows=10000 width=20) (actual time=0.040..2.874 rows=10000 loops=1)
              Buffers: shared hit=1857 read=14
Planning Time: 0.237 ms
  Buffers: shared hit=26 read=1
Execution Time: 5.770 ms
-- your second query
EXPLAIN (ANALYZE, BUFFERS)
select id
from (
select distinct on (id) id
from users
) t
where id = 123;
QUERY PLAN
Unique (cost=0.29..4.30 rows=1 width=4) (actual time=0.056..0.058 rows=1 loops=1)
  Buffers: shared hit=1 read=2
  -> Index Only Scan using users_pkey on users (cost=0.29..4.30 rows=1 width=4) (actual time=0.055..0.056 rows=1 loops=1)
        Index Cond: (id = 123)
        Heap Fetches: 0
        Buffers: shared hit=1 read=2
Planning Time: 0.079 ms
Execution Time: 0.075 ms
-- compare to first query: switching roles of PK and UNIQUE column
EXPLAIN (ANALYZE, BUFFERS)
select email
from (
select distinct on (id) email
from users
) t
where email = 'user123@foo.com';
QUERY PLAN
Subquery Scan on t (cost=0.29..485.58 rows=50 width=16) (actual time=0.081..6.000 rows=1 loops=1)
  Filter: ((t.email)::text = 'user123@foo.com'::text)
  Rows Removed by Filter: 9999
  Buffers: shared hit=69 read=27
  -> Unique (cost=0.29..360.58 rows=10000 width=20) (actual time=0.011..5.056 rows=10000 loops=1)
        Buffers: shared hit=69 read=27
        -> Index Scan using users_pkey on users (cost=0.29..335.58 rows=10000 width=20) (actual time=0.010..2.861 rows=10000 loops=1)
              Buffers: shared hit=69 read=27
Planning Time: 0.054 ms
Execution Time: 6.017 ms
-- compare to second query: trying the same with the UNIQUE column instead of the PK column
EXPLAIN (ANALYZE, BUFFERS)
select email
from (
select distinct on (email) email
from users
) t
where email = 'user123@foo.com';
QUERY PLAN
Unique (cost=0.29..4.30 rows=1 width=16) (actual time=0.014..0.014 rows=1 loops=1)
  Buffers: shared hit=3
  -> Index Only Scan using index_users_on_email on users (cost=0.29..4.30 rows=1 width=16) (actual time=0.013..0.013 rows=1 loops=1)
        Index Cond: (email = 'user123@foo.com'::text)
        Heap Fetches: 0
        Buffers: shared hit=3
Planning Time: 0.076 ms
Execution Time: 0.031 ms
-- With GROUP BY, the PK actually matters
EXPLAIN (ANALYZE, BUFFERS)
select email
from (
select email -- no aggregate required, because id = PK
from users
GROUP BY id -- !
) t
where email = 'user123@foo.com';
QUERY PLAN
Subquery Scan on t (cost=8.31..8.33 rows=1 width=16) (actual time=0.020..0.020 rows=1 loops=1)
  Buffers: shared hit=3
  -> Group (cost=8.31..8.32 rows=1 width=20) (actual time=0.019..0.020 rows=1 loops=1)
        Group Key: users.id
        Buffers: shared hit=3
        -> Sort (cost=8.31..8.32 rows=1 width=20) (actual time=0.018..0.018 rows=1 loops=1)
              Sort Key: users.id
              Sort Method: quicksort Memory: 25kB
              Buffers: shared hit=3
              -> Index Scan using index_users_on_email on users (cost=0.29..8.30 rows=1 width=20) (actual time=0.011..0.012 rows=1 loops=1)
                    Index Cond: ((email)::text = 'user123@foo.com'::text)
                    Buffers: shared hit=3
Planning Time: 0.078 ms
Execution Time: 0.044 ms
-- With GROUP BY, the PK actuall matters: UNIQUE constraint gets no special treatment
-- So this fails:
EXPLAIN (ANALYZE, BUFFERS)
select id
from (
select id -- aggregate required, because email <> PK
from users
GROUP BY email -- !
) t
where id = 123;
ERROR:  column "users.id" must appear in the GROUP BY clause or be used in an aggregate function
LINE 6:    select id -- aggregate required, because email <> PK
                  ^

-- This works:
EXPLAIN (ANALYZE, BUFFERS)
select id
from (
select min(id) AS id -- aggregate required, because email <> PK
from users
GROUP BY email -- !
) t
where id = 123;
QUERY PLAN
Subquery Scan on t (cost=237.00..349.74 rows=45 width=4) (actual time=4.565..4.834 rows=1 loops=1)
  Buffers: shared hit=62
  -> HashAggregate (cost=237.00..349.29 rows=45 width=20) (actual time=4.564..4.834 rows=1 loops=1)
        Group Key: users.email
        Filter: (min(users.id) = 123)
        Peak Memory Usage: 1681 kB
        Rows Removed by Filter: 8983
        Buffers: shared hit=62
        -> Seq Scan on users (cost=0.00..162.00 rows=10000 width=20) (actual time=0.009..1.355 rows=10000 loops=1)
              Buffers: shared hit=62
Planning Time: 0.108 ms
  Buffers: shared hit=2 read=1
Execution Time: 5.036 ms
-- If email is UNIQUE NOT NULL, we can instead:
EXPLAIN (ANALYZE, BUFFERS)
select id
from (
select id
from users
GROUP BY email, id
) t
where id = 123;
-- better query plan. the query is still nonsense.
QUERY PLAN
Subquery Scan on t (cost=0.29..8.31 rows=1 width=4) (actual time=0.010..0.011 rows=1 loops=1)
  Buffers: shared hit=3
  -> Group (cost=0.29..8.30 rows=1 width=20) (actual time=0.009..0.010 rows=1 loops=1)
        Group Key: users.id
        Buffers: shared hit=3
        -> Index Scan using users_pkey on users (cost=0.29..8.30 rows=1 width=20) (actual time=0.008..0.009 rows=1 loops=1)
              Index Cond: (id = 123)
              Buffers: shared hit=3
Planning Time: 0.080 ms
  Buffers: shared hit=2
Execution Time: 0.028 ms