Postgres 14

By using db<>fiddle, you agree to license everything you submit by Creative Commons CC0.
Help with an interesting Postgres question: Why isn't an Index Only Scan used on a partition accessed via the parent table?.
select version();


version


PostgreSQL 14.10 on x86_64-pc-linux-gnu, compiled by gcc (GCC) 8.5.0 20210514 (Red Hat 8.5.0-20), 64-bit



SELECT 1


CREATE TABLE test
(
  id INT GENERATED BY DEFAULT AS IDENTITY PRIMARY KEY,
  t_field TEXT
);

CREATE TABLE


INSERT INTO test (t_field)
VALUES ('bar$foo$john$doe$xxx');  -- OP's own data

INSERT 0 1


INSERT INTO test (t_field)
SELECT 
  LEFT(MD5(RANDOM()::TEXT), FLOOR(RANDOM() * (5 - 3 + 1) + 3)::INT) || '$' ||
  LEFT(MD5(RANDOM()::TEXT), FLOOR(RANDOM() * (5 - 3 + 1) + 3)::INT) || '$' ||
  LEFT(MD5(RANDOM()::TEXT), FLOOR(RANDOM() * (5 - 3 + 1) + 3)::INT) || '$' ||
  LEFT(MD5(RANDOM()::TEXT), FLOOR(RANDOM() * (5 - 3 + 1) + 3)::INT) || '$' ||
  LEFT(MD5(RANDOM()::TEXT), FLOOR(RANDOM() * (5 - 3 + 1) + 3)::INT)
FROM 
  GENERATE_SERIES(1, 299999);  --<<== Vary here
   
  --
  -- Vary the GENERATE_SERIES(1, 299999) number if the fiddle run fails!
  --
  -- 299999 (+ 1 - OP) = 300,000 = large enough number to get reliable, consistent results
  -- and small enough so that the fiddle run doesn't fail too often.
  -- 

INSERT 0 299999


ANALYZE test;

ANALYZE


--
--
-- Approach 5: The slowest - a "curiosity" - uses STRING_TO_ARRAY(),
-- UNNEST() and WITH ORDINALITY. Shown in detail on a whim!
--
-- Different approaches are shown and analysed.
--
--
-- Step "-1" (minus one) - not shown in answer
--
--
-- Turn the reversed string into an array
--

SELECT 
    STRING_TO_ARRAY
    (
      REVERSE
      (
        (
          SELECT t.t_field 
           FROM test t
           WHERE t.id = 1
        )
      )
      , '$'
    );


string_to_array


{xxx,eod,nhoj,oof,rab}



SELECT 1


--
-- Step 1: UNNEST() with STRING_TO_ARRAY() - turn the array into a set of records
--

SELECT
  UNNEST
  (
    STRING_TO_ARRAY
    (
      REVERSE
      (
        (
          SELECT t.t_field 
           FROM test t
           WHERE t.id = 1
        )
      ), 
      '$'
    )
  );


unnest


xxx

eod

nhoj

oof

rab



SELECT 5


--
--
--
--
-- Step 2 - using the WITH ORDINALITY functionality to maintain the order of the 
-- various substrings within the group - i.e. by id.
--

SELECT
  t.t_field,
  t.id, x.elem, x.num
FROM test t
LEFT JOIN LATERAL
  UNNEST(STRING_TO_ARRAY
  (
    REVERSE((SELECT t_field 
             FROM test
             WHERE test.id = t.id
  )), '$'))
  WITH ORDINALITY AS x (elem, num) ON TRUE
LIMIT 10;


t_field
id
elem
num


bar$foo$john$doe$xxx
1
xxx
1

bar$foo$john$doe$xxx
1
eod
2

bar$foo$john$doe$xxx
1
nhoj
3

bar$foo$john$doe$xxx
1
oof
4

bar$foo$john$doe$xxx
1
rab
5

c0a0$eaa5a$dce98$f0e$9e4
2
4e9
1

c0a0$eaa5a$dce98$f0e$9e4
2
e0f
2

c0a0$eaa5a$dce98$f0e$9e4
2
89ecd
3

c0a0$eaa5a$dce98$f0e$9e4
2
a5aae
4

c0a0$eaa5a$dce98$f0e$9e4
2
0a0c
5



SELECT 10


-- 
-- From this post by Erwin Brandstetter
--
-- https://stackoverflow.com/questions/8760419/postgresql-unnest-with-element-number/8767450#8767450
--
-- the above can be rewritten.
--
--
-- Step 2 (bis): Simplified alternative
--

SELECT
  t.t_field,
  t.id, x.elem, x.num
FROM test t,
  UNNEST(STRING_TO_ARRAY
  (
    REVERSE((SELECT t_field 
             FROM test
             WHERE test.id = t.id
  )), '$'))
  WITH ORDINALITY AS x (elem, num)
LIMIT 10;


t_field
id
elem
num


bar$foo$john$doe$xxx
1
xxx
1

bar$foo$john$doe$xxx
1
eod
2

bar$foo$john$doe$xxx
1
nhoj
3

bar$foo$john$doe$xxx
1
oof
4

bar$foo$john$doe$xxx
1
rab
5

c0a0$eaa5a$dce98$f0e$9e4
2
4e9
1

c0a0$eaa5a$dce98$f0e$9e4
2
e0f
2

c0a0$eaa5a$dce98$f0e$9e4
2
89ecd
3

c0a0$eaa5a$dce98$f0e$9e4
2
a5aae
4

c0a0$eaa5a$dce98$f0e$9e4
2
0a0c
5



SELECT 10


--
--
--
--
-- Step 2 (ter): Even more simplified - it is my **_understanding_** that as PostgreSQL
-- is currently implemented, this will work and there are no plans to change this
-- in the forseeable future. Be aware that this solution may not continue to work!
--

SELECT
  t.t_field,
  t.id, s.elem,
  ROW_NUMBER() OVER (PARTITION BY id) AS rn
FROM test t,
  UNNEST(STRING_TO_ARRAY
  (
    REVERSE((SELECT t_field 
             FROM test
             WHERE test.id = t.id
  )), '$')) AS s (elem)
LIMIT 10;


t_field
id
elem
rn


bar$foo$john$doe$xxx
1
xxx
1

bar$foo$john$doe$xxx
1
eod
2

bar$foo$john$doe$xxx
1
nhoj
3

bar$foo$john$doe$xxx
1
oof
4

bar$foo$john$doe$xxx
1
rab
5

c0a0$eaa5a$dce98$f0e$9e4
2
4e9
1

c0a0$eaa5a$dce98$f0e$9e4
2
e0f
2

c0a0$eaa5a$dce98$f0e$9e4
2
89ecd
3

c0a0$eaa5a$dce98$f0e$9e4
2
a5aae
4

c0a0$eaa5a$dce98$f0e$9e4
2
0a0c
5



SELECT 10


--
-- LEFT JOIN LATERAL - WITH ORDINALITY
--
--
-- Step 4: final step, we combine it all. Take the last 4 elements of the array (which
-- remain unchanged) and recombine them using '$', but take the 1st (of the 
-- reversed string - [1]) and concatenate the hyphen and then reverse the lot.
-- 
--

SELECT
  (SELECT t_field FROM test WHERE test.id = tab.id),
  REVERSE(
  (STRING_TO_ARRAY((SELECT REVERSE(t_field) FROM test WHERE test.id = tab.id), '$'))[1]
  || '-' || 
  STRING_AGG(elem, '$')) AS result
FROM
(
  SELECT
    t.id, x.elem, x.num
  FROM test t
  LEFT JOIN LATERAL
    UNNEST(STRING_TO_ARRAY(REVERSE((SELECT t_field 
                                    FROM test
                                    WHERE test.id = t.id
                                    )), '$'))
    WITH ORDINALITY AS x (elem, num) ON TRUE
) AS tab
WHERE tab.num > 1  -- this is the last element of the original string (REVERSE())
GROUP BY tab.id    -- required by STRING_AGG() naturally - otherwise, we'd get one huge string
LIMIT 2;


t_field
result


bar$foo$john$doe$xxx
bar$foo$john$doe-xxx

c0a0$eaa5a$dce98$f0e$9e4
c0a0$eaa5a$dce98$f0e-9e4



SELECT 2


EXPLAIN (ANALYZE, TIMING OFF)
SELECT
  REVERSE(
  (STRING_TO_ARRAY((SELECT REVERSE(t_field) FROM test WHERE test.id = tab.id), '$'))[1]
  || '-' || 
  STRING_AGG(elem, '$'))
FROM
(
  SELECT
    t.id, x.elem, x.num
  FROM test t
  LEFT JOIN LATERAL
    UNNEST(STRING_TO_ARRAY(REVERSE((SELECT t_field 
                                    FROM test
                                    WHERE test.id = t.id
                                    )), '$'))
    WITH ORDINALITY AS x (elem, num) ON TRUE
) AS tab
WHERE tab.num > 1
GROUP BY tab.id;


QUERY PLAN


GroupAggregate  (cost=8.87..2600529.87 rows=300000 width=36) (actual rows=300000 loops=1)

  Group Key: t.id

  ->  Nested Loop  (cost=8.87..56529.87 rows=900000 width=36) (actual rows=1200000 loops=1)

        ->  Index Only Scan using test_pkey on test t  (cost=0.42..10021.42 rows=300000 width=4) (actual rows=300000 loops=1)

              Heap Fetches: 300000

        ->  Function Scan on unnest x  (cost=8.45..8.57 rows=3 width=32) (actual rows=4 loops=300000)

              Filter: (num > 1)

              Rows Removed by Filter: 1

              SubPlan 2

                ->  Index Scan using test_pkey on test test_1  (cost=0.42..8.44 rows=1 width=24) (actual rows=1 loops=300000)

                      Index Cond: (id = t.id)

  SubPlan 1

    ->  Index Scan using test_pkey on test  (cost=0.42..8.44 rows=1 width=32) (actual rows=1 loops=300000)

          Index Cond: (id = t.id)

Planning Time: 0.155 ms

Execution Time: 4047.734 ms



EXPLAIN


--
-- CROSS JOIN - WITH ORDINALITY
--
SELECT
  tab.f,
  REVERSE(
  (STRING_TO_ARRAY((SELECT REVERSE(t_field) FROM test WHERE test.id = tab.id), '$'))[1]
  || '-' ||
  STRING_AGG(tab.elem, '$')) AS result
FROM
(
  SELECT
    t.t_field AS f,
    t.id, x.elem, x.num
  FROM test t,
    UNNEST(STRING_TO_ARRAY
    (
      REVERSE((SELECT t_field 
               FROM test
               WHERE test.id = t.id
    )), '$'))
    WITH ORDINALITY AS x (elem, num)
) AS tab
WHERE tab.num > 1
GROUP BY tab.id, tab.f
LIMIT 2;


f
result


bar$foo$john$doe$xxx
bar$foo$john$doe-xxx

c0a0$eaa5a$dce98$f0e$9e4
c0a0$eaa5a$dce98$f0e-9e4



SELECT 2


EXPLAIN (ANALYZE, TIMING OFF)
SELECT
  tab.f,
  REVERSE(
  (STRING_TO_ARRAY((SELECT REVERSE(t_field) FROM test WHERE test.id = tab.id), '$'))[1]
  || '-' ||
  STRING_AGG(tab.elem, '$')) AS result
FROM
(
  SELECT
    t.t_field AS f,
    t.id, x.elem, x.num
  FROM test t,
    UNNEST(STRING_TO_ARRAY
    (
      REVERSE((SELECT t_field 
               FROM test
               WHERE test.id = t.id
    )), '$'))
    WITH ORDINALITY AS x (elem, num)
) AS tab
WHERE tab.num > 1
GROUP BY tab.id, tab.f;


QUERY PLAN


GroupAggregate  (cost=8.87..2600529.87 rows=300000 width=60) (actual rows=300000 loops=1)

  Group Key: t.id

  ->  Nested Loop  (cost=8.87..56529.87 rows=900000 width=60) (actual rows=1200000 loops=1)

        ->  Index Scan using test_pkey on test t  (cost=0.42..10021.42 rows=300000 width=28) (actual rows=300000 loops=1)

        ->  Function Scan on unnest x  (cost=8.45..8.57 rows=3 width=32) (actual rows=4 loops=300000)

              Filter: (num > 1)

              Rows Removed by Filter: 1

              SubPlan 2

                ->  Index Scan using test_pkey on test test_1  (cost=0.42..8.44 rows=1 width=24) (actual rows=1 loops=300000)

                      Index Cond: (id = t.id)

  SubPlan 1

    ->  Index Scan using test_pkey on test  (cost=0.42..8.44 rows=1 width=32) (actual rows=1 loops=300000)

          Index Cond: (id = t.id)

Planning Time: 0.150 ms

Execution Time: 4011.771 ms



EXPLAIN


--
-- CROSS JOIN - using ROW_NUMBER() OVER()
--
SELECT
  t.t_field,
  t.id, s.elem,
  ROW_NUMBER() OVER (PARTITION BY id) AS rn
FROM test t,
  UNNEST(STRING_TO_ARRAY
  (
    REVERSE((SELECT t_field 
             FROM test
             WHERE test.id = t.id
  )), '$')) AS s (elem)
LIMIT 10;


t_field
id
elem
rn


bar$foo$john$doe$xxx
1
xxx
1

bar$foo$john$doe$xxx
1
eod
2

bar$foo$john$doe$xxx
1
nhoj
3

bar$foo$john$doe$xxx
1
oof
4

bar$foo$john$doe$xxx
1
rab
5

c0a0$eaa5a$dce98$f0e$9e4
2
4e9
1

c0a0$eaa5a$dce98$f0e$9e4
2
e0f
2

c0a0$eaa5a$dce98$f0e$9e4
2
89ecd
3

c0a0$eaa5a$dce98$f0e$9e4
2
a5aae
4

c0a0$eaa5a$dce98$f0e$9e4
2
0a0c
5



SELECT 10


SELECT
  tab.f,
  REVERSE(
  (STRING_TO_ARRAY((SELECT REVERSE(t_field) FROM test WHERE test.id = tab.id), '$'))[1]
  || '-' ||
  STRING_AGG(tab.elem, '$')) AS result
FROM
(
  SELECT
    t.t_field AS f,
    t.id, s.elem,
    ROW_NUMBER() OVER (PARTITION BY id) AS rn
  FROM 
    test t,
    UNNEST(STRING_TO_ARRAY
    (
      REVERSE((SELECT t_field 
               FROM test
               WHERE test.id = t.id
    )), '$')) AS s (elem)
) AS tab
WHERE rn > 1
GROUP BY tab.id, tab.f
LIMIT 2;


f
result


bar$foo$john$doe$xxx
bar$foo$john$doe-xxx

c0a0$eaa5a$dce98$f0e$9e4
c0a0$eaa5a$dce98$f0e-9e4



SELECT 2


--
-- The ROW_NUMBER() is much less performant than the WITH ORDINALITY (1.3 x)
--
EXPLAIN (ANALYZE, TIMING OFF)
SELECT
  tab.f,
  REVERSE(
  (STRING_TO_ARRAY((SELECT REVERSE(t_field) FROM test WHERE test.id = tab.id), '$'))[1]
  || '-' ||
  STRING_AGG(tab.elem, '$')) AS result
FROM
(
  SELECT
    t.t_field AS f,
    t.id, s.elem,
    ROW_NUMBER() OVER (PARTITION BY id) AS rn
  FROM 
    test t,
    UNNEST(STRING_TO_ARRAY
    (
      REVERSE((SELECT t_field 
               FROM test
               WHERE test.id = t.id
    )), '$')) AS s (elem)
) AS tab
WHERE rn > 1
GROUP BY tab.id, tab.f;


QUERY PLAN


GroupAggregate  (cost=9.03..8670029.87 rows=1000000 width=60) (actual rows=300000 loops=1)

  Group Key: tab.id, tab.f

  ->  Incremental Sort  (cost=9.03..197529.87 rows=1000000 width=60) (actual rows=1200000 loops=1)

        Sort Key: tab.id, tab.f

        Presorted Key: tab.id

        Full-sort Groups: 37500  Sort Method: quicksort  Average Memory: 27kB  Peak Memory: 27kB

        ->  Subquery Scan on tab  (cost=8.87..152529.87 rows=1000000 width=60) (actual rows=1200000 loops=1)

              Filter: (tab.rn > 1)

              Rows Removed by Filter: 300000

              ->  WindowAgg  (cost=8.87..115029.87 rows=3000000 width=68) (actual rows=1500000 loops=1)

                    ->  Nested Loop  (cost=8.87..70029.87 rows=3000000 width=60) (actual rows=1500000 loops=1)

                          ->  Index Scan using test_pkey on test t  (cost=0.42..10021.42 rows=300000 width=28) (actual rows=300000 loops=1)

                          ->  Function Scan on unnest s  (cost=8.45..8.55 rows=10 width=32) (actual rows=5 loops=300000)

                                SubPlan 2

                                  ->  Index Scan using test_pkey on test test_1  (cost=0.42..8.44 rows=1 width=24) (actual rows=1 loops=300000)

                                        Index Cond: (id = t.id)

  SubPlan 1

    ->  Index Scan using test_pkey on test  (cost=0.42..8.44 rows=1 width=32) (actual rows=1 loops=300000)

          Index Cond: (id = tab.id)

Planning Time: 0.161 ms

Execution Time: 5003.977 ms



EXPLAIN
t_field	id	elem	num
bar$foo$john$doe$xxx	1	xxx	1
bar$foo$john$doe$xxx	1	eod	2
bar$foo$john$doe$xxx	1	nhoj	3
bar$foo$john$doe$xxx	1	oof	4
bar$foo$john$doe$xxx	1	rab	5
c0a0$eaa5a$dce98$f0e$9e4	2	4e9	1
c0a0$eaa5a$dce98$f0e$9e4	2	e0f	2
c0a0$eaa5a$dce98$f0e$9e4	2	89ecd	3
c0a0$eaa5a$dce98$f0e$9e4	2	a5aae	4
c0a0$eaa5a$dce98$f0e$9e4	2	0a0c	5
t_field	result
bar$foo$john$doe$xxx	bar$foo$john$doe-xxx
c0a0$eaa5a$dce98$f0e$9e4	c0a0$eaa5a$dce98$f0e-9e4