By using db<>fiddle, you agree to license everything you submit by Creative Commons CC0. 3601568 fiddles created (47979 in the last week).
select version();
version
PostgreSQL 14.1 on x86_64-pc-linux-gnu, compiled by gcc (GCC) 8.5.0 20210514 (Red Hat 8.5.0-4), 64-bit
…
hidden batch(es)
CREATE TABLE test
(
id INT GENERATED BY DEFAULT AS IDENTITY PRIMARY KEY,
t_field TEXT
);
✓
hidden batch(es)
INSERT INTO test (t_field)
VALUES ('bar$foo$john$doe$xxx'); -- OP's own data
1 rows affected
hidden batch(es)
INSERT INTO test (t_field)
SELECT
LEFT(MD5(RANDOM()::TEXT), FLOOR(RANDOM() * (5 - 3 + 1) + 3)::INT) || '$' ||
LEFT(MD5(RANDOM()::TEXT), FLOOR(RANDOM() * (5 - 3 + 1) + 3)::INT) || '$' ||
LEFT(MD5(RANDOM()::TEXT), FLOOR(RANDOM() * (5 - 3 + 1) + 3)::INT) || '$' ||
LEFT(MD5(RANDOM()::TEXT), FLOOR(RANDOM() * (5 - 3 + 1) + 3)::INT) || '$' ||
LEFT(MD5(RANDOM()::TEXT), FLOOR(RANDOM() * (5 - 3 + 1) + 3)::INT)
FROM
GENERATE_SERIES(1, 299999); --<<== Vary here
--
-- Vary the GENERATE_SERIES(1, 299999) number if the fiddle run fails!
--
-- 299999 (+ 1 - OP) = 300,000 = large enough number to get reliable, consistent results
-- and small enough so that the fiddle run doesn't fail too often.
--
299999 rows affected
hidden batch(es)
ANALYZE test;
✓
hidden batch(es)
--
--
-- Approach 5: The slowest - a "curiosity" - uses STRING_TO_ARRAY(),
-- UNNEST() and WITH ORDINALITY. Shown in detail on a whim!
--
-- Different approaches are shown and analysed.
--
✓
hidden batch(es)
--
-- Step "-1" (minus one) - not shown in answer
--
✓
hidden batch(es)
--
-- Turn the reversed string into an array
--
SELECT
STRING_TO_ARRAY
(
REVERSE
(
(
SELECT t.t_field
FROM test t
WHERE t.id = 1
)
)
, '$'
);
string_to_array
{xxx,eod,nhoj,oof,rab}
…
hidden batch(es)
--
-- Step 1: UNNEST() with STRING_TO_ARRAY() - turn the array into a set of records
--
SELECT
UNNEST
(
STRING_TO_ARRAY
(
REVERSE
(
(
SELECT t.t_field
FROM test t
WHERE t.id = 1
)
),
'$'
)
);
unnest
xxx
eod
nhoj
oof
rab
…
hidden batch(es)
--
--
--
✓
hidden batch(es)
--
-- Step 2 - using the WITH ORDINALITY functionality to maintain the order of the
-- various substrings within the group - i.e. by id.
--
SELECT
t.t_field,
t.id, x.elem, x.num
FROM test t
LEFT JOIN LATERAL
UNNEST(STRING_TO_ARRAY
(
REVERSE((SELECT t_field
FROM test
WHERE test.id = t.id
)), '$'))
WITH ORDINALITY AS x (elem, num) ON TRUE
LIMIT 10;
t_field
id
elem
num
bar$foo$john$doe$xxx
1
xxx
1
bar$foo$john$doe$xxx
1
eod
2
bar$foo$john$doe$xxx
1
nhoj
3
bar$foo$john$doe$xxx
1
oof
4
bar$foo$john$doe$xxx
1
rab
5
3ded$bea0$550$86916$77c
2
c77
1
3ded$bea0$550$86916$77c
2
61968
2
3ded$bea0$550$86916$77c
2
055
3
3ded$bea0$550$86916$77c
2
0aeb
4
3ded$bea0$550$86916$77c
2
ded3
5
…
hidden batch(es)
--
-- From this post by Erwin Brandstetter
--
-- https://stackoverflow.com/questions/8760419/postgresql-unnest-with-element-number/8767450#8767450
--
-- the above can be rewritten.
--
✓
hidden batch(es)
--
-- Step 2 (bis): Simplified alternative
--
SELECT
t.t_field,
t.id, x.elem, x.num
FROM test t,
UNNEST(STRING_TO_ARRAY
(
REVERSE((SELECT t_field
FROM test
WHERE test.id = t.id
)), '$'))
WITH ORDINALITY AS x (elem, num)
LIMIT 10;
t_field
id
elem
num
bar$foo$john$doe$xxx
1
xxx
1
bar$foo$john$doe$xxx
1
eod
2
bar$foo$john$doe$xxx
1
nhoj
3
bar$foo$john$doe$xxx
1
oof
4
bar$foo$john$doe$xxx
1
rab
5
3ded$bea0$550$86916$77c
2
c77
1
3ded$bea0$550$86916$77c
2
61968
2
3ded$bea0$550$86916$77c
2
055
3
3ded$bea0$550$86916$77c
2
0aeb
4
3ded$bea0$550$86916$77c
2
ded3
5
…
hidden batch(es)
--
--
--
✓
hidden batch(es)
--
-- Step 2 (ter): Even more simplified - it is my **_understanding_** that as PostgreSQL
-- is currently implemented, this will work and there are no plans to change this
-- in the forseeable future. Be aware that this solution may not continue to work!
--
SELECT
t.t_field,
t.id, s.elem,
ROW_NUMBER() OVER (PARTITION BY id) AS rn
FROM test t,
UNNEST(STRING_TO_ARRAY
(
REVERSE((SELECT t_field
FROM test
WHERE test.id = t.id
)), '$')) AS s (elem)
LIMIT 10;
t_field
id
elem
rn
bar$foo$john$doe$xxx
1
xxx
1
bar$foo$john$doe$xxx
1
eod
2
bar$foo$john$doe$xxx
1
nhoj
3
bar$foo$john$doe$xxx
1
oof
4
bar$foo$john$doe$xxx
1
rab
5
3ded$bea0$550$86916$77c
2
c77
1
3ded$bea0$550$86916$77c
2
61968
2
3ded$bea0$550$86916$77c
2
055
3
3ded$bea0$550$86916$77c
2
0aeb
4
3ded$bea0$550$86916$77c
2
ded3
5
…
hidden batch(es)
--
-- LEFT JOIN LATERAL - WITH ORDINALITY
--
✓
hidden batch(es)
--
-- Step 4: final step, we combine it all. Take the last 4 elements of the array (which
-- remain unchanged) and recombine them using '$', but take the 1st (of the
-- reversed string - [1]) and concatenate the hyphen and then reverse the lot.
--
--
SELECT
(SELECT t_field FROM test WHERE test.id = tab.id),
REVERSE(
(STRING_TO_ARRAY((SELECT REVERSE(t_field) FROM test WHERE test.id = tab.id), '$'))[1]
|| '-' ||
STRING_AGG(elem, '$')) AS result
FROM
(
SELECT
t.id, x.elem, x.num
FROM test t
LEFT JOIN LATERAL
UNNEST(STRING_TO_ARRAY(REVERSE((SELECT t_field
FROM test
WHERE test.id = t.id
)), '$'))
WITH ORDINALITY AS x (elem, num) ON TRUE
) AS tab
WHERE tab.num > 1 -- this is the last element of the original string (REVERSE())
GROUP BY tab.id -- required by STRING_AGG() naturally - otherwise, we'd get one huge string
LIMIT 2;
t_field
result
bar$foo$john$doe$xxx
bar$foo$john$doe-xxx
3ded$bea0$550$86916$77c
3ded$bea0$550$86916-77c
…
hidden batch(es)
EXPLAIN (ANALYZE, TIMING OFF)
SELECT
REVERSE(
(STRING_TO_ARRAY((SELECT REVERSE(t_field) FROM test WHERE test.id = tab.id), '$'))[1]
|| '-' ||
STRING_AGG(elem, '$'))
FROM
(
SELECT
t.id, x.elem, x.num
FROM test t
LEFT JOIN LATERAL
UNNEST(STRING_TO_ARRAY(REVERSE((SELECT t_field
FROM test
WHERE test.id = t.id
)), '$'))
WITH ORDINALITY AS x (elem, num) ON TRUE
) AS tab
WHERE tab.num > 1
GROUP BY tab.id;
-> Index Only Scan using test_pkey on test t (cost=0.42..10021.42 rows=300000 width=4) (actual rows=300000 loops=1)
Heap Fetches: 300000
-> Function Scan on unnest x (cost=8.45..8.57 rows=3 width=32) (actual rows=4 loops=300000)
Filter: (num > 1)
Rows Removed by Filter: 1
SubPlan 2
-> Index Scan using test_pkey on test test_1 (cost=0.42..8.44 rows=1 width=25) (actual rows=1 loops=300000)
Index Cond: (id = t.id)
SubPlan 1
-> Index Scan using test_pkey on test (cost=0.42..8.44 rows=1 width=32) (actual rows=1 loops=300000)
Index Cond: (id = t.id)
Planning Time: 0.150 ms
Execution Time: 3857.112 ms
…
hidden batch(es)
--
-- CROSS JOIN - WITH ORDINALITY
--
✓
hidden batch(es)
SELECT
tab.f,
REVERSE(
(STRING_TO_ARRAY((SELECT REVERSE(t_field) FROM test WHERE test.id = tab.id), '$'))[1]
|| '-' ||
STRING_AGG(tab.elem, '$')) AS result
FROM
(
SELECT
t.t_field AS f,
t.id, x.elem, x.num
FROM test t,
UNNEST(STRING_TO_ARRAY
(
REVERSE((SELECT t_field
FROM test
WHERE test.id = t.id
)), '$'))
WITH ORDINALITY AS x (elem, num)
) AS tab
WHERE tab.num > 1
GROUP BY tab.id, tab.f
LIMIT 2;
f
result
bar$foo$john$doe$xxx
bar$foo$john$doe-xxx
3ded$bea0$550$86916$77c
3ded$bea0$550$86916-77c
…
hidden batch(es)
EXPLAIN (ANALYZE, TIMING OFF)
SELECT
tab.f,
REVERSE(
(STRING_TO_ARRAY((SELECT REVERSE(t_field) FROM test WHERE test.id = tab.id), '$'))[1]
|| '-' ||
STRING_AGG(tab.elem, '$')) AS result
FROM
(
SELECT
t.t_field AS f,
t.id, x.elem, x.num
FROM test t,
UNNEST(STRING_TO_ARRAY
(
REVERSE((SELECT t_field
FROM test
WHERE test.id = t.id
)), '$'))
WITH ORDINALITY AS x (elem, num)
) AS tab
WHERE tab.num > 1
GROUP BY tab.id, tab.f;
-> Index Scan using test_pkey on test t (cost=0.42..10021.42 rows=300000 width=29) (actual rows=300000 loops=1)
-> Function Scan on unnest x (cost=8.45..8.57 rows=3 width=32) (actual rows=4 loops=300000)
Filter: (num > 1)
Rows Removed by Filter: 1
SubPlan 2
-> Index Scan using test_pkey on test test_1 (cost=0.42..8.44 rows=1 width=25) (actual rows=1 loops=300000)
Index Cond: (id = t.id)
SubPlan 1
-> Index Scan using test_pkey on test (cost=0.42..8.44 rows=1 width=32) (actual rows=1 loops=300000)
Index Cond: (id = t.id)
Planning Time: 0.179 ms
Execution Time: 3805.532 ms
…
hidden batch(es)
--
-- CROSS JOIN - using ROW_NUMBER() OVER()
--
✓
hidden batch(es)
SELECT
t.t_field,
t.id, s.elem,
ROW_NUMBER() OVER (PARTITION BY id) AS rn
FROM test t,
UNNEST(STRING_TO_ARRAY
(
REVERSE((SELECT t_field
FROM test
WHERE test.id = t.id
)), '$')) AS s (elem)
LIMIT 10;
t_field
id
elem
rn
bar$foo$john$doe$xxx
1
xxx
1
bar$foo$john$doe$xxx
1
eod
2
bar$foo$john$doe$xxx
1
nhoj
3
bar$foo$john$doe$xxx
1
oof
4
bar$foo$john$doe$xxx
1
rab
5
3ded$bea0$550$86916$77c
2
c77
1
3ded$bea0$550$86916$77c
2
61968
2
3ded$bea0$550$86916$77c
2
055
3
3ded$bea0$550$86916$77c
2
0aeb
4
3ded$bea0$550$86916$77c
2
ded3
5
…
hidden batch(es)
SELECT
tab.f,
REVERSE(
(STRING_TO_ARRAY((SELECT REVERSE(t_field) FROM test WHERE test.id = tab.id), '$'))[1]
|| '-' ||
STRING_AGG(tab.elem, '$')) AS result
FROM
(
SELECT
t.t_field AS f,
t.id, s.elem,
ROW_NUMBER() OVER (PARTITION BY id) AS rn
FROM
test t,
UNNEST(STRING_TO_ARRAY
(
REVERSE((SELECT t_field
FROM test
WHERE test.id = t.id
)), '$')) AS s (elem)
) AS tab
WHERE rn > 1
GROUP BY tab.id, tab.f
LIMIT 2;
f
result
bar$foo$john$doe$xxx
bar$foo$john$doe-xxx
3ded$bea0$550$86916$77c
3ded$bea0$550$86916-77c
…
hidden batch(es)
--
-- The ROW_NUMBER() is much less performant than the WITH ORDINALITY (1.3 x)
--
✓
hidden batch(es)
EXPLAIN (ANALYZE, TIMING OFF)
SELECT
tab.f,
REVERSE(
(STRING_TO_ARRAY((SELECT REVERSE(t_field) FROM test WHERE test.id = tab.id), '$'))[1]
|| '-' ||
STRING_AGG(tab.elem, '$')) AS result
FROM
(
SELECT
t.t_field AS f,
t.id, s.elem,
ROW_NUMBER() OVER (PARTITION BY id) AS rn
FROM
test t,
UNNEST(STRING_TO_ARRAY
(
REVERSE((SELECT t_field
FROM test
WHERE test.id = t.id
)), '$')) AS s (elem)
) AS tab
WHERE rn > 1
GROUP BY tab.id, tab.f;