add batch remove batch split batch comment selection show hidden batches hide batch highlight batch
db<>fiddle
donate feedback about
By using db<>fiddle, you agree to license everything you submit by Creative Commons CC0.
Help with an interesting Postgres question: Why isn't an Index Only Scan used on a partition accessed via the parent table?.
CREATE TABLE assets (
id text PRIMARY KEY
, title text
, ts tsvector GENERATED ALWAYS AS (setweight(to_tsvector('english', coalesce(title, '')), 'A')) STORED
);

INSERT INTO assets (id, title) VALUES
('a', 'Hello world!')
, ('b', 'Hello sir')
, ('c', 'I am above the world')
, ('d', 'World hello')
, ('e', 'world1, world2, world3') -- produces duplicate words
;
5 rows affected
-- would produce duplicates
SELECT regexp_split_to_table(title, '[^a-zA-Z]+') AS word
FROM assets a
WHERE a.id = 'e';
word
world
world
world
-- so remove duplicate words early
-- and remove resulting duplicates, too
SELECT a.id
, ( SELECT jsonb_agg(resultsforword)
FROM (
SELECT *
FROM (
SELECT DISTINCT ON (r.id)
r.id, r.title, r.rank
FROM (
SELECT word -- remove duplicate words early
FROM regexp_split_to_table(a.title, '[^a-zA-Z]+') word
WHERE word <> '' -- trim possible leading / trailing empty words
) w
CROSS JOIN LATERAL (
SELECT s.id, s.title
, ts_rank(s.ts, to_tsquery('english', w.word)) AS rank
FROM assets s
WHERE s.id <> a.id
AND ts_rank(s.ts, to_tsquery('english', w.word)) > 0.5
ORDER BY rank DESC
LIMIT 5 -- max. 5 best matches per word
) r
ORDER BY r.id, r.rank DESC -- take best rank for each dupe result
) r
ORDER BY r.rank DESC, r.id -- take best rank overall (id as tiebreaker)
LIMIT 5 -- max 5 overall
) resultsforword
) AS results
FROM assets a
WHERE a.id = 'e';
id results
e [{"id": "a", "rank": 0.6079271, "title": "Hello world!"}, {"id": "c", "rank": 0.6079271, "title": "I am above the world"}, {"id": "d", "rank": 0.6079271, "title": "World hello"}]