Postgres 14

By using db<>fiddle, you agree to license everything you submit by Creative Commons CC0.
Help with an interesting Postgres question: Why isn't an Index Only Scan used on a partition accessed via the parent table?.
CREATE TABLE assets (
  id text PRIMARY KEY
, title text
, ts tsvector GENERATED ALWAYS AS (setweight(to_tsvector('english', coalesce(title, '')), 'A')) STORED
);

INSERT INTO assets (id, title) VALUES
  ('a', 'Hello world!')
, ('b', 'Hello sir')
, ('c', 'I am above the world')
, ('d', 'World hello')
, ('e', 'world1, world2, world3')  -- produces duplicate words
;
5 rows affected

-- would produce duplicates
SELECT regexp_split_to_table(title, '[^a-zA-Z]+') AS word
FROM   assets a
WHERE  a.id = 'e';


word


world

world

world




-- so remove duplicate words early
-- and remove resulting duplicates, too
SELECT a.id
    , (  SELECT jsonb_agg(resultsforword)
         FROM  (
            SELECT *
            FROM  (
               SELECT DISTINCT ON (r.id)
                      r.id, r.title, r.rank
               FROM  (
                  SELECT word        -- remove duplicate words early
                  FROM   regexp_split_to_table(a.title, '[^a-zA-Z]+') word
                  WHERE  word <> ''  -- trim possible leading / trailing empty words
                  ) w
               CROSS  JOIN LATERAL (
                  SELECT s.id, s.title
                       , ts_rank(s.ts, to_tsquery('english', w.word)) AS rank
                  FROM   assets s
                  WHERE  s.id <> a.id
                  AND    ts_rank(s.ts, to_tsquery('english', w.word)) > 0.5
                  ORDER  BY rank DESC
                  LIMIT  5                  -- max. 5 best matches per word
                  ) r
               ORDER  BY r.id, r.rank DESC  -- take best rank for each dupe result
               ) r
            ORDER  BY r.rank DESC, r.id     -- take best rank overall (id as tiebreaker)
            LIMIT  5                        -- max 5 overall 
            ) resultsforword
     ) AS results
FROM   assets a
WHERE  a.id = 'e';


id
results


e
[{"id": "a", "rank": 0.6079271, "title": "Hello world!"}, {"id": "c", "rank": 0.6079271, "title": "I am above the world"}, {"id": "d", "rank": 0.6079271, "title": "World hello"}]