By using db<>fiddle, you agree to license everything you submit by Creative Commons CC0.
Help with an interesting Postgres question: Why isn't an Index Only Scan used on a partition accessed via the parent table?.
CREATE TABLE dataset1
(
id INTEGER PRIMARY KEY,
column4 TEXT
) ;
INSERT INTO dataset1
(id, column4)
SELECT
i, 'column 4 for id ' || i
FROM
generate_series(101, 120) AS s(i);
20 rows affected
CREATE TABLE dataset2
(
column1 TEXT
) ;
INSERT INTO dataset2
(column1)
SELECT
'SOMETHING ' || i
FROM
generate_series (1001, 1020) AS s(i) ;
20 rows affected
SELECT count(DISTINCT column4) FROM dataset1 ;
count |
---|
20 |
WITH original_keys AS
(
-- This creates tuples (id, rn),
-- where rn increases from 1 to number or rows
SELECT
id,
row_number() OVER () AS rn
FROM
dataset1
)
, shuffled_data AS
(
-- This creates tuples (column1, rn)
-- where rn moves between 1 and number of rows, but is randomly shuffled
SELECT
column1,
-- The next statement is what *shuffles* all the data
row_number() OVER (ORDER BY random()) AS rn
FROM
dataset2
)
-- You update your dataset1
-- with the shuffled data, linking back to the original keys
UPDATE
dataset1
SET
column4 = shuffled_data.column1
FROM
shuffled_data
JOIN original_keys ON original_keys.rn = shuffled_data.rn
WHERE
dataset1.id = original_keys.id ;
20 rows affected
SELECT count(DISTINCT column4) FROM dataset1 ;
count |
---|
20 |
SELECT * FROM dataset1 ;
id | column4 |
---|---|
101 | SOMETHING 1014 |
102 | SOMETHING 1019 |
103 | SOMETHING 1001 |
104 | SOMETHING 1009 |
105 | SOMETHING 1008 |
106 | SOMETHING 1013 |
107 | SOMETHING 1016 |
108 | SOMETHING 1010 |
109 | SOMETHING 1007 |
110 | SOMETHING 1020 |
111 | SOMETHING 1005 |
112 | SOMETHING 1003 |
113 | SOMETHING 1017 |
114 | SOMETHING 1018 |
115 | SOMETHING 1004 |
116 | SOMETHING 1015 |
117 | SOMETHING 1011 |
118 | SOMETHING 1002 |
119 | SOMETHING 1012 |
120 | SOMETHING 1006 |
UPDATE
dataset1
SET
column4 = shuffled_data.column1
FROM
(SELECT
column1,
-- The next statement is what *shuffles* all the data
row_number() OVER (ORDER BY random()) AS rn
FROM
dataset2
) AS shuffled_data
JOIN
(SELECT
id,
row_number() OVER () AS rn
FROM
dataset1)
AS original_keys ON original_keys.rn = shuffled_data.rn
WHERE
dataset1.id = original_keys.id ;
20 rows affected
SELECT * FROM dataset1;
id | column4 |
---|---|
101 | SOMETHING 1019 |
102 | SOMETHING 1006 |
103 | SOMETHING 1013 |
104 | SOMETHING 1010 |
105 | SOMETHING 1007 |
106 | SOMETHING 1009 |
107 | SOMETHING 1005 |
108 | SOMETHING 1017 |
109 | SOMETHING 1015 |
110 | SOMETHING 1014 |
111 | SOMETHING 1001 |
112 | SOMETHING 1004 |
113 | SOMETHING 1012 |
114 | SOMETHING 1003 |
115 | SOMETHING 1020 |
116 | SOMETHING 1011 |
117 | SOMETHING 1016 |
118 | SOMETHING 1008 |
119 | SOMETHING 1002 |
120 | SOMETHING 1018 |