add batch remove batch split batch comment selection show hidden batches hide batch highlight batch
db<>fiddle
donate feedback about
By using db<>fiddle, you agree to license everything you submit by Creative Commons CC0.
Help with an interesting Postgres question: Why isn't an Index Only Scan used on a partition accessed via the parent table?.
CREATE TABLE dataset1
(
id INTEGER PRIMARY KEY,
column4 TEXT
) ;
INSERT INTO dataset1
(id, column4)
SELECT
i, 'column 4 for id ' || i
FROM
generate_series(101, 120) AS s(i);
20 rows affected
CREATE TABLE dataset2
(
column1 TEXT
) ;
INSERT INTO dataset2
(column1)
SELECT
'SOMETHING ' || i
FROM
generate_series (1001, 1020) AS s(i) ;
20 rows affected
SELECT count(DISTINCT column4) FROM dataset1 ;
count
20
WITH original_keys AS
(
-- This creates tuples (id, rn),
-- where rn increases from 1 to number or rows
SELECT
id,
row_number() OVER () AS rn
FROM
dataset1
)
, shuffled_data AS
(
-- This creates tuples (column1, rn)
-- where rn moves between 1 and number of rows, but is randomly shuffled
SELECT
column1,
-- The next statement is what *shuffles* all the data
row_number() OVER (ORDER BY random()) AS rn
FROM
dataset2
)
-- You update your dataset1
-- with the shuffled data, linking back to the original keys
UPDATE
dataset1
SET
column4 = shuffled_data.column1
FROM
shuffled_data
JOIN original_keys ON original_keys.rn = shuffled_data.rn
WHERE
dataset1.id = original_keys.id ;
20 rows affected
SELECT count(DISTINCT column4) FROM dataset1 ;
count
20
SELECT * FROM dataset1 ;
id column4
101 SOMETHING 1014
102 SOMETHING 1019
103 SOMETHING 1001
104 SOMETHING 1009
105 SOMETHING 1008
106 SOMETHING 1013
107 SOMETHING 1016
108 SOMETHING 1010
109 SOMETHING 1007
110 SOMETHING 1020
111 SOMETHING 1005
112 SOMETHING 1003
113 SOMETHING 1017
114 SOMETHING 1018
115 SOMETHING 1004
116 SOMETHING 1015
117 SOMETHING 1011
118 SOMETHING 1002
119 SOMETHING 1012
120 SOMETHING 1006
UPDATE
dataset1
SET
column4 = shuffled_data.column1
FROM
(SELECT
column1,
-- The next statement is what *shuffles* all the data
row_number() OVER (ORDER BY random()) AS rn
FROM
dataset2
) AS shuffled_data
JOIN
(SELECT
id,
row_number() OVER () AS rn
FROM
dataset1)
AS original_keys ON original_keys.rn = shuffled_data.rn
WHERE
dataset1.id = original_keys.id ;
20 rows affected
SELECT * FROM dataset1;
id column4
101 SOMETHING 1019
102 SOMETHING 1006
103 SOMETHING 1013
104 SOMETHING 1010
105 SOMETHING 1007
106 SOMETHING 1009
107 SOMETHING 1005
108 SOMETHING 1017
109 SOMETHING 1015
110 SOMETHING 1014
111 SOMETHING 1001
112 SOMETHING 1004
113 SOMETHING 1012
114 SOMETHING 1003
115 SOMETHING 1020
116 SOMETHING 1011
117 SOMETHING 1016
118 SOMETHING 1008
119 SOMETHING 1002
120 SOMETHING 1018