clear markdown feedback
clear markdown feedback
By using db<>fiddle, you agree to license everything you submit by Creative Commons CC0. 3798917 fiddles created (41833 in the last week).

/* ================================================================================ Pivot example with variable number of columns in the output. ================================================================================ example data is straight forward, imagine a table with a customer identifier, an invoice date and an amount. */ DROP TABLE IF EXISTS invoice; CREATE TEMPORARY TABLE invoice ( id serial not null primary key, customer_id int not null, invoice_date date not null, amount int not null ); INSERT INTO invoice (customer_id, invoice_date, amount) values (2, '2022-06-16 04:11:50', 8706) , (2, '2022-02-12 15:26:53', 4163) , (2, '2022-12-05 23:30:36', 3473) , (2, '2022-11-01 21:16:45', 5952) , (2, '2022-10-03 09:35:34', 4170) , (2, '2022-06-09 03:22:14', 4818) , (1, '2022-12-02 09:15:26', 463) , (1, '2022-02-19 09:58:43', 2944) , (1, '2022-12-03 12:38:20', 5879) , (2, '2022-01-13 10:54:31', 4294) , (2, '2022-01-28 14:03:22', 2557) , (1, '2022-07-10 19:06:52', 6506) , (2, '2022-02-18 11:50:49', 1105) , (2, '2022-08-29 06:54:49', 6794) , (2, '2022-04-15 07:00:34', 2136) , (1, '2022-07-13 23:30:07', 1421) , (1, '2022-12-29 12:24:54', 5460) , (1, '2022-06-20 23:12:59', 4217) , (1, '2022-04-15 23:47:50', 2695) , (1, '2022-11-22 05:51:11', 3208) , (2, '2022-04-11 12:29:28', 1921) , (2, '2022-01-09 11:51:56', 6512) , (2, '2022-07-10 23:14:41', 1658) , (1, '2022-06-02 16:44:31', 3348) , (1, '2022-11-08 11:15:59', 467) , (2, '2022-09-01 20:13:31', 9222) , (2, '2022-09-02 22:11:56', 7029) , (2, '2022-06-07 11:22:30', 9933) , (1, '2022-04-11 20:58:57', 2284) , (2, '2022-12-13 06:30:01', 7945) , (2, '2022-10-19 17:16:21', 8940) , (1, '2022-02-16 06:57:09', 1202) , (2, '2022-01-17 22:02:24', 7567) , (2, '2022-11-16 17:02:20', 545) , (2, '2022-12-13 12:40:18', 6266) , (2, '2022-07-31 00:12:44', 9151) , (2, '2022-05-20 03:14:10', 7798) , (1, '2022-04-30 04:32:55', 420) , (2, '2022-12-10 02:21:36', 9770) , (1, '2022-09-28 15:55:24', 14) , (2, '2022-09-29 05:34:56', 2170) , (2, '2022-09-12 22:58:57', 2598) , (2, '2022-01-16 09:54:42', 1100) , (1, '2022-06-26 09:55:05', 7256) , (1, '2022-02-05 10:42:52', 6024) , (1, '2022-10-19 16:30:23', 5733) , (1, '2022-12-28 02:08:37', 2645) , (2, '2022-02-19 14:22:03', 7315) , (1, '2022-10-28 05:39:54', 9733) ;
49 rows affected
 hidden batch(es)


/* I'm going to explain the moving parts one by one and we will bring it all together at the end. First thing we need to do is gather all of the data we want to use as columns in our pivoted output, plus any data we want to break by on those columns. Lets just do the first part to start with and then we will come back and show how to break by other data. So lets start by just trying to sum all of the amounts by month and have a column for each month. This data is for the year 2022, and I want to make sure we dont miss any empty months in our output where we might have had no revenue, so we will use generate_series to get a data point for each month. */ SELECT generate_series::DATE dte FROM GENERATE_SERIES('2022-01-01'::DATE, '2022-12-31', '1 month')
dte
2022-01-01
2022-02-01
2022-03-01
2022-04-01
2022-05-01
2022-06-01
2022-07-01
2022-08-01
2022-09-01
2022-10-01
2022-11-01
2022-12-01
 hidden batch(es)


/* Now, lets take our columns and get the data for each - this will give us a normal relational dataset where our pivoted "columns" will be their own column and the data for each will be another column. We select from our columns data set and LEFT OUTER JOIN so that if there was no data for that column we would still get it in our output. Notice that March does not have any amount, but we still have the row. */ WITH columns AS ( SELECT generate_series::DATE dte FROM GENERATE_SERIES('2022-01-01'::DATE, '2022-12-31', '1 month') ) SELECT columns.dte , SUM(COALESCE(invoice.amount, 0)) total FROM columns LEFT OUTER JOIN invoice ON DATE_PART('year', invoice_date) = DATE_PART('year', columns.dte) AND DATE_PART('month', invoice_date) = DATE_PART('month', columns.dte) GROUP BY columns.dte ORDER BY columns.dte
dte total
2022-01-01 22030
2022-02-01 22753
2022-03-01 0
2022-04-01 9456
2022-05-01 7798
2022-06-01 38278
2022-07-01 18736
2022-08-01 6794
2022-09-01 21033
2022-10-01 28576
2022-11-01 10172
2022-12-01 41901
 hidden batch(es)


/* Now comes the secret sauce, the JSONB_OBJECT_AGG() that makes our pivot work. We aggregate our data together, passing the "column" we want to use as the first argument and the data we want to aggregate using that column as the second argument. Basically the first argument is your pivoted "column" column from the data set (or a formatted version of it, like in our example) and the second arg is the "cell" of data you want for that "column". You get a json object with a key for each "column" and its value being its data. (Remember to scroll down for a more complicated example where we are aggregating this data by customer) */ WITH columns AS ( SELECT generate_series::DATE dte FROM GENERATE_SERIES('2022-01-01'::DATE, '2022-12-31', '1 month') ) , data AS ( SELECT columns.dte , SUM(COALESCE(invoice.amount, 0)) total FROM columns LEFT OUTER JOIN invoice ON DATE_PART('year', invoice_date) = DATE_PART('year', columns.dte) AND DATE_PART('month', invoice_date) = DATE_PART('month', columns.dte) GROUP BY columns.dte ) SELECT JSONB_OBJECT_AGG(TO_CHAR(dte, 'YYYY-MM'), total) pivotData FROM data;
pivotdata
{"2022-01": 22030, "2022-02": 22753, "2022-03": 0, "2022-04": 9456, "2022-05": 7798, "2022-06": 38278, "2022-07": 18736, "2022-08": 6794, "2022-09": 21033, "2022-10": 28576, "2022-11": 10172, "2022-12": 41901}
 hidden batch(es)


/* If your application can use the json object as it is you can stop at the last query, but if you need to get actual columns out and you dont mind creating a dynamic sql query, you can pull the data out into columns like this example. *But* this requires you to know the columns you want before hand. */ WITH columns AS ( SELECT generate_series::DATE dte FROM GENERATE_SERIES('2022-01-01'::DATE, '2022-12-31', '1 month') ) , data AS ( SELECT columns.dte , SUM(COALESCE(invoice.amount, 0)) total FROM columns LEFT OUTER JOIN invoice ON DATE_PART('year', invoice_date) = DATE_PART('year', columns.dte) AND DATE_PART('month', invoice_date) = DATE_PART('month', columns.dte) GROUP BY columns.dte ), result AS ( SELECT JSONB_OBJECT_AGG(TO_CHAR(dte, 'YYYY-MM'), total) pivotData FROM data ) SELECT (pivotData->>'2022-01') "jan" , (pivotData->>'2022-02') "feb" , (pivotData->>'2022-03') "mar" , (pivotData->>'2022-04') "apr" , (pivotData->>'2022-05') "may" , (pivotData->>'2022-06') "jun" , (pivotData->>'2022-07') "jul" , (pivotData->>'2022-08') "aug" , (pivotData->>'2022-09') "sep" , (pivotData->>'2022-10') "oct" , (pivotData->>'2022-11') "nov" , (pivotData->>'2022-12') "dec" FROM result ;
jan feb mar apr may jun jul aug sep oct nov dec
22030 22753 0 9456 7798 38278 18736 6794 21033 28576 10172 41901
 hidden batch(es)


/* Now for a more complicated example, lets say you want to do the same thing, but you want to get totals for each customer. You just need to pull that grouped data along for the ride. Your "columns" query needs to create a column for each month, plus the data you're grouping by. We use a cross join here with a list of all of our customers, again so that if we are missing data for one month for a customer we will still have that column in our output. */ SELECT generate_series dte , customer_id FROM generate_series('2022-01-01'::date, '2022-12-31', '1 month') CROSS JOIN ( SELECT DISTINCT customer_id FROM invoice ) AS customers
dte customer_id
2022-01-01 00:00:00+00 2
2022-01-01 00:00:00+00 1
2022-02-01 00:00:00+00 2
2022-02-01 00:00:00+00 1
2022-03-01 00:00:00+00 2
2022-03-01 00:00:00+00 1
2022-04-01 00:00:00+01 2
2022-04-01 00:00:00+01 1
2022-05-01 00:00:00+01 2
2022-05-01 00:00:00+01 1
2022-06-01 00:00:00+01 2
2022-06-01 00:00:00+01 1
2022-07-01 00:00:00+01 2
2022-07-01 00:00:00+01 1
2022-08-01 00:00:00+01 2
2022-08-01 00:00:00+01 1
2022-09-01 00:00:00+01 2
2022-09-01 00:00:00+01 1
2022-10-01 00:00:00+01 2
2022-10-01 00:00:00+01 1
2022-11-01 00:00:00+00 2
2022-11-01 00:00:00+00 1
2022-12-01 00:00:00+00 2
2022-12-01 00:00:00+00 1
 hidden batch(es)


/* Now we need to group our "data" query by the grouping column and sum for each "column". You can see some examples this time of months with no money. */ WITH columns AS ( SELECT generate_series dte , customer_id FROM generate_series('2022-01-01'::date, '2022-12-31', '1 month') CROSS JOIN ( SELECT DISTINCT customer_id FROM invoice ) AS customers ) SELECT columns.dte , columns.customer_id , SUM(COALESCE(invoice.amount, 0)) total FROM columns LEFT OUTER JOIN invoice ON DATE_PART('year', invoice_date) = DATE_PART('year', columns.dte) AND DATE_PART('month', invoice_date) = DATE_PART('month', columns.dte) AND columns.customer_id = invoice.customer_id GROUP BY columns.dte , columns.customer_id;
dte customer_id total
2022-12-01 00:00:00+00 1 14447
2022-04-01 00:00:00+01 2 4057
2022-09-01 00:00:00+01 1 14
2022-03-01 00:00:00+00 1 0
2022-03-01 00:00:00+00 2 0
2022-05-01 00:00:00+01 1 0
2022-06-01 00:00:00+01 2 23457
2022-01-01 00:00:00+00 2 22030
2022-12-01 00:00:00+00 2 27454
2022-10-01 00:00:00+01 2 13110
2022-08-01 00:00:00+01 2 6794
2022-08-01 00:00:00+01 1 0
2022-02-01 00:00:00+00 1 10170
2022-02-01 00:00:00+00 2 12583
2022-10-01 00:00:00+01 1 15466
2022-11-01 00:00:00+00 1 3675
2022-05-01 00:00:00+01 2 7798
2022-01-01 00:00:00+00 1 0
2022-06-01 00:00:00+01 1 14821
2022-04-01 00:00:00+01 1 5399
2022-07-01 00:00:00+01 2 10809
2022-09-01 00:00:00+01 2 21019
2022-07-01 00:00:00+01 1 7927
2022-11-01 00:00:00+00 2 6497
 hidden batch(es)


/* Now we do our final aggregation, passing our "column" heading as the first arg, the "cell" as the second arg, and the grouping by our customer_id. Again if your application can handle the json data then you may be done, but if you want to turn it back into columns see the next example. */ WITH columns AS ( SELECT generate_series dte , customer_id FROM generate_series('2022-01-01'::date, '2022-12-31', '1 month') CROSS JOIN ( SELECT DISTINCT customer_id FROM invoice ) AS customers ), data AS ( SELECT columns.dte , columns.customer_id , SUM(COALESCE(invoice.amount, 0)) total FROM columns LEFT OUTER JOIN invoice ON DATE_PART('year', invoice_date) = DATE_PART('year', columns.dte) AND DATE_PART('month', invoice_date) = DATE_PART('month', columns.dte) AND columns.customer_id = invoice.customer_id GROUP BY columns.dte , columns.customer_id ) SELECT customer_id , JSONB_OBJECT_AGG(TO_CHAR(dte, 'YYYY-MM'), total) pivotData FROM data GROUP BY customer_id ;
customer_id pivotdata
2 {"2022-01": 22030, "2022-02": 12583, "2022-03": 0, "2022-04": 4057, "2022-05": 7798, "2022-06": 23457, "2022-07": 10809, "2022-08": 6794, "2022-09": 21019, "2022-10": 13110, "2022-11": 6497, "2022-12": 27454}
1 {"2022-01": 0, "2022-02": 10170, "2022-03": 0, "2022-04": 5399, "2022-05": 0, "2022-06": 14821, "2022-07": 7927, "2022-08": 0, "2022-09": 14, "2022-10": 15466, "2022-11": 3675, "2022-12": 14447}
 hidden batch(es)


/* And here is our final output back as columns. */ WITH columns AS ( SELECT generate_series dte , customer_id FROM generate_series('2022-01-01'::date, '2022-12-31', '1 month') CROSS JOIN ( SELECT DISTINCT customer_id FROM invoice ) AS customers ), data AS ( SELECT columns.dte , columns.customer_id , SUM(COALESCE(invoice.amount, 0)) total FROM columns LEFT OUTER JOIN invoice ON DATE_PART('year', invoice_date) = DATE_PART('year', columns.dte) AND DATE_PART('month', invoice_date) = DATE_PART('month', columns.dte) AND columns.customer_id = invoice.customer_id GROUP BY columns.dte , columns.customer_id ), result AS ( SELECT customer_id , JSONB_OBJECT_AGG(TO_CHAR(dte, 'YYYY-MM'), total) pivotData FROM data GROUP BY customer_id ) SELECT customer_id , (pivotData->>'2022-01') "jan" , (pivotData->>'2022-02') "feb" , (pivotData->>'2022-03') "mar" , (pivotData->>'2022-04') "apr" , (pivotData->>'2022-05') "may" , (pivotData->>'2022-06') "jun" , (pivotData->>'2022-07') "jul" , (pivotData->>'2022-08') "aug" , (pivotData->>'2022-09') "sep" , (pivotData->>'2022-10') "oct" , (pivotData->>'2022-11') "nov" , (pivotData->>'2022-12') "dec" FROM result ORDER BY customer_id
customer_id jan feb mar apr may jun jul aug sep oct nov dec
1 0 10170 0 5399 0 14821 7927 0 14 15466 3675 14447
2 22030 12583 0 4057 7798 23457 10809 6794 21019 13110 6497 27454
 hidden batch(es)