Paste this into a new question or an answer at dba.stackexchange.com:
<!-- --> > /* > ================================================================================ > Pivot example with variable number of columns in the output. > ================================================================================ > > example data is straight forward, imagine a table with a customer identifier, > an invoice date and an amount. > */ > > DROP TABLE IF EXISTS invoice; > CREATE TEMPORARY TABLE invoice ( > id serial not null primary key, > customer_id int not null, > invoice_date date not null, > amount int not null > ); > > > > INSERT INTO invoice (customer_id, invoice_date, amount) > values > (2, '2022-06-16 04:11:50', 8706) > , (2, '2022-02-12 15:26:53', 4163) > , (2, '2022-12-05 23:30:36', 3473) > , (2, '2022-11-01 21:16:45', 5952) > , (2, '2022-10-03 09:35:34', 4170) > , (2, '2022-06-09 03:22:14', 4818) > , (1, '2022-12-02 09:15:26', 463) > , (1, '2022-02-19 09:58:43', 2944) > , (1, '2022-12-03 12:38:20', 5879) > , (2, '2022-01-13 10:54:31', 4294) > , (2, '2022-01-28 14:03:22', 2557) > , (1, '2022-07-10 19:06:52', 6506) > , (2, '2022-02-18 11:50:49', 1105) > , (2, '2022-08-29 06:54:49', 6794) > , (2, '2022-04-15 07:00:34', 2136) > , (1, '2022-07-13 23:30:07', 1421) > , (1, '2022-12-29 12:24:54', 5460) > , (1, '2022-06-20 23:12:59', 4217) > , (1, '2022-04-15 23:47:50', 2695) > , (1, '2022-11-22 05:51:11', 3208) > , (2, '2022-04-11 12:29:28', 1921) > , (2, '2022-01-09 11:51:56', 6512) > , (2, '2022-07-10 23:14:41', 1658) > , (1, '2022-06-02 16:44:31', 3348) > , (1, '2022-11-08 11:15:59', 467) > , (2, '2022-09-01 20:13:31', 9222) > , (2, '2022-09-02 22:11:56', 7029) > , (2, '2022-06-07 11:22:30', 9933) > , (1, '2022-04-11 20:58:57', 2284) > , (2, '2022-12-13 06:30:01', 7945) > , (2, '2022-10-19 17:16:21', 8940) > , (1, '2022-02-16 06:57:09', 1202) > , (2, '2022-01-17 22:02:24', 7567) > , (2, '2022-11-16 17:02:20', 545) > , (2, '2022-12-13 12:40:18', 6266) > , (2, '2022-07-31 00:12:44', 9151) > , (2, '2022-05-20 03:14:10', 7798) > , (1, '2022-04-30 04:32:55', 420) > , (2, '2022-12-10 02:21:36', 9770) > , (1, '2022-09-28 15:55:24', 14) > , (2, '2022-09-29 05:34:56', 2170) > , (2, '2022-09-12 22:58:57', 2598) > , (2, '2022-01-16 09:54:42', 1100) > , (1, '2022-06-26 09:55:05', 7256) > , (1, '2022-02-05 10:42:52', 6024) > , (1, '2022-10-19 16:30:23', 5733) > , (1, '2022-12-28 02:08:37', 2645) > , (2, '2022-02-19 14:22:03', 7315) > , (1, '2022-10-28 05:39:54', 9733) > ; > > <pre> > ✓ > > ✓ > 49 rows affected > </pre> <!-- --> > /* > I'm going to explain the moving parts one by one and we will bring it all > together at the end. > > First thing we need to do is gather all of the data we want to use as > columns in our pivoted output, plus any data we want to break by on > those columns. > Lets just do the first part to start with and then we will come back and show > how to break by other data. > > So lets start by just trying to sum all of the amounts by month and have a > column for each month. This data is for the year 2022, and I want to make sure > we dont miss any empty months in our output where we might have had no revenue, > so we will use generate_series to get a data point for each month. > */ > > SELECT > generate_series::DATE dte > FROM GENERATE_SERIES('2022-01-01'::DATE, '2022-12-31', '1 month') > > <pre> > | dte | > | :--------- | > | 2022-01-01 | > | 2022-02-01 | > | 2022-03-01 | > | 2022-04-01 | > | 2022-05-01 | > | 2022-06-01 | > | 2022-07-01 | > | 2022-08-01 | > | 2022-09-01 | > | 2022-10-01 | > | 2022-11-01 | > | 2022-12-01 | > </pre> <!-- --> > /* > Now, lets take our columns and get the data for each - this will give us a > normal relational dataset where our pivoted "columns" will be their own > column and the data for each will be another column. > > We select from our columns data set and LEFT OUTER JOIN so that if there was > no data for that column we would still get it in our output. > Notice that March does not have any amount, but we still have the row. > */ > > > WITH columns AS ( > SELECT > generate_series::DATE dte > FROM GENERATE_SERIES('2022-01-01'::DATE, '2022-12-31', '1 month') > ) > SELECT > columns.dte > , SUM(COALESCE(invoice.amount, 0)) total > FROM columns > LEFT OUTER > JOIN invoice > ON DATE_PART('year', invoice_date) = DATE_PART('year', columns.dte) > AND DATE_PART('month', invoice_date) = DATE_PART('month', columns.dte) > GROUP BY > columns.dte > ORDER BY > columns.dte > > <pre> > dte | total > :--------- | ----: > 2022-01-01 | 22030 > 2022-02-01 | 22753 > 2022-03-01 | 0 > 2022-04-01 | 9456 > 2022-05-01 | 7798 > 2022-06-01 | 38278 > 2022-07-01 | 18736 > 2022-08-01 | 6794 > 2022-09-01 | 21033 > 2022-10-01 | 28576 > 2022-11-01 | 10172 > 2022-12-01 | 41901 > </pre> <!-- --> > /* > Now comes the secret sauce, the JSONB_OBJECT_AGG() that makes our pivot work. > We aggregate our data together, passing the "column" we want to use as the > first argument and the data we want to aggregate using that column as > the second argument. > > Basically the first argument is your pivoted "column" column from the data set > (or a formatted version of it, like in our example) and the second arg is > the "cell" of data you want for that "column". > > You get a json object with a key for each "column" and its value being its data. > (Remember to scroll down for a more complicated example where we are > aggregating this data by customer) > */ > WITH columns AS ( > SELECT > generate_series::DATE dte > FROM GENERATE_SERIES('2022-01-01'::DATE, '2022-12-31', '1 month') > ) > , data AS ( > SELECT > columns.dte > , SUM(COALESCE(invoice.amount, 0)) total > FROM columns > LEFT OUTER > JOIN invoice > ON DATE_PART('year', invoice_date) = DATE_PART('year', columns.dte) > AND DATE_PART('month', invoice_date) = DATE_PART('month', columns.dte) > GROUP BY > columns.dte > ) > SELECT > JSONB_OBJECT_AGG(TO_CHAR(dte, 'YYYY-MM'), total) pivotData > FROM > data; > > <pre> > | pivotdata | > | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | > | {&quot;2022-01&quot;: 22030, &quot;2022-02&quot;: 22753, &quot;2022-03&quot;: 0, &quot;2022-04&quot;: 9456, &quot;2022-05&quot;: 7798, &quot;2022-06&quot;: 38278, &quot;2022-07&quot;: 18736, &quot;2022-08&quot;: 6794, &quot;2022-09&quot;: 21033, &quot;2022-10&quot;: 28576, &quot;2022-11&quot;: 10172, &quot;2022-12&quot;: 41901} | > </pre> <!-- --> > /* > If your application can use the json object as it is you can stop > at the last query, but if you need to get actual columns out and > you dont mind creating a dynamic sql query, you can pull the data > out into columns like this example. > *But* this requires you to know the columns you want before hand. > */ > > > WITH columns AS ( > SELECT > generate_series::DATE dte > FROM GENERATE_SERIES('2022-01-01'::DATE, '2022-12-31', '1 month') > ) > , data AS ( > SELECT > columns.dte > , SUM(COALESCE(invoice.amount, 0)) total > FROM columns > LEFT OUTER > JOIN invoice > ON DATE_PART('year', invoice_date) = DATE_PART('year', columns.dte) > AND DATE_PART('month', invoice_date) = DATE_PART('month', columns.dte) > GROUP BY > columns.dte > ), result AS ( > SELECT > JSONB_OBJECT_AGG(TO_CHAR(dte, 'YYYY-MM'), total) pivotData > FROM > data > ) > SELECT > (pivotData->>'2022-01') "jan" > , (pivotData->>'2022-02') "feb" > , (pivotData->>'2022-03') "mar" > , (pivotData->>'2022-04') "apr" > , (pivotData->>'2022-05') "may" > , (pivotData->>'2022-06') "jun" > , (pivotData->>'2022-07') "jul" > , (pivotData->>'2022-08') "aug" > , (pivotData->>'2022-09') "sep" > , (pivotData->>'2022-10') "oct" > , (pivotData->>'2022-11') "nov" > , (pivotData->>'2022-12') "dec" > > FROM > result > ; > > <pre> > jan | feb | mar | apr | may | jun | jul | aug | sep | oct | nov | dec > :---- | :---- | :-- | :--- | :--- | :---- | :---- | :--- | :---- | :---- | :---- | :---- > 22030 | 22753 | 0 | 9456 | 7798 | 38278 | 18736 | 6794 | 21033 | 28576 | 10172 | 41901 > </pre> <!-- --> > /* > > Now for a more complicated example, lets say you want to do the same thing, > but you want to get totals for each customer. You just need to pull > that grouped data along for the ride. > > Your "columns" query needs to create a column for each month, plus the data > you're grouping by. We use a cross join here with a list of all of our > customers, again so that if we are missing data for one month for a customer > we will still have that column in our output. > */ > > SELECT > generate_series dte > , customer_id > FROM generate_series('2022-01-01'::date, '2022-12-31', '1 month') > CROSS JOIN > ( > SELECT DISTINCT > customer_id > FROM > invoice > ) AS customers > > <pre> > dte | customer_id > :--------------------- | ----------: > 2022-01-01 00:00:00+00 | 2 > 2022-01-01 00:00:00+00 | 1 > 2022-02-01 00:00:00+00 | 2 > 2022-02-01 00:00:00+00 | 1 > 2022-03-01 00:00:00+00 | 2 > 2022-03-01 00:00:00+00 | 1 > 2022-04-01 00:00:00+01 | 2 > 2022-04-01 00:00:00+01 | 1 > 2022-05-01 00:00:00+01 | 2 > 2022-05-01 00:00:00+01 | 1 > 2022-06-01 00:00:00+01 | 2 > 2022-06-01 00:00:00+01 | 1 > 2022-07-01 00:00:00+01 | 2 > 2022-07-01 00:00:00+01 | 1 > 2022-08-01 00:00:00+01 | 2 > 2022-08-01 00:00:00+01 | 1 > 2022-09-01 00:00:00+01 | 2 > 2022-09-01 00:00:00+01 | 1 > 2022-10-01 00:00:00+01 | 2 > 2022-10-01 00:00:00+01 | 1 > 2022-11-01 00:00:00+00 | 2 > 2022-11-01 00:00:00+00 | 1 > 2022-12-01 00:00:00+00 | 2 > 2022-12-01 00:00:00+00 | 1 > </pre> <!-- --> > /* > Now we need to group our "data" query by the grouping column and > sum for each "column". > > You can see some examples this time of months with no money. > */ > > WITH columns AS ( > SELECT > generate_series dte > , customer_id > FROM generate_series('2022-01-01'::date, '2022-12-31', '1 month') > CROSS JOIN > ( > SELECT DISTINCT > customer_id > FROM > invoice > ) AS customers > ) > SELECT > columns.dte > , columns.customer_id > , SUM(COALESCE(invoice.amount, 0)) total > FROM columns > LEFT OUTER > JOIN invoice > ON DATE_PART('year', invoice_date) = DATE_PART('year', columns.dte) > AND DATE_PART('month', invoice_date) = DATE_PART('month', columns.dte) > AND columns.customer_id = invoice.customer_id > GROUP BY > columns.dte > , columns.customer_id; > > <pre> > dte | customer_id | total > :--------------------- | ----------: | ----: > 2022-12-01 00:00:00+00 | 1 | 14447 > 2022-04-01 00:00:00+01 | 2 | 4057 > 2022-09-01 00:00:00+01 | 1 | 14 > 2022-03-01 00:00:00+00 | 1 | 0 > 2022-03-01 00:00:00+00 | 2 | 0 > 2022-05-01 00:00:00+01 | 1 | 0 > 2022-06-01 00:00:00+01 | 2 | 23457 > 2022-01-01 00:00:00+00 | 2 | 22030 > 2022-12-01 00:00:00+00 | 2 | 27454 > 2022-10-01 00:00:00+01 | 2 | 13110 > 2022-08-01 00:00:00+01 | 2 | 6794 > 2022-08-01 00:00:00+01 | 1 | 0 > 2022-02-01 00:00:00+00 | 1 | 10170 > 2022-02-01 00:00:00+00 | 2 | 12583 > 2022-10-01 00:00:00+01 | 1 | 15466 > 2022-11-01 00:00:00+00 | 1 | 3675 > 2022-05-01 00:00:00+01 | 2 | 7798 > 2022-01-01 00:00:00+00 | 1 | 0 > 2022-06-01 00:00:00+01 | 1 | 14821 > 2022-04-01 00:00:00+01 | 1 | 5399 > 2022-07-01 00:00:00+01 | 2 | 10809 > 2022-09-01 00:00:00+01 | 2 | 21019 > 2022-07-01 00:00:00+01 | 1 | 7927 > 2022-11-01 00:00:00+00 | 2 | 6497 > </pre> <!-- --> > /* > Now we do our final aggregation, passing our "column" heading as the first arg, > the "cell" as the second arg, and the grouping by our customer_id. > > Again if your application can handle the json data then you may be done, but if > you want to turn it back into columns see the next example. > */ > > WITH columns AS ( > SELECT > generate_series dte > , customer_id > FROM generate_series('2022-01-01'::date, '2022-12-31', '1 month') > CROSS JOIN > ( > SELECT DISTINCT > customer_id > FROM > invoice > ) AS customers > ), data AS ( > SELECT > columns.dte > , columns.customer_id > , SUM(COALESCE(invoice.amount, 0)) total > FROM columns > LEFT OUTER > JOIN invoice > ON DATE_PART('year', invoice_date) = DATE_PART('year', columns.dte) > AND DATE_PART('month', invoice_date) = DATE_PART('month', columns.dte) > AND columns.customer_id = invoice.customer_id > GROUP BY > columns.dte > , columns.customer_id > ) > SELECT > customer_id > , JSONB_OBJECT_AGG(TO_CHAR(dte, 'YYYY-MM'), total) pivotData > FROM > data > GROUP BY > customer_id > ; > > <pre> > customer_id | pivotdata > ----------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- > 2 | {&quot;2022-01&quot;: 22030, &quot;2022-02&quot;: 12583, &quot;2022-03&quot;: 0, &quot;2022-04&quot;: 4057, &quot;2022-05&quot;: 7798, &quot;2022-06&quot;: 23457, &quot;2022-07&quot;: 10809, &quot;2022-08&quot;: 6794, &quot;2022-09&quot;: 21019, &quot;2022-10&quot;: 13110, &quot;2022-11&quot;: 6497, &quot;2022-12&quot;: 27454} > 1 | {&quot;2022-01&quot;: 0, &quot;2022-02&quot;: 10170, &quot;2022-03&quot;: 0, &quot;2022-04&quot;: 5399, &quot;2022-05&quot;: 0, &quot;2022-06&quot;: 14821, &quot;2022-07&quot;: 7927, &quot;2022-08&quot;: 0, &quot;2022-09&quot;: 14, &quot;2022-10&quot;: 15466, &quot;2022-11&quot;: 3675, &quot;2022-12&quot;: 14447} > </pre> <!-- --> > /* > And here is our final output back as columns. > */ > WITH columns AS ( > SELECT > generate_series dte > , customer_id > FROM generate_series('2022-01-01'::date, '2022-12-31', '1 month') > CROSS JOIN > ( > SELECT DISTINCT > customer_id > FROM > invoice > ) AS customers > ), data AS ( > SELECT > columns.dte > , columns.customer_id > , SUM(COALESCE(invoice.amount, 0)) total > FROM columns > LEFT OUTER > JOIN invoice > ON DATE_PART('year', invoice_date) = DATE_PART('year', columns.dte) > AND DATE_PART('month', invoice_date) = DATE_PART('month', columns.dte) > AND columns.customer_id = invoice.customer_id > GROUP BY > columns.dte > , columns.customer_id > ), result AS ( > SELECT > customer_id > , JSONB_OBJECT_AGG(TO_CHAR(dte, 'YYYY-MM'), total) pivotData > FROM > data > GROUP BY > customer_id > ) > SELECT > customer_id > , (pivotData->>'2022-01') "jan" > , (pivotData->>'2022-02') "feb" > , (pivotData->>'2022-03') "mar" > , (pivotData->>'2022-04') "apr" > , (pivotData->>'2022-05') "may" > , (pivotData->>'2022-06') "jun" > , (pivotData->>'2022-07') "jul" > , (pivotData->>'2022-08') "aug" > , (pivotData->>'2022-09') "sep" > , (pivotData->>'2022-10') "oct" > , (pivotData->>'2022-11') "nov" > , (pivotData->>'2022-12') "dec" > FROM > result > ORDER BY customer_id > > <pre> > customer_id | jan | feb | mar | apr | may | jun | jul | aug | sep | oct | nov | dec > ----------: | :---- | :---- | :-- | :--- | :--- | :---- | :---- | :--- | :---- | :---- | :--- | :---- > 1 | 0 | 10170 | 0 | 5399 | 0 | 14821 | 7927 | 0 | 14 | 15466 | 3675 | 14447 > 2 | 22030 | 12583 | 0 | 4057 | 7798 | 23457 | 10809 | 6794 | 21019 | 13110 | 6497 | 27454 > </pre> *db<>fiddle [here](https://dbfiddle.uk/?rdbms=postgres_14&fiddle=39e115cb8afd6e62c0101286ecd08a3f)*
back to fiddle