Postgres 14

By using db<>fiddle, you agree to license everything you submit by Creative Commons CC0.

Help with an interesting Postgres question: Why isn't an Index Only Scan used on a partition accessed via the parent table?.

/*
================================================================================
Pivot example with variable number of columns in the output.
================================================================================

example data is straight forward, imagine a table with a customer identifier, 
an invoice date and an amount.
*/

DROP TABLE IF EXISTS invoice;
CREATE TEMPORARY TABLE invoice (
  id serial not null primary key,
  customer_id int not null,
  invoice_date date not null,
  amount int not null
);



INSERT INTO invoice (customer_id, invoice_date, amount)
values
  (2, '2022-06-16 04:11:50', 8706)
, (2, '2022-02-12 15:26:53', 4163)
, (2, '2022-12-05 23:30:36', 3473)
, (2, '2022-11-01 21:16:45', 5952)
, (2, '2022-10-03 09:35:34', 4170)
, (2, '2022-06-09 03:22:14', 4818)
, (1, '2022-12-02 09:15:26', 463)
, (1, '2022-02-19 09:58:43', 2944)
, (1, '2022-12-03 12:38:20', 5879)
, (2, '2022-01-13 10:54:31', 4294)
, (2, '2022-01-28 14:03:22', 2557)
, (1, '2022-07-10 19:06:52', 6506)
, (2, '2022-02-18 11:50:49', 1105)
, (2, '2022-08-29 06:54:49', 6794)
, (2, '2022-04-15 07:00:34', 2136)
, (1, '2022-07-13 23:30:07', 1421)
, (1, '2022-12-29 12:24:54', 5460)
, (1, '2022-06-20 23:12:59', 4217)
, (1, '2022-04-15 23:47:50', 2695)
, (1, '2022-11-22 05:51:11', 3208)
, (2, '2022-04-11 12:29:28', 1921)
, (2, '2022-01-09 11:51:56', 6512)
, (2, '2022-07-10 23:14:41', 1658)
, (1, '2022-06-02 16:44:31', 3348)
, (1, '2022-11-08 11:15:59', 467)
, (2, '2022-09-01 20:13:31', 9222)
, (2, '2022-09-02 22:11:56', 7029)
, (2, '2022-06-07 11:22:30', 9933)
, (1, '2022-04-11 20:58:57', 2284)
, (2, '2022-12-13 06:30:01', 7945)
, (2, '2022-10-19 17:16:21', 8940)
, (1, '2022-02-16 06:57:09', 1202)
, (2, '2022-01-17 22:02:24', 7567)
, (2, '2022-11-16 17:02:20', 545)
, (2, '2022-12-13 12:40:18', 6266)
, (2, '2022-07-31 00:12:44', 9151)
, (2, '2022-05-20 03:14:10', 7798)
, (1, '2022-04-30 04:32:55', 420)
, (2, '2022-12-10 02:21:36', 9770)
, (1, '2022-09-28 15:55:24', 14)
, (2, '2022-09-29 05:34:56', 2170)
, (2, '2022-09-12 22:58:57', 2598)
, (2, '2022-01-16 09:54:42', 1100)
, (1, '2022-06-26 09:55:05', 7256)
, (1, '2022-02-05 10:42:52', 6024)
, (1, '2022-10-19 16:30:23', 5733)
, (1, '2022-12-28 02:08:37', 2645)
, (2, '2022-02-19 14:22:03', 7315)
, (1, '2022-10-28 05:39:54', 9733)
;

DROP TABLE



CREATE TABLE



INSERT 0 49


/*
I'm going to explain the moving parts one by one and we will bring it all 
together at the end.

First thing we need to do is gather all of the data we want to use as 
columns in our pivoted output, plus any data we want to break by on 
those columns.
Lets just do the first part to start with and then we will come back and show 
how to break by other data.

So lets start by just trying to sum all of the amounts by month and have a 
column for each month.  This data is for the year 2022, and I want to make sure
we dont miss any empty months in our output where we might have had no revenue, 
so we will use generate_series to get a data point for each month.
*/

SELECT
  generate_series::DATE dte
FROM GENERATE_SERIES('2022-01-01'::DATE, '2022-12-31', '1 month')


dte


2022-01-01

2022-02-01

2022-03-01

2022-04-01

2022-05-01

2022-06-01

2022-07-01

2022-08-01

2022-09-01

2022-10-01

2022-11-01

2022-12-01



SELECT 12


/*
Now, lets take our columns and get the data for each - this will give us a 
normal relational dataset where our pivoted "columns" will be their own 
column and the data for each will be another column.

We select from our columns data set and LEFT OUTER JOIN so that if there was 
no data for that column we would still get it in our output. 
Notice that March does not have any amount, but we still have the row.
*/


WITH columns AS (
  SELECT
    generate_series::DATE dte
  FROM GENERATE_SERIES('2022-01-01'::DATE, '2022-12-31', '1 month')
)
SELECT
    columns.dte
  , SUM(COALESCE(invoice.amount, 0)) total
FROM columns
LEFT OUTER
  JOIN invoice
  ON DATE_PART('year', invoice_date) = DATE_PART('year', columns.dte)
  AND DATE_PART('month', invoice_date) = DATE_PART('month', columns.dte)
GROUP BY
    columns.dte
ORDER BY
    columns.dte


dte
total


2022-01-01
22030

2022-02-01
22753

2022-03-01
0

2022-04-01
9456

2022-05-01
7798

2022-06-01
38278

2022-07-01
18736

2022-08-01
6794

2022-09-01
21033

2022-10-01
28576

2022-11-01
10172

2022-12-01
41901



SELECT 12


/*
Now comes the secret sauce, the JSONB_OBJECT_AGG() that makes our pivot work.  
We aggregate our data together, passing the "column" we want to use as the 
first argument and the data we want to aggregate using that column as 
the second argument.

Basically the first argument is your pivoted "column" column from the data set 
(or a formatted version of it, like in our example) and the second arg is 
the "cell" of data you want for that "column".

You get a json object with a key for each "column" and its value being its data.
(Remember to scroll down for a more complicated example where we are
aggregating this data by customer)
*/
WITH columns AS (
  SELECT
    generate_series::DATE dte
  FROM GENERATE_SERIES('2022-01-01'::DATE, '2022-12-31', '1 month')
)
, data AS (
  SELECT
      columns.dte
    , SUM(COALESCE(invoice.amount, 0)) total
  FROM columns
  LEFT OUTER
    JOIN invoice
    ON DATE_PART('year', invoice_date) = DATE_PART('year', columns.dte)
    AND DATE_PART('month', invoice_date) = DATE_PART('month', columns.dte)
  GROUP BY
      columns.dte
)
SELECT
  JSONB_OBJECT_AGG(TO_CHAR(dte, 'YYYY-MM'), total) pivotData
FROM
  data;


pivotdata


{"2022-01": 22030, "2022-02": 22753, "2022-03": 0, "2022-04": 9456, "2022-05": 7798, "2022-06": 38278, "2022-07": 18736, "2022-08": 6794, "2022-09": 21033, "2022-10": 28576, "2022-11": 10172, "2022-12": 41901}



SELECT 1


/*
If your application can use the json object as it is you can stop 
at the last query, but if you need to get actual columns out and 
you dont mind creating a dynamic sql query, you can pull the data 
out into columns like this example.  
*But* this requires you to know the columns you want before hand.
*/


WITH columns AS (
  SELECT
    generate_series::DATE dte
  FROM GENERATE_SERIES('2022-01-01'::DATE, '2022-12-31', '1 month')
)
, data AS (
  SELECT
      columns.dte
    , SUM(COALESCE(invoice.amount, 0)) total
  FROM columns
  LEFT OUTER
    JOIN invoice
    ON DATE_PART('year', invoice_date) = DATE_PART('year', columns.dte)
    AND DATE_PART('month', invoice_date) = DATE_PART('month', columns.dte)
  GROUP BY
      columns.dte
), result AS (
SELECT
  JSONB_OBJECT_AGG(TO_CHAR(dte, 'YYYY-MM'), total) pivotData
FROM
  data
)
SELECT
    (pivotData->>'2022-01') "jan"
  , (pivotData->>'2022-02') "feb"
  , (pivotData->>'2022-03') "mar"
  , (pivotData->>'2022-04') "apr"
  , (pivotData->>'2022-05') "may"
  , (pivotData->>'2022-06') "jun"
  , (pivotData->>'2022-07') "jul"
  , (pivotData->>'2022-08') "aug"
  , (pivotData->>'2022-09') "sep"
  , (pivotData->>'2022-10') "oct"
  , (pivotData->>'2022-11') "nov"
  , (pivotData->>'2022-12') "dec"

FROM
  result
  ;


jan
feb
mar
apr
may
jun
jul
aug
sep
oct
nov
dec


22030
22753
0
9456
7798
38278
18736
6794
21033
28576
10172
41901



SELECT 1


/*

Now for a more complicated example, lets say you want to do the same thing,
but you want to get totals for each customer.  You just need to pull 
that grouped data along for the ride.

Your "columns" query needs to create a column for each month, plus the data 
you're grouping by. We use a cross join here with a list of all of our 
customers, again so that if we are missing data for one month for a customer 
we will still have that column in our output.
*/

SELECT
    generate_series dte
  , customer_id
FROM generate_series('2022-01-01'::date, '2022-12-31', '1 month')
CROSS JOIN
  (
    SELECT DISTINCT
      customer_id
    FROM
      invoice
  ) AS customers


dte
customer_id


2022-01-01 00:00:00+00
2

2022-01-01 00:00:00+00
1

2022-02-01 00:00:00+00
2

2022-02-01 00:00:00+00
1

2022-03-01 00:00:00+00
2

2022-03-01 00:00:00+00
1

2022-04-01 00:00:00+01
2

2022-04-01 00:00:00+01
1

2022-05-01 00:00:00+01
2

2022-05-01 00:00:00+01
1

2022-06-01 00:00:00+01
2

2022-06-01 00:00:00+01
1

2022-07-01 00:00:00+01
2

2022-07-01 00:00:00+01
1

2022-08-01 00:00:00+01
2

2022-08-01 00:00:00+01
1

2022-09-01 00:00:00+01
2

2022-09-01 00:00:00+01
1

2022-10-01 00:00:00+01
2

2022-10-01 00:00:00+01
1

2022-11-01 00:00:00+00
2

2022-11-01 00:00:00+00
1

2022-12-01 00:00:00+00
2

2022-12-01 00:00:00+00
1



SELECT 24


/*
Now we need to group our "data" query by the grouping column and 
sum for each "column".

You can see some examples this time of months with no money.
*/

WITH columns AS (
  SELECT
       generate_series dte
     , customer_id
  FROM generate_series('2022-01-01'::date, '2022-12-31', '1 month')
  CROSS JOIN
    (
      SELECT DISTINCT
        customer_id
      FROM
        invoice
    ) AS customers
)
SELECT
    columns.dte
  , columns.customer_id
  , SUM(COALESCE(invoice.amount, 0)) total
FROM columns
LEFT OUTER
  JOIN invoice
  ON DATE_PART('year', invoice_date) = DATE_PART('year', columns.dte)
  AND DATE_PART('month', invoice_date) = DATE_PART('month', columns.dte)
  AND columns.customer_id = invoice.customer_id
GROUP BY
    columns.dte
  , columns.customer_id;


dte
customer_id
total


2022-12-01 00:00:00+00
1
14447

2022-04-01 00:00:00+01
2
4057

2022-09-01 00:00:00+01
1
14

2022-03-01 00:00:00+00
1
0

2022-03-01 00:00:00+00
2
0

2022-05-01 00:00:00+01
1
0

2022-06-01 00:00:00+01
2
23457

2022-01-01 00:00:00+00
2
22030

2022-12-01 00:00:00+00
2
27454

2022-10-01 00:00:00+01
2
13110

2022-08-01 00:00:00+01
2
6794

2022-08-01 00:00:00+01
1
0

2022-02-01 00:00:00+00
1
10170

2022-02-01 00:00:00+00
2
12583

2022-10-01 00:00:00+01
1
15466

2022-11-01 00:00:00+00
1
3675

2022-05-01 00:00:00+01
2
7798

2022-01-01 00:00:00+00
1
0

2022-06-01 00:00:00+01
1
14821

2022-04-01 00:00:00+01
1
5399

2022-07-01 00:00:00+01
2
10809

2022-09-01 00:00:00+01
2
21019

2022-07-01 00:00:00+01
1
7927

2022-11-01 00:00:00+00
2
6497



SELECT 24


/*
Now we do our final aggregation, passing our "column" heading as the first arg, 
the "cell" as the second arg, and the grouping by our customer_id.

Again if your application can handle the json data then you may be done, but if 
you want to turn it back into columns see the next example.
*/

WITH columns AS (
  SELECT
       generate_series dte
     , customer_id
  FROM generate_series('2022-01-01'::date, '2022-12-31', '1 month')
  CROSS JOIN
    (
      SELECT DISTINCT
        customer_id
      FROM
        invoice
    ) AS customers
), data AS (
  SELECT
      columns.dte
    , columns.customer_id
    , SUM(COALESCE(invoice.amount, 0)) total
  FROM columns
  LEFT OUTER
    JOIN invoice
    ON DATE_PART('year', invoice_date) = DATE_PART('year', columns.dte)
    AND DATE_PART('month', invoice_date) = DATE_PART('month', columns.dte)
    AND columns.customer_id = invoice.customer_id
  GROUP BY
      columns.dte
    , columns.customer_id
)
SELECT
    customer_id
  , JSONB_OBJECT_AGG(TO_CHAR(dte, 'YYYY-MM'), total) pivotData
FROM
  data
GROUP BY
  customer_id
;


customer_id
pivotdata


2
{"2022-01": 22030, "2022-02": 12583, "2022-03": 0, "2022-04": 4057, "2022-05": 7798, "2022-06": 23457, "2022-07": 10809, "2022-08": 6794, "2022-09": 21019, "2022-10": 13110, "2022-11": 6497, "2022-12": 27454}

1
{"2022-01": 0, "2022-02": 10170, "2022-03": 0, "2022-04": 5399, "2022-05": 0, "2022-06": 14821, "2022-07": 7927, "2022-08": 0, "2022-09": 14, "2022-10": 15466, "2022-11": 3675, "2022-12": 14447}



SELECT 2


/*
And here is our final output back as columns.
*/
WITH columns AS (
  SELECT
       generate_series dte
     , customer_id
  FROM generate_series('2022-01-01'::date, '2022-12-31', '1 month')
  CROSS JOIN
    (
      SELECT DISTINCT
        customer_id
      FROM
        invoice
    ) AS customers
), data AS (
  SELECT
      columns.dte
    , columns.customer_id
    , SUM(COALESCE(invoice.amount, 0)) total
  FROM columns
  LEFT OUTER
    JOIN invoice
    ON DATE_PART('year', invoice_date) = DATE_PART('year', columns.dte)
    AND DATE_PART('month', invoice_date) = DATE_PART('month', columns.dte)
    AND columns.customer_id = invoice.customer_id
  GROUP BY
      columns.dte
    , columns.customer_id
), result AS (
  SELECT
    customer_id
    , JSONB_OBJECT_AGG(TO_CHAR(dte, 'YYYY-MM'), total) pivotData
  FROM
    data
  GROUP BY
    customer_id
)
SELECT
    customer_id
  , (pivotData->>'2022-01') "jan"
  , (pivotData->>'2022-02') "feb"
  , (pivotData->>'2022-03') "mar"
  , (pivotData->>'2022-04') "apr"
  , (pivotData->>'2022-05') "may"
  , (pivotData->>'2022-06') "jun"
  , (pivotData->>'2022-07') "jul"
  , (pivotData->>'2022-08') "aug"
  , (pivotData->>'2022-09') "sep"
  , (pivotData->>'2022-10') "oct"
  , (pivotData->>'2022-11') "nov"
  , (pivotData->>'2022-12') "dec"
FROM
  result
ORDER BY customer_id


customer_id
jan
feb
mar
apr
may
jun
jul
aug
sep
oct
nov
dec


1
0
10170
0
5399
0
14821
7927
0
14
15466
3675
14447

2
22030
12583
0
4057
7798
23457
10809
6794
21019
13110
6497
27454



SELECT 2

dte	total
2022-01-01	22030
2022-02-01	22753
2022-03-01	0
2022-04-01	9456
2022-05-01	7798
2022-06-01	38278
2022-07-01	18736
2022-08-01	6794
2022-09-01	21033
2022-10-01	28576
2022-11-01	10172
2022-12-01	41901

dte	customer_id
2022-01-01 00:00:00+00	2
2022-01-01 00:00:00+00	1
2022-02-01 00:00:00+00	2
2022-02-01 00:00:00+00	1
2022-03-01 00:00:00+00	2
2022-03-01 00:00:00+00	1
2022-04-01 00:00:00+01	2
2022-04-01 00:00:00+01	1
2022-05-01 00:00:00+01	2
2022-05-01 00:00:00+01	1
2022-06-01 00:00:00+01	2
2022-06-01 00:00:00+01	1
2022-07-01 00:00:00+01	2
2022-07-01 00:00:00+01	1
2022-08-01 00:00:00+01	2
2022-08-01 00:00:00+01	1
2022-09-01 00:00:00+01	2
2022-09-01 00:00:00+01	1
2022-10-01 00:00:00+01	2
2022-10-01 00:00:00+01	1
2022-11-01 00:00:00+00	2
2022-11-01 00:00:00+00	1
2022-12-01 00:00:00+00	2
2022-12-01 00:00:00+00	1

dte	customer_id	total
2022-12-01 00:00:00+00	1	14447
2022-04-01 00:00:00+01	2	4057
2022-09-01 00:00:00+01	1	14
2022-03-01 00:00:00+00	1	0
2022-03-01 00:00:00+00	2	0
2022-05-01 00:00:00+01	1	0
2022-06-01 00:00:00+01	2	23457
2022-01-01 00:00:00+00	2	22030
2022-12-01 00:00:00+00	2	27454
2022-10-01 00:00:00+01	2	13110
2022-08-01 00:00:00+01	2	6794
2022-08-01 00:00:00+01	1	0
2022-02-01 00:00:00+00	1	10170
2022-02-01 00:00:00+00	2	12583
2022-10-01 00:00:00+01	1	15466
2022-11-01 00:00:00+00	1	3675
2022-05-01 00:00:00+01	2	7798
2022-01-01 00:00:00+00	1	0
2022-06-01 00:00:00+01	1	14821
2022-04-01 00:00:00+01	1	5399
2022-07-01 00:00:00+01	2	10809
2022-09-01 00:00:00+01	2	21019
2022-07-01 00:00:00+01	1	7927
2022-11-01 00:00:00+00	2	6497

customer_id	pivotdata
2	{"2022-01": 22030, "2022-02": 12583, "2022-03": 0, "2022-04": 4057, "2022-05": 7798, "2022-06": 23457, "2022-07": 10809, "2022-08": 6794, "2022-09": 21019, "2022-10": 13110, "2022-11": 6497, "2022-12": 27454}
1	{"2022-01": 0, "2022-02": 10170, "2022-03": 0, "2022-04": 5399, "2022-05": 0, "2022-06": 14821, "2022-07": 7927, "2022-08": 0, "2022-09": 14, "2022-10": 15466, "2022-11": 3675, "2022-12": 14447}