By using db<>fiddle, you agree to license everything you submit by Creative Commons CC0.
Help with an interesting Postgres question: Why isn't an Index Only Scan used on a partition accessed via the parent table?.
CREATE TABLE employee (
id int PRIMARY KEY -- !
, name text -- do NOT use char(n) !
, division_id int
);
INSERT INTO employee VALUES
(1, 'John', 1),
(2, 'Amber', 2),
(3, 'Mike', 1),
(4, 'Jimmy', 1),
(5, 'Kathy', 2);
CREATE TABLE attendance (
id int PRIMARY KEY --!
, employee_id int NOT NULL REFERENCES employee -- FK!
, activity_type int
, created_at timestamptz NOT NULL
);
INSERT INTO attendance VALUES
(1, 1, 1,'2020-11-18 07:10:25 +00:00'),
(2, 2, 1,'2020-11-18 07:30:25 +00:00'),
(3, 3, 1,'2020-11-18 07:50:25 +00:00'),
(4, 2, 2,'2020-11-18 19:10:25 +00:00'),
(5, 3, 2,'2020-11-18 19:22:38 +00:00'),
(6, 1, 2,'2020-11-18 20:01:05 +00:00'),
(7, 1, 1,'2020-11-19 07:11:23 +00:00'),
(8, 1, 2,'2020-11-19 16:21:53 +00:00'),
(9, 1, 1,'2020-11-19 19:11:23 +00:00'),
(10, 1, 2,'2020-11-19 20:21:53 +00:00'),
(11, 1, 1,'2020-11-20 07:41:38 +00:00'),
(12, 1, 2,'2020-11-20 08:52:01 +00:00');
TABLE attendance;
TABLE employee;
5 rows affected
12 rows affected
id | employee_id | activity_type | created_at |
---|---|---|---|
1 | 1 | 1 | 2020-11-18 07:10:25+00 |
2 | 2 | 1 | 2020-11-18 07:30:25+00 |
3 | 3 | 1 | 2020-11-18 07:50:25+00 |
4 | 2 | 2 | 2020-11-18 19:10:25+00 |
5 | 3 | 2 | 2020-11-18 19:22:38+00 |
6 | 1 | 2 | 2020-11-18 20:01:05+00 |
7 | 1 | 1 | 2020-11-19 07:11:23+00 |
8 | 1 | 2 | 2020-11-19 16:21:53+00 |
9 | 1 | 1 | 2020-11-19 19:11:23+00 |
10 | 1 | 2 | 2020-11-19 20:21:53+00 |
11 | 1 | 1 | 2020-11-20 07:41:38+00 |
12 | 1 | 2 | 2020-11-20 08:52:01+00 |
id | name | division_id |
---|---|---|
1 | John | 1 |
2 | Amber | 2 |
3 | Mike | 1 |
4 | Jimmy | 1 |
5 | Kathy | 2 |
-- time zones fixed (among other things):
SELECT *
FROM ( -- complete employee/date grid for division in range
SELECT g.d::date AS the_date, id AS employee_id, name, division_id
FROM (
SELECT generate_series(MIN(created_at) AT TIME ZONE 'Asia/Kuala_Lumpur'
, MAX(created_at) AT TIME ZONE 'Asia/Kuala_Lumpur'
, interval '1 day'
)
FROM attendance
) g(d)
CROSS JOIN employee e
WHERE e.division_id = 1
) de
LEFT JOIN ( -- checkins & checkouts per employee/date for division in range
SELECT employee_id, ts::date AS the_date
, array_agg(id) as rows
, min(ts) FILTER (WHERE activity_type = 1) AS min_check_in
, max(ts) FILTER (WHERE activity_type = 2) AS max_check_out
, array_agg(ts::time) FILTER (WHERE activity_type = 1) AS check_ins
, array_agg(ts::time) FILTER (WHERE activity_type = 2) AS check_outs
FROM (
SELECT a.id, a.employee_id, a.activity_type, a.created_at AT TIME ZONE 'Asia/Kuala_Lumpur' AS ts -- convert to timestamp
FROM employee e
JOIN attendance a ON a.employee_id = e.id
-- WHERE a.created_at >= timestamp '2020-11-20' AT TIME ZONE 'Asia/Kuala_Lumpur' -- "sargable" expressions
-- AND a.created_at < timestamp '2020-11-21' AT TIME ZONE 'Asia/Kuala_Lumpur' -- exclusive upper bound (includes all of 2020-11-20);
AND e.division_id = 1
ORDER BY a.employee_id, a.created_at, a.activity_type -- optional to guarantee sorted arrays
) sub
GROUP BY 1, 2
) a USING (the_date, employee_id)
ORDER BY 1, 2;
the_date | employee_id | name | division_id | rows | min_check_in | max_check_out | check_ins | check_outs |
---|---|---|---|---|---|---|---|---|
2020-11-18 | 1 | John | 1 | {1} | 2020-11-18 15:10:25 | null | {15:10:25} | null |
2020-11-18 | 3 | Mike | 1 | {3} | 2020-11-18 15:50:25 | null | {15:50:25} | null |
2020-11-18 | 4 | Jimmy | 1 | null | null | null | null | null |
2020-11-19 | 1 | John | 1 | {6,7} | 2020-11-19 15:11:23 | 2020-11-19 04:01:05 | {15:11:23} | {04:01:05} |
2020-11-19 | 3 | Mike | 1 | {5} | null | 2020-11-19 03:22:38 | null | {03:22:38} |
2020-11-19 | 4 | Jimmy | 1 | null | null | null | null | null |
2020-11-20 | 1 | John | 1 | {8,9,10,11,12} | 2020-11-20 03:11:23 | 2020-11-20 16:52:01 | {03:11:23,15:41:38} | {00:21:53,04:21:53,16:52:01} |
2020-11-20 | 3 | Mike | 1 | null | null | null | null | null |
2020-11-20 | 4 | Jimmy | 1 | null | null | null | null | null |
-- original
SELECT
d::date AT TIME ZONE 'Asia/Kuala_Lumpur' AS created_date,
e.id,
e.name,
e.division_id,
ARRAY_AGG(
a.id
) as rows,
MIN(a.created_at) FILTER (WHERE a.activity_type = 1) as min_time_in,
MAX(a.created_at) FILTER (WHERE a.activity_type = 2) as max_time_out,
ARRAY_AGG(
CASE
WHEN a.activity_type = 1
THEN a.created_at
ELSE NULL
END
) as check_ins,
ARRAY_AGG(
CASE
WHEN a.activity_type = 2
THEN a.created_at
ELSE NULL
END
) as check_outs
FROM (SELECT MIN(created_at), MAX(created_at) FROM attendance) AS r(startdate,enddate)
, generate_series(
startdate::timestamp AT TIME ZONE 'Asia/Kuala_Lumpur',
enddate::timestamp AT TIME ZONE 'Asia/Kuala_Lumpur',
interval '1 day') g(d)
CROSS JOIN employee e
LEFT JOIN attendance a ON a.created_at::date = d::date AND e.id = a.employee_id
where created_at >= timestamp '2020-11-20' AT TIME ZONE 'Asia/Kuala_Lumpur' and created_at < timestamp '2020-11-20' AT TIME ZONE 'Asia/Kuala_Lumpur' + interval '1 day'
and division_id = 1
GROUP BY
created_date
created_date | id | name | division_id | rows | min_time_in | max_time_out | check_ins | check_outs |
---|---|---|---|---|---|---|---|---|
2020-11-19 08:00:00 | 1 | John | 1 | {8,9,10} | 2020-11-19 19:11:23+00 | 2020-11-19 20:21:53+00 | {NULL,"2020-11-19 19:11:23+00",NULL} | {"2020-11-19 16:21:53+00",NULL,"2020-11-19 20:21:53+00"} |