By using db<>fiddle, you agree to license everything you submit by Creative Commons CC0.
Help with an interesting Postgres question: Why isn't an Index Only Scan used on a partition accessed via the parent table?.
create schema new_york_citibike;
create table new_york_citibike.citibike_trips(
starttime timestamptz,
start_station_id int,
tripduration interval
);
select setseed(.42);
insert into new_york_citibike.citibike_trips
select now()-random()*'1 week'::interval as starttime,
random()*3 as start_station_id,
random()*'4 hours'::interval as tripduration
from generate_series(1,4,1)
returning *;
CREATE SCHEMA
CREATE TABLE
setseed |
---|
SELECT 1
starttime | start_station_id | tripduration |
---|---|---|
2024-10-26 10:24:32.352195+00 | 1 | 03:44:47.110507 |
2024-10-31 04:25:04.656435+00 | 0 | 00:51:19.64743 |
2024-10-28 04:45:23.826656+00 | 1 | 02:18:07.194556 |
2024-10-26 08:33:32.541909+00 | 2 | 03:52:40.097185 |
INSERT 0 4
SELECT
starttime,
start_station_id,
tripduration,
(SELECT AVG(tripduration)
FROM /*bigquery-public-data.*/new_york_citibike.citibike_trips
WHERE start_station_id = outer_trips.start_station_id
) AS avg_duration_for_station,
tripduration -
(SELECT AVG(tripduration)
FROM /*bigquery-public-data.*/new_york_citibike.citibike_trips
WHERE start_station_id = outer_trips.start_station_id
) AS difference_from_avg
FROM /*bigquery-public-data.*/new_york_citibike.citibike_trips AS outer_trips
LIMIT 25;
starttime | start_station_id | tripduration | avg_duration_for_station | difference_from_avg |
---|---|---|---|---|
2024-10-26 10:24:32.352195+00 | 1 | 03:44:47.110507 | 03:01:27.152532 | 00:43:19.957975 |
2024-10-31 04:25:04.656435+00 | 0 | 00:51:19.64743 | 00:51:19.64743 | 00:00:00 |
2024-10-28 04:45:23.826656+00 | 1 | 02:18:07.194556 | 03:01:27.152532 | -00:43:19.957976 |
2024-10-26 08:33:32.541909+00 | 2 | 03:52:40.097185 | 03:52:40.097185 | 00:00:00 |
SELECT 4
--removing "self-reference" returns global, instead of per-station averages
--same for all rows
SELECT
starttime,
start_station_id,
tripduration,
(SELECT AVG(tripduration)
FROM /*bigquery-public-data.*/new_york_citibike.citibike_trips
--WHERE start_station_id = outer_trips.start_station_id
) AS avg_duration_for_station,
tripduration -
(SELECT AVG(tripduration)
FROM /*bigquery-public-data.*/new_york_citibike.citibike_trips
--WHERE start_station_id = outer_trips.start_station_id
) AS difference_from_avg
FROM /*bigquery-public-data.*/new_york_citibike.citibike_trips AS outer_trips
LIMIT 25;
starttime | start_station_id | tripduration | avg_duration_for_station | difference_from_avg |
---|---|---|---|---|
2024-10-26 10:24:32.352195+00 | 1 | 03:44:47.110507 | 02:41:43.51242 | 01:03:03.598087 |
2024-10-31 04:25:04.656435+00 | 0 | 00:51:19.64743 | 02:41:43.51242 | -01:50:23.86499 |
2024-10-28 04:45:23.826656+00 | 1 | 02:18:07.194556 | 02:41:43.51242 | -00:23:36.317864 |
2024-10-26 08:33:32.541909+00 | 2 | 03:52:40.097185 | 02:41:43.51242 | 01:10:56.584765 |
SELECT 4
SELECT
starttime,
start_station_id,
tripduration,
avg(tripduration)over(w1) as avg_duration_for_station,
tripduration - avg(tripduration)over(w1) AS difference_from_avg
FROM /*bigquery-public-data.*/new_york_citibike.citibike_trips AS outer_trips
WINDOW w1 AS (partition by start_station_id)
LIMIT 25;
starttime | start_station_id | tripduration | avg_duration_for_station | difference_from_avg |
---|---|---|---|---|
2024-10-31 04:25:04.656435+00 | 0 | 00:51:19.64743 | 00:51:19.64743 | 00:00:00 |
2024-10-26 10:24:32.352195+00 | 1 | 03:44:47.110507 | 03:01:27.152532 | 00:43:19.957975 |
2024-10-28 04:45:23.826656+00 | 1 | 02:18:07.194556 | 03:01:27.152532 | -00:43:19.957976 |
2024-10-26 08:33:32.541909+00 | 2 | 03:52:40.097185 | 03:52:40.097185 | 00:00:00 |
SELECT 4
SELECT starttime,
start_station_id,
tripduration,
avg_duration_for_station,
tripduration - avg_duration_for_station AS difference_from_avg
FROM /*bigquery-public-data.*/new_york_citibike.citibike_trips AS outer_trips
LEFT JOIN (SELECT start_station_id,
AVG(tripduration) AS avg_duration_for_station
FROM /*bigquery-public-data.*/new_york_citibike.citibike_trips
GROUP BY start_station_id
) AS station USING (start_station_id)
LIMIT 25;
starttime | start_station_id | tripduration | avg_duration_for_station | difference_from_avg |
---|---|---|---|---|
2024-10-26 10:24:32.352195+00 | 1 | 03:44:47.110507 | 03:01:27.152532 | 00:43:19.957975 |
2024-10-31 04:25:04.656435+00 | 0 | 00:51:19.64743 | 00:51:19.64743 | 00:00:00 |
2024-10-28 04:45:23.826656+00 | 1 | 02:18:07.194556 | 03:01:27.152532 | -00:43:19.957976 |
2024-10-26 08:33:32.541909+00 | 2 | 03:52:40.097185 | 03:52:40.097185 | 00:00:00 |
SELECT 4