By using db<>fiddle, you agree to license everything you submit by Creative Commons CC0.
CREATE TABLE users (
user_id INT UNSIGNED PRIMARY KEY,
activated_at DATETIME NOT NULL,
INDEX idx_cohort_start_date ((DATE(activated_at)))
);
INSERT INTO users (user_id, activated_at) VALUES
(1, '2013-06-20 01:00:00'),
(2, '2013-06-20 02:00:00'),
(3, '2013-06-20 03:00:00'),
(4, '2013-06-20 04:00:00'),
(5, '2013-06-20 05:00:00'),
(6, '2013-06-21 06:00:00'),
(7, '2013-06-21 07:00:00'),
(8, '2013-06-22 07:00:00');
CREATE TABLE events (
user_id INT UNSIGNED,
event_type ENUM('signup_flow', 'engagement') NOT NULL,
occurred_at DATETIME NOT NULL,
PRIMARY KEY (user_id, event_type, occurred_at),
FOREIGN KEY (user_id) REFERENCES users (user_id)
);
INSERT INTO events VALUES
(1, 'signup_flow', '2013-06-20 01:00:00'),
(2, 'signup_flow', '2013-06-20 02:00:00'),
(3, 'signup_flow', '2013-06-20 03:00:00'),
(4, 'signup_flow', '2013-06-20 04:00:00'),
(5, 'signup_flow', '2013-06-20 05:00:00'),
(6, 'signup_flow', '2013-06-21 06:00:00'),
(7, 'signup_flow', '2013-06-21 07:00:00'),
(8, 'signup_flow', '2013-06-22 07:00:00'),
(1, 'engagement', '2013-06-28 01:00:00'),
(2, 'engagement', '2013-06-28 01:00:00'),
(3, 'engagement', '2013-06-28 01:00:00'),
(4, 'engagement', '2013-06-28 01:00:00'),
(5, 'engagement', '2013-06-28 01:00:00'),
Records: 8 Duplicates: 0 Warnings: 0
Records: 18 Duplicates: 0 Warnings: 0
WITH cohorts AS (
SELECT
DATE(activated_at) AS cohort_start_date,
COUNT(*) AS total_users
FROM users
GROUP BY 1
),
weekly_stats AS (
SELECT
DATE(u.activated_at) AS cohort_start_date,
YEARWEEK(e.occurred_at, 0) AS year_week,
COUNT(DISTINCT e.user_id) AS active_users
FROM users u
JOIN events e ON u.user_id = e.user_id
WHERE e.event_type = 'engagement'
GROUP BY cohort_start_date, year_week
)
SELECT
cohorts.cohort_start_date,
cohorts.total_users AS total_users,
weekly_stats.year_week,
weekly_stats.active_users,
weekly_stats.active_users / cohorts.total_users * 100 AS retention_rate
FROM cohorts
JOIN weekly_stats
ON cohorts.cohort_start_date = weekly_stats.cohort_start_date
ORDER BY cohort_start_date, year_week;
cohort_start_date | total_users | year_week | active_users | retention_rate |
---|---|---|---|---|
2013-06-20 | 5 | 201325 | 5 | 100.0000 |
2013-06-20 | 5 | 201326 | 3 | 60.0000 |
2013-06-20 | 5 | 201327 | 1 | 20.0000 |
2013-06-21 | 2 | 201330 | 1 | 50.0000 |
/*
* If there's the possibility of very few people activating their accounts
* on a given date and then never "engaging", then you need a LEFT JOIN
* between cohorts and weekly_stats
*/
WITH cohorts AS (
SELECT
DATE(activated_at) AS cohort_start_date,
COUNT(*) AS total_users
FROM users
GROUP BY 1
),
weekly_stats AS (
SELECT
DATE(u.activated_at) AS cohort_start_date,
YEARWEEK(e.occurred_at, 0) AS year_week,
COUNT(DISTINCT e.user_id) AS active_users
FROM users u
JOIN events e ON u.user_id = e.user_id
WHERE e.event_type = 'engagement'
GROUP BY cohort_start_date, year_week
)
SELECT
cohorts.cohort_start_date,
cohorts.total_users AS total_users,
weekly_stats.year_week,
weekly_stats.active_users,
weekly_stats.active_users / cohorts.total_users * 100 AS retention_rate
FROM cohorts
LEFT JOIN weekly_stats
ON cohorts.cohort_start_date = weekly_stats.cohort_start_date
ORDER BY cohort_start_date, year_week;
cohort_start_date | total_users | year_week | active_users | retention_rate |
---|---|---|---|---|
2013-06-20 | 5 | 201325 | 5 | 100.0000 |
2013-06-20 | 5 | 201326 | 3 | 60.0000 |
2013-06-20 | 5 | 201327 | 1 | 20.0000 |
2013-06-21 | 2 | 201330 | 1 | 50.0000 |
2013-06-22 | 1 | null | null | null |