add batch remove batch split batch comment selection show hidden batches hide batch highlight batch
db<>fiddle
donate feedback about
By using db<>fiddle, you agree to license everything you submit by Creative Commons CC0.
CREATE TABLE users (
user_id INT UNSIGNED PRIMARY KEY,
activated_at DATETIME NOT NULL,
INDEX idx_cohort_start_date ((DATE(activated_at)))
);
INSERT INTO users (user_id, activated_at) VALUES
(1, '2013-06-20 01:00:00'),
(2, '2013-06-20 02:00:00'),
(3, '2013-06-20 03:00:00'),
(4, '2013-06-20 04:00:00'),
(5, '2013-06-20 05:00:00'),
(6, '2013-06-21 06:00:00'),
(7, '2013-06-21 07:00:00'),
(8, '2013-06-22 07:00:00');

CREATE TABLE events (
user_id INT UNSIGNED,
event_type ENUM('signup_flow', 'engagement') NOT NULL,
occurred_at DATETIME NOT NULL,
PRIMARY KEY (user_id, event_type, occurred_at),
FOREIGN KEY (user_id) REFERENCES users (user_id)
);
INSERT INTO events VALUES
(1, 'signup_flow', '2013-06-20 01:00:00'),
(2, 'signup_flow', '2013-06-20 02:00:00'),
(3, 'signup_flow', '2013-06-20 03:00:00'),
(4, 'signup_flow', '2013-06-20 04:00:00'),
(5, 'signup_flow', '2013-06-20 05:00:00'),
(6, 'signup_flow', '2013-06-21 06:00:00'),
(7, 'signup_flow', '2013-06-21 07:00:00'),
(8, 'signup_flow', '2013-06-22 07:00:00'),
(1, 'engagement', '2013-06-28 01:00:00'),
(2, 'engagement', '2013-06-28 01:00:00'),
(3, 'engagement', '2013-06-28 01:00:00'),
(4, 'engagement', '2013-06-28 01:00:00'),
(5, 'engagement', '2013-06-28 01:00:00'),
Records: 8  Duplicates: 0  Warnings: 0
Records: 18  Duplicates: 0  Warnings: 0
WITH cohorts AS (
SELECT
DATE(activated_at) AS cohort_start_date,
COUNT(*) AS total_users
FROM users
GROUP BY 1
),
weekly_stats AS (
SELECT
DATE(u.activated_at) AS cohort_start_date,
YEARWEEK(e.occurred_at, 0) AS year_week,
COUNT(DISTINCT e.user_id) AS active_users
FROM users u
JOIN events e ON u.user_id = e.user_id
WHERE e.event_type = 'engagement'
GROUP BY cohort_start_date, year_week
)
SELECT
cohorts.cohort_start_date,
cohorts.total_users AS total_users,
weekly_stats.year_week,
weekly_stats.active_users,
weekly_stats.active_users / cohorts.total_users * 100 AS retention_rate
FROM cohorts
JOIN weekly_stats
ON cohorts.cohort_start_date = weekly_stats.cohort_start_date
ORDER BY cohort_start_date, year_week;
cohort_start_date total_users year_week active_users retention_rate
2013-06-20 5 201325 5 100.0000
2013-06-20 5 201326 3 60.0000
2013-06-20 5 201327 1 20.0000
2013-06-21 2 201330 1 50.0000
/*
* If there's the possibility of very few people activating their accounts
* on a given date and then never "engaging", then you need a LEFT JOIN
* between cohorts and weekly_stats
*/
WITH cohorts AS (
SELECT
DATE(activated_at) AS cohort_start_date,
COUNT(*) AS total_users
FROM users
GROUP BY 1
),
weekly_stats AS (
SELECT
DATE(u.activated_at) AS cohort_start_date,
YEARWEEK(e.occurred_at, 0) AS year_week,
COUNT(DISTINCT e.user_id) AS active_users
FROM users u
JOIN events e ON u.user_id = e.user_id
WHERE e.event_type = 'engagement'
GROUP BY cohort_start_date, year_week
)
SELECT
cohorts.cohort_start_date,
cohorts.total_users AS total_users,
weekly_stats.year_week,
weekly_stats.active_users,
weekly_stats.active_users / cohorts.total_users * 100 AS retention_rate
FROM cohorts
LEFT JOIN weekly_stats
ON cohorts.cohort_start_date = weekly_stats.cohort_start_date
ORDER BY cohort_start_date, year_week;
cohort_start_date total_users year_week active_users retention_rate
2013-06-20 5 201325 5 100.0000
2013-06-20 5 201326 3 60.0000
2013-06-20 5 201327 1 20.0000
2013-06-21 2 201330 1 50.0000
2013-06-22 1 null null null