SQL Cookbook
Practical recipes for common behavioral analytics patterns. Each recipe is self-contained — copy and paste into a DuckDB session with the extension loaded.
Funnel Recipes
Basic Conversion Funnel
SELECT user_id,
window_funnel(INTERVAL '1 hour', event_time,
event_type = 'page_view',
event_type = 'add_to_cart',
event_type = 'purchase'
) as furthest_step
FROM events
GROUP BY user_id;
Funnel Drop-off Report
Aggregate per-user funnel results into a conversion report showing where users abandon:
WITH funnels AS (
SELECT user_id,
window_funnel(INTERVAL '1 hour', event_time,
event_type = 'page_view',
event_type = 'add_to_cart',
event_type = 'checkout',
event_type = 'purchase'
) as step
FROM events GROUP BY user_id
)
SELECT
step as reached_step,
COUNT(*) as users,
ROUND(100.0 * COUNT(*) / SUM(COUNT(*)) OVER (), 1) as pct
FROM funnels
GROUP BY step
ORDER BY step;
Funnel by Date
Track daily funnel conversion rates:
SELECT
event_time::DATE as day,
COUNT(*) as total_users,
SUM(CASE WHEN step >= 1 THEN 1 ELSE 0 END) as viewed,
SUM(CASE WHEN step >= 2 THEN 1 ELSE 0 END) as carted,
SUM(CASE WHEN step >= 3 THEN 1 ELSE 0 END) as purchased,
ROUND(100.0 * SUM(CASE WHEN step >= 3 THEN 1 ELSE 0 END) /
NULLIF(SUM(CASE WHEN step >= 1 THEN 1 ELSE 0 END), 0), 1) as conversion_pct
FROM (
SELECT user_id, MIN(event_time) as event_time,
window_funnel(INTERVAL '1 hour', event_time,
event_type = 'page_view',
event_type = 'add_to_cart',
event_type = 'purchase'
) as step
FROM events
GROUP BY user_id
)
GROUP BY day
ORDER BY day;
Strict Funnel (No Repeated Steps)
Use strict_increase mode to require strictly increasing timestamps between
steps — no duplicate timestamps allowed:
SELECT user_id,
window_funnel(INTERVAL '1 hour', 'strict_increase', event_time,
event_type = 'page_view',
event_type = 'add_to_cart',
event_type = 'purchase'
) as step
FROM events GROUP BY user_id;
Funnel with Re-entry
Allow the funnel to restart when the first condition fires again, tracking the best attempt:
SELECT user_id,
window_funnel(INTERVAL '1 hour', 'allow_reentry', event_time,
event_type = 'page_view',
event_type = 'add_to_cart',
event_type = 'purchase'
) as step
FROM events GROUP BY user_id;
Funnel by Segment
Compare funnel performance across user segments (device, campaign, etc.):
SELECT
device_type,
COUNT(*) as users,
ROUND(AVG(step), 2) as avg_step,
SUM(CASE WHEN step = 4 THEN 1 ELSE 0 END) as completed,
ROUND(100.0 * SUM(CASE WHEN step = 4 THEN 1 ELSE 0 END) / COUNT(*), 1) as pct
FROM (
SELECT user_id, device_type,
window_funnel(INTERVAL '1 hour', event_time,
event_type = 'page_view',
event_type = 'add_to_cart',
event_type = 'checkout',
event_type = 'purchase'
) as step
FROM events GROUP BY user_id, device_type
)
GROUP BY device_type
ORDER BY pct DESC;
Session Recipes
Basic Session Assignment
SELECT user_id, event_time,
sessionize(event_time, INTERVAL '30 minutes') OVER (
PARTITION BY user_id ORDER BY event_time
) as session_id
FROM events;
Session Metrics (Duration, Page Count, Bounce Rate)
WITH sessionized AS (
SELECT user_id, event_time, page_url,
sessionize(event_time, INTERVAL '30 minutes') OVER (
PARTITION BY user_id ORDER BY event_time
) as session_id
FROM events
)
SELECT
user_id,
session_id,
COUNT(*) as page_views,
MIN(event_time) as started_at,
MAX(event_time) as ended_at,
EXTRACT(EPOCH FROM MAX(event_time) - MIN(event_time)) as duration_sec,
CASE WHEN COUNT(*) = 1 THEN true ELSE false END as is_bounce
FROM sessionized
GROUP BY user_id, session_id;
Sessions Per User Per Day
WITH sessionized AS (
SELECT user_id, event_time,
sessionize(event_time, INTERVAL '30 minutes') OVER (
PARTITION BY user_id ORDER BY event_time
) as session_id
FROM events
)
SELECT
user_id,
event_time::DATE as day,
COUNT(DISTINCT session_id) as sessions
FROM sessionized
GROUP BY user_id, day
ORDER BY user_id, day;
Average Session Duration by Day
WITH sessionized AS (
SELECT user_id, event_time,
sessionize(event_time, INTERVAL '30 minutes') OVER (
PARTITION BY user_id ORDER BY event_time
) as session_id
FROM events
),
session_stats AS (
SELECT
user_id, session_id,
MIN(event_time)::DATE as day,
EXTRACT(EPOCH FROM MAX(event_time) - MIN(event_time)) as duration_sec
FROM sessionized
GROUP BY user_id, session_id
)
SELECT
day,
COUNT(*) as sessions,
ROUND(AVG(duration_sec), 0) as avg_duration_sec,
ROUND(AVG(duration_sec) / 60.0, 1) as avg_duration_min
FROM session_stats
GROUP BY day
ORDER BY day;
Entry Page Analysis
Identify the first page of each session for channel attribution:
WITH sessionized AS (
SELECT user_id, event_time, page_url, referrer,
sessionize(event_time, INTERVAL '30 minutes') OVER (
PARTITION BY user_id ORDER BY event_time
) as session_id
FROM events
)
SELECT
page_url as entry_page,
COUNT(*) as sessions,
ROUND(100.0 * COUNT(*) / SUM(COUNT(*)) OVER (), 1) as pct
FROM (
SELECT DISTINCT ON (user_id, session_id) user_id, session_id, page_url, referrer
FROM sessionized
ORDER BY user_id, session_id, event_time
)
GROUP BY page_url
ORDER BY sessions DESC;
Retention Recipes
Weekly Cohort Retention
SELECT
cohort_week,
COUNT(*) as cohort_size,
SUM(CASE WHEN r[1] THEN 1 ELSE 0 END) as week_0,
SUM(CASE WHEN r[2] THEN 1 ELSE 0 END) as week_1,
SUM(CASE WHEN r[3] THEN 1 ELSE 0 END) as week_2,
SUM(CASE WHEN r[4] THEN 1 ELSE 0 END) as week_3,
ROUND(100.0 * SUM(CASE WHEN r[2] THEN 1 ELSE 0 END) /
NULLIF(SUM(CASE WHEN r[1] THEN 1 ELSE 0 END), 0), 1) as w1_pct,
ROUND(100.0 * SUM(CASE WHEN r[3] THEN 1 ELSE 0 END) /
NULLIF(SUM(CASE WHEN r[1] THEN 1 ELSE 0 END), 0), 1) as w2_pct,
ROUND(100.0 * SUM(CASE WHEN r[4] THEN 1 ELSE 0 END) /
NULLIF(SUM(CASE WHEN r[1] THEN 1 ELSE 0 END), 0), 1) as w3_pct
FROM (
SELECT user_id, cohort_week,
retention(
activity_date >= cohort_week AND activity_date < cohort_week + INTERVAL '7 days',
activity_date >= cohort_week + INTERVAL '7 days' AND activity_date < cohort_week + INTERVAL '14 days',
activity_date >= cohort_week + INTERVAL '14 days' AND activity_date < cohort_week + INTERVAL '21 days',
activity_date >= cohort_week + INTERVAL '21 days' AND activity_date < cohort_week + INTERVAL '28 days'
) as r
FROM activity GROUP BY user_id, cohort_week
)
GROUP BY cohort_week
ORDER BY cohort_week;
Day-1 / Day-7 / Day-30 Retention
Classic mobile/SaaS retention metrics:
SELECT
signup_date,
COUNT(*) as new_users,
SUM(CASE WHEN r[1] THEN 1 ELSE 0 END) as day_0,
ROUND(100.0 * SUM(CASE WHEN r[2] THEN 1 ELSE 0 END) /
NULLIF(COUNT(*), 0), 1) as d1_pct,
ROUND(100.0 * SUM(CASE WHEN r[3] THEN 1 ELSE 0 END) /
NULLIF(COUNT(*), 0), 1) as d7_pct,
ROUND(100.0 * SUM(CASE WHEN r[4] THEN 1 ELSE 0 END) /
NULLIF(COUNT(*), 0), 1) as d30_pct
FROM (
SELECT user_id, signup_date,
retention(
activity_date = signup_date,
activity_date = signup_date + INTERVAL '1 day',
activity_date = signup_date + INTERVAL '7 days',
activity_date = signup_date + INTERVAL '30 days'
) as r
FROM user_activity GROUP BY user_id, signup_date
)
GROUP BY signup_date
ORDER BY signup_date;
Retention by Segment
Compare retention across user segments (plan type, acquisition channel, etc.):
SELECT
plan_type,
COUNT(*) as users,
ROUND(100.0 * SUM(CASE WHEN r[2] THEN 1 ELSE 0 END) /
NULLIF(SUM(CASE WHEN r[1] THEN 1 ELSE 0 END), 0), 1) as w1_retention,
ROUND(100.0 * SUM(CASE WHEN r[3] THEN 1 ELSE 0 END) /
NULLIF(SUM(CASE WHEN r[1] THEN 1 ELSE 0 END), 0), 1) as w2_retention
FROM (
SELECT user_id, plan_type,
retention(
activity_week = signup_week,
activity_week = signup_week + INTERVAL '7 days',
activity_week = signup_week + INTERVAL '14 days'
) as r
FROM user_activity GROUP BY user_id, plan_type
)
GROUP BY plan_type
ORDER BY w1_retention DESC;
Sequence Pattern Recipes
Detect a Specific Event Sequence
Did the user view a product then add it to cart (with any events in between)?
SELECT user_id,
sequence_match('(?1).*(?2)', event_time,
event_type = 'page_view',
event_type = 'add_to_cart'
) as viewed_then_carted
FROM events GROUP BY user_id;
Time-Constrained Pattern
User signed up and completed onboarding within 10 minutes:
SELECT user_id,
sequence_match('(?1).*(?t<=600)(?2)', event_time,
event_type = 'signup',
event_type = 'onboarding_complete'
) as fast_onboarder
FROM events GROUP BY user_id;
Count Repeated Patterns
How many times does each user repeat the browse → cart cycle?
SELECT user_id,
sequence_count('(?1).*(?2)', event_time,
event_type = 'page_view',
event_type = 'add_to_cart'
) as browse_cart_cycles
FROM events
GROUP BY user_id
ORDER BY browse_cart_cycles DESC;
Multi-Step Pattern with Wildcards
Detect users who viewed, added to cart, then purchased — but NOT immediately (at least one event between cart and purchase):
SELECT user_id,
sequence_match('(?1).*(?2).(?3)', event_time,
event_type = 'page_view',
event_type = 'add_to_cart',
event_type = 'purchase'
) as indirect_purchase
FROM events GROUP BY user_id;
Get Matched Timestamps
Retrieve the exact timestamps when each step in a 3-step pattern matched:
SELECT user_id,
sequence_match_events('(?1).*(?2).*(?3)', event_time,
event_type = 'signup',
event_type = 'first_purchase',
event_type = 'second_purchase'
) as milestone_times
FROM events GROUP BY user_id;
Time Between Pattern Steps
Calculate the time between matched pattern steps:
WITH matched AS (
SELECT user_id,
sequence_match_events('(?1).*(?2).*(?3)', event_time,
event_type = 'signup',
event_type = 'first_purchase',
event_type = 'review'
) as ts
FROM events GROUP BY user_id
)
SELECT user_id,
ts[1] as signup_time,
ts[2] as purchase_time,
ts[3] as review_time,
EXTRACT(EPOCH FROM ts[2] - ts[1]) / 3600.0 as hours_to_purchase,
EXTRACT(EPOCH FROM ts[3] - ts[2]) / 3600.0 as hours_to_review
FROM matched
WHERE len(ts) = 3;
User Flow Recipes
Forward Flow — What Happens Next?
After Home → Product, what do users do next?
SELECT
COALESCE(next_page, '(end of session)') as next_page,
COUNT(*) as users,
ROUND(100.0 * COUNT(*) / SUM(COUNT(*)) OVER (), 1) as pct
FROM (
SELECT user_id,
sequence_next_node('forward', 'first_match', event_time, page,
page = 'Home', page = 'Home', page = 'Product'
) as next_page
FROM events GROUP BY user_id
)
GROUP BY next_page
ORDER BY users DESC;
Backward Flow — What Led Here?
What page do users visit immediately before reaching the Checkout page?
SELECT
COALESCE(prev_page, '(start of session)') as prev_page,
COUNT(*) as users,
ROUND(100.0 * COUNT(*) / SUM(COUNT(*)) OVER (), 1) as pct
FROM (
SELECT user_id,
sequence_next_node('backward', 'first_match', event_time, page,
page = 'Checkout', page = 'Checkout'
) as prev_page
FROM events GROUP BY user_id
)
GROUP BY prev_page
ORDER BY users DESC;
Last-Match Flow
Using last_match base to find the next page after the last occurrence of
the Home → Product pattern:
SELECT user_id,
sequence_next_node('forward', 'last_match', event_time, page,
page = 'Home', page = 'Home', page = 'Product'
) as next_page_after_last
FROM events GROUP BY user_id;
Combined Analysis Recipes
A/B Test Behavioral Comparison
Compare funnel depth and conversion speed between test groups:
WITH funnel AS (
SELECT user_id, test_group,
window_funnel(INTERVAL '2 hours', event_time,
event_type = 'signup',
event_type = 'profile_setup',
event_type = 'first_action'
) as step
FROM events GROUP BY user_id, test_group
),
speed AS (
SELECT user_id, test_group,
sequence_match('(?1).*(?t<=1800)(?2).*(?t<=1800)(?3)', event_time,
event_type = 'signup',
event_type = 'profile_setup',
event_type = 'first_action'
) as completed_fast
FROM events GROUP BY user_id, test_group
)
SELECT
f.test_group,
COUNT(*) as users,
ROUND(AVG(f.step), 2) as avg_depth,
SUM(CASE WHEN f.step = 3 THEN 1 ELSE 0 END) as completed,
ROUND(100.0 * SUM(CASE WHEN f.step = 3 THEN 1 ELSE 0 END) / COUNT(*), 1) as complete_pct,
SUM(CASE WHEN s.completed_fast THEN 1 ELSE 0 END) as fast_completers
FROM funnel f
JOIN speed s ON f.user_id = s.user_id
GROUP BY f.test_group;
Session + Funnel Combined
Analyze funnel performance per session:
WITH sessionized AS (
SELECT *, sessionize(event_time, INTERVAL '30 minutes') OVER (
PARTITION BY user_id ORDER BY event_time
) as session_id
FROM events
)
SELECT user_id, session_id,
window_funnel(INTERVAL '1 hour', event_time,
event_type = 'page_view',
event_type = 'add_to_cart',
event_type = 'purchase'
) as step
FROM sessionized
GROUP BY user_id, session_id
ORDER BY user_id, session_id;
Power Users Detection
Find users with the most repeated behavioral patterns:
SELECT user_id,
sequence_count('(?1).*(?2)', event_time,
event_type = 'search',
event_type = 'page_view'
) as search_browse_cycles,
sequence_count('(?1).*(?2)', event_time,
event_type = 'page_view',
event_type = 'add_to_cart'
) as browse_cart_cycles,
window_funnel(INTERVAL '24 hours', event_time,
event_type = 'page_view',
event_type = 'add_to_cart',
event_type = 'purchase'
) as funnel_depth
FROM events
GROUP BY user_id
ORDER BY browse_cart_cycles DESC
LIMIT 20;
Querying Parquet Files Directly
All recipes work with any DuckDB-supported file format:
SELECT user_id,
window_funnel(INTERVAL '1 hour', event_time,
event_type = 'view', event_type = 'cart', event_type = 'purchase'
) as step
FROM read_parquet('s3://my-bucket/events/*.parquet')
WHERE event_time >= '2024-01-01'
GROUP BY user_id;
Pattern Syntax Quick Reference
| Pattern | Meaning |
|---|---|
(?N) | Match event where condition N is true (1-indexed) |
. | Match exactly one event (any) |
.* | Match zero or more events (any) |
(?t<=N) | At most N seconds since previous match |
(?t>=N) | At least N seconds since previous match |
(?t<N) | Less than N seconds since previous match |
(?t>N) | More than N seconds since previous match |
(?t==N) | Exactly N seconds since previous match |
(?t!=N) | Not exactly N seconds since previous match |
Common patterns:
(?1).*(?2) -- cond1 then cond2, any gap
(?1)(?2) -- cond1 immediately followed by cond2
(?1).*(?t<=3600)(?2) -- cond1 then cond2 within 1 hour
(?1).(?2) -- cond1, one event, then cond2
(?1).*(?2).*(?3) -- three-step sequence