โ ๊ณผ์ 1. ์ ์ ์ ๊ณ์ ์์ฑ ํ ํ์ ๊ฐ์ ์๋ฃ ์ ํ์จ (location, device๋ณ)
# ๊ณผ์ ์ค๋ช
์ ์ ์ ๊ณ์ ์์ฑ (create_user) ํ ํ์๊ฐ์ ์๋ฃ (complete_signup) ์ ํ์จ์ ์ฌ์ฉ์ ์์น(location), ๊ธฐ๊ธฐ(device) ๋ณ๋ก ๊ตฌํด์ฃผ์ธ์. ์์ซ์ ๋์งธ์๋ฆฌ๊น์ง ์ถ๋ ฅํ๋ฉฐ, ์ฒ์ ์ ์ ๊ฐ ๊ณ์ ์ ์์ฑํ ํ ํ๋ฃจ ์์ ์๋ฃ๋์์ด์ผ ํฉ๋๋ค. (๋ฐฑ๋ถ๋ฅ ๋ก ๊ตฌํด์ฃผ์ธ์)
# ๊ฒฐ๊ณผ ์ถ๋ ฅ ์์
# ์ ๋ต ์ฝ๋
WITH creation_user AS (
SELECT user_id,
occurred_at,
location,
device
FROM yammer_event
WHERE event_name = 'create_user'
), completion_signup AS (
SELECT user_id,
occurred_at
FROM yammer_event
WHERE event_name = 'complete_signup'
)
SELECT location,
device,
round(count(DISTINCT s.user_id)*100.0 / count(DISTINCT c.user_id), 2) AS creation_signup_conversion
FROM creation_user c
LEFT JOIN completion_signup s
ON c.user_id = s.user_id
AND c.occurred_at <= s.occurred_at
AND timestampdiff(HOUR, c.occurred_at, s.occurred_at) <= 24
GROUP BY location, device
# ์ฝ๋ ์ค๋ช
left join์ ์ผ์ชฝ์ ์์นํ๋ creation_user ๋ถ๋ถ์๋ง location๊ณผ device๋ฅผ ์ ์ด์ฃผ์๋ค
์ค๋ฅธ์ชฝ์ ์์นํ๋ completion_signup์๋ location๊ณผ device๋ฅผ ์๋ตํ๊ณ ๊ผญ ํ์ํ user_id์ occurred_at๋ง ์ ์ด์ฃผ์๋ค
timestampdiff(์๊ฐ๋จ์, ์์์๊ฐ, ๋์๊ฐ)
์๊ฐ์ฐจ์ด ๊ตฌํ๊ธฐ
โ ๊ณผ์ 2. ํ์๊ฐ์ ์ ํ์จ ๋จ๊ณ๋ณ ๋ถ์
# ๊ณผ์ ์ค๋ช
์ ์ ์ ๊ณ์ ์์ฑ (create_user) ํ ์ด๋ฉ์ผ ์ ๋ ฅ ํ๋ฉด ์ง์ (enter_email)๋ฅ , ์ด๋ฉ์ผ ์ ๋ ฅ ํ๋ฉด ์ง์ (enter_email) ํ ๊ฐ์ธ ์ ๋ณด ์ ๋ ฅ ํ๋ฉด(enter_info) ์ง์ ๋ฅ , ๊ฐ์ธ ์ ๋ณด ์ ๋ ฅ ํ๋ฉด(enter_info) ์ง์ ํ ํ์๊ฐ์ ์๋ฃ ์ ํ์จ(complete_signup), ์ ์ ์ ๊ณ์ ์์ฑ ํ ํ์๊ฐ์ ์๋ฃ ์ ํ์จ์ ๊ฐ๊ฐ ๊ตฌํด์ฃผ์ธ์. ์์ซ์ ๋์์ง๋ฆฌ๊น์ง ์ถ๋ ฅํ๋ฉฐ, ๋ชจ๋ ๋จ๊ณ๋ ์ฒ์ ์ ์ ๊ฐ ๊ณ์ ์ ์์ฑํ ํ ํ๋ฃจ ์์ ์๋ฃ๋์์ด์ผ ํฉ๋๋ค. (๋ฐฑ๋ถ๋ฅ ๋ก ๊ตฌํด์ฃผ์ธ์)
# ๊ฒฐ๊ณผ ์ถ๋ ฅ ์์
# ์ ๋ต ์ฝ๋
WITH creation_user AS (
SELECT user_id,
occurred_at AS creation_time,
device,
location
FROM yammer_event
WHERE event_name = 'create_user'
), entering_email AS (
SELECT user_id,
occurred_at AS email_time
FROM yammer_event
WHERE event_name = 'enter_email'
), entering_info AS (
SELECT user_id,
occurred_at AS info_time
FROM yammer_event
WHERE event_name = 'enter_info'
), completion_signup AS (
SELECT user_id,
occurred_at AS signup_time
FROM yammer_event
WHERE event_name = 'complete_signup'
), full_funnel AS (
SELECT cu.user_id,
ee.user_id AS ee_id,
ei.user_id AS ei_id,
cs.user_id AS cs_id,
creation_time,
email_time,
info_time,
signup_time
FROM creation_user cu
LEFT JOIN entering_email ee
ON cu.user_id = ee.user_id AND cu.creation_time <= ee.email_time AND timestampdiff(HOUR, cu.creation_time, ee.email_time) <= 24
LEFT JOIN entering_info ei
ON cu.user_id = ei.user_id AND ee.email_time <= ei.info_time AND timestampdiff(HOUR, ee.email_time, ei.info_time) <= 24
LEFT JOIN completion_signup cs
ON cu.user_id = cs.user_id AND ei.info_time <= cs.signup_time AND timestampdiff(HOUR, ei.info_time, cs.signup_time) <= 24
AND timestampdiff(HOUR, cu.creation_time, cs.signup_time) <= 24
)
SELECT round(count(DISTINCT ee_id)*100.0 / count(DISTINCT user_id), 2) AS email_entry_rate,
round(count(DISTINCT ei_id)*100.0 / count(DISTINCT ee_id), 2) AS info_entry_rate,
round(count(DISTINCT cs_id)*100.0 / count(DISTINCT ei_id), 2) AS signup_completion_rate,
round(count(DISTINCT cs_id)*100.0 / count(DISTINCT user_id), 2) AS overall_signup_rate
FROM full_funnel
# ํต์ฌ ๊ฐ๋
from ์๋ ํ ์ด๋ธ
left join ์๋ก์ด ํ ์ด๋ธ1, 2, 3 ... ์ ํ ๋
on์์ ์๋ ํ ์ด๋ธ.user_id = ์๋ก์ดํ ์ด๋ธ.user_id๋ฅผ ์งํํ์๋ค
โก๏ธ๋ชจ๋ ๋จ๊ณ๋ ์ฒ์ ์ ์ ๊ฐ ๊ณ์ ์ ์์ฑํ ํ ํ๋ฃจ ์์ ์๋ฃ๋์์ด์ผ ํฉ๋๋ค
์ด ๋ถ๋ถ์ ๊ตฌํํ๊ธฐ ์ํด์
๊ฐ๊ฐ์ ์์ ๋จ๊ณ๋ง๋ค 24์๊ฐ ์ด๋ด, cu.creation_time๊ณผ cs.signup_time์ 24์๊ฐ ์ด๋ด ์กฐ๊ฑด์ ๊ฑธ์ด์ค ์๋ ์๋ค
๋๋
cu.creation_time๊ณผ ๊ฐ๊ฐ์ ๋จ๊ณ๋ง๋ค 24์๊ฐ ์ด๋ด ์กฐ๊ฑด์ผ๋ก๋ ๊ฑธ์ด์ค ์ ์๋ค
โ ๊ณผ์ 3. ํ์๊ฐ์ ์ ํ์จ ๋ฐ ์ดํ ํ๋ ์ ํ์จ ๋ถ์
์ ์ ์ ๊ณ์ ์์ฑ (create_user) ํ ํ์๊ฐ์ ์๋ฃ (complete_signup) ์ ํ์จ, ํ์๊ฐ์ ์๋ฃ ํ ํํ์ด์ง ์ง์ (home_page)์จ, ํํ์ด์ง ์ง์ ํ ๋ฉ์ธ์ง ์ข์์ ํด๋ฆญ๋ฅ (like_message), ํ์๊ฐ์ ํ ๋ฉ์ธ์ง ์ข์์ ํด๋ฆญ ์ ํ์จ์ ๊ฐ๊ฐ ๊ตฌํด์ฃผ์ธ์. ์์ซ์ ๋์์ง๋ฆฌ๊น์ง ์ถ๋ ฅํ๋ฉฐ, ๋ชจ๋ ๋จ๊ณ๋ ์ฒ์ ์ ์ ๊ฐ ๊ณ์ ์ ์์ฑํ ํ 30๋ถ ์์ ์๋ฃ๋์์ด์ผ ํฉ๋๋ค. (๋ฐฑ๋ถ๋ฅ ๋ก ๊ตฌํด์ฃผ์ธ์)
# full_funnel ์ถ๋ ฅํด๋ณด๊ธฐ
# 3-3) ํผ๋ ๋ถ์ : ํ์๊ฐ์
์ ํ์จ ๋ฐ ์ดํ ํ๋ ์ ํ์จ ๋ถ์
WITH creation_user AS (
SELECT user_id,
occurred_at AS creation_time,
device,
location
FROM yammer_event
WHERE event_name = 'create_user'
), completion_signup AS (
SELECT user_id,
occurred_at AS signup_time
FROM yammer_event
WHERE event_name = 'complete_signup'
), homepage_enter AS (
SELECT user_id,
occurred_at AS homepage_time
FROM yammer_event
WHERE event_name = 'home_page'
), like_msg AS (
SELECT user_id,
occurred_at AS like_message_time
FROM yammer_event
WHERE event_name = 'like_message'
), full_funnel AS (
SELECT cu.user_id,
cs.user_id AS cs_id,
he.user_id AS he_id,
lm.user_id AS lm_id,
creation_time,
signup_time,
homepage_time,
like_message_time
FROM creation_user cu
LEFT JOIN completion_signup cs ON cu.user_id = cs.user_id
AND timestampdiff(MINUTE, cu.creation_time, cs.signup_time) <= 30
AND cu.creation_time <= cs.signup_time
LEFT JOIN homepage_enter he ON cu.user_id = he.user_id
AND timestampdiff(MINUTE, cu.creation_time, he.homepage_time) <= 30
AND cs.signup_time <= he.homepage_time
LEFT JOIN like_msg lm ON cu.user_id = lm.user_id
AND timestampdiff(MINUTE, cu.creation_time, lm.like_message_time) <= 30
AND he.homepage_time <= lm.like_message_time
)
SELECT *
FROM full_funnel
# ์คํ๊ฒฐ๊ณผ
# ์ ๋ต ์ฝ๋
# 3-3) ํผ๋ ๋ถ์ : ํ์๊ฐ์
์ ํ์จ ๋ฐ ์ดํ ํ๋ ์ ํ์จ ๋ถ์
WITH creation_user AS (
SELECT user_id,
occurred_at AS creation_time,
device,
location
FROM yammer_event
WHERE event_name = 'create_user'
), completion_signup AS (
SELECT user_id,
occurred_at AS signup_time
FROM yammer_event
WHERE event_name = 'complete_signup'
), homepage_enter AS (
SELECT user_id,
occurred_at AS homepage_time
FROM yammer_event
WHERE event_name = 'home_page'
), like_msg AS (
SELECT user_id,
occurred_at AS like_message_time
FROM yammer_event
WHERE event_name = 'like_message'
), full_funnel AS (
SELECT cu.user_id,
cs.user_id AS cs_id,
he.user_id AS he_id,
lm.user_id AS lm_id,
creation_time,
signup_time,
homepage_time,
like_message_time
FROM creation_user cu
LEFT JOIN completion_signup cs ON cu.user_id = cs.user_id
AND timestampdiff(MINUTE, cu.creation_time, cs.signup_time) <= 30
AND cu.creation_time <= cs.signup_time
LEFT JOIN homepage_enter he ON cu.user_id = he.user_id
AND timestampdiff(MINUTE, cu.creation_time, he.homepage_time) <= 30
AND cs.signup_time <= he.homepage_time
LEFT JOIN like_msg lm ON cu.user_id = lm.user_id
AND timestampdiff(MINUTE, cu.creation_time, lm.like_message_time) <= 30
AND he.homepage_time <= lm.like_message_time
)
SELECT round(count(DISTINCT cs_id)*100.0 / count(DISTINCT user_id), 2) AS creation_signup_rate,
round(count(DISTINCT he_id)*100.0 / count(DISTINCT cs_id), 2) AS signup_homepage_entry_rate,
round(count(DISTINCT lm_id)*100.0 / count(DISTINCT he_id), 2) AS homepage_entry_like_message_rate,
round(count(DISTINCT lm_id)*100.0 / count(DISTINCT cs_id), 2) AS signup_like_message_rate
FROM full_funnel
# ์ ๋ต ์ฝ๋ ์คํ๊ฒฐ๊ณผ
โ ๊ณผ์ 4. ์ผ๋ณ๋ก ํ์ ๊ฐ์ ํ๋ก์ธ์ค์ ๊ฐ ๋จ๊ณ ์๋ฃ ํ๊ท ์๊ฐ
# ๋ฌธ์
์ผ๋ณ๋ก ํ์ ๊ฐ์ ํ๋ก์ธ์ค์ ๊ฐ ๋จ๊ณ(๊ณ์ ์์ฑ๋ถํฐ ์ด๋ฉ์ผ ์ ๋ ฅ, ๊ฐ์ธ ์ ๋ณด ์ ๋ ฅ, ๊ฐ์ ์๋ฃ๊น์ง)๋ฅผ ์๋ฃํ๋ ๋ฐ ๊ฑธ๋ฆฐ ํ๊ท ์๊ฐ์ ๊ตฌํด์ฃผ์ธ์.
์ ์ ์ ๊ณ์ ์์ฑ (create_user) ํ ์ด๋ฉ์ผ ์ ๋ ฅ ํ๋ฉด ์ง์ (enter_email) ๊น์ง ๊ฑธ๋ฆฐ ์๊ฐ, ์ด๋ฉ์ผ ์ ๋ ฅ ํ๋ฉด ์ง์ (enter_email) ํ ๊ฐ์ธ ์ ๋ณด ์ ๋ ฅ ํ๋ฉด(enter_info) ์ง์ ๊น์ง ๊ฑธ๋ฆฐ ์๊ฐ, ๊ฐ์ธ ์ ๋ณด ์ ๋ ฅ ํ๋ฉด(enter_info) ์ง์ ํ ํ์๊ฐ์ ์๋ฃ ์ ํ๊น์ง ๊ฑธ๋ฆฐ ์๊ฐ(complete_signup), ์ ์ ์ ๊ณ์ ์์ฑ ํ ํ์๊ฐ์ ์๋ฃ๊น์ง ๊ฑธ๋ฆฐ ์๊ฐ์ ํ๊ท ์ ๊ฐ๊ฐ ๊ตฌํด์ฃผ์ธ์.
์์ซ์ ๋์์ง๋ฆฌ๊น์ง ์ถ๋ ฅํ๋ฉฐ, ๋ชจ๋ ๋จ๊ณ๋ ์ฒ์ ์ ์ ๊ฐ ๊ณ์ ์ ์์ฑํ ํ ํ๋ฃจ ์์ ์๋ฃ๋์์ด์ผ ํฉ๋๋ค. (๋ฐฑ๋ถ๋ฅ ๋ก ๊ตฌํด์ฃผ์ธ์)
# ๋ฌธ์ ์คํ๊ฒฐ๊ณผ
# ์ ๋ต ์ฝ๋
# 3-4) ํผ๋ ๋ถ์ : ์ผ๋ณ๋ก ํ์ ๊ฐ์
ํ๋ก์ธ์ค์ ๊ฐ ๋จ๊ณ ์๋ฃ ํ๊ท ์๊ฐ
WITH creation_user AS (
SELECT user_id
, occurred_at AS creation_time
, device
, location
FROM yammer_event
WHERE event_name = 'create_user'
), entering_email AS (
SELECT user_id
, occurred_at AS email_time
FROM yammer_event
WHERE event_name = 'enter_email'
), entering_info AS (
SELECT user_id
, occurred_at AS info_time
FROM yammer_event
WHERE event_name = 'enter_info'
), completion_signup AS (
SELECT user_id
, occurred_at AS signup_time
FROM yammer_event
WHERE event_name = 'complete_signup'
)
SELECT date_format(cu.creation_time, '%Y-%m-%d') AS creation_date,
round(avg(timestampdiff(SECOND, cu.creation_time, ee.email_time)),2) AS email_entry_time,
round(avg(timestampdiff(SECOND, ee.email_time, ei.info_time)),2) AS info_entry_time,
round(avg(timestampdiff(SECOND, ei.info_time, cs.signup_time)),2) AS signup_completion_time,
round(avg(timestampdiff(SECOND, cu.creation_time, cs.signup_time)),2) AS creation_signup_time
FROM creation_user cu
LEFT JOIN entering_email ee ON cu.user_id = ee.user_id
AND timestampdiff(HOUR, cu.creation_time, ee.email_time) <= 24
AND cu.creation_time <= ee.email_time
LEFT JOIN entering_info ei ON cu.user_id = ei.user_id
AND timestampdiff(HOUR, cu.creation_time, ei.info_time) <= 24
AND ee.email_time <= ei.info_time
LEFT JOIN completion_signup cs ON cu.user_id = cs.user_id
AND timestampdiff(HOUR, cu.creation_time, cs.signup_time) <= 24
AND ei.info_time <= cs.signup_time
GROUP BY date_format(cu.creation_time, '%Y-%m-%d')
# ์ ๋ต ์ฝ๋ ์คํ ๊ฒฐ๊ณผ
# ํต์ฌ ๊ฐ๋
round
avg(์ง๊ณํจ์)
timestampdiff๋ฅผ ํ๋ฒ์ ์ฌ์ฉํ์๋ค