< groupby ๊ฐ๋ ๋ฐ ์ ์ฉํ ๊ตฌ๋ฌธ >
# group by
1) ์ฌ์ฉ์ด์
์ง๊ณํจ์๋ฅผ ์ฌ์ฉํ๊ธฐ ์ํด์
2) ๋ฌธ๋ฒ
select ์ปฌ๋ผ,
์ง๊ณํจ์
group by ์ปฌ๋ผ
# ํ์ฌ ๋ฐ์ดํฐ๋ฒ ์ด์ค ์์น ๊ตฌํ๊ธฐ
select database()
# ๋ฐ์ดํฐ๋ฒ ์ด์ค ์ ํํ๊ธฐ
use ๋ฐ์ดํฐ๋ฒ ์ด์ค;
< exists ๊ฐ๋ >
# exists ๋ฌธ๋ฒ
์์ ํ ์ด๋ธ ๋ง๋ค๊ธฐ
CREATE TABLE test_user(
id varchar(50),
name varchar(50),
email varchar(50)
)
CREATE TABLE test_info(
id varchar(50),
city varchar(50),
age int
)
ํ ์ด๋ธ์ ๋ฐ์ดํฐ ์ฝ์ ํ๊ธฐ
INSERT INTO test_user
VALUES
('user1', 'A', 'A@A.com'),
('user2', 'B', 'B@B.com'),
('user3', 'C', 'C@C.com'),
('user4', 'D', 'D@D.com'),
('user5', 'E', 'E@E.com');
INSERT INTO test_info
VALUES
('user1', 'Seoul', 15),
('user2', 'Jeju', 20),
('user3', 'Japan', 30);
EXISTS๋ ์๋ธ์ฟผ๋ฆฌ(subquery)์์ ๋ฐํ๋ ๊ฒฐ๊ณผ๊ฐ ์กด์ฌํ๋์ง ์ฌ๋ถ๋ฅผ ํ์ธํ๋ ์กฐ๊ฑด ์ฐ์ฐ์์ ๋๋ค.
EXISTS๋ฅผ ์ฌ์ฉํ์ฌ ์๋ธ์ฟผ๋ฆฌ๋ฅผ ์กฐํฉํ๋ฉด ๋ฐ์ดํฐ๋ฅผ ํํฐ๋งํ๊ฑฐ๋ ๊ด๋ จ๋ ๋ฐ์ดํฐ๋ฅผ ํ์ธํ๋ ๋ฐ ์ ์ฉํ๊ฒ ์ฌ์ฉํ ์ ์์ต๋๋ค.
# test_user ํ ์ด๋ธ์์ test_infoํ ์ด๋ธ์ id๊ฐ๊ณผ ๊ฐ์ ํ ์ถ๋ ฅํ๊ธฐ
SELECT *
FROM test_user a
WHERE EXISTS (
SELECT *
FROM test_info
WHERE a.id = id
)
# test_user ํ ์ด๋ธ์์ test_infoํ ์ด๋ธ์ id๊ฐ๊ณผ ๋ค๋ฅธ ํ ์ถ๋ ฅํ๊ธฐ
SELECT *
FROM test_user a
WHERE NOT EXISTS (
SELECT *
FROM test_info
WHERE a.id = id
)
# exists()๋ง ์ฌ์ฉํ๋ค๋ฉด exists()์์ ์กฐ๊ฑด์ ๋ง๋ ๊ฒ์ด๋ฉด 1(true), 0(false)์ผ๋ก ์ถ๋ ฅํ๊ธฐ
SELECT *,
EXISTS (
SELECT *
FROM test_info
WHERE a.id = id
) AS cnt
FROM test_user a
์ฐธ๊ณ ์๋ฃ
[MySQL] EXISTS ์ฌ์ฉ๋ฒ, ์์ , ํ์ฉํ๊ธฐ (์กฐ๊ฑด ๋ง์กฑ ๊ฒ์)
EXISTS๋ ์๋ธ์ฟผ๋ฆฌ(subquery)์์ ๋ฐํ๋ ๊ฒฐ๊ณผ๊ฐ ์กด์ฌํ๋์ง ์ฌ๋ถ๋ฅผ ํ์ธํ๋ ์กฐ๊ฑด ์ฐ์ฐ์์ ๋๋ค. EXISTS๋ฅผ ์ฌ์ฉํ์ฌ ์๋ธ์ฟผ๋ฆฌ๋ฅผ ์กฐํฉํ๋ฉด ๋ฐ์ดํฐ๋ฅผ ํํฐ๋งํ๊ฑฐ๋ ๊ด๋ จ๋ ๋ฐ์ดํฐ๋ฅผ ํ์ธํ๋ ๋ฐ ์ ์ฉ
jh-tr.tistory.com
< 4-1. ํ์ฑ ์ ์ ์ ๋ฆฌํ ์ ๊ณ์ฐ >
๋ชฉํ: Facebook SQL ์ธํฐ๋ทฐ์ ๋์จ ํ์ฑ ์ ์ ์ ๋ฆฌํ ์ ์ SQL๋ก ๊ณ์ฐ
- ํ์ต ๋ชฉํ : ์ฝํธํธ ๋ฆฌํ
์
์ ํ์์ฑ์ ๋ํด ์ดํดํ๊ณ ์ด๋ฅผ SQL๋ก ๊ตฌํํ ์ ์๋ค.
- [ํ์ด์ค๋ถ] ํ์ฑ ์ ์ ๋ฆฌํ ์ ๊ตฌํ๊ธฐ
https://datalemur.com/questions/user-retention
๊ฒฐ๊ณผ: ์ฌ์ดํธ์์ ๋ฌธ์ ๋ฅผ ํ๊ณ ์ ๋ต SQL ์ฝ๋๋ฅผ ์ ์ถ
โป ํด๋น ์ฌ์ดํธ์์๋ PostgreSQL๋ก ์์ฑํ๊ฒ ๋์ด์๋ค
โป ํ์๋ Mysql๋ฌธ๋ฒ์ ์์ฑํ ํ PostgreSQL ์ผ๋ก ๋ฐ๊ฟ์ ์ ์ถํจ
SELECT *
FROM user_actions
[ ์ ๋ต ์ฝ๋ ]
( test๋ฐ์ดํฐ๋ฒ ์ด์ค์ user_actions ํ ์ด๋ธ )
SELECT month(curr_month.event_date) AS MONTH,
count(DISTINCT user_id) AS monthly_active_users
FROM user_actions AS curr_month
WHERE EXISTS (
SELECT last_month.user_id
FROM user_actions AS last_month
WHERE curr_month.user_id = last_month.user_id
AND month(last_month.event_date) = month(date_sub(curr_month.event_date, INTERVAL 1 month))
)
AND month(curr_month.event_date) = 7
AND year(curr_month.event_date) = 2022
GROUP BY month(curr_month.event_date)
[ ์ถ๋ ฅ ๊ฒฐ๊ณผ ]
< 4-2. ์ฝํธํธ ๋ฆฌํ ์ ๊ณ์ฐ >
๋ชฉํ: ์ธ์ ์์ ์ฝํธํธ ๋ฆฌํ ์ (๊ณ ๊ฐ์ ์)๋ฅผ ๊ตฌํ ๊ฒ์ ๊ธฐ๋ฐ์ผ๋ก ์ฌ๊ตฌ๋งค ํด๋์ ๋ฆฌํ ์ ์ ๋น์จ๋ก ๊ตฌํ๊ธฐ
- ํ์ต ๋ชฉํ : ์ฝํธํธ ๋ฆฌํ ์ ์ ํ์์ฑ์ ๋ํด ์ดํดํ๊ณ ์ด๋ฅผ SQL๋ก ๊ตฌํํ ์ ์๋ค.
- solvesql์ ๊ตฌ๊ธ ๋ก๊ทธ์ธ์ผ๋ก ๊ฐ์ ํ๋ค.
- ํ๋ ์ด๊ทธ๋ผ์ด๋๋ก ๋ค์ด๊ฐ๋ค.
- ๋ฐ์ดํฐ ๋ฒ ์ด์ค์์ UK E-Commerce Orders๋ฅผ ํด๋ฆญํ๋ค.
๊ฒฐ๊ณผ: ์๋ณ ์ฌ๊ตฌ๋งค ๋ฆฌํ ์ ์ ๋ฐฑ๋ถ์จ (์์์ ๋๋ฒ์งธ์ง๋ฆฌ๊น์ง)๋ก ๊ตฌํด SQL ์ฝ๋๋ฅผ ์ ์ถ
( test๋ฐ์ดํฐ๋ฒ ์ด์ค์ user_actions ํ ์ด๋ธ )
SELECT *
FROM sales_transaction_v
< ํต์ฌ ๊ฐ๋ >
# ๋ฌธ์์ด์ ๋ ์ง ํ์์ผ๋ก ๋ฐ๊พธ๊ธฐ
STR_TO_DATE(Date, '%m/%d/%Y') as Date
# ํ์ฌ ๊ฐ ํ์ ๊ทธ๋๋ก ๋งค๊ฐ๋ณ์์ ์ ๋ฌ
# %Y / %m / %d
date_format(๋ ์ง์ปฌ๋ผ, '%Y-%m-%d')
%Y
4์๋ฆฌ ๋ ๋
%m
2์๋ฆฌ ์
%d
2์๋ฆฌ ์ผ
%01
01์ด๋ผ๋ ๊ณ ์ ๋๊ฐ
# ํต์ฌ๊ตฌ๋ฌธ
count( distinct / case when ๊ตฌ๋ฌธ / date_add )
count(distinct ์ปฌ๋ผ) * 100.0 / count(distinct ์ปฌ๋ผ)
# ์ธ์์ ๊ตฌํ๊ธฐ
[ ์ ๋ต ์ฝ๋ ]
WITH first AS (
SELECT CustomerNo,
TransactionNo,
str_to_date(Date, '%m/%d/%Y') AS Date,
min(str_to_date(Date, '%m/%d/%Y')) OVER (PARTITION BY CustomerNo ORDER BY str_to_date(Date, '%m/%d/%Y'))
AS first_order_date
FROM sales_transaction_v
WHERE CustomerNo IS NOT NULL
), mon AS (
SELECT CustomerNo,
TransactionNo,
Date,
date_format(Date, '%Y-%m-%01') AS order_month,
date_format(first_order_date, '%Y-%m-%01') AS first_order_month
FROM FIRST
)
SELECT first_order_month,
count(DISTINCT CustomerNo) AS month0,
count(DISTINCT CASE WHEN date_add(first_order_month, INTERVAL 1 month) = order_month THEN CustomerNo ELSE NULL END) AS month1,
count(DISTINCT CASE WHEN date_add(first_order_month, INTERVAL 2 month) = order_month THEN CustomerNo ELSE NULL END) AS month2,
count(DISTINCT CASE WHEN date_add(first_order_month, INTERVAL 3 month) = order_month THEN CustomerNo ELSE NULL END) AS month3,
count(DISTINCT CASE WHEN date_add(first_order_month, INTERVAL 4 month) = order_month THEN CustomerNo ELSE NULL END) AS month4,
count(DISTINCT CASE WHEN date_add(first_order_month, INTERVAL 5 month) = order_month THEN CustomerNo ELSE NULL END) AS month5,
count(DISTINCT CASE WHEN date_add(first_order_month, INTERVAL 6 month) = order_month THEN CustomerNo ELSE NULL END) AS month6,
count(DISTINCT CASE WHEN date_add(first_order_month, INTERVAL 7 month) = order_month THEN CustomerNo ELSE NULL END) AS month7,
count(DISTINCT CASE WHEN date_add(first_order_month, INTERVAL 8 month) = order_month THEN CustomerNo ELSE NULL END) AS month8,
count(DISTINCT CASE WHEN date_add(first_order_month, INTERVAL 9 month) = order_month THEN CustomerNo ELSE NULL END) AS month9,
count(DISTINCT CASE WHEN date_add(first_order_month, INTERVAL 10 month) = order_month THEN CustomerNo ELSE NULL END) AS month10,
count(DISTINCT CASE WHEN date_add(first_order_month, INTERVAL 11 month) = order_month THEN CustomerNo ELSE NULL END) AS month11
FROM mon
GROUP BY first_order_month
ORDER BY first_order_month
[ ์ถ๋ ฅ๊ฒฐ๊ณผ ]
# ๋น์จ ๊ตฌํ๊ธฐ
[ ์ ๋ต ์ฝ๋ ]
WITH first AS (
SELECT CustomerNo,
TransactionNo,
str_to_date(Date, '%m/%d/%Y') AS Date,
min(str_to_date(Date, '%m/%d/%Y')) OVER (PARTITION BY CustomerNo ORDER BY str_to_date(Date, '%m/%d/%Y'))
AS first_order_date
FROM sales_transaction_v
WHERE CustomerNo IS NOT NULL
), mon AS (
SELECT CustomerNo,
TransactionNo,
Date,
date_format(Date, '%Y-%m-%01') AS order_month,
date_format(first_order_date, '%Y-%m-%01') AS first_order_month
FROM FIRST
)
SELECT first_order_month,
round(count(DISTINCT CustomerNo)*100.0 / count(DISTINCT CustomerNo), 2) AS month0,
round(
count(DISTINCT CASE WHEN date_add(first_order_month, INTERVAL 1 month) = order_month THEN CustomerNo ELSE NULL END)*100.0
/ count(DISTINCT CustomerNo), 2) AS month1,
round(
count(DISTINCT CASE WHEN date_add(first_order_month, INTERVAL 2 month) = order_month THEN CustomerNo ELSE NULL END)*100.0
/ count(DISTINCT CustomerNo), 2) AS month2,
round(
count(DISTINCT CASE WHEN date_add(first_order_month, INTERVAL 3 month) = order_month THEN CustomerNo ELSE NULL END)*100.0
/ count(DISTINCT CustomerNo), 2) AS month3,
round(
count(DISTINCT CASE WHEN date_add(first_order_month, INTERVAL 4 month) = order_month THEN CustomerNo ELSE NULL END)*100.0
/ count(DISTINCT CustomerNo), 2) AS month4,
round(
count(DISTINCT CASE WHEN date_add(first_order_month, INTERVAL 5 month) = order_month THEN CustomerNo ELSE NULL END)*100.0
/ count(DISTINCT CustomerNo), 2) AS month5,
round(
count(DISTINCT CASE WHEN date_add(first_order_month, INTERVAL 6 month) = order_month THEN CustomerNo ELSE NULL END)*100.0
/ count(DISTINCT CustomerNo), 2) AS month6,
round(
count(DISTINCT CASE WHEN date_add(first_order_month, INTERVAL 7 month) = order_month THEN CustomerNo ELSE NULL END)*100.0
/ count(DISTINCT CustomerNo), 2) AS month7,
round(
count(DISTINCT CASE WHEN date_add(first_order_month, INTERVAL 8 month) = order_month THEN CustomerNo ELSE NULL END)*100.0
/ count(DISTINCT CustomerNo), 2) AS month8,
round(
count(DISTINCT CASE WHEN date_add(first_order_month, INTERVAL 9 month) = order_month THEN CustomerNo ELSE NULL END)*100.0
/ count(DISTINCT CustomerNo), 2) AS month9,
round(
count(DISTINCT CASE WHEN date_add(first_order_month, INTERVAL 10 month) = order_month THEN CustomerNo ELSE NULL END)*100.0
/ count(DISTINCT CustomerNo), 2) AS month10,
round(
count(DISTINCT CASE WHEN date_add(first_order_month, INTERVAL 11 month) = order_month THEN CustomerNo ELSE NULL END)*100.0
/ count(DISTINCT CustomerNo), 2) AS month11
FROM mon
GROUP BY first_order_month
ORDER BY first_order_month
[ ์ถ๋ ฅ ๊ฒฐ๊ณผ ]