MYSQL练习:用户消费行为分析
练习相关资料:
数据源:https://pan.baidu.com/s/11ZtjKnv5-nwyf6cMyk_3JQ 提取:yu63
课程参考连接:《秦路数据分析第5周(12-15节)》https://www.bilibili.com/video/BV1cE411F7fa?p=12
建表逻辑:
1)CMD命令方式:参见B站连接
2)Kettle方式:https://www.jianshu.com/p/4f0a10ea170e
3)Navicat:创建Userbuy数据库,点击菜单“表”——>点击导入向导——>涉及各列字段类型
数据库版本:MYSQL 8.0.18
一、分析维度:
1-统计不同月份的下单人数 |
2-统计用户三月份的回购率和复购率 |
2-1计算各月份的复购率 |
3-统计男女的消费频次是否有差异 |
4-统计多次消费的用户,第一次和最后一次消费时间的间隔 |
5-统计不同年龄段的用户消费金额是否有差异 |
6-统计消费的二八法则,消费的top20%用户,贡献了多少额度 |
二、实操
1-统计不同月份的下单人数
1)方法1
-- 方法1
SELECT DATE_FORMAT(PAIDTIME,'%Y-%M'),Count(DISTINCT userid)
FROM order_info_utf
where ISPAID = '已支付'
GROUP BY DATE_FORMAT(PAIDTIME,'%Y-%M');
-- 2)方法2
SELECT month(PAIDTIME),Count(DISTINCT userid)
FROM order_info_utf
where ISPAID = '已支付'
GROUP BY month(PAIDTIME);
2-统计用户三月份的回购率和复购率
-- 1)复购率
select count(tt.userid)'三月购买总用户数',count(tt.userid_count)'多次购买用户数',(count(tt.userid_count)/count(tt.userid))复购率
from
(SELECT t.userid,case when t.userid_count>1 then 1 else null end userid_count
FROM
(SELECT userid,count(userid)userid_count
from order_info_utf
where ispaid= '已支付' and month(paidtime)='03'
group by userid)t)tt;
-- 2)回购率
SELECT t1.m,COUNT(t1.m),COUNT(t2.m),round((COUNT(t2.m)/COUNT(t1.m)),4)回购率
from
(SELECT USERID,DATE_FORMAT(PAIDTIME,'%y-%m-01')as m
from order_info_utf
where ispaid= '已支付'
GROUP BY USERID,DATE_FORMAT(PAIDTIME,'%y-%m-01'))t1
LEFT JOIN
(SELECT USERID,DATE_FORMAT(PAIDTIME,'%y-%m-01')as m
from order_info_utf
where ispaid= '已支付'
GROUP BY USERID,DATE_FORMAT(PAIDTIME,'%y-%m-01'))t2
on t1.userid=t2.userid and date(t1.m)=date(DATE_sub(t2.m,INTERVAL 1 month))
GROUP BY t1.m;
1)复购率
2)回购率
2-1计算各月份的复购率
SELECT cm as 月份,count(cm)用户数,count(if(ct>1,1,null))复购用户数,ROUND(count(if(ct>1,1,null))/count(cm),2)复购率
from
(SELECT month(paidtime) as cm,USERID,count(USERID)as ct
from order_info_utf
where ISPAID = '已支付'
GROUP BY month(paidtime),USERID)t
GROUP BY cm;
3-统计男女的消费频次是否有差异
其中 where sex is not null=where sex<>''
SELECT t2.sex,avg(ct)
from
(SELECT userid,count(1) as ct
from order_info_utf
where ispaid='已支付'
GROUP BY userid)t1
INNER JOIN
(SELECT userid,sex
from user_info_utf as u
where sex is not null )t2
on t1.userid=t2.userid
GROUP BY t2.sex;
4-统计多次消费的用户,第一次和最后一次消费时间的间隔
SELECT userid,max(paidtime),min(paidtime),DATEDIFF(max(paidtime),min(paidtime))
from order_info_utf
where ispaid='已支付'
group by userid
having count(*)>1;
5-统计不同年龄段的用户消费金额是否有差异
1)先用with创建age表,把user_info_utf表中的年龄段进行划分(通过case when)
2)将 age表,order_info_utf表中已支付的人内连接(inner join),统计不同年龄段的消费额
with age as(
SELECT age1.userid,
(CASE
when age1.age BETWEEN 10 and 19 then '10-19岁'
when age1.age between 20 and 29 then '20-29岁'
when age1.age between 30 and 39 then '30-39岁'
when age1.age between 40 and 49 then '40-49岁'
when age1.age between 50 and 59 then '50-59岁'
when age1.age between 60 and 69 then '60-69岁'
when age1.age between 70 and 79 then '70-79岁'
else '80以上' end) NLD
from
(SELECT userid,(year(CURDATE())-year(birth))age
from user_info_utf
where birth is not null)age1)
SELECT NLD,ROUND(AVG(price),2)
from (SELECT * from order_info_utf where ISPAID = '已支付')t2
INNER JOIN age
on t2.userid=age.userid
group by NLD
order by NLD ASC
6-统计消费的二八法则,消费的top20%用户,贡献了多少额度
-- 方法1
select sum(total) from -- top20%用户贡献的总额度
(select *,row_number()over(order by total desc)as '排序' from
(select userid,round(sum(cast(price as float)),2) total from order_info_utf where ISPAID = '已支付' group by USERID
) aa) tt
where 排序< (select count(distinct USERID)* 0.2 from order_info_utf where ISPAID = '已支付');
-- 方法2
/*先计算count*0.2(即20%的头部用户的人数),得出人数约为17000,然后看17000人的消费金额*/
select count(userId),sum(total) from (
select userId,sum(price) as total from order_info_utf o
where isPaid = '已支付'
group by userId
order by total desc
limit 17000)t ;
1)方法1
2)方法