-- 数据准备 create database taobao; use taobao; create table UserBehavior( user_id int, item_id int, item_category int, behavior_type varchar(10), user_geohash varchar(10), times datetime, amount decimal(5,2) ); show variables like '%secure%';-- 查看安全路径,随后将表格粘贴到该安全路径下, load data infile "C:/ProgramData/MySQL/MySQL Server 8.0/Uploads/UserBehavior.csv" into table UserBehavior fields terminated by ',' ignore 1 lines; select * from UserBehavior limit 10; select count(*) from UserBehavior;#1048575 -- 数据清洗 -- 缺失值处理 select count(user_id), count(item_id), count(item_category), count(behavior_type), count(user_geohash), count(times), count(amount) from UserBehavior; -- 异常值检查 select min(times),max(times),min(amount),max(amount) from userbehavior; -- 重复记录处理 select distinct * from userbehavior; -- 字段处理:根据times字段增加计算字段用户行为日期、周和小时,排除后续分析不需要的user_geohash字段,并将筛选后的结果保存到新表 #保存方式一:直接导出表格csv文件 select user_id, item_id, item_category, behavior_type, date(times) as 日期, hour(times) as 小时, date_format(times,'%w') as 星期, amount from (select distinct * from userbehavior) as t #从去重的表里面去查询 limit 100; #保存方式二:代码保存(至安全路径) select user_id, item_id, item_category, behavior_type, date(times) as 日期, hour(times) as 小时, date_format(times,'%w') as 星期, amount from (select distinct * from userbehavior) as t limit 100 into outfile "C:/ProgramData/MySQL/MySQL Server 8.0/Uploads/t2.csv"-- 保存到安全路径并命名为t2 fields terminated by ',' lines terminated by '\r\n'; #保存方式三:视图 #视图:优点 1.简化SQL语句,快速提取,将复杂查询语句进行封装;2.安全,保护数据,设置访问权限 让不同级别用户看到不同的视图;缺点 操作视图比直接操作基础表要慢 create view userbehavior_new as select user_id, item_id, item_category, behavior_type, date(times) as 日期, hour(times) as 小时, date_format(times,'%w') as 星期, amount from (select distinct * from userbehavior) as t; select * from userbehavior_new limit 100; select count(*) from userbehavior_new;#962097 -- 数据预览 select count(distinct user_id) as 用户数, count(distinct item_id) as 商品数, count(distinct item_category) as 类目数 from userbehavior_new; select behavior_type, count(*) as 行为次数 from userbehavior_new group by behavior_type; -- 数据分析 -- 1.流量指标分析 -- 每日PV、UV、人均浏览量、成交量、销售额 select 日期, sum(behavior_type="pv") as 浏览量, count(distinct user_id) as 访客数, sum(behavior_type="pv")/count(distinct user_id) as 人均浏览量, sum(behavior_type="buy") as 成交量, sum(if(behavior_type="buy",amount,0)) as 销售额 from userbehavior_new group by 日期; -- 周一到周日PV、UV、人均浏览量、成交量、销售额 select 星期, sum(behavior_type="pv") as 浏览量, count(distinct user_id) as 访客数, sum(behavior_type="pv")/count(distinct user_id) as 人均浏览量, sum(behavior_type="buy") as 成交量, sum(if(behavior_type="buy",amount,0)) as 销售额 from userbehavior_new group by 星期; -- 每小时PV、UV、人均浏览量、成交量、销售额 select 小时, sum(behavior_type="pv") as 浏览量, count(distinct user_id) as 访客数, sum(behavior_type="pv")/count(distinct user_id) as 人均浏览量, sum(behavior_type="buy") as 成交量, sum(if(behavior_type="buy",amount,0)) as 销售额 from userbehavior_new group by 小时; -- 2.行为转化分析(转化率=当前行为用户数/上一行为用户数) select behavior_type, count(distinct user_id) as 用户人数, lag(count(distinct user_id),1) over(order by behavior_type desc) as 上一行为用户人数, ifnull(count(distinct user_id)/lag(count(distinct user_id),1) over(order by behavior_type desc),1) as 转化率 from userbehavior_new group by behavior_type; -- 浏览—加购—购买的转化率 select behavior_type, count(distinct user_id) as 用户人数, lag(count(distinct user_id),1) over(order by behavior_type desc) as 上一行为用户人数, ifnull(count(distinct user_id)/lag(count(distinct user_id),1) over(order by behavior_type desc),1) as 转化率 from userbehavior_new where behavior_type in ('pv','cart','buy') group by behavior_type; -- 每日浏览—加购—购买的转化率 # 行转列的方法 -- charlly 整理 select 日期, sum(if(behavior_type="pv",用户人数,0)) as 浏览人数, -- 用if条件判断将行转化成列,然后对其进行聚合 sum(if(behavior_type="cart",用户人数,0)) as 加购人数, sum(if(behavior_type="cart",用户人数,0))/sum(if(behavior_type="pv",用户人数,0)) as 浏览_加购转化率, sum(if(behavior_type="buy",用户人数,0)) as 购买人数, sum(if(behavior_type="buy",用户人数,0))/sum(if(behavior_type="cart",用户人数,0)) as 加购_购买转化率 from (select 日期, behavior_type, count(distinct user_id) as 用户人数 from userbehavior_new where behavior_type in ('pv','cart','buy') group by 日期, behavior_type) as t group by 日期; -- 每日浏览—加购—购买的转化率 老师答案 select 日期, sum(if(behavior_type='pv',用户人数,0)) as 浏览人数, sum(if(behavior_type='cart',用户人数,0)) as 加购人数, sum(if(behavior_type='buy',用户人数,0)) as 购买人数, sum(if(behavior_type='cart',用户人数,0))/sum(if(behavior_type='pv',用户人数,0)) as 浏览_加购转化率, sum(if(behavior_type='buy',用户人数,0))/sum(if(behavior_type='cart',用户人数,0)) as 加购_够买转化率 from (select 日期, behavior_type, count(distinct user_id) as 用户人数 from userbehavior_new where behavior_type in ('pv','cart','buy') group by 日期,behavior_type) as t group by 日期;