2021.11.18Zeppelin电子商务消费行为分析代码记录

目录

 6.1找出顾客最常用的信用卡

 6.2找出客户资料中排名前五的职位名称

 6.3在美国女性最常用的信用卡

 6.4按性别和国家进行客户统计

 7.1计算每月总收入

 7.2计算每个季度的总收入 

 7.3按年计算总收入

 7.4按工作日计算总收入

 7.5按时间段计算总收入(需要清理数据)

 7.5按季度、时间段计算总收入(需要清理数据)

 7.6按时间段计算平均消费 

 7.7按工作日计算平均消费

 7.8计算年、月、日的交易总数 

 7.9找出交易量最大的10个客户

 7.10找出消费最多的前10位顾客

 7.11统计该期间交易数量最少的用户

 7.12计算每个季度的独立客户总数

 7.13计算每周的独立客户总数

 7.14计算整个活动客户平均花费的最大值1

 7.14计算整个活动客户平均花费的最大值2

 7.15统计每月花费最多的客户

 7.16统计每月访问次数最多的客户

 7.17按总价找出最受欢迎的5种产品

 7.18根据购买频率找出最畅销的5种产品

 7.19根据客户数量找出最受欢迎的5种产品

 8.1按客流量找出最受欢迎的商店

 8.2根据顾客消费价格找出最受欢迎的商店 

 8.3根据顾客交易情况找出最受欢迎的商店

 8.4根据商店和唯一的顾客id获取最受欢迎的产品

 8.5获取每个商店的员工与顾客比

 8.6按年和月计算每家店的收入

 8.7按店铺制作总收益饼图

 8.8找出每个商店最繁忙的时间段

 8.9找出每家店的忠实顾客

 8.10根据每位员工的最高收入找出明星商店

 9.1在ext_store_review中找出存在冲突的交易映射关系

 9.2了解客户评价的覆盖率

 9.3根据评分了解客户的分布情况

 9.4根据交易了解客户的分布情况

 9.5客户给出的最佳评价是否总是同一家门店


 6.1找出顾客最常用的信用卡

%hive
select country, credit_type, count(distinct credit_no) num from vw_customer_details group by country,credit_type order by num desc

 6.2找出客户资料中排名前五的职位名称

%hive
-- use shopping
-- select * from vw_customer_details
select job,count(*) pn from  vw_customer_details group by job  order by pn desc limit 5

 6.3在美国女性最常用的信用卡

%hive
select credit_type,count(*) pn from vw_customer_details where country='United States' and gender='Female' group by  credit_type order by pn desc limit 3

 6.4按性别和国家进行客户统计

%hive
select country,gender,count(*) from vw_customer_details group by country,gender 

 7.1计算每月总收入

%hive
select purchase_month,sum(price) per_month from transaction_details group by purchase_month

 7.2计算每个季度的总收入 

%hive
with
t1 as 
(select  price,purchase_date,concat_ws("-",substring(purchase_date,1,4),cast(ceil(month(purchase_date)/3.0) as string)) year_quarter from  transaction_details)
select year_quarter,sum(price) from t1 group by year_quarter

 7.3按年计算总收入

%hive
with
t1 as 
(select price,substr(purchase_date,1,4) year from transaction_details)
select year,sum(price) from t1 group by year

 7.4按工作日计算总收入

%hive
with
t1 as
(select price,date_format(purchase_date,'u') weekday from  transaction_details)
select weekday,sum(price) from t1 where weekday in(1,2,3,4,5) group by weekday 

 7.5按时间段计算总收入(需要清理数据)

%hive
-- select * from  transaction_details
with 
t1 as
(select price,purchase_time, if(purchase_time like '%M',from_unixtime(unix_timestamp(purchase_time,'hh:mm aa'),'HH:mm'),purchase_time) time_format from transaction_details),
t2 as (
select price,purchase_time,time_format,
(cast(split(time_format,":")[0] as decimal(4,2))+
cast(split(time_format,":")[1] as decimal(4,2))/60) purchase_time_in_hrs
from t1),
t3 as(
select price,purchase_time,time_format,
if(purchase_time_in_hrs>5 and purchase_time_in_hrs<=8,"early morning",
if(purchase_time_in_hrs>8 and purchase_time_in_hrs<=11,"morning",
if(purchase_time_in_hrs>11 and purchase_time_in_hrs<=13,"noon",
if(purchase_time_in_hrs>13 and purchase_time_in_hrs<=17,"afternoon",
if(purchase_time_in_hrs>17 and purchase_time_in_hrs<=22,"early evening","night"))))) hour_type
from t2)
select hour_type,sum(price) from t3 group by hour_type

 7.5按季度、时间段计算总收入(需要清理数据)

%hive
with 
t1 as
(select price,purchase_time, purchase_date,if(purchase_time like '%M',from_unixtime(unix_timestamp(purchase_time,'hh:mm aa'),'HH:mm'),purchase_time) time_format from transaction_details),
t2 as (
select price,purchase_time,time_format,purchase_date,
(cast(split(time_format,":")[0] as decimal(4,2))+
cast(split(time_format,":")[1] as decimal(4,2))/60) purchase_time_in_hrs
from t1),
t3 as(
select price,purchase_time,time_format,purchase_date,
if(purchase_time_in_hrs>5 and purchase_time_in_hrs<=8,"5-8",
if(purchase_time_in_hrs>8 and purchase_time_in_hrs<=11,"8-11",
if(purchase_time_in_hrs>11 and purchase_time_in_hrs<=13,"11-13",
if(purchase_time_in_hrs>13 and purchase_time_in_hrs<=17,"13-17",
if(purchase_time_in_hrs>17 and purchase_time_in_hrs<=22,"17-22","22-5"))))) hour_type
from t2),
t4 as 
(select price,purchase_date,concat_ws("-",substring(purchase_date,1,4),cast(ceil(month(purchase_date)/3.0) as string)) year_quarter,hour_type from  t3) 
select year_quarter,hour_type,sum(price) from t4 group by year_quarter,hour_type

 7.6按时间段计算平均消费 

%hive
with 
t1 as
(select price,purchase_time, if(purchase_time like '%M',from_unixtime(unix_timestamp(purchase_time,'hh:mm aa'),'HH:mm'),purchase_time) time_format from transaction_details),
t2 as (
select price,purchase_time,time_format,
(cast(split(time_format,":")[0] as decimal(4,2))+
cast(split(time_format,":")[1] as decimal(4,2))/60) purchase_time_in_hrs
from t1),
t3 as(
select price,purchase_time,time_format,
if(purchase_time_in_hrs>5 and purchase_time_in_hrs<=8,"early morning",
if(purchase_time_in_hrs>8 and purchase_time_in_hrs<=11,"morning",
if(purchase_time_in_hrs>11 and purchase_time_in_hrs<=13,"noon",
if(purchase_time_in_hrs>13 and purchase_time_in_hrs<=17,"afternoon",
if(purchase_time_in_hrs>17 and purchase_time_in_hrs<=22,"early evening","night"))))) hour_type
from t2)

 7.7按工作日计算平均消费

%hive 
with 
t1 as
(select price,date_format(purchase_date,'u') weekday from transaction_details)
select avg (price),weekday from t1 where weekday in (1,2,3,4,5) 
group by weekday

 7.8计算年、月、日的交易总数 

%hive
select purchase_date,sum(price) from transaction_details
group by purchase_date

 7.9找出交易量最大的10个客户

%hive
with
t1 as(
select customer_id,count(transaction_id) tran_cnt from transaction_details group by customer_id order by tran_cnt limit 10)
select t1.customer_id,concat(first_name,"",last_name)from t1 left join vw_customer_details vcd on t1.customer_id=vcd.customer_id

 7.10找出消费最多的前10位顾客

%hive
with
t1 as(
select customer_id,sum(price) price_sum from transaction_details group by customer_id order by price_sum limit 10)
select t1.customer_id,concat(first_name,"",last_name)from t1 left join vw_customer_details vcd on t1.customer_id=vcd.customer_id

 7.11统计该期间交易数量最少的用户

%hive
with
t1 as(
select customer_id,sum(price) price_sum from transaction_details group by customer_id order by price_sum asc limit 1)
select t1.customer_id,concat(first_name,"",last_name)from t1 left join vw_customer_details vcd on t1.customer_id=vcd.customer_id

 7.12计算每个季度的独立客户总数

 

%hive
with
t1 as (
select transaction_id,
concat_ws('-',substr(purchase_date,1,4),cast(ceil(month(purchase_date)/3.0) as string))  as  year_quarter 
from transaction_details)
select year_quarter,count(transaction_id) from t1 group by year_quarter order by year_quarter

 7.13计算每周的独立客户总数

%hive
with
t1 as 
(select transaction_id,concat_ws('-',substr(purchase_date,1,4),cast(weekofyear(purchase_date)as string)) as weeknum from transaction_details)
select weeknum,count( transaction_id) from t1 group by weeknum

 7.14计算整个活动客户平均花费的最大值1

%hive
select customer_id,avg(price) price_avg
from transaction_details
group by customer_id
order by price_avg desc 
limit 1

 7.14计算整个活动客户平均花费的最大值2

%hive
with 
t1 as (select customer_id,avg(price) price_avg from  transaction_details
group by customer_id)
select max(price_avg) from t1 

 7.15统计每月花费最多的客户

%hive
with
t1 as 
(select  customer_id,purchase_month,sum(price) price_sum from transaction_details group by purchase_month,customer_id),
t2 as 
(select rank() over(partition by purchase_month order by price_sum desc ) rn_sum,
purchase_month,
price_sum,
customer_id
from t1)
select purchase_month,price_sum,customer_id from t2 where rn_sum=1;

 7.16统计每月访问次数最多的客户

%hive
with
t1 as 
(select  customer_id,purchase_month,sum(price) price_sum,count(transaction_id) tran_cnt from transaction_details group by purchase_month,customer_id),
t2 as (select
rank() over(partition by purchase_month order by price_sum desc ) rn_sum,
rank() over(partition by purchase_month order by tran_cnt desc ) rn_cnt,
purchase_month,
price_sum,
tran_cnt,
customer_id
from t1),
t3 as(
select purchase_month,'max' a,price_sum as value,customer_id from t2 where rn_sum=1
union all
select purchase_month,'visit' a,tran_cnt as value,customer_id from t2 where rn_cnt=1)
select purchase_month,a,value from t3 group by purchase_month,a,value

 7.17按总价找出最受欢迎的5种产品

%hive
select product,sum(price)  pro_price from transaction_details group by product order by pro_price desc limit 5

 7.18根据购买频率找出最畅销的5种产品

%hive 
select product,count(transaction_id) pro_count from transaction_details group by product order by pro_count desc limit 5

 7.19根据客户数量找出最受欢迎的5种产品

%hive
select product,count(distinct customer_id) cus_num from transaction_details group by product order by cus_num desc limit 5

 8.1按客流量找出最受欢迎的商店

%hive
with
t1 as (
select store_id,count(distinct customer_id) cnt from transaction_details group by store_id order by cnt desc limit 1)
select t1.store_id,esd.store_name from t1 left join ext_store_details esd on t1.store_id=esd.store_id

MONTH MAX VISIT  

%hive
create table tmp_user as
with
t1 as 
(select  customer_id,purchase_month,sum(price) price_sum,count(transaction_id) tran_cnt from transaction_details group by purchase_month,customer_id),
t2 as (select
rank() over(partition by purchase_month order by price_sum desc ) rn_sum,
rank() over(partition by purchase_month order by tran_cnt desc ) rn_cnt,
purchase_month,
price_sum,
tran_cnt,
customer_id
from t1),
t3 as(
select purchase_month,'max' a,price_sum as value,customer_id from t2 where rn_sum=1
union all
select purchase_month,'visit' a,tran_cnt as value,customer_id from t2 where rn_cnt=1)
select purchase_month,a,value from t3 group by purchase_month,a,value

%hive
select purchase_month,
max(case when a='max' then value else 0 end ) as max ,
max(case when a='visit' then value else 0 end ) as visit,
max(if(a='max',value,0)) as maxif,
max(if(a='visit',value,0)) as visitif
from tmp_user group by purchase_month;

 8.2根据顾客消费价格找出最受欢迎的商店 

%hive
select esd.store_name, td.store_id,sum(price) sum_price  from transaction_details td right join ext_store_details esd on td.store_id=esd.store_id group by esd.store_name,td.store_id order by  sum_price  desc  limit 1

 8.3根据顾客交易情况找出最受欢迎的商店

%hive
select esd.store_name, td.store_id,count(transaction_id) cnt_tran  from transaction_details td right join ext_store_details esd on td.store_id=esd.store_id group by esd.store_name,td.store_id order by  cnt_tran   desc  limit 1

 8.4根据商店和唯一的顾客id获取最受欢迎的产品

%hive
with 
t1 as 
(select store_id,product,count(distinct(customer_id))  frq_num from transaction_details group by store_id,product),
t2 as 
(select store_id,product,frq_num,rank() over( partition by store_id order by frq_num desc ) rn
from t1)
select  t2.store_id,esd.store_name,t2.product from t2 left join ext_store_details esd on esd.store_id=t2.store_id
where t2.rn=1

 8.5获取每个商店的员工与顾客比

%hive
with 
t1 as 
(select store_id,count(distinct customer_id) visitor_num from transaction_details
group by store_id)
select t1.store_id,esd.store_name, round(esd.employee_number/t1.visitor_num,2) from t1 
left join ext_store_details esd on t1.store_id=esd.store_id

 8.6按年和月计算每家店的收入

%hive
with
t1 as 
(select price,substr(purchase_month,1,7) ym from transaction_details)
select ym,sum(price) from t1 group by ym

%hive
with
t1 as 
(select store_id,price,substr(purchase_month,1,7) ym from transaction_details)
select store_id,ym,sum(price) from t1 group by ym,store_id

 8.7按店铺制作总收益饼图

%hive
select store_id,sum(price) from transaction_details group by store_id

 8.8找出每个商店最繁忙的时间段

%hive
with 
t1 as(
select store_id,price,purchase_time, if(purchase_time like '%M',from_unixtime(unix_timestamp(purchase_time,'hh:mm aa'),'HH:mm'),purchase_time) time_format from transaction_details),
t2 as (
select store_id,price,purchase_time,time_format,
(cast(split(time_format,":")[0] as decimal(4,2))+
cast(split(time_format,":")[1] as decimal(4,2))/60) purchase_time_in_hrs
from t1),
t3 as(
select store_id,price,purchase_time,time_format,
if(purchase_time_in_hrs>5 and purchase_time_in_hrs<=8,"5-8",
if(purchase_time_in_hrs>8 and purchase_time_in_hrs<=11,"8-11",
if(purchase_time_in_hrs>11 and purchase_time_in_hrs<=13,"11-13",
if(purchase_time_in_hrs>13 and purchase_time_in_hrs<=17,"13-17",
if(purchase_time_in_hrs>17 and purchase_time_in_hrs<=22,"17-22","22-5"))))) hour_type
from t2),
t4 as(
select store_id,hour_type,sum(price) sum from t3 group by store_id,hour_type
),
t5 as
(select store_id,hour_type,sum,dense_rank() over(partition by store_id order by sum desc) rk from t4)
select store_id,hour_type,sum from t5
where rk=1

 8.9找出每家店的忠实顾客

%hive
with
t1 as 
(select store_id,customer_id,count(customer_id) cnt from transaction_details group by store_id,customer_id),
t2 as 
(select store_id,customer_id,dense_rank() over(partition by store_id order by cnt desc) rk from t1)
select  store_id,customer_id,rk from t2
where rk=1

 8.10根据每位员工的最高收入找出明星商店

%hive
with 
t1 as 
(select store_id,sum(price) sp from transaction_details group by store_id),
t2 as
(select t1.store_id,esd.store_name,sp,esd.employee_number,(sp/esd.employee_number) wage from t1 
join ext_store_details esd on t1.store_id =esd.store_id),
t3 as 
(select store_id,store_name,sp,employee_number,wage,dense_rank() over(order by wage desc) rk from t2)
select store_id,store_name,sp,employee_number,wage,rk from t3  
where rk=1

 9.1在ext_store_review中找出存在冲突的交易映射关系

%hive
select transaction_id from ext_store_review 
group by transaction_id
having count(1)>1

 9.2了解客户评价的覆盖率

%hive
select store_id,count(1) from ext_store_review group by store_id

 9.3根据评分了解客户的分布情况

%hive
select store_id,review_score,count(1) from ext_store_review group by store_id,review_score

 9.4根据交易了解客户的分布情况

%hive
select store_id,customer_id,count(1) from  transaction_details group by store_id,customer_id

 9.5客户给出的最佳评价是否总是同一家门店

%hive
with
t1 as
(select store_id,review_score from ext_store_review  where review_score='5')
select store_id,count(1) from t1 group by store_id 
上一篇:记录react redux store中item的值改变而页面未渲染的坑


下一篇:集成VUEX+sessionStorage保存登入信息