目录
9.1在ext_store_review中找出存在冲突的交易映射关系
6.1找出顾客最常用的信用卡
%hive
select country, credit_type, count(distinct credit_no) num from vw_customer_details group by country,credit_type order by num desc
6.2找出客户资料中排名前五的职位名称
%hive
-- use shopping
-- select * from vw_customer_details
select job,count(*) pn from vw_customer_details group by job order by pn desc limit 5
6.3在美国女性最常用的信用卡
%hive
select credit_type,count(*) pn from vw_customer_details where country='United States' and gender='Female' group by credit_type order by pn desc limit 3
6.4按性别和国家进行客户统计
%hive
select country,gender,count(*) from vw_customer_details group by country,gender
7.1计算每月总收入
%hive
select purchase_month,sum(price) per_month from transaction_details group by purchase_month
7.2计算每个季度的总收入
%hive
with
t1 as
(select price,purchase_date,concat_ws("-",substring(purchase_date,1,4),cast(ceil(month(purchase_date)/3.0) as string)) year_quarter from transaction_details)
select year_quarter,sum(price) from t1 group by year_quarter
7.3按年计算总收入
%hive
with
t1 as
(select price,substr(purchase_date,1,4) year from transaction_details)
select year,sum(price) from t1 group by year
7.4按工作日计算总收入
%hive
with
t1 as
(select price,date_format(purchase_date,'u') weekday from transaction_details)
select weekday,sum(price) from t1 where weekday in(1,2,3,4,5) group by weekday
7.5按时间段计算总收入(需要清理数据)
%hive
-- select * from transaction_details
with
t1 as
(select price,purchase_time, if(purchase_time like '%M',from_unixtime(unix_timestamp(purchase_time,'hh:mm aa'),'HH:mm'),purchase_time) time_format from transaction_details),
t2 as (
select price,purchase_time,time_format,
(cast(split(time_format,":")[0] as decimal(4,2))+
cast(split(time_format,":")[1] as decimal(4,2))/60) purchase_time_in_hrs
from t1),
t3 as(
select price,purchase_time,time_format,
if(purchase_time_in_hrs>5 and purchase_time_in_hrs<=8,"early morning",
if(purchase_time_in_hrs>8 and purchase_time_in_hrs<=11,"morning",
if(purchase_time_in_hrs>11 and purchase_time_in_hrs<=13,"noon",
if(purchase_time_in_hrs>13 and purchase_time_in_hrs<=17,"afternoon",
if(purchase_time_in_hrs>17 and purchase_time_in_hrs<=22,"early evening","night"))))) hour_type
from t2)
select hour_type,sum(price) from t3 group by hour_type
7.5按季度、时间段计算总收入(需要清理数据)
%hive
with
t1 as
(select price,purchase_time, purchase_date,if(purchase_time like '%M',from_unixtime(unix_timestamp(purchase_time,'hh:mm aa'),'HH:mm'),purchase_time) time_format from transaction_details),
t2 as (
select price,purchase_time,time_format,purchase_date,
(cast(split(time_format,":")[0] as decimal(4,2))+
cast(split(time_format,":")[1] as decimal(4,2))/60) purchase_time_in_hrs
from t1),
t3 as(
select price,purchase_time,time_format,purchase_date,
if(purchase_time_in_hrs>5 and purchase_time_in_hrs<=8,"5-8",
if(purchase_time_in_hrs>8 and purchase_time_in_hrs<=11,"8-11",
if(purchase_time_in_hrs>11 and purchase_time_in_hrs<=13,"11-13",
if(purchase_time_in_hrs>13 and purchase_time_in_hrs<=17,"13-17",
if(purchase_time_in_hrs>17 and purchase_time_in_hrs<=22,"17-22","22-5"))))) hour_type
from t2),
t4 as
(select price,purchase_date,concat_ws("-",substring(purchase_date,1,4),cast(ceil(month(purchase_date)/3.0) as string)) year_quarter,hour_type from t3)
select year_quarter,hour_type,sum(price) from t4 group by year_quarter,hour_type
7.6按时间段计算平均消费
%hive
with
t1 as
(select price,purchase_time, if(purchase_time like '%M',from_unixtime(unix_timestamp(purchase_time,'hh:mm aa'),'HH:mm'),purchase_time) time_format from transaction_details),
t2 as (
select price,purchase_time,time_format,
(cast(split(time_format,":")[0] as decimal(4,2))+
cast(split(time_format,":")[1] as decimal(4,2))/60) purchase_time_in_hrs
from t1),
t3 as(
select price,purchase_time,time_format,
if(purchase_time_in_hrs>5 and purchase_time_in_hrs<=8,"early morning",
if(purchase_time_in_hrs>8 and purchase_time_in_hrs<=11,"morning",
if(purchase_time_in_hrs>11 and purchase_time_in_hrs<=13,"noon",
if(purchase_time_in_hrs>13 and purchase_time_in_hrs<=17,"afternoon",
if(purchase_time_in_hrs>17 and purchase_time_in_hrs<=22,"early evening","night"))))) hour_type
from t2)
7.7按工作日计算平均消费
%hive
with
t1 as
(select price,date_format(purchase_date,'u') weekday from transaction_details)
select avg (price),weekday from t1 where weekday in (1,2,3,4,5)
group by weekday
7.8计算年、月、日的交易总数
%hive
select purchase_date,sum(price) from transaction_details
group by purchase_date
7.9找出交易量最大的10个客户
%hive
with
t1 as(
select customer_id,count(transaction_id) tran_cnt from transaction_details group by customer_id order by tran_cnt limit 10)
select t1.customer_id,concat(first_name,"",last_name)from t1 left join vw_customer_details vcd on t1.customer_id=vcd.customer_id
7.10找出消费最多的前10位顾客
%hive
with
t1 as(
select customer_id,sum(price) price_sum from transaction_details group by customer_id order by price_sum limit 10)
select t1.customer_id,concat(first_name,"",last_name)from t1 left join vw_customer_details vcd on t1.customer_id=vcd.customer_id
7.11统计该期间交易数量最少的用户
%hive
with
t1 as(
select customer_id,sum(price) price_sum from transaction_details group by customer_id order by price_sum asc limit 1)
select t1.customer_id,concat(first_name,"",last_name)from t1 left join vw_customer_details vcd on t1.customer_id=vcd.customer_id
7.12计算每个季度的独立客户总数
%hive
with
t1 as (
select transaction_id,
concat_ws('-',substr(purchase_date,1,4),cast(ceil(month(purchase_date)/3.0) as string)) as year_quarter
from transaction_details)
select year_quarter,count(transaction_id) from t1 group by year_quarter order by year_quarter
7.13计算每周的独立客户总数
%hive
with
t1 as
(select transaction_id,concat_ws('-',substr(purchase_date,1,4),cast(weekofyear(purchase_date)as string)) as weeknum from transaction_details)
select weeknum,count( transaction_id) from t1 group by weeknum
7.14计算整个活动客户平均花费的最大值1
%hive
select customer_id,avg(price) price_avg
from transaction_details
group by customer_id
order by price_avg desc
limit 1
7.14计算整个活动客户平均花费的最大值2
%hive
with
t1 as (select customer_id,avg(price) price_avg from transaction_details
group by customer_id)
select max(price_avg) from t1
7.15统计每月花费最多的客户
%hive
with
t1 as
(select customer_id,purchase_month,sum(price) price_sum from transaction_details group by purchase_month,customer_id),
t2 as
(select rank() over(partition by purchase_month order by price_sum desc ) rn_sum,
purchase_month,
price_sum,
customer_id
from t1)
select purchase_month,price_sum,customer_id from t2 where rn_sum=1;
7.16统计每月访问次数最多的客户
%hive
with
t1 as
(select customer_id,purchase_month,sum(price) price_sum,count(transaction_id) tran_cnt from transaction_details group by purchase_month,customer_id),
t2 as (select
rank() over(partition by purchase_month order by price_sum desc ) rn_sum,
rank() over(partition by purchase_month order by tran_cnt desc ) rn_cnt,
purchase_month,
price_sum,
tran_cnt,
customer_id
from t1),
t3 as(
select purchase_month,'max' a,price_sum as value,customer_id from t2 where rn_sum=1
union all
select purchase_month,'visit' a,tran_cnt as value,customer_id from t2 where rn_cnt=1)
select purchase_month,a,value from t3 group by purchase_month,a,value
7.17按总价找出最受欢迎的5种产品
%hive
select product,sum(price) pro_price from transaction_details group by product order by pro_price desc limit 5
7.18根据购买频率找出最畅销的5种产品
%hive
select product,count(transaction_id) pro_count from transaction_details group by product order by pro_count desc limit 5
7.19根据客户数量找出最受欢迎的5种产品
%hive
select product,count(distinct customer_id) cus_num from transaction_details group by product order by cus_num desc limit 5
8.1按客流量找出最受欢迎的商店
%hive
with
t1 as (
select store_id,count(distinct customer_id) cnt from transaction_details group by store_id order by cnt desc limit 1)
select t1.store_id,esd.store_name from t1 left join ext_store_details esd on t1.store_id=esd.store_id
MONTH MAX VISIT
%hive
create table tmp_user as
with
t1 as
(select customer_id,purchase_month,sum(price) price_sum,count(transaction_id) tran_cnt from transaction_details group by purchase_month,customer_id),
t2 as (select
rank() over(partition by purchase_month order by price_sum desc ) rn_sum,
rank() over(partition by purchase_month order by tran_cnt desc ) rn_cnt,
purchase_month,
price_sum,
tran_cnt,
customer_id
from t1),
t3 as(
select purchase_month,'max' a,price_sum as value,customer_id from t2 where rn_sum=1
union all
select purchase_month,'visit' a,tran_cnt as value,customer_id from t2 where rn_cnt=1)
select purchase_month,a,value from t3 group by purchase_month,a,value
%hive
select purchase_month,
max(case when a='max' then value else 0 end ) as max ,
max(case when a='visit' then value else 0 end ) as visit,
max(if(a='max',value,0)) as maxif,
max(if(a='visit',value,0)) as visitif
from tmp_user group by purchase_month;
8.2根据顾客消费价格找出最受欢迎的商店
%hive
select esd.store_name, td.store_id,sum(price) sum_price from transaction_details td right join ext_store_details esd on td.store_id=esd.store_id group by esd.store_name,td.store_id order by sum_price desc limit 1
8.3根据顾客交易情况找出最受欢迎的商店
%hive
select esd.store_name, td.store_id,count(transaction_id) cnt_tran from transaction_details td right join ext_store_details esd on td.store_id=esd.store_id group by esd.store_name,td.store_id order by cnt_tran desc limit 1
8.4根据商店和唯一的顾客id获取最受欢迎的产品
%hive
with
t1 as
(select store_id,product,count(distinct(customer_id)) frq_num from transaction_details group by store_id,product),
t2 as
(select store_id,product,frq_num,rank() over( partition by store_id order by frq_num desc ) rn
from t1)
select t2.store_id,esd.store_name,t2.product from t2 left join ext_store_details esd on esd.store_id=t2.store_id
where t2.rn=1
8.5获取每个商店的员工与顾客比
%hive
with
t1 as
(select store_id,count(distinct customer_id) visitor_num from transaction_details
group by store_id)
select t1.store_id,esd.store_name, round(esd.employee_number/t1.visitor_num,2) from t1
left join ext_store_details esd on t1.store_id=esd.store_id
8.6按年和月计算每家店的收入
%hive
with
t1 as
(select price,substr(purchase_month,1,7) ym from transaction_details)
select ym,sum(price) from t1 group by ym
%hive
with
t1 as
(select store_id,price,substr(purchase_month,1,7) ym from transaction_details)
select store_id,ym,sum(price) from t1 group by ym,store_id
8.7按店铺制作总收益饼图
%hive
select store_id,sum(price) from transaction_details group by store_id
8.8找出每个商店最繁忙的时间段
%hive
with
t1 as(
select store_id,price,purchase_time, if(purchase_time like '%M',from_unixtime(unix_timestamp(purchase_time,'hh:mm aa'),'HH:mm'),purchase_time) time_format from transaction_details),
t2 as (
select store_id,price,purchase_time,time_format,
(cast(split(time_format,":")[0] as decimal(4,2))+
cast(split(time_format,":")[1] as decimal(4,2))/60) purchase_time_in_hrs
from t1),
t3 as(
select store_id,price,purchase_time,time_format,
if(purchase_time_in_hrs>5 and purchase_time_in_hrs<=8,"5-8",
if(purchase_time_in_hrs>8 and purchase_time_in_hrs<=11,"8-11",
if(purchase_time_in_hrs>11 and purchase_time_in_hrs<=13,"11-13",
if(purchase_time_in_hrs>13 and purchase_time_in_hrs<=17,"13-17",
if(purchase_time_in_hrs>17 and purchase_time_in_hrs<=22,"17-22","22-5"))))) hour_type
from t2),
t4 as(
select store_id,hour_type,sum(price) sum from t3 group by store_id,hour_type
),
t5 as
(select store_id,hour_type,sum,dense_rank() over(partition by store_id order by sum desc) rk from t4)
select store_id,hour_type,sum from t5
where rk=1
8.9找出每家店的忠实顾客
%hive
with
t1 as
(select store_id,customer_id,count(customer_id) cnt from transaction_details group by store_id,customer_id),
t2 as
(select store_id,customer_id,dense_rank() over(partition by store_id order by cnt desc) rk from t1)
select store_id,customer_id,rk from t2
where rk=1
8.10根据每位员工的最高收入找出明星商店
%hive
with
t1 as
(select store_id,sum(price) sp from transaction_details group by store_id),
t2 as
(select t1.store_id,esd.store_name,sp,esd.employee_number,(sp/esd.employee_number) wage from t1
join ext_store_details esd on t1.store_id =esd.store_id),
t3 as
(select store_id,store_name,sp,employee_number,wage,dense_rank() over(order by wage desc) rk from t2)
select store_id,store_name,sp,employee_number,wage,rk from t3
where rk=1
9.1在ext_store_review中找出存在冲突的交易映射关系
%hive
select transaction_id from ext_store_review
group by transaction_id
having count(1)>1
9.2了解客户评价的覆盖率
%hive
select store_id,count(1) from ext_store_review group by store_id
9.3根据评分了解客户的分布情况
%hive
select store_id,review_score,count(1) from ext_store_review group by store_id,review_score
9.4根据交易了解客户的分布情况
%hive
select store_id,customer_id,count(1) from transaction_details group by store_id,customer_id
9.5客户给出的最佳评价是否总是同一家门店
%hive
with
t1 as
(select store_id,review_score from ext_store_review where review_score='5')
select store_id,count(1) from t1 group by store_id