(36)DWD层业务数据导入脚本

1. 编写脚本 1 )在 /home/atguigu/bin 目录下创建脚本 ods_to_dwd_db.sh [atguigu@hadoop102 bin]$ vim ods_to_dwd_db.sh 在脚本中填写如下内容 #!/bin/bash APP=gmall hive=/opt/module/hive/bin/hive # 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天 if [ -n "$2" ] ;then do_date=$2 else do_date=`date -d "-1 day" +%F` fi sql1=" set mapreduce.job.queuename=hive; set hive.exec.dynamic.partition.mode=nonstrict; SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; insert overwrite table ${APP}.dwd_dim_sku_info partition(dt='$do_date') select sku.id, sku.spu_id, sku.price, sku.sku_name, sku.sku_desc, sku.weight, sku.tm_id, ob.tm_name, sku.category3_id, c2.id category2_id, c1.id category1_id, c3.name category3_name, c2.name category2_name, c1.name category1_name, spu.spu_name, sku.create_time from ( select * from ${APP}.ods_sku_info where dt='$do_date' )sku join ( select * from ${APP}.ods_base_trademark where dt='$do_date' )ob on sku.tm_id=ob.tm_id join ( select * from ${APP}.ods_spu_info where dt='$do_date' )spu on spu.id = sku.spu_id join ( select * from ${APP}.ods_base_category3 where dt='$do_date' )c3 on sku.category3_id=c3.id join ( select * from ${APP}.ods_base_category2 where dt='$do_date' )c2 on c3.category2_id=c2.id join ( select * from ${APP}.ods_base_category1 where dt='$do_date' )c1 on c2.category1_id=c1.id; insert overwrite table ${APP}.dwd_dim_coupon_info partition(dt='$do_date') select id, coupon_name, coupon_type, condition_amount, condition_num, activity_id, benefit_amount, benefit_discount, create_time, range_type, spu_id, tm_id, category3_id, limit_num, operate_time, expire_time from ${APP}.ods_coupon_info where dt='$do_date'; insert overwrite table ${APP}.dwd_dim_activity_info partition(dt='$do_date') select id, activity_name, activity_type, start_time, end_time, create_time from ${APP}.ods_activity_info where dt='$do_date'; insert overwrite table ${APP}.dwd_fact_order_detail partition(dt='$do_date') select id, order_id, user_id, sku_id, sku_num, order_price, sku_num, create_time, province_id, source_type, source_id, original_amount_d, if(rn=1,final_total_amount-(sum_div_final_amount final_amount_d),final_amount_d), if(rn=1,feight_fee-(sum_div_feight_fee-feight_fee_d),feight_fee_d), if(rn=1,benefit_reduce_amount-(sum_div_benefit_reduce_amount benefit_reduce_amount_d),benefit_reduce_amount_d) from ( select od.id, od.order_id, od.user_id, od.sku_id, od.sku_name, od.order_price, od.sku_num, od.create_time, oi.province_id, od.source_type, od.source_id, round(od.order_price*od.sku_num,2) original_amount_d, round(od.order_price*od.sku_num/oi.original_total_amount*oi.final_total_amount,2) final_amount_d, round(od.order_price*od.sku_num/oi.original_total_amount*oi.feight_fee,2) feight_fee_d, round(od.order_price*od.sku_num/oi.original_total_amount*oi.benefit_reduce_amount, 2) benefit_reduce_amount_d, row_number() over(partition by od.order_id order by od.id desc) rn, oi.final_total_amount, oi.feight_fee, oi.benefit_reduce_amount, sum(round(od.order_price*od.sku_num/oi.original_total_amount*oi.final_total_amount ,2)) over(partition by od.order_id) sum_div_final_amount, sum(round(od.order_price*od.sku_num/oi.original_total_amount*oi.feight_fee,2)) over(partition by od.order_id) sum_div_feight_fee, sum(round(od.order_price*od.sku_num/oi.original_total_amount*oi.benefit_reduce_amo unt,2)) over(partition by od.order_id) sum_div_benefit_reduce_amount from ( select * from ${APP}.ods_order_detail where dt='$do_date' ) od join ( select * from ${APP}.ods_order_info where dt='$do_date' ) oi on od.order_id=oi.id )t1; insert overwrite table ${APP}.dwd_fact_payment_info partition(dt='$do_date') select pi.id, pi.out_trade_no, pi.order_id, pi.user_id, pi.alipay_trade_no, pi.total_amount, pi.subject, pi.payment_type, pi.payment_time, oi.province_id from ( select * from ${APP}.ods_payment_info where dt='$do_date' )pi join ( select id, province_id from ${APP}.ods_order_info where dt='$do_date' )oi on pi.order_id = oi.id; insert overwrite table ${APP}.dwd_fact_order_refund_info partition(dt='$do_date') select id, user_id, order_id, sku_id, refund_type, refund_num, refund_amount, refund_reason_type, create_time from ${APP}.ods_order_refund_info where dt='$do_date'; insert overwrite table ${APP}.dwd_fact_comment_info partition(dt='$do_date') select id, user_id, sku_id, spu_id, order_id, appraise, create_time from ${APP}.ods_comment_info where dt='$do_date'; insert overwrite table ${APP}.dwd_fact_cart_info partition(dt='$do_date') select id, user_id, sku_id, cart_price, sku_num, sku_name, create_time, operate_time, is_ordered, order_time, source_type, source_id from ${APP}.ods_cart_info where dt='$do_date'; insert overwrite table ${APP}.dwd_fact_favor_info partition(dt='$do_date') select id, user_id, sku_id, spu_id, is_cancel, create_time, cancel_time from ${APP}.ods_favor_info where dt='$do_date'; insert overwrite table ${APP}.dwd_fact_coupon_use partition(dt) select if(new.id is null,old.id,new.id), if(new.coupon_id is null,old.coupon_id,new.coupon_id), if(new.user_id is null,old.user_id,new.user_id), if(new.order_id is null,old.order_id,new.order_id), if(new.coupon_status is null,old.coupon_status,new.coupon_status), if(new.get_time is null,old.get_time,new.get_time), if(new.using_time is null,old.using_time,new.using_time), if(new.used_time is null,old.used_time,new.used_time), date_format(if(new.get_time is null,old.get_time,new.get_time),'yyyy-MM-dd') from ( select id, coupon_id, user_id, order_id, coupon_status, get_time, using_time, used_time from ${APP}.dwd_fact_coupon_use where dt in ( select date_format(get_time,'yyyy-MM-dd') from ${APP}.ods_coupon_use where dt='$do_date' ) )old full outer join ( select id, coupon_id, user_id, order_id, coupon_status, get_time, using_time, used_time from ${APP}.ods_coupon_use where dt='$do_date' )new on old.id=new.id; insert overwrite table ${APP}.dwd_fact_order_info partition(dt) select if(new.id is null,old.id,new.id), if(new.order_status is null,old.order_status,new.order_status), if(new.user_id is null,old.user_id,new.user_id), if(new.out_trade_no is null,old.out_trade_no,new.out_trade_no), if(new.tms['1001'] is null,old.create_time,new.tms['1001']),--1001 对应未支付状态 if(new.tms['1002'] is null,old.payment_time,new.tms['1002']), if(new.tms['1003'] is null,old.cancel_time,new.tms['1003']), if(new.tms['1004'] is null,old.finish_time,new.tms['1004']), if(new.tms['1005'] is null,old.refund_time,new.tms['1005']), if(new.tms['1006'] is null,old.refund_finish_time,new.tms['1006']), if(new.province_id is null,old.province_id,new.province_id), if(new.activity_id is null,old.activity_id,new.activity_id), if(new.original_total_amount is null,old.original_total_amount,new.original_total_amount), if(new.benefit_reduce_amount is null,old.benefit_reduce_amount,new.benefit_reduce_amount), if(new.feight_fee is null,old.feight_fee,new.feight_fee), if(new.final_total_amount is null,old.final_total_amount,new.final_total_amount), date_format(if(new.tms['1001'] is null,old.create_time,new.tms['1001']),'yyyy MM-dd') from ( select id, order_status, user_id, out_trade_no, create_time, payment_time, cancel_time, finish_time, refund_time, refund_finish_time, province_id, activity_id, original_total_amount, benefit_reduce_amount, feight_fee, final_total_amount from ${APP}.dwd_fact_order_info where dt in ( select date_format(create_time,'yyyy-MM-dd') from ${APP}.ods_order_info where dt='$do_date' ) )old full outer join ( select info.id, info.order_status, info.user_id, info.out_trade_no, info.province_id, act.activity_id, log.tms, info.original_total_amount, info.benefit_reduce_amount, info.feight_fee, info.final_total_amount from ( select order_id, str_to_map(concat_ws(',',collect_set(concat(order_status,'=',operate_time))),',',' =') tms from ${APP}.ods_order_status_log where dt='$do_date' group by order_id )log join ( select * from ${APP}.ods_order_info where dt='$do_date' )info on log.order_id=info.id left join ( select * from ${APP}.ods_activity_order where dt='$do_date' )act on log.order_id=act.order_id )new on old.id=new.id; " sql2=" insert overwrite table ${APP}.dwd_dim_base_province select bp.id, bp.name, bp.area_code, bp.iso_code, bp.region_id, br.region_name from ${APP}.ods_base_province bp join ${APP}.ods_base_region br on bp.region_id=br.id; " sql3=" insert overwrite table ${APP}.dwd_dim_user_info_his_tmp select * from ( select id, name, birthday, gender, email, user_level, create_time, operate_time, '$do_date' start_date, '9999-99-99' end_date from ${APP}.ods_user_info where dt='$do_date' union all select uh.id, uh.name, uh.birthday, uh.gender, uh.email, uh.user_level, uh.create_time, uh.operate_time, uh.start_date, if(ui.id is not null and uh.end_date='9999-99-99', date_add(ui.dt,-1), uh.end_date) end_date from ${APP}.dwd_dim_user_info_his uh left join ( select * from ${APP}.ods_user_info where dt='$do_date' ) ui on uh.id=ui.id )his order by his.id, start_date; insert overwrite table ${APP}.dwd_dim_user_info_his select * from ${APP}.dwd_dim_user_info_his_tmp; " case $1 in "first"){ $hive -e "$sql1$sql2" };; "all"){ $hive -e "$sql1$sql3" };; esac 2 )增加脚本执行权限 [atguigu@hadoop102 bin]$ chmod 777 ods_to_dwd_db.sh 2. 脚本使用说明 1 )初次导入 ( 1 )时间维度表 参照 4.4.5 节数据装载 ( 2 )用户维度表 参照 4.4.15 节拉链表初始化 ( 3 )其余表 初次导入时,脚本的第一个参数应为 first ,线上环境不传第二个参数,自动获取前一天 日期。 [atguigu@hadoop102 bin]$ ods_to_dwd_db.sh first 2020-06-14 2 )每日定时导入 每日定时导入,脚本的第一个参数应为 all ,线上环境不传第二个参数,自动获取前一 天日期。 [atguigu@hadoop102 bin]$ ods_to_dwd_db.sh all 2020-06-15 (注:脚本的sql的整理)
上一篇:尚硅谷_尚硅谷离线数据仓库项目(阿里云离线数仓)_笔记


下一篇:学 Win32 汇编[13]: 定义符号常量(=、EQU、TEXTEQU)