一、建表语句
create table dws_bhv_habo_measure_lostrate_mb_di( version_flag bigint comment '版本标签 2:web丢失 3ios直播丢失 4安卓直播丢失', query_version string comment '查询版本号', is_video int comment '直播标签 1直播 0非直播', sumcount bigint comment '总数', actual_count bigint comment '实际上报数', lost_rate string comment '丢失率') partitioned by ( dt int comment '分区')
二、模型代码
set hive.exec.dynamic.partition.mode = nonstrict; insert overwrite table taeget_tab partition(dt) select version_flag, query_version, is_video, sum(should) as sumcount, sum(actual) as actual_count, 1 - sum(actual) * 1.00 / sum(should) as lost_rate, dt from( select dt, did, biz_type, biz_name, query_version, version_flag, is_video, (max(req_idx) - min(req_idx) + 1) as should, --应该 count(distinct req_idx) as actual --实际 from( --对各个客户端条件打标签 select dt ,did ,req_idx ,is_video ,biz_type ,biz_name --统一Web和App的字段 ,IF(label_map['label_loss_02']='1' ,sdk_version,app_version) query_version ,case when label_map['label_loss_03']='1' then 3 --ios直播丢失率条件 when label_map['label_loss_04']='1' then 4 --android直播丢失率条件 when label_map['label_loss_02']='1' then 2 --web丢失率条件 else -1 end as version_flag from( select dt, event_time, video_time, did, gif, sdk_version, app_version, biz_type, biz_name, client_type, is_video, label_map, req_idx from base_table where dt = ${dt} and client_type not in(-1,8, 9) and biz_type = 7 )base_tab ) label_tab group by dt, biz_type, biz_name, query_version, did, version_flag, is_video ) out_tab group by dt, version_flag, query_version, is_video