装饰器
十分相似的三个函数
# =============广点通============= filehome=greenspan_file_name fileall=os.listdir(filehome) select_col= ['ftime','advertiser_id','agent_id','industry_name_level1', 'industry_name_level2','is_ocpa', 'flow_name_level2', 'product_type','desttype','creative_size','creative_size_name', 'acttion_track_type', 'trace_cnt', 'exposure_cnt', 'click_cnt','real_cost'] greenspan_idx_col = ['月','年度周','advertiser_id','agent_id','industry_name_level1','industry_name_level2', 'product_type','desttype','creative_size','creative_size_name','flow_name_level2','acttion_track_type','是否OCPA'] greenspan_val = ['trace_cnt','exposure_cnt','click_cnt','real_cost'] def clear_data(filehome, col): data = pd.read_csv(filehome) data = data.loc[:, col] data.replace([np.nan, np.inf,'NA','-','nan'],[0,0,0,0,0],inplace=True) data['advertiser_id'] = data['advertiser_id'].astype('int64').astype(str) data['agent_id'] = data['agent_id'].astype(str) data=data.loc[data['advertiser_id'].isin(revenueday_id),:] data['是否OCPA'] = np.where(data['is_ocpa'].isin([0,888888888888,'','0','888888888888']),'否','是') # data.loc[:,['总曝光uv','总点击uv','财务收入']]=data.loc[:,['总曝光uv','总点击uv','财务收入']].apply(lambda x : x.astype('float')) data['ftime'] = pd.DatetimeIndex(data['ftime'].astype(str)) data['年度周'] = data['ftime'].apply(lambda x:int(x.strftime("%W"))+1) data['月'] = data['ftime'].apply(lambda x:x.strftime("%m")) data.loc[:,greenspan_idx_col]=data.loc[:,greenspan_idx_col].apply(lambda x : x.astype('str')) data_pivot = pd.pivot_table(data,index=greenspan_idx_col,values=greenspan_val,aggfunc='sum',fill_value=0).reset_index() return data_pivot files_home_pyq=greenspan_file_name greenspan_pivot=pd.concat([clear_data(filehome=files_home_pyq + file,col=select_col) for file in os.listdir(files_home_pyq)]) # ===================公众号 =============== filehome=gzh_file_name fileall=os.listdir(filehome) fileall select_col= ['数据日期','广告主id','广告主appid','服务商id','一级行业','二级行业','广告类型','扣费类型','广告位','素材规格','is_ocpm', '落地页类型','曝光量','曝光量UV','点击量','点击量UV','消耗','商品指标','落地页分享次数'] idx_pyq = ['月','年度周','广告主id','广告主appid','服务商id','一级行业','二级行业','广告类型','投放类型','广告位','素材规格','is_ocpm','落地页类型'] val_pyq = ['曝光量','曝光量UV','点击量','点击量UV','消耗','商品指标','落地页分享次数'] def clear_data(filehome,col): data=pd.read_csv(filehome,encoding="gbk") data.rename(columns={'消耗(元)':'消耗'},inplace=True) data=data.loc[:,col] data.replace([np.nan,'-',' ',''],[0,0,0,0],inplace=True) data['广告主id']=data['广告主id'].astype('int64').astype(str) data=data.loc[data['广告主id'].isin(revenueday_id),:] data['数据日期'] = pd.DatetimeIndex(data['数据日期'].astype(str)) data['年度周'] = data['数据日期'].apply(lambda x:int(x.strftime("%W"))+1) data['月'] = data['数据日期'].apply(lambda x:x.strftime("%m")) data['投放类型'] = np.where((data['扣费类型'].isin(['CPC'])) | ((data['扣费类型'].isin(['CPM'])) & (data['广告位'].isin(['激励小视频']))),'竞价','排期') data.loc[:,['曝光量','曝光量UV','点击量','点击量UV','消耗','商品指标','落地页分享次数']]=data.loc[:,['曝光量','曝光量UV','点击量', '点击量UV','消耗','商品指标','落地页分享次数']].apply(lambda x : x.astype('float')) data.loc[:,idx_pyq]=data.loc[:,idx_pyq].apply(lambda x : x.astype('str')) data_pivot = pd.pivot_table(data,index=idx_pyq,values=val_pyq,aggfunc='sum',fill_value=0).reset_index() return data_pivot files_home_gzh=gzh_file_name datagzh=pd.concat([clear_data(filehome=files_home_gzh + file,col=select_col) for file in os.listdir(files_home_gzh)]) # === ========== 朋友圈 ============ select_col= ['数据日期','uid','appid','一级行业','二级行业','推广目标','投放类型','素材类型','是否oCPM','城市分级', '素材落地页','文字链落地页','总曝光uv','总曝光pv','总点击uv','总点击pv','财务收入', '关注uv','销售线索收集+提交表单uv','订单量','总互动点击uv','不感兴趣uv','评论uv','点赞uv','落地页分享次数'] idx_pyq = ['月','年度周','uid','appid','一级行业','二级行业','推广目标','投放类型','素材类型','是否oCPM','城市分级','素材落地页','文字链落地页'] val_pyq = ['总曝光uv','总曝光pv','总点击uv','总点击pv','财务收入','关注uv','销售线索收集+提交表单uv', '订单量','总互动点击uv','不感兴趣uv','评论uv','点赞uv','落地页分享次数'] def clear_data(filehome,col): data=pd.read_csv(filehome,encoding="gbk") data=data.loc[:,col] data.replace([np.nan,'-',' ',''],[0,0,0,0],inplace=True) data['uid']=data['uid'].astype('int64').astype(str) data=data.loc[data['uid'].isin(revenueday_id),:] data['数据日期'] = pd.DatetimeIndex(data['数据日期'].astype(str)) data['年度周'] = data['数据日期'].apply(lambda x:int(x.strftime("%W"))+1) data['月'] = data['数据日期'].apply(lambda x:x.strftime("%m")) data.loc[:,['总曝光uv','总曝光pv','总点击uv','总点击pv','财务收入','关注uv','销售线索收集+提交表单uv','订单量','总互动点击uv','不感兴趣uv', '评论uv','点赞uv','落地页分享次数']]=data.loc[:,['总曝光uv','总曝光pv','总点击uv','总点击pv','财务收入','关注uv','销售线索收集+提交表单uv', '订单量','总互动点击uv','不感兴趣uv','评论uv','点赞uv','落地页分享次数']].apply(lambda x : x.astype('float')) data.loc[:,idx_pyq]=data.loc[:,idx_pyq].apply(lambda x : x.astype('str')) data_pivot = pd.pivot_table(data,index=idx_pyq,values=val_pyq,aggfunc='sum',fill_value=0).reset_index() return data_pivot files_home_pyq=pyq_file_name datapyq=pd.concat([clear_data(filehome=files_home_pyq + file,col=select_col) for file in os.listdir(files_home_pyq)])
装饰器封装
def _read_plat_file(data, group_by_keys, value_keys): def processing_data(func): def wrapper(): data.replace([np.nan, '-', ' ', ''], [0, 0, 0, 0], inplace=True) data['ftime'] = data['ftime'].astype(int).astype(str).apply(lambda x: datetime.strptime(x, "%Y%m%d")) data['uid'] = data['uid'].astype('int64').astype(str) data_assign = data.assign( ftime=pd.DatetimeIndex(data['ftime'].astype(str)), f_yw=data['ftime'].apply(lambda x: int(x.strftime("%W")) + 1), f_m=data['ftime'].apply(lambda x: x.strftime("%m")) ) data_assign = func(data_assign) data_assign = pivot_table(data_assign, index=group_by_keys, values=value_keys, aggfunc='sum', fill_value=0).reset_index() return data_assign return wrapper return processing_data def get_gdt_pivot(data, group_by_keys, value_keys): @_read_plat_file(data, group_by_keys, value_keys) def _gdt_file(data_assign): data_assign = data_assign.assign( is_ocpa=np.where(data_assign['is_ocpm'].isin([0, 888888888888, '', '0', '888888888888']), '否', '是'), ) return data_assign return _gdt_file() def get_gzh_pivot(data, group_by_keys, value_keys): @_read_plat_file(data, group_by_keys, value_keys) def _gzh_file(data_assign): data_assign = data_assign.assign( is_ocpm=np.where(data_assign['is_ocpm'].isin(['t']), '是', '否') ) return data_assign return _gzh_file() def get_pyq_pivot(data, group_by_keys, value_keys): @_read_plat_file(data, group_by_keys, value_keys) def _pyq_file(data_assign): data_assign = data_assign.assign( is_ocpm=np.where(data_assign['is_ocpm'].isin(['t']), '是', '否') ) return data_assign return _pyq_file() def get_pivot_function_package(plat, data, group_by_keys, value_keys): function_dict = { 'GDT': get_gdt_pivot, 'GZH': get_gzh_pivot, 'PYQ': get_pyq_pivot, } return function_dict[plat](data, group_by_keys, value_keys)