装饰器

装饰器

十分相似的三个函数

#  =============广点通=============
   filehome=greenspan_file_name
    fileall=os.listdir(filehome)
    
    select_col= ['ftime','advertiser_id','agent_id','industry_name_level1', 'industry_name_level2','is_ocpa', 'flow_name_level2',
                 'product_type','desttype','creative_size','creative_size_name',
                 'acttion_track_type', 'trace_cnt', 'exposure_cnt', 'click_cnt','real_cost']
    
    greenspan_idx_col = ['月','年度周','advertiser_id','agent_id','industry_name_level1','industry_name_level2',
                         'product_type','desttype','creative_size','creative_size_name','flow_name_level2','acttion_track_type','是否OCPA']
    greenspan_val = ['trace_cnt','exposure_cnt','click_cnt','real_cost']
    
    def clear_data(filehome, col):
        data = pd.read_csv(filehome)
        data = data.loc[:, col]
        data.replace([np.nan, np.inf,'NA','-','nan'],[0,0,0,0,0],inplace=True)
        data['advertiser_id'] = data['advertiser_id'].astype('int64').astype(str)
        data['agent_id'] = data['agent_id'].astype(str)
        data=data.loc[data['advertiser_id'].isin(revenueday_id),:]
        data['是否OCPA'] = np.where(data['is_ocpa'].isin([0,888888888888,'','0','888888888888']),'否','是')
    #    data.loc[:,['总曝光uv','总点击uv','财务收入']]=data.loc[:,['总曝光uv','总点击uv','财务收入']].apply(lambda x : x.astype('float'))
        data['ftime'] = pd.DatetimeIndex(data['ftime'].astype(str))
        data['年度周'] = data['ftime'].apply(lambda x:int(x.strftime("%W"))+1)
        data['月'] = data['ftime'].apply(lambda x:x.strftime("%m"))
        data.loc[:,greenspan_idx_col]=data.loc[:,greenspan_idx_col].apply(lambda x : x.astype('str'))
        data_pivot = pd.pivot_table(data,index=greenspan_idx_col,values=greenspan_val,aggfunc='sum',fill_value=0).reset_index()
        return  data_pivot
    
    files_home_pyq=greenspan_file_name
    greenspan_pivot=pd.concat([clear_data(filehome=files_home_pyq + file,col=select_col) for file in os.listdir(files_home_pyq)])


#  ===================公众号 ===============
    filehome=gzh_file_name
    fileall=os.listdir(filehome)
    fileall
    
    select_col= ['数据日期','广告主id','广告主appid','服务商id','一级行业','二级行业','广告类型','扣费类型','广告位','素材规格','is_ocpm',
                 '落地页类型','曝光量','曝光量UV','点击量','点击量UV','消耗','商品指标','落地页分享次数']
    
    idx_pyq = ['月','年度周','广告主id','广告主appid','服务商id','一级行业','二级行业','广告类型','投放类型','广告位','素材规格','is_ocpm','落地页类型']
    val_pyq = ['曝光量','曝光量UV','点击量','点击量UV','消耗','商品指标','落地页分享次数']
    
    
    def clear_data(filehome,col):
        data=pd.read_csv(filehome,encoding="gbk")
        data.rename(columns={'消耗(元)':'消耗'},inplace=True)
        data=data.loc[:,col]
        data.replace([np.nan,'-',' ',''],[0,0,0,0],inplace=True)
        data['广告主id']=data['广告主id'].astype('int64').astype(str)
        data=data.loc[data['广告主id'].isin(revenueday_id),:]
        data['数据日期'] = pd.DatetimeIndex(data['数据日期'].astype(str))
        data['年度周'] = data['数据日期'].apply(lambda x:int(x.strftime("%W"))+1)
        data['月'] = data['数据日期'].apply(lambda x:x.strftime("%m"))
        data['投放类型'] = np.where((data['扣费类型'].isin(['CPC'])) | ((data['扣费类型'].isin(['CPM'])) & (data['广告位'].isin(['激励小视频']))),'竞价','排期')
        data.loc[:,['曝光量','曝光量UV','点击量','点击量UV','消耗','商品指标','落地页分享次数']]=data.loc[:,['曝光量','曝光量UV','点击量',
                '点击量UV','消耗','商品指标','落地页分享次数']].apply(lambda x : x.astype('float'))
        data.loc[:,idx_pyq]=data.loc[:,idx_pyq].apply(lambda x : x.astype('str'))
        data_pivot = pd.pivot_table(data,index=idx_pyq,values=val_pyq,aggfunc='sum',fill_value=0).reset_index()
        return  data_pivot
    
    files_home_gzh=gzh_file_name
    datagzh=pd.concat([clear_data(filehome=files_home_gzh + file,col=select_col) for file in os.listdir(files_home_gzh)])


#  === ========== 朋友圈 ============
  select_col= ['数据日期','uid','appid','一级行业','二级行业','推广目标','投放类型','素材类型','是否oCPM','城市分级',
                 '素材落地页','文字链落地页','总曝光uv','总曝光pv','总点击uv','总点击pv','财务收入',
                 '关注uv','销售线索收集+提交表单uv','订单量','总互动点击uv','不感兴趣uv','评论uv','点赞uv','落地页分享次数']
    
    idx_pyq = ['月','年度周','uid','appid','一级行业','二级行业','推广目标','投放类型','素材类型','是否oCPM','城市分级','素材落地页','文字链落地页']
    val_pyq = ['总曝光uv','总曝光pv','总点击uv','总点击pv','财务收入','关注uv','销售线索收集+提交表单uv',
                                            '订单量','总互动点击uv','不感兴趣uv','评论uv','点赞uv','落地页分享次数']
    
    
    def clear_data(filehome,col):
        data=pd.read_csv(filehome,encoding="gbk")
        data=data.loc[:,col]
        data.replace([np.nan,'-',' ',''],[0,0,0,0],inplace=True)
        data['uid']=data['uid'].astype('int64').astype(str)
        data=data.loc[data['uid'].isin(revenueday_id),:]
        data['数据日期'] = pd.DatetimeIndex(data['数据日期'].astype(str))
        data['年度周'] = data['数据日期'].apply(lambda x:int(x.strftime("%W"))+1)
        data['月'] = data['数据日期'].apply(lambda x:x.strftime("%m"))
        data.loc[:,['总曝光uv','总曝光pv','总点击uv','总点击pv','财务收入','关注uv','销售线索收集+提交表单uv','订单量','总互动点击uv','不感兴趣uv',
                    '评论uv','点赞uv','落地页分享次数']]=data.loc[:,['总曝光uv','总曝光pv','总点击uv','总点击pv','财务收入','关注uv','销售线索收集+提交表单uv',
                                            '订单量','总互动点击uv','不感兴趣uv','评论uv','点赞uv','落地页分享次数']].apply(lambda x : x.astype('float'))
        data.loc[:,idx_pyq]=data.loc[:,idx_pyq].apply(lambda x : x.astype('str'))
        data_pivot = pd.pivot_table(data,index=idx_pyq,values=val_pyq,aggfunc='sum',fill_value=0).reset_index()
        return  data_pivot
    
    files_home_pyq=pyq_file_name
    datapyq=pd.concat([clear_data(filehome=files_home_pyq + file,col=select_col) for file in os.listdir(files_home_pyq)])

  

装饰器封装

def _read_plat_file(data, group_by_keys, value_keys):
    def processing_data(func):
        def wrapper():
            data.replace([np.nan, '-', ' ', ''], [0, 0, 0, 0], inplace=True)
            data['ftime'] = data['ftime'].astype(int).astype(str).apply(lambda x: datetime.strptime(x, "%Y%m%d"))
            data['uid'] = data['uid'].astype('int64').astype(str)
            data_assign = data.assign(
                ftime=pd.DatetimeIndex(data['ftime'].astype(str)),
                f_yw=data['ftime'].apply(lambda x: int(x.strftime("%W")) + 1),
                f_m=data['ftime'].apply(lambda x: x.strftime("%m"))
            )
            data_assign = func(data_assign)
            data_assign = pivot_table(data_assign,
                                     index=group_by_keys,
                                     values=value_keys,
                                     aggfunc='sum',
                                     fill_value=0).reset_index()
            return data_assign

        return wrapper

    return processing_data


def get_gdt_pivot(data, group_by_keys, value_keys):

    @_read_plat_file(data, group_by_keys, value_keys)
    def _gdt_file(data_assign):
        data_assign = data_assign.assign(
            is_ocpa=np.where(data_assign['is_ocpm'].isin([0, 888888888888, '', '0', '888888888888']), '否', '是'),
        )
        return data_assign

    return _gdt_file()


def get_gzh_pivot(data, group_by_keys, value_keys):

    @_read_plat_file(data, group_by_keys, value_keys)
    def _gzh_file(data_assign):
        data_assign = data_assign.assign(
            is_ocpm=np.where(data_assign['is_ocpm'].isin(['t']), '是', '否')
        )
        return data_assign

    return _gzh_file()


def get_pyq_pivot(data, group_by_keys, value_keys):

    @_read_plat_file(data,  group_by_keys, value_keys)
    def _pyq_file(data_assign):
        data_assign = data_assign.assign(
            is_ocpm=np.where(data_assign['is_ocpm'].isin(['t']), '是', '否')
        )
        return data_assign

    return _pyq_file()


def get_pivot_function_package(plat, data,  group_by_keys, value_keys):
    function_dict = {
        'GDT': get_gdt_pivot,
        'GZH': get_gzh_pivot,
        'PYQ': get_pyq_pivot,
    }
    return function_dict[plat](data,  group_by_keys, value_keys)

  

 

上一篇:Flink计算PV,UV的案例及问题分析


下一篇:storm的并发度