o2o优惠券预测--经验分享(三)

今日分感谢大神们
下面我们直接上代码

def GetUserMerchantRelateInfo(feature):
    #4.user_merchant:
    #times_user_buy_merchant_before. 
    feature3 = feature
    all_user_merchant = feature3[['user_id','merchant_id']]
    all_user_merchant.drop_duplicates(inplace=True)
    
    #只保留销售了商品的商户id
    t = feature3[['user_id','merchant_id','date']]
    t = t[t.date!='null'][['user_id','merchant_id']]
    
    #用户一共买了这家商户的多少商品
    t['user_merchant_buy_total'] = 1
    t = t.groupby(['user_id','merchant_id']).agg('sum').reset_index()
    t.drop_duplicates(inplace=True)

    t1 = feature3[['user_id','merchant_id','coupon_id']]
    t1 = t1[t1.coupon_id!='null'][['user_id','merchant_id']]
    
    #用户一共收到一个商户的多少优惠券
    t1['user_merchant_received'] = 1
    t1 = t1.groupby(['user_id','merchant_id']).agg('sum').reset_index()
    t1.drop_duplicates(inplace = True)

    t2 = feature3[['user_id','merchant_id','date','date_received']]
    t2 = t2[(t2.date!='null')&(t2.date_received!='null')][['user_id','merchant_id']]
    
    #用户在一家商户中使用优惠券购买的商品的数目
    t2['user_merchant_buy_use_coupon'] = 1
    t2 = t2.groupby(['user_id','merchant_id']).agg('sum').reset_index()
    t2.drop_duplicates(inplace = True)

    #用户在一家商家的所有记录总数
    t3 = feature3[['user_id','merchant_id']]
    t3['user_merchant_any'] = 1
    t3 = t3.groupby(['user_id','merchant_id']).agg('sum').reset_index()
    t3.drop_duplicates(inplace = True)

    t4 = feature3[['user_id','merchant_id','date','coupon_id']]
    t4 = t4[(t4.date!='null')&(t4.coupon_id=='null')][['user_id','merchant_id']]
    
    #用户没有使用优惠券购买的商品的数目
    t4['user_merchant_buy_common'] = 1
    t4 = t4.groupby(['user_id','merchant_id']).agg('sum').reset_index()
    t4.drop_duplicates(inplace = True)

    user_merchant3 = pd.merge(all_user_merchant,t,on=['user_id','merchant_id'],how='left')
    user_merchant3 = pd.merge(user_merchant3,t1,on=['user_id','merchant_id'],how='left')
    user_merchant3 = pd.merge(user_merchant3,t2,on=['user_id','merchant_id'],how='left')
    user_merchant3 = pd.merge(user_merchant3,t3,on=['user_id','merchant_id'],how='left')
    user_merchant3 = pd.merge(user_merchant3,t4,on=['user_id','merchant_id'],how='left')
    
    #都是针对一家商户和一个用户
    user_merchant3.user_merchant_buy_use_coupon = user_merchant3.user_merchant_buy_use_coupon.replace(np.nan,0)
    user_merchant3.user_merchant_buy_common = user_merchant3.user_merchant_buy_common.replace(np.nan,0)
    #y优惠券的转换率,用户使用了的优惠券/一共收到的优惠券
    user_merchant3['user_merchant_coupon_transfer_rate'] = user_merchant3.user_merchant_buy_use_coupon.astype('float') / user_merchant3.user_merchant_received.astype('float')
    #用户使用优惠券的概率,在一家商户使用优惠券购买的商品/在一家商户购买商品的总数
    user_merchant3['user_merchant_coupon_buy_rate'] = user_merchant3.user_merchant_buy_use_coupon.astype('float') / user_merchant3.user_merchant_buy_total.astype('float')
    #用户在商户消费的概率 用户在商户购买的总数/在一家商户浏览的总次数
    user_merchant3['user_merchant_rate'] = user_merchant3.user_merchant_buy_total.astype('float') / user_merchant3.user_merchant_any.astype('float')
    #用户在一家商户不适用优惠券购买的概率 普通购买的商品数/购买商品的总数
    user_merchant3['user_merchant_common_buy_rate'] = user_merchant3.user_merchant_buy_common.astype('float') / user_merchant3.user_merchant_buy_total.astype('float')
    return user_merchant3

训练集和测试集的构造

def get_label(s):
    s = s.split(':')
    if s[0]=='null':
        return 0
    elif (date(int(s[0][0:4]),int(s[0][4:6]),int(s[0][6:8]))-date(int(s[1][0:4]),int(s[1][4:6]),int(s[1][6:8]))).days<=15:
        return 1
    else:
        return -1 
def GenerateData(dataset, feature, label=True):
    # 获取各个特征处理后的结果
    coupon_feature = GetCouponFeature(dataset, feature)
    merchant_feature = GetMerchantFeature(feature)
    user_feature = GetUserRelateInfo(feature)
    user_merchant = GetUserMerchantRelateInfo(feature)
    other_feature = GetOtherFeature(dataset)

    dataset = pd.merge(coupon_feature, merchant_feature,
                       on='merchant_id', how='left')
    dataset = pd.merge(dataset, user_feature, on='user_id', how='left')
    dataset = pd.merge(dataset, user_merchant, on=[
                       'user_id', 'merchant_id'], how='left')
    dataset = pd.merge(dataset, other_feature, on=[
                       'user_id', 'coupon_id', 'date_received'], how='left')
    dataset.drop_duplicates(inplace=True)

    dataset.user_merchant_buy_total = dataset.user_merchant_buy_total.replace(
        np.nan, 0)
    dataset.user_merchant_any = dataset.user_merchant_any.replace(np.nan, 0)
    dataset.user_merchant_received = dataset.user_merchant_received.replace(
        np.nan, 0)
    dataset['is_weekend'] = dataset.day_of_week.apply(
        lambda x: 1 if x in (6, 7) else 0)
    weekday_dummies = pd.get_dummies(dataset.day_of_week)
    weekday_dummies.columns = [
        'weekday'+str(i+1) for i in range(weekday_dummies.shape[1])]
    dataset = pd.concat([dataset, weekday_dummies], axis=1)

    # 如果是训练集要记得处理label标签值  但是在测试集中不用处理label标签 注意off_train和off_test字段
    if label:
        dataset['label'] = dataset.date.astype(
            'str') + ':' + dataset.date_received.astype('str')
        dataset.label = dataset.label.apply(get_label)
        dataset.drop(['merchant_id', 'day_of_week', 'date', 'date_received',
                     'coupon_count'], axis=1, inplace=True)

    else:
        dataset.drop(['merchant_id', 'day_of_week', 'coupon_count'],
                 axis=1, inplace=True)
        
    # 所有的表都要一起处理null
    dataset = dataset.replace('null', np.nan)

    return dataset

特征处理及保存

GenerateData1 = GenerateData(dataset1, feature1)
GenerateData2 = GenerateData(dataset2, feature2)
GenerateData3 = GenerateData(dataset3, feature3, False)

GenerateData1.to_csv('./GenerateData1.csv', index=None)
GenerateData2.to_csv('./GenerateData2.csv', index=None)
GenerateData3.to_csv('./GenerateData3.csv', index=None)


上一篇:Java调用微信支付


下一篇:csp-s模拟测试56(10.2)Merchant「二分」·Equation「树状数组」