Python 统计会员N日(内)留存率

一、导入库

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import openpyxl
import datetime as dt

二、导入数据

df=pd.read_excel(os.getcwd() + os.path.sep + "data.xlsx")

三、实现

df.sort_values('memberid').head(5)
def retention(data,n,dur=False):
    result = {}
    dates = sorted(set(data['购买日期']))
    new_member = data[['memberid','购买日期']].groupby('memberid').min()
    for date in dates:
        date_pay_p = set(new_member[new_member['购买日期']==date].index)   # 当日首次下单用户
        daten = date + dt.timedelta(days=n)
        if dur == True:
            daten_pay_p = set(df[(df['购买日期'] > date) & (df['购买日期']<=daten)]['memberid']) # 首次下单后n日内(含第n日)再次下单用户
        else:
            daten_pay_p = set(df[df['购买日期']==daten]['memberid']) # 首次下单第n日再次下单用户
        day_count = len(date_pay_p)
        nday_count = len(daten_pay_p)
        retention_count = len(date_pay_p.intersection(daten_pay_p))
        if len(date_pay_p) == 0:
            retention_rate = np.nan
        else:
            retention_rate = retention_count/day_count
        result[date] = [day_count,nday_count,retention_count,retention_rate]
    result = pd.DataFrame.from_dict(result).T
    result.columns=['day_count','nday_count','retention_count','retention_rate']
    return(result)

四、结果呈现

retention_table = retention(df,3)
retention_table['Month'] = pd.to_datetime(retention_table.index).month
retention_table.groupby(retention_table['Month'])['retention_rate'].mean().plot()

五、数据及源码

阿里云盘:https://www.aliyundrive.com/s/R1y61iU6zdT

上一篇:python数据框处理


下一篇:WEB安全-SQL注入漏洞测试(宽字节)