营销渠道客户转化分析(归因分析)

1.背景及问题

现某IT产品销售公司,有一定量的小公司水平的用户,这些用户在做出购买时,会接触到销售公司的多个营销渠道,不同的渠道上投入怎样分配,以实现营销效益的最大化,便成为了很多公司的市场营销部门亟需解决的问题。

即:找出转化率最高的渠道路径或方式


2.思路步骤

  • 线性模型分析

  • 马尔科夫链分析

  • 可视化马尔科夫链

转换率计算

  • 第一次点击 用户访问路径上的第一个触点获取所有贡献值

  • 最后一次点击 用户购买之前最后一个触点获取所有贡献值

  • 线性模型分析 用户访问路径上的所有触点平分贡献值

  • 马尔科夫链 马尔科夫链的转移矩阵 -> 每个触点的移除效应-> 触点贡献值


3.数据集介绍

营销渠道客户转化分析(归因分析)

Id: 某IT产品销售公司的客户,客户类型是小公司

Segment: 客户的画像

Channel:客户生命周期中触及过的渠道; DM(直邮),EM(电子邮件), PHONE(电话)和 WEB(产品销售官网浏览记录)

Date: 客户触及渠道的日期,触及时间长度为1年

Pur_flag: 等于1表示该客户在接触完相应渠道后,完成了IT产品的购买


4.代码及具体步骤

导入模块

import numpy as np
import pandas as pd
import networkx as nx
from pprint import pprint
import os
import matplotlib.pyplot as plt
os.chdir(r'C:/Users/pc/Desktop/数据分析项目/客户转化分析/')
# 导入数据,将第四列解析为日期格式

df = pd.read_csv(r'./ChannelAttribute.csv', parse_dates=[3])
df.head()
id segment channel date pur_flag
0 20398764672 Tier 2 DM 2018-03-19 0
1 20408399343 Tier 2 WEB 2017-09-27 0
2 20438922645 Tier 2 WEB 2017-11-15 0
3 20225918468 Tier 2 DM 2017-05-24 0
4 20278581048 Tier 3 DM 2018-04-23 0

创建路径数据

def create_path_with_value(data, element):
    path = []
    path.append('start')
    df2 = data.loc[data['id'] == element, :].sort_values(['id', 'date'], ascending=[False, True])

    for i in range(len(df2)):
        path.append(df2.iloc[i]['channel'])

    if df2['pur_flag'].unique() == 1:
        path.append('conversion')
        conv = 1
        conv_null = 0
    else:
        path.append('null')
        conv = 0
        conv_null = 1
    return [path, conv, conv_null]


final_path, conv, conv_null = [], [], []
for element in df['id'].unique():
    rst = create_path_with_value(df, element)
    final_path.append(rst[0])
    conv.append(rst[1])
    conv_null.append(rst[2])
# 路径数据
path_data = pd.DataFrame({'path': final_path, 'conv': conv, 'conv_nulls': conv_null})
path_data.head(10)
path conv conv_nulls
0 [start, WEB, EM, DM, null] 0 1
1 [start, EM, WEB, DM, null] 0 1
2 [start, WEB, EM, DM, null] 0 1
3 [start, DM, EM, WEB, null] 0 1
4 [start, EM, WEB, DM, null] 0 1
5 [start, PHONE, EM, DM, WEB, null] 0 1
6 [start, PHONE, WEB, DM, null] 0 1
7 [start, DM, PHONE, WEB, null] 0 1
8 [start, WEB, EM, DM, conversion] 1 0
9 [start, PHONE, WEB, DM, null] 0 1

归因分析(最后一次点击、第一次点击和线性模型)

def create_last_click_stats_pair(data):
    temp_path, temp_conv = [], []
    for i in range(len(data)):
        temp_path.append(data.iloc[i]['path'][-2])
        temp_conv.append(data.iloc[i]['conv'])
    return pd.DataFrame({'touch': temp_path, 'Last_Conv': temp_conv})


def create_first_order_states_pair(data):
    temp_path, temp_conv = [], []
    for i in range(len(data)):
        temp_path.append(data.iloc[i]['path'][1])
        temp_conv.append(data.iloc[i]['conv'])
    return pd.DataFrame({'touch': temp_path, 'First_Conv': temp_conv})


def create_linear_click_stats_pair(data):
    temp_path, temp_conv = [], []
    for i in range(len(data)):
        if len(data.iloc[i]['path'])==6:
            for j in range(1,5):
                temp_path.append(data.iloc[i]['path'][j])
                temp_conv.append(data.iloc[i]['conv'] / (6 - 2))
        elif len(data.iloc[i]['path'])==5:
            for j in range(1,4):
                temp_path.append(data.iloc[i]['path'][j])
                temp_conv.append(data.iloc[i]['conv'] / (5 - 2))
        elif len(data.iloc[i]['path'])==4:
            for j in range(1,3):
                temp_path.append(data.iloc[i]['path'][j])
                temp_conv.append(data.iloc[i]['conv'] / (4 - 2))
        else:
            for j in range(1, 2):
                temp_path.append(data.iloc[i]['path'][j])
                temp_conv.append(data.iloc[i]['conv'] / (3 - 2))
    return pd.DataFrame({'touch': temp_path, 'Linear_Conv': temp_conv})
last_touch = create_last_click_stats_pair(path_data).groupby('touch')['Last_Conv'].sum().reset_index()
linear_touch = create_linear_click_stats_pair(path_data).groupby('touch')['Linear_Conv'].sum().reset_index()
first_touch = create_first_order_states_pair(path_data).groupby('touch')['First_Conv'].sum().reset_index()
lst = last_touch.set_index('touch').iloc[:, 0:].apply(lambda x: x / x.sum())
li = linear_touch.set_index('touch').iloc[:, 0:].apply(lambda x: x / x.sum())
fst = first_touch.set_index('touch').iloc[:, 0:].apply(lambda x: x / x.sum())
dfs = [fst, lst, li]
dfs = [df for df in dfs]
dfs[0].join(dfs[1:])
First_Conv Last_Conv Linear_Conv
touch
DM 0.341152 0.744850 0.504964
EM 0.278233 0.097915 0.198478
PHONE 0.094440 0.014768 0.050488
WEB 0.286175 0.142467 0.246070



线性模型分析结论: 相比于其他渠道,DM(直邮)是转化率较优的渠道



马尔科夫链

# 手动计算状态转移矩阵
def split_states(data):
    temp_data = []
    for i in range(len(data)):
        path = data.iloc[i]['path']
        state_pairs, values = [], []
        for j in range(len(path)-1):
            state_pairs.append((path[j], path[j+1]))
            values.append(1)

        temp_data.append([state_pairs, values])
    return temp_data
temps = split_states(path_data)
temps[0:3]
[[[('start', 'WEB'), ('WEB', 'EM'), ('EM', 'DM'), ('DM', 'null')],
  [1, 1, 1, 1]],
 [[('start', 'EM'), ('EM', 'WEB'), ('WEB', 'DM'), ('DM', 'null')],
  [1, 1, 1, 1]],
 [[('start', 'WEB'), ('WEB', 'EM'), ('EM', 'DM'), ('DM', 'null')],
  [1, 1, 1, 1]]]
def transition_maxtrix(data):
    state_pairs, values = [], []
    for i in range(len(data)):
        for j, z in zip(data[i][0], data[i][1]):
            state_pairs.append(j)
            values.append(z)
    temp_df = pd.DataFrame({'state_pairs': state_pairs, 'values': values})
    grp_df = temp_df.groupby('state_pairs')['values'].sum().reset_index()
    grp_df[['start', 'end']] = grp_df['state_pairs'].apply(pd.Series)

    table = pd.crosstab(grp_df['end'], grp_df['start'], values=grp_df['values'], aggfunc=np.sum, normalize='columns')\
        .applymap(lambda x: "{:3.2f}".format(x))
    return table
# 输出状态转移矩阵
tmp = transition_maxtrix(temps)
tmp1 = tmp.transpose()
tmp1
end DM EM PHONE WEB conversion null
start
DM 0.00 0.09 0.01 0.12 0.21 0.57
EM 0.43 0.00 0.02 0.41 0.04 0.09
PHONE 0.18 0.11 0.00 0.69 0.01 0.01
WEB 0.58 0.22 0.03 0.00 0.05 0.12
start 0.14 0.29 0.32 0.25 0.00 0.00

计算渠道的移除效应

def channel_remove(data,channel_removed):
    state_pairs, values = [], []
    for i in range(len(data)):
        for j, z in zip(data[i][0], data[i][1]):
            state_pairs.append(j)
            values.append(z)
    temp_df = pd.DataFrame({'state_pairs': state_pairs, 'values': values})
    grp_df = temp_df.groupby('state_pairs')['values'].sum().reset_index()
    grp_df[['start', 'end']] = grp_df['state_pairs'].apply(pd.Series)
    temp = grp_df.copy()
    grp_df['start'] = grp_df['start'].replace(channel_removed, 'unknown')
    grp_df['end'] = grp_df['end'].replace(channel_removed, 'unknown')
    return [grp_df, temp]


# 筛选出成功转化路径

path_data_pur = path_data[path_data['conv']==1]
temps = split_states(path_data_pur)

conversion =[]
columns = ['start', 'end', 'values_x', 'values_y', 'perct']

# 所有渠道
channels_list = list(df['channel'].unique())

df_dummy1 = pd.DataFrame({'start': ['start', 'conversion', 'null'],
                          'end': ['start', 'conversion', 'null'],
                          'values_x': [0, 0, 0],
                          'values_y': [0, 0, 0],
                          'perct': [0, 1, 1]})
dy_dummy = pd.DataFrame(df_dummy1, columns=columns)


df_dummy2 = pd.DataFrame({'start': ['start', 'conversion', 'null'],
                         'end': ['start', 'conversion', 'null']})

# 逐个计算移除单个渠道后的总转化数

for chnl in channels_list:
    df_remove = channel_remove(temps, chnl)[0]
    df_noremove = channel_remove(temps, chnl)[1]
    
    df_temp = df_remove.groupby('start')['values'].sum().reset_index()
    df_temp = pd.merge(df_remove, df_temp, on='start', how='left')
    df_temp['perct'] = df_temp['values_x']/df_temp['values_y']

    df_temp = pd.DataFrame(df_temp, columns=columns)

    df_temp = pd.concat([df_temp, dy_dummy], axis=0)

    df_ini = pd.DataFrame(df_noremove, columns=['start', 'end'])

    df_temp2 = pd.concat([df_ini, df_dummy2], axis=0)

    df_temp = pd.merge(df_temp2, df_temp, on=['start', 'end'], how='left')

#     用0填充由于左连接出现的NaN
    df_temp['values_x'].fillna(0, inplace=True)
    df_temp['values_y'].fillna(0, inplace=True)
    df_temp['perct'].fillna(0, inplace=True)

    df_trans1 = pd.crosstab(df_temp['start'], df_temp['end'], values=df_temp['perct'], aggfunc=np.sum)

    df_trans1.update(df_trans1[['DM', 'EM', 'PHONE', 'WEB', 'conversion', 'null', 'start']].fillna(0))
    
#     转化为numpy矩阵
    df_trans_mat = np.matrix(df_trans1)

    inist_n1 = pd.crosstab(df_temp['start'], df_temp['end'], values=df_temp['values_x'], aggfunc=np.sum)
    inist_n1.update(inist_n1[['DM', 'EM', 'PHONE', 'WEB', 'conversion', 'null', 'start']].fillna(0))

    inist_mat = np.matrix(inist_n1.iloc[-1])
    
#     矩阵乘积
    mat = inist_mat*df_trans_mat

# 取出估计出来的转化数
    conversion.append(mat[0,4])


# 计算单个渠道的移除效应
chnl_conversion = pd.DataFrame({'channel': channels_list, 'conv': conversion})

df_remove = channel_remove(temps, chnl)[0]
df_noremove = channel_remove(temps, chnl)[1]
tot_conv = df_remove['values'].sum()

chnl_conversion['impact'] = (tot_conv-chnl_conversion['conv'])/tot_conv

tot_impact = chnl_conversion['impact'].sum()

chnl_conversion['convet_rate']= chnl_conversion['impact']/tot_impact

chnl_conversion
channel conv impact convet_rate
0 DM 1003.039274 0.961468 0.265241
1 WEB 2724.928034 0.895320 0.246993
2 EM 2856.954317 0.890248 0.245594
3 PHONE 3179.825240 0.877845 0.242172




可视化马尔科夫链

states = ['start', 'DM', 'EM', 'PHONE', 'WEB', 'conversion', 'null']

def _get_markov_edges(Q):
    edges = {}
    for col in Q.columns:
        for idx in Q.index:
            edges[(idx,col)] = Q.loc[idx,col]
    return edges
edges_wts = _get_markov_edges(tmp1)
edges_wts
{('DM', 'DM'): '0.00',
 ('EM', 'DM'): '0.43',
 ('PHONE', 'DM'): '0.18',
 ('WEB', 'DM'): '0.58',
 ('start', 'DM'): '0.14',
 ('DM', 'EM'): '0.09',
 ('EM', 'EM'): '0.00',
 ('PHONE', 'EM'): '0.11',
 ('WEB', 'EM'): '0.22',
 ('start', 'EM'): '0.29',
 ('DM', 'PHONE'): '0.01',
 ('EM', 'PHONE'): '0.02',
 ('PHONE', 'PHONE'): '0.00',
 ('WEB', 'PHONE'): '0.03',
 ('start', 'PHONE'): '0.32',
 ('DM', 'WEB'): '0.12',
 ('EM', 'WEB'): '0.41',
 ('PHONE', 'WEB'): '0.69',
 ('WEB', 'WEB'): '0.00',
 ('start', 'WEB'): '0.25',
 ('DM', 'conversion'): '0.21',
 ('EM', 'conversion'): '0.04',
 ('PHONE', 'conversion'): '0.01',
 ('WEB', 'conversion'): '0.05',
 ('start', 'conversion'): '0.00',
 ('DM', 'null'): '0.57',
 ('EM', 'null'): '0.09',
 ('PHONE', 'null'): '0.01',
 ('WEB', 'null'): '0.12',
 ('start', 'null'): '0.00'}
# 移除0转化概率的边
for key, value in list(edges_wts.items()):
    if value == '0.00':
        edges_wts.pop(key)

pprint(edges_wts)
{('DM', 'EM'): '0.09',
 ('DM', 'PHONE'): '0.01',
 ('DM', 'WEB'): '0.12',
 ('DM', 'conversion'): '0.21',
 ('DM', 'null'): '0.57',
 ('EM', 'DM'): '0.43',
 ('EM', 'PHONE'): '0.02',
 ('EM', 'WEB'): '0.41',
 ('EM', 'conversion'): '0.04',
 ('EM', 'null'): '0.09',
 ('PHONE', 'DM'): '0.18',
 ('PHONE', 'EM'): '0.11',
 ('PHONE', 'WEB'): '0.69',
 ('PHONE', 'conversion'): '0.01',
 ('PHONE', 'null'): '0.01',
 ('WEB', 'DM'): '0.58',
 ('WEB', 'EM'): '0.22',
 ('WEB', 'PHONE'): '0.03',
 ('WEB', 'conversion'): '0.05',
 ('WEB', 'null'): '0.12',
 ('start', 'DM'): '0.14',
 ('start', 'EM'): '0.29',
 ('start', 'PHONE'): '0.32',
 ('start', 'WEB'): '0.25'}
# 用networkx绘制马尔科夫链

os.environ["PATH"] += os.pathsep + './graphviz-2.38/release/bin/'

G = nx.MultiDiGraph()

# 增加节点状态
G.add_nodes_from(states)
print('Nodes:\n{G.nodes()}\n')

# 边表示转换概率
for k, v in edges_wts.items():
    tmp_origin, tmp_destination = k[0], k[1]
    G.add_edge(tmp_origin, tmp_destination, weight=v, label=v)
print('Edges:')
pprint(G.edges(data=True))

pos = nx.drawing.nx_pydot.graphviz_layout(G, prog='dot')
nx.draw_networkx(G, pos)

# 创建边标签
edge_labels = {(n1,n2):d['label'] for n1,n2,d in G.edges(data=True)}
nx.draw_networkx_edge_labels(G , pos, edge_labels=edge_labels)
nx.drawing.nx_pydot.write_dot(G, 'customer_markov.dot')
Nodes:
{G.nodes()}

Edges:
OutMultiEdgeDataView([('DM', 'WEB', {'label': '0.12', 'weight': '0.12'}), ('DM', 'EM', {'label': '0.09', 'weight': '0.09'}), ('DM', 'null', {'label': '0.57', 'weight': '0.57'}), ('DM', 'PHONE', {'label': '0.01', 'weight': '0.01'}), ('DM', 'conversion', {'label': '0.21', 'weight': '0.21'}), ('start', 'EM', {'label': '0.29', 'weight': '0.29'}), ('start', 'DM', {'label': '0.14', 'weight': '0.14'}), ('start', 'PHONE', {'label': '0.32', 'weight': '0.32'}), ('start', 'WEB', {'label': '0.25', 'weight': '0.25'}), ('EM', 'DM', {'label': '0.43', 'weight': '0.43'}), ('EM', 'WEB', {'label': '0.41', 'weight': '0.41'}), ('EM', 'null', {'label': '0.09', 'weight': '0.09'}), ('EM', 'PHONE', {'label': '0.02', 'weight': '0.02'}), ('EM', 'conversion', {'label': '0.04', 'weight': '0.04'}), ('WEB', 'DM', {'label': '0.58', 'weight': '0.58'}), ('WEB', 'EM', {'label': '0.22', 'weight': '0.22'}), ('WEB', 'null', {'label': '0.12', 'weight': '0.12'}), ('WEB', 'PHONE', {'label': '0.03', 'weight': '0.03'}), ('WEB', 'conversion', {'label': '0.05', 'weight': '0.05'}), ('PHONE', 'WEB', {'label': '0.69', 'weight': '0.69'}), ('PHONE', 'DM', {'label': '0.18', 'weight': '0.18'}), ('PHONE', 'null', {'label': '0.01', 'weight': '0.01'}), ('PHONE', 'EM', {'label': '0.11', 'weight': '0.11'}), ('PHONE', 'conversion', {'label': '0.01', 'weight': '0.01'})])

营销渠道客户转化分析(归因分析)

上一篇:SQLSERVER 查看服务器IP地址的命令


下一篇:达梦数据库常用功能及命令记录--持续更新