1. model pipeline拆解
# -*- coding: utf-8 -*-
import pandas as pd
import lightgbm as lgb
from sklearn import metrics
from woe.eval import eval_segment_metrics
# 一般这样,不需改动
params = {
'boosting_type': 'gbdt',
'objective': 'binary',
'metric': 'auc',
'num_leaves': 6,
'max_bin': 10,
'learning_rate': 0.01,
'is_unbalance': False,
'verbose': 0
}
feature_list = ["chat_7d_cnt", "chat_cnt_self_expression", "chat_cnt_text"]
drop_cols = ["uid", "is_later_30d_loss", "pt"]
keep_cols = ['uid', 'score']
LABEL = 'is_later_30d_loss'
# 阈值通过segment确定
THRESHOLD = 0.5034
# 测试集待打分数据
data_input_path = '../data/raw_data.txt'
data_output_path = '../data/predict_data.csv'
model_file_path = 'model.txt'
if __name_