房态预测

import os
import time

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# 模型处理模块
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler

# 常规模型
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
# 集成学习和stacking模型
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier, RandomForestClassifier
import xgboost as xgb

# 评价标准模块
from sklearn import metrics
from sklearn.metrics import accuracy_score,roc_auc_score,recall_score,precision_score, classification_report

import warnings
warnings.filterwarnings('ignore')
%matplotlib inline


data = pd.read_csv(r"E:\Excersise\ML\Trip\order_train_merage.csv",parse_dates=["orderdate","arrival","etd"])
data.head()

# 检测个字段的缺失及占比
data.apply(lambda x: [x.isnull().sum(), x.isnull().sum()/x.size], axis=0)

data.dropna(inplace=True)

data.label.value_counts()

data.duplicated().sum()

data.describe(include="object")

dummies = pd.get_dummies(data.hotelbelongto ,prefix='hotelbelongto')
dummies_1 = pd.get_dummies(data.supplierchannel ,prefix='supplierchannel')
data = pd.concat([data,dummies,dummies_1],axis=1)
data.head()

#ADASYN自适应采样
from imblearn.over_sampling  import  ADASYN
sample =ADASYN()
#抽样的X,Y都要为数组
X_resampled,y_resampled  = sample.fit_resample(data.loc[:,data.columns != "label"].values,data.label.values)

model_name_param_dict = { 'LR': (LogisticRegression()),
                          'DT': (DecisionTreeClassifier()),
                          'AdaBoost': (AdaBoostClassifier()),
                          'GBDT': (GradientBoostingClassifier()),
                          'RF': (RandomForestClassifier()),
                          'XGBoost':(XGBClassifier())
                         }
result = {}
for model_name, model in model_name_param_dict.items():
    result[model_name] = train_model(X_train, y_train, X_test, y_test, model,model_name)

  

上一篇:机器学习基础1


下一篇:SlowFast-入门1-动作识别-部署与测试