#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2022/1/1 13:49
# @Author : @linlianqin
# @Site :
# @File : naivyBates.py
# @Software: PyCharm
# @description:
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import BernoulliNB
from sklearn.naive_bayes import MultinomialNB
import csv
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
import joblib
from dataProcess import loaddatasets
from paths import abs_path
import numpy as np
# 处理数据
def loadDataSets(xlspath):
datas, labels = loaddatasets(xlspath)
labels_ = labels.reshape(len(labels), 1)
dataSet = np.hstack((datas, labels_)).astype(int)
return dataSet, datas, labels
# 训练
def train(traffic_feature, traffic_target):
print('traffic_feature=', traffic_feature)
print('traffic_target=', traffic_target)
scaler = StandardScaler() # 标准化转换
scaler.fit(traffic_feature) # 训练标准化对象
traffic_feature = scaler.transform(traffic_feature) # 转换数据集
feature_train, feature_test, target_train, target_test = train_test_split(traffic_feature, traffic_target,
test_size=0.1, random_state=0)
model = BernoulliNB()
model.fit(feature_train, target_train)
return model,feature_test,target_test
# 预测
def predict(model, feature_test):
predict_results = model.predict(feature_test)
return predict_results # [1,2,3]
# 评估
def evalue(model, predict_labels, true_labels):
acc = accuracy_score(predict_labels, true_labels)
print("准确率:", acc)
conf_mat = confusion_matrix(true_labels, predict_labels)
print("混淆矩阵:", conf_mat)
report = classification_report(true_labels, predict_labels)
print("模型分析报告:", report)
return acc, conf_mat, report
# 保存模型
def save_model(model, path):
# 保存模型
joblib.dump(model, path)
# 加载模型
def load_model(path):
# 加载模型进行预测
new_model = joblib.load(path)
return new_model
if __name__ == '__main__':
print("加载数据集......")
xlsPath = abs_path + "\\data\\min_datas.xlsx"
dataSet, datas, labels = loadDataSets(xlsPath)
print("开始训练......")
model,feature_test,target_test = train(datas, labels)
print("测试模型,测试集")
predict_labels = predict(model,feature_test)
print("评估模型......")
acc, conf_mat, report = evalue(model,predict_labels,target_test)
print("保存模型")
path = abs_path+"\\data\\naivyBates_%.2f.pkl"%acc
save_model(model,path)
print("调用模型进行预测")
testVec = [[0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0]] # 注意数据是二维的
new_model = load_model(path)
predict_results = new_model.predict(testVec)
print("待测数据:",testVec)
print("预测结果:",predict_results)