ML实战:手动实现异常检测算法
- 这里使用了改良版本,即使用多元高斯分布函数,提高了对数据相关性的适应
代码实现
Detaction类
import numpy as np
from math import exp,pow,pi
np.set_printoptions(suppress=True)
class Detection:
def __init__(self,x):
'''
:param x:训练集
:param n:特征数
:param u:每个特征的均值
:param cov:协方差矩阵
'''
self.x=x
self.n=len(x[0])
def fit(self):
#参数拟合
m=len(self.x)
self.u=np.sum(self.x,axis=0)/m
temp=self.x-self.u
self.cov=np.matmul(temp.T,temp)/m
def predict(self,x):
#预测函数
cov=np.linalg.inv(self.cov)
p=exp(((x-self.u)@cov@(x.T-self.u.T))*(-0.5))/pow(np.linalg.det(2*pi*self.cov),0.5)#多元高斯分布函数
if p<10e-4:
return 1
else:
return 0
主函数
from sklearn.datasets import make_blobs
import matplotlib.pyplot as plt
import numpy as np
import sys
from Detection_class import Detection
from sklearn.model_selection import train_test_split
np.set_printoptions(suppress=True)
color=['blue','red']
label=['Normal','Abnormal']
#生成数据,1000个正常样本,以及三类异常样本各10个
x, y = make_blobs(n_samples=[1000,10,10,10], n_features=2, random_state=np.random.randint(0,30))
x_postive=x[y==0,:]
x_error=x[y!=0,:]
x_train,x_test=train_test_split(x_postive,test_size=0.3,random_state=np.random.randint(0,30))
y_test=[0]*len(x_test)+[1]*len(x_error)
y_test=np.array(y_test)
x_test=np.r_[x_test,x_error]
#调用fit算法,实现参数拟合
detection=Detection(x_train)
detection.fit()
#处理测试集的预测结果
y_predict=[]
for i in x_test:
y_predict.append(detection.predict(i))
y_predict=np.array(y_predict)
print(len(y_predict[y_predict==0]))
#实现结果可视化,与真实值对比
plt.figure(figsize=(12, 5), dpi=80)
for i in range(2):
plt.scatter(x_test[y_test==i, 0], x_test[y_test==i,1],marker='o',s=8,c=color[i],label=label[i])
plt.title('Real Data')
plt.legend(loc=[1, 0])
plt.savefig('E:\python\ml\ml by myself\Abnormal_Detection\Abnormal_Detection_real.png')
for i in range(2):
plt.scatter(x_test[y_predict==i, 0], x_test[y_predict==i,1],marker='o',s=8,c=color[i],label=label[i])
plt.title('Predict Result')
plt.savefig('E:\python\ml\ml by myself\Abnormal_Detection\Abnormal_Detection_predict.png')
sys.exit(0)
结果