神经网络二分类数据集练习

神经网络(二分类)

1. 鸢尾花数据集

from sklearn.datasets import load_iris,load_boston,load_breast_cancer,load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler,StandardScaler
import numpy as np
import matplotlib.pyplot as plt
#加载数据集
data=load_iris()       #加载鸢尾花数据集
X=data.data       #取data关键字对应的值为特征
Y=data.target        #取target关键字对应的值为标签
# print(X.shape)#(150, 4)
# print(Y)
#选择特征和类别
X=X[Y!=2,0:]      #选择特征2,3
Y=Y[Y!=2]
print(X.shape)#(150, 4)
print(X)
print(Y)
#缩放
def suofang(x):
    xmin=np.min(x,axis=0)
    xmax=np.max(x,axis=0)
    s=(x-xmin)/(xmax-xmin)
    return s
x=suofang(X)
y=Y
print(y)

#打乱顺序
m=x.shape[0]
np.random.seed(4)
order=np.random.permutation(m)
x=x[order]
y=y[order]
print(x.shape)
print(y.shape)

#拼接
xx=np.c_[np.ones(len(x)),x]
a=int(len(x)*0.7)
y=np.c_[ya]
trainx=xx[:a]
trainy=y[:a]
testx=xx[a:]
testy=y[a:]
#模型函数
def model(x,theta):
    return x.dot(theta)
#sigmoid函数
def sigmoid(z):
    return 1/(1+np.exp(-z))
#代价函数
def cost(h,y):
    return -1/m*np.sum(y*np.log(h)+(1-y)*np.log(1-h))
#向前传播函数
def forwardp(a1,theta1,theta2,theta3):
    z1=a1.dot(theta1)
    a2=sigmoid(z1)
    z2 = a2.dot(theta2)
    a3 = sigmoid(z2)
    z3 = a3.dot(theta3)
    a4 = sigmoid(z3)
    return a2,a3,a4
    
#向后传播函数
def backp(a1,a2,a3,a4,y,theta1,theta2,theta3,alpha=0.005):
    m,n=a1.shape
    sigma4=a4-y
    sigma3=sigma4.dot(theta3.T)*a3*(1-a3)
    sigma2=sigma3.dot(theta2.T)*a2*(1-a2)

    dt3=1/m*a3.T.dot(sigma4)
    dt2 = 1 / m * a2.T.dot(sigma3)
    dt1 = 1 / m * a1.T.dot(sigma2)

    theta3=theta3-alpha*dt3
    theta2 = theta2 - alpha * dt2
    theta1 = theta1 - alpha * dt1

    return theta3,theta2,theta1
    
#梯度下降函数
def gradeDecline(a1,y,nums,k,l):
    m,n=a1.shape
    # np.random.seed(4)
    j=np.zeros(nums)
    # theta1 =2* np.random.randn(n,k)-1
    # theta2 =2* np.random.randn(k, l)-1
    # theta3 =2* np.random.randn(l, 1)-1
    theta1=np.zeros((n,k))
    theta2 = np.zeros((k, l))
    theta3 = np.zeros((l, 1))
    for i in range(nums):
        a2,a3,a4=forwardp(a1, theta1, theta2, theta3)
        j[i]=cost(a4,y)
        
     theta3,theta2,theta1=backp(a1,a2,a3,a4,y,theta1,theta2,theta3,alpha=0.005)

    return theta1,theta2,theta3,j,a4
    
#精度函数
def accuracy(a1,a4,y):#accuracy准确性
    m, n = a1.shape
    count=0
    for i in range(m):
        if np.where(a4[i]>0.5,1,0)==y[i]:
            count+=1

    return count/m
    
#x,y为和1拼接之后的,通过切片拼接之后的x和y,为trainx,trainy   
theta1,theta2,theta3,j,a4=gradeDecline(trainx,trainy,100000,7,10)
print(theta1,theta2,theta3)
#theta1为(5,5)
#theta2为(5,2)
#theta3为(2,1)
print('训练集精度:',accuracy(trainx,a4,trainy)*100,'%')
a2,a3,a44=forwardp(testx,theta1,theta2,theta3)
print('测试集精度:',accuracy(testx,a44,testy)*100,'%')

#画代价函数图
plt.plot(j)
plt.show()

#画二分类直线和散点图
def tu(x,y):
    plt.scatter(x[y == 0, 1], x[y == 0, 2])
    plt.scatter(x[y == 1, 1], x[y == 1, 2])
    x1min = x[:, 1].min()
    x1max = x[:, 1].max()
    x2min = x[:, 2].min()
    x2max = x[:, 2].max()
    plt.plot([x1min,x1max],[x2max,x2min])
#调用参数x,ya为拼接之前的x,ya;ya必须是一维数组(不是二维)
tu(x,ya)

plt.show()

#代价函数图如下:
神经网络二分类数据集练习
#二分类散点图和直线如下:
神经网络二分类数据集练习

2. 苹果数据集

import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
# 深度学习(DL, Deep Learning)是机器学习(ML, Machine Learning)领域中一个新的研究方向,它被引入机器学习使其更接近于最初的目标——人工智能(AI, Artificial Intelligence)。
# 深度学习是学习样本数据的内在规律和表示层次,这些学习过程中获得的信息对诸如文字,图像和声音等数据的解释有很大的帮助。它的最终目标是让机器能够像人一样具有分析学习能力,能够识别文字、图像和声音等数据。 深度学习是一个复杂的机器学习算法,在语音和图像识别方面取得的效果,远远超过先前相关技术。
# 深度学习在搜索技术,数据挖掘,机器学习,机器翻译,自然语言处理,多媒体学习,语音,推荐和个性化技术,以及其他相关领域都取得了很多成果。深度学习使机器模仿视听和思考等人类的活动,解决了很多复杂的模式识别难题,使得人工智能相关技术取得了很大进步。 
# 已知某一果园的一组数据集(apple.txt文件中),有两个标签,分别为苹果的大小与苹果重量,y标签表示苹果的等级:1表示优等,0表示普通
# 利用神经网络模型, 编写神经网络底层代码,实现对数据的训练和预测,主要涉及以下步骤:数据集的预处理、代价函数、激活函数、梯度下降、训练和预测。
# 具体要求如下:

# 完成数据集的加载
data=np.loadtxt('apple.txt',delimiter=',')
x=data[:,:-1]
y=data[:,-1]
print(x)
print(y)

# 特征缩放
def suofang(x):
    xmin=np.min(x,axis=0)
    xmax=np.max(x,axis=0)
    s=(x-xmin)/(xmax-xmin)
    return s

x=suofang(x)
# 数据拼接
m,n=x.shape
xx=np.c_[np.ones(m),x]
# 数据的洗牌
def wash(x,y):
    m,n=x.shape
    np.random.seed(4)
    order=np.random.permutation(m)
    x=x[order]
    y=y[order]
    return x,y
xx,y=wash(xx,y)
# 5.分割成训练集和测试集
a=int(len(x)*0.7)

trainx=xx[:a]
trainy=y[:a]
testx=xx[a:]
testy=y[a:]

trainy=np.c_[trainy]
testy=np.c_[testy]
# 实现sigmoid激活函数及其导数
def model(x,theta):
    z = x.dot(theta)
    return z
def sigmoid(z,grad=False):
    if grad==True:
        return z*(1-z)
    return 1/(1+np.exp(-z))

# 前向传播
def forwardp(a1,theta1,theta2,theta3):
    z1 = a1.dot(theta1)
    a2 = sigmoid(z1)
    z2 = a2.dot(theta2)
    a3 = sigmoid(z2)
    z4 = a3.dot(theta3)
    a4 = sigmoid(z4)
    return a2,a3,a4
# 自定义实现代价函数
def cost(a4,y):
    return -np.mean(y*np.log(a4)+(1-y)*np.log(1-a4))

# 反向传播
def backp(a4,a3,a2,a1,y,theta1,theta2,theta3,alpha):
    sigma4=a4-y
    sigma3 = sigma4.dot(theta3.T) * sigmoid(a3,grad=True)
    sigma2 = sigma3.dot(theta2.T) * sigmoid(a2, grad=True)
    dt3 = 1 / m * a3.T.dot(sigma4)
    dt2 = 1 / m * a2.T.dot(sigma3)
    dt1 = 1 / m * a1.T.dot(sigma2)
    theta3 = theta3-alpha * dt3
    theta2 = theta2 - alpha * dt2
    theta1 = theta1 - alpha * dt1
    return theta1,theta2,theta3,a4
# 实现梯度下降并记录代价函数
def gradeDecline(a1,y,nums,k,l):
    m,n=a1.shape
    j=np.zeros(nums)
    np.random.seed(4)
    theta1=2*np.random.rand(n,k)-1
    theta2=2*np.random.rand(k,l)-1
    theta3=2*np.random.rand(l,1)-1
    for i in range(nums):
        a2,a3,a4=forwardp(a1, theta1, theta2, theta3)
        j[i]=cost(a4,y)
        theta1,theta2,theta3,a4=backp(a4,a3,a2,a1,y,theta1,theta2,theta3,alpha=0.1)
    return theta1,theta2,theta3,j,a4
# 自定义准确率计算方法
def score(a1,y,a4):
    m,n=a1.shape
    count=0
    for i in range(m):
        if (np.where(a4[i]>0.5,1,0)==y[i]):
            count+=1
    acc=count/m
    return acc
# 写出主函数调用梯度下降完成模型的训练
theta1,theta2,theta3,j,a4=gradeDecline(trainx,trainy,10000,4,2)
print(j)
print('训练集的精确度为:',score(trainx,trainy,a4)*100,'%')
# 在测试集上完成预测
a2,a3,a44=forwardp(testx,theta1,theta2,theta3)
# 画出代价函数曲线
plt.plot(j)
plt.show()
# 计算并输出在测试集上的准确率
print('测试集的精确度为:',score(testx,testy,a44)*100,'%')
# 测试集的精确度为: 81.81818181818183 %
# 测试集的精确度为: 50.0 %

#画图
def tu(x,y):
    plt.scatter(x[y == 0, 1], x[y == 0, 2])
    plt.scatter(x[y == 1, 1], x[y == 1, 2])
    x1min = x[:, 1].min()
    x1max = x[:, 1].max()
    x2min = x[:, 2].min()
    x2max = x[:, 2].max()
    plt.plot([x1min,x1max],[x2max,x2min])
#调用参数x,ya为拼接之前的x,ya;ya必须是一维数组(不是二维)
tu(xx,y)
plt.show()
apple.txt
0.697,0.46,1
0.774,0.376,1
0.634,0.264,1
0.608,0.318,1
0.556,0.215,1
0.403,0.237,1
0.481,0.149,1
0.437,0.211,1
0.666,0.091,0
0.243,0.267,0
0.245,0.057,0
0.343,0.099,0
0.639,0.161,0
0.657,0.198,0
0.36,0.37,0
0.593,0.042,0
0.719,0.103,0

#代价函数图如下:
神经网络二分类数据集练习
#二分类散点图和直线如下:
神经网络二分类数据集练习

3. 西瓜数据集

import numpy as np
import matplotlib.pyplot as plt

plt.rcParams['font.sans-serif']=['SimHei']
#西瓜数据
x1 = [0.697,0.774,0.634,0.608,0.556,0.403,0.481,0.437,0.666,0.243,0.245,0.343,0.639,0.657,0.360,0.593,0.719]
x2 = [0.460,0.376,0.264,0.318,0.215,0.237,0.149,0.211,0.091,0.267,0.057,0.099,0.161,0.198,0.370,0.042,0.103]
y = [1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0]

x11=np.c_[x1,x2]

#拼接
xx=np.c_[np.ones(len(x1)),x1,x2]
yy=np.c_[y]

#洗牌
np.random.seed(4)
order=np.random.permutation(len(xx))
xxx=xx[order]
yyy=yy[order]
m,n=xx.shape

def model(x,theta):
    return x.dot(theta)

def sigmoid(z):
    return 1/(1+np.exp(-z))

def cost(a5,y):
    return -np.mean(y*np.log(a5)+(1-y)*np.log(1-a5))

def frontprogatation(a1,theta1,theta2,theta3,theta4):
    z1=model(a1,theta1)
    a2=sigmoid(z1)
    z2=model(a2,theta2)
    a3=sigmoid(z2)
    z3=model(a3,theta3)
    a4=sigmoid(z3)
    z4 = model(a4, theta4)
    a5 = sigmoid(z4)
    return a2,a3,a4,a5

def backprogatation(y,theta1,theta2,theta3,theta4,a1,a2,a3,a4,a5,alpha):
    m=len(a5)
    sigma5 = a5 - y
    sigma4 = sigma5.dot(theta4.T) * a4 * (1 - a4)
    sigma3 = sigma4.dot(theta3.T) * a3 * (1 - a3)
    sigma2 = sigma3.dot(theta2.T) * a2 * (1 - a2)

    dt4 = 1 / m * a4.T.dot(sigma5)
    dt3 = 1 / m * a3.T.dot(sigma4)
    dt2 = 1 / m * a2.T.dot(sigma3)
    dt1 = 1 / m * a1.T.dot(sigma2)

    theta4 = theta4 - alpha * dt4
    theta3 = theta3 - alpha * dt3
    theta2 = theta2 - alpha * dt2
    theta1 = theta1 - alpha * dt1

    return theta1,theta2,theta3,theta4



def gradeDecline(a1,y,alpha,nums):
    m,n=a1.shape
    j=np.zeros(nums)
    theta1=np.zeros((n,6))
    theta2 = np.zeros((6,5))
    theta3 = np.zeros((5,7))
    theta4 = np.zeros((7,1))
    for i in range(nums):
        a2,a3,a4,a5=frontprogatation(a1,theta1,theta2,theta3,theta4)
        j[i]=cost(a5,y)
        theta1,theta2,theta3,theta4=backprogatation\
            (y, theta1, theta2, theta3, theta4, a1, a2, a3, a4,a5, alpha)

    return theta1,theta2,theta3,theta4,j



theta1,theta2,theta3,theta4,j=gradeDecline(xxx,yyy,0.01,10000)
print(j)
print(theta1)
print(theta2)
print(theta3)
print(theta4)
plt.plot(j)
plt.show()

神经网络二分类数据集练习

4. 神经网络sklearn.neural_network

import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LinearRegression,logistic
import warnings
from sklearn.metrics import confusion_matrix,classification_report
warnings.filterwarnings('ignore')
x1=[0.697,0.774,0.634,0.608,0.556,0.403,0.481,0.437,0.666,0.243,0.245,0.343,0.639,0.657,0.360,0.593,0.719]
x2=[0.460,0.376,0.264,0.318,0.215,0.237,0.149,0.211,0.091,0.267,0.057,0.099,0.161,0.198,0.370,0.042,0.103]
y=[1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0]

#拼接
xx=np.c_[np.ones(len(x1)),x1,x2]
yy=np.c_[y]
#调用神经网络库
clf=MLPClassifier(alpha=0.02)
clf.fit(xx,yy)
print('预测y',clf.predict([[1,0.64,0.28]]))
print('精度:',clf.score(xx,yy)*100,'%')
print('斜率',clf.coefs_)
print('截距',clf.intercepts_)

上一篇:C# ③ 使用Trim方法清除空格


下一篇:【Python基础教程】快速找到多个字典中的公共键(key)的方法