ML06 My_PCA

ML实战:手动实现PCA降维算法

  • 在此手动实现PCA算法,并且应用到神经网络的手写数字识别加速上

代码实现

PCA类
import numpy as np
from sklearn.preprocessing import Normalizer
import sys
class PCA:
    def __init__(self,x,features=None,rate=None):
        '''
        :param x:训练集
        :param z:训练集实现PCA降维后的结果
        :param u:降维后的空间的向量表达
        :param xapprox:压缩重现后的近似样本
        :param features:保留的特征数
        :param rate: 保留率限制
        :param variance:本次PCA保留率
        '''
        if features is None and rate is None:
            print('You should input a standard!')
            sys.exit(0)
        if features is not None and rate is not None:
            print('You should not input two standards!')
            sys.exit(0)
        transfer=Normalizer()
        self.x=transfer.fit_transform(x)
        self.features=features
        self.rate=rate
        self.variance=0

    def fit_features(self,features):
        #根据保留特征进行PCA降维
        m=len(self.x)
        cov=np.matmul(self.x.T,self.x)/m
        U,s,V=np.linalg.svd(cov)
        self.u=U[:,:features]
        self.z=np.matmul(self.u.T,self.x.T)
        self.xapprox=np.matmul(self.u,self.z).T
        temp = self.x - self.xapprox
        self.variance=1-np.sum(temp*temp)/np.sum(self.x*self.x)

    def fit_rate(self):
        #根据保留率进行PCA降维,从特征数为1循环调用fit_features(),直至满足要求
        self.features=0
        while self.variance<self.rate:
            self.features += 1
            self.fit_features(self.features)

    def cost(self,x):
        #计算对数据集x降维后,还保留了多少
        transfer = Normalizer()
        x = transfer.fit_transform(x)
        z=np.matmul(self.u.T,x.T)
        xapprox = np.matmul(self.u, z).T
        temp = x - xapprox
        return 1 - np.sum(temp * temp) / np.sum(x * x)

    def predict(self,x):
        #对数据集x降维
        transfer = Normalizer()
        x = transfer.fit_transform(x)
        return np.matmul(self.u.T,x.T).T

    def fit(self):
        #参数拟合
        if self.features is not None:
            self.fit_features(self.features)
        else:
            self.fit_rate()
主函数
#use mypca on digits recongnize by neural network

from PCA_class import PCA
from sklearn import datasets
import numpy as np
import sys
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

np.set_printoptions(suppress=True)

#读入数据
x=datasets.load_digits().data
y=datasets.load_digits().target
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.03,random_state=np.random.randint(0,30))
X=np.arange(1,len(x_test)+1)

#调用fit方法,实现参数拟合
pca=PCA(x_train,rate=0.99)
pca.fit()

#对训练集和测试集都实现pca降维
x_train_pca=pca.predict(x_train)
x_test_pca=pca.predict(x_test)

#查看PCA算法性能
print('The original features:',str(len(x_train[0])))
print('After PCA, the features:',str(pca.features))
print('After using PCA on x_test, x_test also have save '+str(round(pca.cost(x_test),2))+'%')

#用降维后的数据训练神经网络
clf=MLPClassifier(hidden_layer_sizes=(50),activation='logistic',solver='adam',learning_rate_init=0.0001,max_iter=3000)
clf.fit(x_train_pca,y_train)
y_predict=clf.predict(x_test_pca)

#实现预测结果可视化
plt.figure(figsize=(30,8),dpi=80)
plt.scatter(X,y_test,label='real',marker='s',color='blue')
plt.scatter(X,y_predict,label='predict',marker='x',color='red')
plt.legend(loc=[1,0])
plt.grid(True,linestyle='--',alpha=0.5)
plt.yticks(y_test[::1])
plt.xticks(X[::1])
plt.xlabel('index of tests')
plt.ylabel('target')
plt.savefig('E:\python\ml\ml by myself\PCA\PCA_myself_on_NeuralNetwork')

sys.exit(0)

结果

PCA性能

ML06 My_PCA

应用到神经网络加速后

ML06 My_PCA

上一篇:普通卷积,分组卷积,深度可分离卷积


下一篇:ES6常用的16个新特性