RBF network

1.radial basis function

RBF表示某种距离,$\mu_m$为中心点,相当于将点$x$到中心点的某种距离作为特征转换

Output方法可以根据需求任意选取(比如使用SVM,logistic regression等)

关键在于:中心点选取,距离函数选择

RBF network

RBF network

2.使用kmean选取中心点,后使用logistic regression

import numpy as np
from sklearn.cluster import KMeans
from sklearn.linear_model import LogisticRegression
from matplotlib import pyplot as plt
from sklearn import cross_validation
from sklearn.base import BaseEstimator class KMeansRBF:
def __init__(self,n_clusters,beta,C):
self.n_clusters=n_clusters
self.beta=beta
self.C=C def fit(self,X,y):
km = KMeans(n_clusters=self.n_clusters);
km.fit(X)
ct = km.cluster_centers_
self.ct = ct
G = self._nFeature(ct, X)
lg=LogisticRegression(C=self.C)
lg.fit(G,y)
self.lg=lg def predict(self,X):
G = self._nFeature(self.ct, X)
return self.lg.predict(G) def _nFeature(self,cts,X):
G = np.zeros((X.shape[0],cts.shape[0]))
for xi,x in enumerate(X):
for ci,c in enumerate(cts):
G[xi,ci] = self._kernal(x, c)
return G def _kernal(self,x1,x2):
x = x1-x2;
return np.exp(-self.beta*np.dot(np.transpose(x),x)) def predict_proba(self,X):
G = self._nFeature(self.ct, X)
return self.lg.predict_proba(G) def get_params(self, deep=True):
return {'n_clusters':self.n_clusters,'beta':self.beta,'C':self.C} def set_params(self, **parameters):
for parameter, value in parameters.items():
setattr(self, parameter, value) def plot(self,X,y):
pos = np.where(y==1)
neg = np.where(y==-1) x1 = X[pos[0],:]
x2 = X[neg[0],:] plt.figure()
plt.plot(x1[:,0],x1[:,1],'o')
plt.plot(x2[:,0],x2[:,1],'o')
plt.plot(self.ct[0,0],self.ct[0,1],'ro')
plt.plot(self.ct[1,0],self.ct[1,1],'ro') xmax = np.max(X[:,0])+5
xmin = np.min(X[:,0])-5
ymax = np.max(X[:,1])+5
ymin = np.min(X[:,1])-5 numx = int((xmax-xmin)*10)
numy = int((ymax-ymin)*10)
total = numx*numy;
lx = np.linspace(xmin,xmax,numx)
ly = np.linspace(ymin,ymax,numy)
mgrid = np.meshgrid(lx,ly)
px = np.hstack((mgrid[0].reshape(total,1),mgrid[1].reshape(total,1)))
pre=self.predict_proba(px)
ind = np.where(abs(pre[:,1]-pre[:,0])<0.01) px=px[ind]
plt.plot(px[:,0],px[:,1],'yo')
plt.show()
if __name__ == '__main__':
x1=np.random.normal(10, 6.0, (80,2))
x2=np.random.normal(-10, 6.0, (80,2)) X = np.vstack((x1,x2))
y = np.zeros((160,1))
y[range(0,80),0]=y[range(0,80),0]+1
y[range(80,160),0]=y[range(80,160),0]-1
y=np.ravel(y) betas = np.linspace(0.001,0.1,100)
k = range(100)
score = np.zeros((100,1),'float') bestbeta = 0.001;
maxscore = -1;
for i,beta in enumerate(betas):
krbf = KMeansRBF(2,beta,1)
scores =cross_validation.cross_val_score(krbf,X,y,scoring="accuracy",cv=5)
score[i,0]=scores.mean()
if score[i,0]>maxscore:
maxscore=score[i,0]
bestbeta = beta plt.figure()
plt.plot(k,score,'b-')
plt.show()
print bestbeta;
krbf = KMeansRBF(2,bestbeta,1)
krbf.fit(X, y)
krbf.plot(X,y)

3.结果

    gussian中beta的値设置非常关键

由于指数函数增加得很快,所以大的beta値,意味着只有和中心点很近的点才能被判定和中心点为同一类

                                       beta = 1                                          beta = 0.001
RBF network RBF network
上一篇:IIS Web服务扩展中没有Asp.net 2.0


下一篇:玩转X-CTR100 l STM32F4 l BMP280气压计传感器