吴恩达第七周作业 K-means python(实现)
最新版K-means修正,可能可以解决部分学者的问题
声明:有参考别的博客,但是有自己修正
主函数1
DataFile1 = 'ex7data2.mat' #读取mat文件
parameter_Data=scio.loadmat(DataFile1)
X=parameter_Data['X']#X:300x2
K=3#三分类
initial_centroids=np.array([[3,3],[6,2],[8,5]])#手动设置三个分类的坐标
idx=findClosestCentroids(X,initial_centroids)#更新各个坐标所属分类
centroids=computeCentroids(X,idx,K)#更新分类的坐标
idx, centroids_all = runKmeans(X, initial_centroids, 20)#迭代K-means20次
plotData(X, centroids_all, idx)#画图
random_initial_centroids=kMeansInitCentroids(X,K)#随机生成分类
工具函数部分
import random
import numpy as np
import matplotlib.pyplot as plt
def findClosestCentroids(X, initial_centroids):#计算每一个x所属分类
K=initial_centroids.shape[0]#3x1
idx=np.zeros((X.shape[0],1))#300x1
for i in range(idx.shape[0]):
min=10000
index=0
for j in range(K):
c=(X[i][0]-initial_centroids[j][0])**2+(X[i][1]-initial_centroids[j][1])**2
if c<min:
min=c
index=j
idx[i]=index+1
return idx
def computeCentroids(X,idx,K):#idx 是每一个x所属的类 重新计算分类点的坐标
m = X.shape[0]
n = X.shape[1]
centroids = np.zeros((K, n))
counts = np.zeros((K, n))
for i in range(m):
centroids[int(idx[i]-1)] += X[i]
counts[int(idx[i]-1)] += 1
centroids = centroids / counts
return centroids
def runKmeans(X, centroids, max_iters):#K-means算法
K = len(centroids)
centroids_all = []
centroids_all.append(centroids)
centroid_i = centroids
for i in range(max_iters):
idx = findClosestCentroids(X, centroid_i)
centroid_i = computeCentroids(X, idx,K)
centroids_all.append(centroid_i)
return idx, centroids_all
def plotData(X, centroids, idx=None):#画图
"""
可视化聚类结果和簇中心的移动过程
:param X: ndarray,所有的数据
:param idx: ndarray,每个数据所属类标签
:param centroids_all: [ndarray,...]计算过程中每轮的簇中心
:return: None
"""
plt.scatter(X[..., 0], X[..., 1], c=idx)
xx = []
yy = []
for c in centroids:
xx.append(c[..., 0])
yy.append(c[..., 1])
plt.plot(xx, yy, 'rx--')
plt.show()
def kMeansInitCentroids(X,K):#随机初始化!,注意:笔者这里使用了numpy.random.shuffle(),导致原数据顺序都错乱了,特此感谢别的博主
"""随机初始化"""
m, n = X.shape
idx = np.random.choice(m, K)
centroids = X[idx]
return centroids
#主函数2(图片压缩)
A = io.imread('bird_small.png')#(128, 128, 3)
A = A/255
X = A.reshape(16384, 3)
K = 16
centroids = kMeansInitCentroids(X, K)
plt.imshow(A);
plt.show()
idx, centroids_all = runKmeans(X, centroids, 10)#idx:(16384, 1)
img = np.zeros(X.shape)#img:(16384, 3)
centroids = centroids_all[-1]#centroids:(16, 3)
for i in range(len(idx)):
img[i] = centroids[int(idx[i]-1)]#这里要注意,别的博客不知道是否真的实现,感觉它们的语法有点问题,如果我的理解有误,请指正,
img = img.reshape((128, 128, 3))#还原图片
plt.imshow(img);
plt.show()