## 吴恩达第七周作业 K-means python(实现)

吴恩达第七周作业 K-means python(实现)

最新版K-means修正,可能可以解决部分学者的问题

声明:有参考别的博客,但是有自己修正

主函数1

DataFile1 = 'ex7data2.mat' #读取mat文件
parameter_Data=scio.loadmat(DataFile1)
X=parameter_Data['X']#X:300x2
K=3#三分类
initial_centroids=np.array([[3,3],[6,2],[8,5]])#手动设置三个分类的坐标
idx=findClosestCentroids(X,initial_centroids)#更新各个坐标所属分类
centroids=computeCentroids(X,idx,K)#更新分类的坐标
idx, centroids_all = runKmeans(X, initial_centroids, 20)#迭代K-means20次
plotData(X, centroids_all, idx)#画图
random_initial_centroids=kMeansInitCentroids(X,K)#随机生成分类

工具函数部分

import random

import numpy as np
import matplotlib.pyplot as plt



def findClosestCentroids(X, initial_centroids):#计算每一个x所属分类
    K=initial_centroids.shape[0]#3x1
    idx=np.zeros((X.shape[0],1))#300x1
    for i in range(idx.shape[0]):
        min=10000
        index=0
        for j in range(K):
            c=(X[i][0]-initial_centroids[j][0])**2+(X[i][1]-initial_centroids[j][1])**2
            if c<min:
                min=c
                index=j
        idx[i]=index+1
    return idx
def computeCentroids(X,idx,K):#idx 是每一个x所属的类 重新计算分类点的坐标
    m = X.shape[0]
    n = X.shape[1]
    centroids = np.zeros((K, n))
    counts = np.zeros((K, n))
    for i in range(m):
        centroids[int(idx[i]-1)] += X[i]
        counts[int(idx[i]-1)] += 1
    centroids = centroids / counts
    return centroids



def runKmeans(X, centroids, max_iters):#K-means算法
    K = len(centroids)

    centroids_all = []
    centroids_all.append(centroids)
    centroid_i = centroids
    for i in range(max_iters):
        idx = findClosestCentroids(X, centroid_i)
        centroid_i = computeCentroids(X, idx,K)
        centroids_all.append(centroid_i)

    return idx, centroids_all


def plotData(X, centroids, idx=None):#画图
    """
    可视化聚类结果和簇中心的移动过程
    :param X: ndarray,所有的数据
    :param idx: ndarray,每个数据所属类标签
    :param centroids_all: [ndarray,...]计算过程中每轮的簇中心
    :return: None
    """
    plt.scatter(X[..., 0], X[..., 1], c=idx)
    xx = []
    yy = []
    for c in centroids:
        xx.append(c[..., 0])
        yy.append(c[..., 1])
    plt.plot(xx, yy, 'rx--')
    plt.show()

def kMeansInitCentroids(X,K):#随机初始化!,注意:笔者这里使用了numpy.random.shuffle(),导致原数据顺序都错乱了,特此感谢别的博主
    """随机初始化"""
    m, n = X.shape
    idx = np.random.choice(m, K)
    centroids = X[idx]
    return centroids

#主函数2(图片压缩)
A = io.imread('bird_small.png')#(128, 128, 3)
A = A/255 
X = A.reshape(16384, 3)
K = 16
centroids = kMeansInitCentroids(X, K)
plt.imshow(A);
plt.show()
idx, centroids_all = runKmeans(X, centroids, 10)#idx:(16384, 1)
img = np.zeros(X.shape)#img:(16384, 3)
centroids = centroids_all[-1]#centroids:(16, 3)
for i in range(len(idx)):
    img[i] = centroids[int(idx[i]-1)]#这里要注意,别的博客不知道是否真的实现,感觉它们的语法有点问题,如果我的理解有误,请指正,
img = img.reshape((128, 128, 3))#还原图片
plt.imshow(img);
plt.show()
上一篇:美赛python学习d13——K_means聚类算法


下一篇:聚类之K-means算法理论及代码实现