K-means聚类乱七八糟的尝试

参考资料:https://blog.csdn.net/ten_sory/article/details/81016748

计算不同样本间的距离来判断他们的相近关系,相近的就会放到同一个类别去

  1. 选择K值(即需要分成的类别),最好根据聚类的结果和K的函数关系来判断K值是多少
  2. 选择聚类点:1.随机选取 多次取均值 3.bisecting K-means

step1:选定要聚类的类别数目k(如上例的k=3类),选择k个中心点。

step2:针对每个样本点,找到距离其最近的中心点(寻找组织),距离同一中心点最近的点为一个类,这样完成了一次聚类。

step3:判断聚类前后的样本点的类别情况是否相同,如果相同,则算法终止,否则进入step4。

step4:针对每个类别中的样本点,计算这些样本点的中心点,当做该类的新的中心点,继续step2。

我的尝试:随机数产生点,自主定义了三个点,将距离三个点近的放入同一个类中,在同一个类中选取点与中心点,以及聚类附近的点进行距离比较,如果更近的话就会替代当前中心点,当中心点不再变化时跳出循环得到结果。


import random
import matplotlib.pyplot as plt
import math
import numpy as np
def get_point():
    list = []
    list_1 = []
    list_2 = []
    list_3 = []
    x = []
    y = []
    i = 0
    while i < 30:
        list_1.append([random.randrange(0, 10), random.randrange(0, 10)])
        list_2.append([random.randrange(30, 40), random.randrange(10, 20)])
        list_3.append([random.randrange(10, 20), random.randrange(50, 70)])
        i = i + 1
    list = list_1 + list_2 + list_3    # 得到坐标集合
    j = 0
    while j < len(list):
        x.append(list[j][0])
        y.append(list[j][1])
        j += 1
    plt.plot(x, y)
    plt.show()
    return list

def get_distance(a, b):
    c = np.array(a) - np.array(b)
    dis = math.hypot(c[0], c[1])    # 算出距离
    return dis

def k_means(cluster1, cluster2, cluster3, center_point):
    while True:
        i = 0
        while i < len(cluster1):
            cluster = center_point
            dis1 = get_distance(cluster1[i], center_point[0])
            j = 1
            while j < len(cluster1):
                dis4 = get_distance(cluster1[i], cluster1[j])
                if min(dis1, dis4) != dis1:
                    del center_point[0]
                    center_point.insert(0, cluster1[j])
                j += 1
            i += 1
        k = 0
        while k < len(cluster2):
            cluster = center_point
            dis2 = get_distance(cluster2[k], center_point[1])
            j = 1
            while j < len(cluster2):
                dis5 = get_distance(cluster2[k], cluster2[j])
                if min(dis2, dis5) != dis2:
                    del center_point[1]
                    center_point.insert(1, cluster2[j])
                j += 1
            k += 1
        l = 0
        while l < len(cluster3):
            cluster = center_point
            dis3 = get_distance(cluster3[l], center_point[2])
            j = 1
            while j < len(cluster3):
                dis6 = get_distance(cluster3[l], cluster3[j])
                if min(dis3, dis6) != dis3:
                    del center_point[2]
                    center_point.insert(2, cluster3[j])
                j += 1
            l += 1
        if center_point[0] == cluster[0] and center_point[1] == cluster[1] and center_point[2] == cluster[2]:
            break
    return cluster

def get_cluster(points, center_point):
    cluster1 = []
    cluster2 = []
    cluster3 = []
    x1 = []
    y1 = []
    x2 = []
    y2 = []
    x3 = []
    y3 = []
    i = 0
    while i < len(points):
        dis1 = get_distance(points[i], center_point[0])
        dis2 = get_distance(points[i], center_point[1])
        dis3 = get_distance(points[i], center_point[2])
        if dis1 == min(dis1, dis2, dis3):
            cluster1.append(points[i])
        elif dis2 == min(dis1, dis2, dis3):
            cluster2.append(points[i])
        elif dis3 == min(dis1, dis2, dis3):
            cluster3.append(points[i])
        i += 1
    j = 0
    while j < len(cluster1):
        x1.append(cluster1[j][0])
        y1.append(cluster1[j][1])
        j += 1
    j = 0
    while j < len(cluster2):
        x2.append(cluster2[j][0])
        y2.append(cluster2[j][1])
        j += 1
    j = 0
    while j < len(cluster3):
        x3.append(cluster3[j][0])
        y3.append(cluster3[j][1])
        j += 1
    plt.plot(x1, y1, 'r')
    plt.plot(x2, y2, 'g')
    plt.plot(x3, y3, 'b')
    plt.show()

    return cluster1, cluster2, cluster3


if __name__ == '__main__':
    center_point = [[5, 5], [30, 10], [20, 60]]
    points = get_point()
    li = get_cluster(points, center_point)
    center_point = k_means(li[0], li[1], li[2], center_point)
    print(center_point)

上一篇:盒子模型及弹性盒子


下一篇:CentOS7安装部署MongoDB