参考资料:https://blog.csdn.net/ten_sory/article/details/81016748
计算不同样本间的距离来判断他们的相近关系,相近的就会放到同一个类别去
- 选择K值(即需要分成的类别),最好根据聚类的结果和K的函数关系来判断K值是多少
- 选择聚类点:1.随机选取 多次取均值 3.bisecting K-means
step1:选定要聚类的类别数目k(如上例的k=3类),选择k个中心点。
step2:针对每个样本点,找到距离其最近的中心点(寻找组织),距离同一中心点最近的点为一个类,这样完成了一次聚类。
step3:判断聚类前后的样本点的类别情况是否相同,如果相同,则算法终止,否则进入step4。
step4:针对每个类别中的样本点,计算这些样本点的中心点,当做该类的新的中心点,继续step2。
我的尝试:随机数产生点,自主定义了三个点,将距离三个点近的放入同一个类中,在同一个类中选取点与中心点,以及聚类附近的点进行距离比较,如果更近的话就会替代当前中心点,当中心点不再变化时跳出循环得到结果。
import random
import matplotlib.pyplot as plt
import math
import numpy as np
def get_point():
list = []
list_1 = []
list_2 = []
list_3 = []
x = []
y = []
i = 0
while i < 30:
list_1.append([random.randrange(0, 10), random.randrange(0, 10)])
list_2.append([random.randrange(30, 40), random.randrange(10, 20)])
list_3.append([random.randrange(10, 20), random.randrange(50, 70)])
i = i + 1
list = list_1 + list_2 + list_3 # 得到坐标集合
j = 0
while j < len(list):
x.append(list[j][0])
y.append(list[j][1])
j += 1
plt.plot(x, y)
plt.show()
return list
def get_distance(a, b):
c = np.array(a) - np.array(b)
dis = math.hypot(c[0], c[1]) # 算出距离
return dis
def k_means(cluster1, cluster2, cluster3, center_point):
while True:
i = 0
while i < len(cluster1):
cluster = center_point
dis1 = get_distance(cluster1[i], center_point[0])
j = 1
while j < len(cluster1):
dis4 = get_distance(cluster1[i], cluster1[j])
if min(dis1, dis4) != dis1:
del center_point[0]
center_point.insert(0, cluster1[j])
j += 1
i += 1
k = 0
while k < len(cluster2):
cluster = center_point
dis2 = get_distance(cluster2[k], center_point[1])
j = 1
while j < len(cluster2):
dis5 = get_distance(cluster2[k], cluster2[j])
if min(dis2, dis5) != dis2:
del center_point[1]
center_point.insert(1, cluster2[j])
j += 1
k += 1
l = 0
while l < len(cluster3):
cluster = center_point
dis3 = get_distance(cluster3[l], center_point[2])
j = 1
while j < len(cluster3):
dis6 = get_distance(cluster3[l], cluster3[j])
if min(dis3, dis6) != dis3:
del center_point[2]
center_point.insert(2, cluster3[j])
j += 1
l += 1
if center_point[0] == cluster[0] and center_point[1] == cluster[1] and center_point[2] == cluster[2]:
break
return cluster
def get_cluster(points, center_point):
cluster1 = []
cluster2 = []
cluster3 = []
x1 = []
y1 = []
x2 = []
y2 = []
x3 = []
y3 = []
i = 0
while i < len(points):
dis1 = get_distance(points[i], center_point[0])
dis2 = get_distance(points[i], center_point[1])
dis3 = get_distance(points[i], center_point[2])
if dis1 == min(dis1, dis2, dis3):
cluster1.append(points[i])
elif dis2 == min(dis1, dis2, dis3):
cluster2.append(points[i])
elif dis3 == min(dis1, dis2, dis3):
cluster3.append(points[i])
i += 1
j = 0
while j < len(cluster1):
x1.append(cluster1[j][0])
y1.append(cluster1[j][1])
j += 1
j = 0
while j < len(cluster2):
x2.append(cluster2[j][0])
y2.append(cluster2[j][1])
j += 1
j = 0
while j < len(cluster3):
x3.append(cluster3[j][0])
y3.append(cluster3[j][1])
j += 1
plt.plot(x1, y1, 'r')
plt.plot(x2, y2, 'g')
plt.plot(x3, y3, 'b')
plt.show()
return cluster1, cluster2, cluster3
if __name__ == '__main__':
center_point = [[5, 5], [30, 10], [20, 60]]
points = get_point()
li = get_cluster(points, center_point)
center_point = k_means(li[0], li[1], li[2], center_point)
print(center_point)