聚类层次【python,机器学习,算法】

import matplotlib.pyplot as plt import numpy as np # 设置了随机数种子,让随机数生成变得可重复,即在设置过后,每次运行代码得到的随机数都是一样的。 np.random.seed(0) cluster1 = np.random.randn(30, 2) + np.array([0, 7]) cluster2 = np.random.randn(30, 2) + np.array([8, 0]) cluster3 = np.random.randn(30, 2) + np.array([8, 8]) # 用于沿着垂直方向(行方向)堆叠数组,得到一个总的数据集 data = np.vstack([cluster1, cluster2, cluster3]) # 1. 初始化每个数据点为一个独立的簇 def initialize_clusters(data): return [[point] for point in data] # 2. 计算簇中心之间的距离 def compute_distances(clusters): distances = np.zeros((len(clusters), len(clusters))) for i in range(len(clusters)): for j in range(len(clusters)): if i != j: # 使用欧式距离计算两个簇的距离 distances[i][j] = np.sqrt(sum((np.mean(clusters[i], axis=0) - np.mean( clusters[j], axis=0)) ** 2)) return distances # 找距离最近的两个簇 def find_closest_clusters(distances): min_distance = np.inf # 用于保存最近两个簇对应的索引 closest_clusters = None for i in range(len(distances)): for j in range(len(distances)): if i != j and distances[i][j] < min_distance: min_distance = distances[i][j] closest_clusters = i, j return closest_clusters # 3. 合并最近的两个簇为一个新的簇,并更新簇中心点 def merge_clusters(clusters, closest_clusters): i, j = closest_clusters merged_cluster = clusters[i] + clusters[j] # 将最近的两个簇更新为一个簇 new_clusters = [cluster for idx, cluster in enumerate(clusters) if idx not in (i, j)] # 这里将没有合并的簇放进新的簇列表里面 new_clusters.append(merged_cluster) return new_clusters def hierarchical_clustering(data, k): # 初始化每个数据点为一个独立的簇 clusters = initialize_clusters(data) # 开始迭代合并最相似的簇 while len(clusters) > k: # 计算簇中心之间的距离,并找到最近的两个簇 distances = compute_distances(clusters) closest_clusters = find_closest_clusters(distances) # 合并最近的两个簇为一个新的簇,并更新簇中心点 clusters = merge_clusters(clusters, closest_clusters) return clusters # 执行层次聚类算法 k = 4 clusters = hierarchical_clustering(data, k) # 打印聚类结果 for idx, cluster in enumerate(clusters): print(f"Cluster {idx + 1}: ", cluster) # 绘制聚类结果的图表 plt.figure(figsize=(8, 6)) colors = ["red", "green", "blue", "yellow"] for i in range(k): for p in clusters[i]: plt.scatter(x=p[0], y=p[1], color=colors[i]) plt.xlabel("X") plt.ylabel("Y") plt.title("Hierarchical Clustering") plt.show()
上一篇:【自然语言处理】【Scaling Law】Observational Scaling Laws:跨不同模型构建Scaling Law-五、Observational Scaling其他应用(讨论)


下一篇:手撕设计模式——克隆对象之原型模式