使用的数据集如下:https://download.csdn.net/download/qq_41938259/12141002
这是I/O
以下是代码:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
def findDistance(x, y):
return np.sqrt(np.sum(np.power(x-y, 2)))
def findPoints(data, k):
m, n = np.shape(data)
points = np.mat(np.zeros((k, n)))
for i in range(n):
min = np.min(data[:, i])
I = float(np.max(data[:, i]) - min)
points[:, i] = min + I * np.random.rand(k, 1)
return points
def kMeans(data, k):
m, n = np.shape(data)
cluster = np.mat(np.zeros((m, 2)))
points = findPoints(data, k)
flag = True
while flag:
flag = False
for i in range(m):
minDistance = np.inf
minIndex = -1
for j in range(k):
distance = findDistance(points[j, :], data[i, :])
if distance < minDistance:
minDistance = distance
minIndex = j
if cluster[i, 0] != minIndex:
flag = True
cluster[i, :] = minIndex, minDistance**2
for p in range(k):
pts = data[np.nonzero(cluster[:, 0].A == p)[0]]
points[p, :] = np.mean(pts, axis=0)
return points, cluster
if __name__ == '__main__':
data = pd.read_csv("E:\\result.csv")
data = pd.DataFrame({'x': data['value'], 'y': data['price']})
data = data.to_numpy()
k = 2
a, b = kMeans(data, k)
fig = plt.figure(figsize=(10, 10), dpi=100)
ax = fig.add_subplot(111)
ax.set_xlabel("$value$")
ax.set_xticks(range(0, 250000, 25000))
ax.set_ylabel("$price$")
ax.set_yticks(range(0, 85000, 5000))
ax.set_title('K-means')
for i in range(k):
pts = data[np.nonzero(b[:, 0].A == i)[0], :]
ax.scatter(np.matrix(data[:, 0]).A[0], np.matrix(data[:, 1]).A[0], marker='o', s=90, color='b', alpha=0.2)
ax.scatter(a[:, 0].flatten().A[0], a[:, 1].flatten().A[0], marker='*', s=900, color='r', alpha=0.9)
plt.show()
END
TIM往事如风33470348 发布了134 篇原创文章 · 获赞 20 · 访问量 2万+ 私信 关注