K-Means聚类算法

from pyspark.sql import Row
from pyspark.ml.clustering import KMeans,KMeansModel
from pyspark.ml.linalg import Vectors


def f(x):
rel = {}
rel['features']=Vectors. \
dense(str(x[2]),str(x[24]),str(x[28]),str(x[29]))
rel['label'] = str(x[22])
return rel


data = spark.sparkContext.textFile("file:///home/hw17685187119/student2.txt").map(lambda line: line.split(';')).map(lambda p: Row(**f(p))).toDF()
kmeansmodel = KMeans().setK(3).setFeaturesCol('features').setPredictionCol('prediction').fit(data)


results = kmeansmodel.transform(data).collect()
for item in results:
print(str(item[0])+' is predcted as cluster'+ str(item[1]))


results2 = kmeansmodel.clusterCenters()
for item in results2:
print(item)

 

kmeansmodel.computeCost(data)

上一篇:【Matlab疾病分类】模糊逻辑分类叶病严重程度分级系统【含GUI源码 194期】


下一篇:【交通标志识别】基于matlab GUI BP神经网络交通标志识别(带面板)【含Matlab源码 1647期】