【Python机械学习】K邻近算法预测电影类型

1.数据可视化

import pandas as pd
import  numpy as np
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
font = FontProperties(fname=r"simhei.ttf", size=14)
data=pd.read_excel('moive.xlsx')
x = [5,3,31,59,60,80]
y = [100,95,105,2,3,10]
labels = ["《战狼》","《红海行动》","《碟中谍6》","《前任3》","《春娇与志明》","《泰坦尼克号》"]
plt.scatter(x,y,s=120)
plt.xlabel("亲吻次数",fontproperties=font)
plt.ylabel("打斗次数",fontproperties=font)
plt.xticks(range(0,150,10))
plt.yticks(range(0,150,10))
count = 0
for x_i,y_i in zip(x,y):
    plt.annotate(f"{labels[count]}",xy=(x_i,y_i),xytext=(x_i,y_i),fontproperties=font)
    count+=1

【Python机械学习】K邻近算法预测电影类型

2.使用numpy计算欧式距离

data=pd.read_excel('moive.xlsx')
data

【Python机械学习】K邻近算法预测电影类型

dis=np.sqrt((data['打斗次数']- data.loc[6,"打斗次数"])**2+(data['接吻次数']- data.loc[6,"接吻次数"])**2)
data['距离']=dis
data

【Python机械学习】K邻近算法预测电影类型

3.判断距离

#对距离进行排序,选取前三的电影类型所占比例最大的电影类型
types=data.sort_values(by='距离').iloc[:3]["电影类型"]
new_mv_type = types.value_counts().index[0]
print('电影类型为:',new_mv_type)

【Python机械学习】K邻近算法预测电影类型

4.使用api

from sklearn.neighbors import KNeighborsClassifier

# 1.读取数据
mv_df = pd.read_excel("电影数据.xlsx",sheet_name=0)

# 2.构建训练集的特征数据
x = mv_df.loc[:5,"打斗次数":"接吻次数"].values

# 3.构建训练集的目标数据
y = mv_df.loc[:5,"电影类型"].values

# 4.实例化api
knn_cls = KNeighborsClassifier(n_neighbors=4)

# 5.进行训练
knn_cls.fit(x,y)

# 6.预测数据
knn_cls.predict([[5,29]])

【Python机械学习】K邻近算法预测电影类型

5.源码

import pandas as pd
import  numpy as np
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
font = FontProperties(fname=r"simhei.ttf", size=14)
data=pd.read_excel('moive.xlsx')
x = [5,3,31,59,60,80]
y = [100,95,105,2,3,10]
labels = ["《战狼》","《红海行动》","《碟中谍6》","《前任3》","《春娇与志明》","《泰坦尼克号》"]
plt.scatter(x,y,s=120)
plt.xlabel("亲吻次数",fontproperties=font)
plt.ylabel("打斗次数",fontproperties=font)
plt.xticks(range(0,150,10))
plt.yticks(range(0,150,10))
count = 0
for x_i,y_i in zip(x,y):
    plt.annotate(f"{labels[count]}",xy=(x_i,y_i),xytext=(x_i,y_i),fontproperties=font)
    count+=1
data=pd.read_excel('moive.xlsx')
print(data)
dis=np.sqrt((data['打斗次数']- data.loc[6,"打斗次数"])**2+(data['接吻次数']- data.loc[6,"接吻次数"])**2)
data['距离']=dis
types=data.sort_values(by='距离').iloc[:3]["电影类型"]
new_mv_type = types.value_counts().index[0]
print('电影类型为:',new_mv_type)

#api
from sklearn.neighbors import KNeighborsClassifier
# 1.读取数据
mv_df = pd.read_excel("moive.xlsx",sheet_name=0)
# 2.构建训练集的特征数据
x = mv_df.loc[:5,"打斗次数":"接吻次数"].values
# 3.构建训练集的目标数据
y = mv_df.loc[:5,"电影类型"].values
# 4.实例化api
knn_cls = KNeighborsClassifier(n_neighbors=4)
# 5.进行训练
knn_cls.fit(x,y)
# 6.预测数据
knn_cls.predict([[5,29]])
上一篇:linux怎么将一个文件移动到另一个目录下


下一篇:JSP运行原理