协同过滤推荐算法实现。
接上篇日志,获得了新数据的索引之后,模型的输入基本上就搞定了。
模型是基于MF算法的,原理请移步:https://zhuanlan.zhihu.com/p/69662980
总之就是模型的训练需要用户和item的交互数据,而测试只需要用户和item的id,就能预测用户对每个item的喜好程度并用用score量化。有了每个item的score,系统就可以取score最高的前K个item推荐给用户。
不过实现的推荐算法的精度并不高,在8%左右。随机推荐效果也差不多了。
代码实现:
import torch
from torch import nn
from torch.nn import functional as F
import numpy as np
from rec.model_FM import FM as rec_model
latent_len = 64
user_num = 33278
item_num = 172946
user = 20 #用户id接口
k = 30
# --model setup----------------------------------------------------------------------------------
class myModel(nn.Module):
def __init__(self,
embedding_size,
user_num,
item_num,
):
super(myModel, self).__init__()
# create rec model
self.rec_model = rec_model(embedding_size, user_num, item_num)
# create user and item embeddings
self.user_embeddings = nn.Embedding(user_num, embedding_size)
self.item_embeddings = nn.Embedding(item_num, embedding_size)
# initialize user and item embeddings
# initilization
user_weight = torch.FloatTensor(user_num, embedding_size)
item_weight = torch.FloatTensor(item_num, embedding_size)
nn.init.xavier_uniform_(user_weight)
nn.init.xavier_uniform_(item_weight)
user_weight = F.normalize(user_weight, p=2, dim=1)
item_weight = F.normalize(item_weight, p=2, dim=1)
# feed values
self.user_embeddings.weight.data.copy_(user_weight)
self.item_embeddings.weight.data.copy_(item_weight)
# miscs
self.bias = nn.Parameter(torch.FloatTensor([0.0]))
def forward(self, u_ids, i_ids):
# get user/item embeddings
u_e = self.user_embeddings(u_ids) # torch.Size([128, 64])
i_e = self.item_embeddings(i_ids) # torch.Size([128, 64])
y = self.rec_model(u_ids, i_ids, u_e, i_e) # torch.Size([128])
return y
model = myModel(latent_len,user_num,item_num)
model = nn.DataParallel(model)
#--load model--------------------------------
model_path = 'model_recommendation.pt'
model_dict_pretrained = torch.load(model_path, map_location='cpu') # 6
cur_model_dict = model.state_dict() # 6
shared_dict = {k: v for k, v in model_dict_pretrained.items() if k in cur_model_dict} # 6
cur_model_dict.update(shared_dict)
model.load_state_dict(cur_model_dict)
#--get user_ids,item_ids------------------------------------------------
data = np.load('indexT.npy')
item_ids = torch.from_numpy(data)
user_ids = torch.from_numpy(np.array(user)).unsqueeze(0)
user_ids = user_ids.repeat(len(item_ids)).long()
#--get top K item_ids-----------------------------------------------------------
scores = model(user_ids, item_ids)
scores = scores.detach().cpu().numpy()
rank_list = (scores).argsort()[:k] #start from highest
rec_list = item_ids[rank_list]#得到top K item_ids在原数据的索引
#print(rec_list)
#根据索引还原item_ids
rec_list = rec_list.detach().cpu().numpy()
index = np.load("recipe_index_list_new.npy")
for i in rec_list:
list = []
list.append(index[i])
print (list)
代码是在学长的代码基础上完成的。协同过滤推荐是上学期的课设内容,但是当时我只是把代码跑通,根本没有深入,所以暑期实训依然一头雾水。所以推荐算法是在管理同学和学长的帮助下实现的。
接下来的工作是利用flask实现算法的嵌入。