код:
from surprise import Dataset, evaluate
from surprise import KNNBasic
from collections import defaultdict
from surprise import Reader
import os
import datetime
print (datetime.datetime.now())
file_path = os.path.expanduser('ratings.txt')
reader = Reader(line_format='user item rating ', sep=' ')
data=Dataset.load_from_file(file_path,reader)
trainingset=data.build_full_trainset()
sim_options={'name':'pearson','min_support':5,'user_based':True}
algo = KNNBasic(k=25,min_k=5,sim_options=sim_options,verbose=True)
algo.fit(trainingset)
testset=trainingset.build_anti_testset(fill=0)
predictions=algo.test(testset)
def get_top15_recommendations(predictions, topN=15):
top_recs = defaultdict(list)
for uid, iid, true_r, est, _ in predictions:
top_recs[uid].append((iid, est))
#print top_recs
for uid, user_ratings in top_recs.items():
user_ratings.sort(key=lambda x: x[1], reverse=True)
top_recs[uid] = user_ratings[:topN]
#print top_recs
return top_recs
print "processing"
top15_recommendations = get_top15_recommendations(predictions)
f=open("thresholds_movielens.txt",'w')
for uid, user_ratings in top15_recommendations.items():
f.write(str(uid)),f.write(" "),f.write(str(user_ratings[-1])),f.write("\n")
print (datetime.datetime.now())
набор данных доступен по адресу:
https://grouplens.org/datasets/movielens/1m/
Распределение должно быть таким:
https://ai2 -s2-public.s3.amazonaws.com / фигуры / 2017-08-08 / b3471c62c0d3735b36f551fae6427b6d573411c0 / 7-Figure4-1.png