关联分析
import pandas
movie = pandas.read_csv('Data/movies.csv')
movie_dic = {}
for i in movie.iterrows():
movie_dic[i[1].movieId] =i[1].title
import datetime
df = pandas.read_csv('Data/ratings.csv')
from apyori import apriori
transactions = [ele for ele in df.groupby('userId')['movieId'].apply(list)]
rules = apriori(transactions,min_support = 0.2,min_confidence = 0.5,min_lift = 3,min_length=2)
results = list(rules)
for rec in results:
print(rec)
print(';'.join([movie_dic[item] for item in rec.items]))
频繁样式勘探
from pymining import itemmining
fp_input = itemmining.get_fptree(transactions)
report=itemmining.fpgrowth(fp_input,min_support=30,pruning=True)
for ele in report:
if len(ele)>=6:
print(';'.join([movie_dic[item] for item in ele]))