曼哈顿距离
def manhattan(rating1, rating2):
"""计算曼哈顿距离,rating1 和 rating2均为字典对"""
distance = 0
for key in rating1:
if key in rating2:
distance = distance + abs(rating1[key] - rating2[key])
return distance
欧几里德距离
def euclid(rating1, rating2):
"""计算欧几里德距离"""
distance = 0
for key in rating1:
if key in rating2:
distance += pow(rating1[key] - rating2[key], 2)
return sqrt(distance)
闵可夫斯基距离
def minkowski(rating1, rating2, r):
distance = 0
for key in rating1:
if key in rating2:
distance += pow(abs(rating1[key] - rating2[key]), r)
return pow(distance, 1.0 / r)
皮尔森相关系数
def pearson(rating1, rating2):
sum_x = 0
sum_y = 0
sum_xy = 0
sum_x2 = 0
sum_y2 = 0
n = 0
for key in rating1:
if key in rating2:
n += 1
sum_x += rating1[key]
sum_y += rating2[key]
sum_xy += rating1[key] * rating2[key] #-->sum_xy += sum_x * sum_y
sum_x2 += rating1[key] ** 2
sum_y2 += rating2[key] ** 2
fenmu = sqrt(sum_x2 - (sum_x ** 2) / n) * sqrt(sum_y2 - (sum_y ** 2) / n)
if fenmu == 0:
return 0
else:
return (sum_xy - (sum_x * sum_y) / n) / fenmu
余弦距离
分子x,y为向量的数量积
def cosine_similarity(rating1, rating2):
sum_xy = 0
sum_x2 = 0
sum_y2 = 0
for key in rating1:
if key in rating2:
sum_xy += rating1[key] * rating2[key]
sum_x2 += rating1[key] ** 2
sum_y2 += rating2[key] ** 2
return sum_xy / (sqrt(sum_x2) * sqrt(sum_y2))