import numpy as np
import operator
# test 测试数据(1, feature_size)
# train_x 训练样本(samples, feature_size)
# train_y 训练样本标签(samples, 1)
# k 最近邻个数
def knn(test, train_x, train_y, k):
m = train_x.shape[0] # sample个数
diff_matrix = np.tile(test, (m, 1)) - train_x # test的特征与每个sample的特征的差
square_diff_matrix = diff_matrix ** 2
square_distances = square_diff_matrix.sum(axis = 1)
distances = square_distances ** 0.5 # test与每个sample的欧式距离
sorted_distances_index = distances.argsort() # 返回排序从小到大对应的索引
class_dt = {}
for i in range(k):
y_ = train_y[sorted_distances_index[i]]
class_dt[y_] = class_dt.get(y_, 0) + 1
sorted_class_y = sorted(class_dt.items(), key = operator.itemgetter(1), reverse = True)
print(class_dt.items())
print(sorted_class_y)
return sorted_class_y[0][0]
* 关于函数numpy.tile()的用法,可以参考:Numpy中tile()函数简单理解