Self-made KNN algorithms

最近生统课上的作业，要求不调包自己写一个KNN算法，用于预测鸢尾花数据集，

test_samples <- data.frame(Sepal.Length = c(6.1, 5.9, 6.7, 5.6, 7.0, 6.5),
                           Sepal.Width = c(2.5, 5.0, 4.0, 3.1, 3.6, 3.2),
                           Petal.Length = c(1.7, 2.0, 6.5, 1.5, 6.3, 4.8),
                           Petal.Width = c(0.3, 1.2, 2.2, 0.1, 2.5, 1.5),
                           row.names = paste('sample', 1:6, sep = ''))
test_samples


cal.dist <- function(vector1, vector2){
  v.diff <- vector1 - vector2
  return(sum(sqrt(v.diff**2)))
}


k.nearest.neighbors <- function(train, test, k=3){
  # 
  n.col <- unlist(lapply(train, is.numeric))
  train.numeric <- train[, n.col]  # only extract the numeric dataframe
  train.factor <- train[, (n.col==FALSE)]
  
  # Calculate the distance
  # dist.list <- sapply(unique(as.character(train.factor)), function(x) NULL)  # only in character form, the list could be build with name but with emtpy entry
  dist.df <- apply(train.numeric, MARGIN = 1, cal.dist, test)
  check.df <- data.frame(distance=dist.df, label=train.factor)
  check.df <- check.df[order(check.df$distance, decreasing = FALSE), ]

  # Vote
  k.df <- check.df[c(1:k), ]
  count.table <- data.frame(table(k.df[, 2]))
  vote <- count.table[which(count.table[, 2] == max(count.table[, 2])), ][1, 1]
  return(as.character(vote))
}

# main function
for (i in 1:nrow(test_samples)){
  line = test_samples[i, ]
  print(k.nearest.neighbors(iris, line, k=3))   # print the output vote results
}

预测结果如下，

setosa
setosa
virginica
setosa
virginica
versicolor

Self-made KNN algorithms

推荐阅读更多精彩内容