单纯贝叶斯分类法

  1. import the data
data(Titanic)
tr(Titanic)
# 'table' num [1:4, 1:2, 1:2, 1:2] 0 0 35 0 0 0 17 0 118 154 ...
# - attr(*, "dimnames")=List of 4
# ..$ Class   : chr [1:4] "1st" "2nd" "3rd" "Crew"
# ..$ Sex     : chr [1:2] "Male" "Female"
# ..$ Age     : chr [1:2] "Child" "Adult"
# ..$ Survived: chr [1:2] "No" "Yes"
  1. convert the array into a data frame
countsToCases <- function(x, countcol = "Freq") {
  # Get the row indices to pull from x
  idx <- rep.int(seq_len(nrow(x)), x[[countcol]])
  # Drop count column
  x[[countcol]] <- NULL
  # Get the rows from x
  x[idx, ]
}

caseTita<-countsToCases(as.data.frame(Titanic))
head(caseTita)
# Class  Sex   Age Survived
# 3     3rd Male Child       No
# 3.1   3rd Male Child       No
# 3.2   3rd Male Child       No
# 3.3   3rd Male Child       No
# 3.4   3rd Male Child       No
# 3.5   3rd Male Child       No

nrow(caseTita)
# [1] 2201
  1. Naïve Bayes classification
library(e1071)
model <- naiveBayes(Survived ~ ., data = caseTita)
predict(model, caseTita[sample(1:2201,10,replace=FALSE),])
# [1] No  No  No  No  No  No  Yes No  Yes No 
# Levels: No Yes
predict(model, caseTita[sample(1:2201,10,replace=FALSE),],type="raw")
# No       Yes
# [1,] 0.7247820 0.2752180
# [2,] 0.6960593 0.3039407
# [3,] 0.8466171 0.1533829
# [4,] 0.3679509 0.6320491
# [5,] 0.8466171 0.1533829
# [6,] 0.7247820 0.2752180
# [7,] 0.8466171 0.1533829
# [8,] 0.3523184 0.6476816
# [9,] 0.8552217 0.1447783
# [10,] 0.8466171 0.1533829

m <- naiveBayes(Survived ~ ., data = Titanic)
m

# Naive Bayes Classifier for Discrete Predictors
# 
# Call:
#   naiveBayes.formula(formula = Survived ~ ., data = Titanic)
# 
# A-priori probabilities:
#   Survived
# No      Yes 
# 0.676965 0.323035 
# 
# Conditional probabilities:
#   Class
# Survived        1st        2nd        3rd       Crew
# No  0.08187919 0.11208054 0.35436242 0.45167785
# Yes 0.28551336 0.16596343 0.25035162 0.29817159
# 
# Sex
# Survived       Male     Female
# No  0.91543624 0.08456376
# Yes 0.51617440 0.48382560
# 
# Age
# Survived      Child      Adult
# No  0.03489933 0.96510067
# Yes 0.08016878 0.91983122
  1. split the data into the predictor data frame and outcome vector
library(caret)
x<-caseTita[,-4]
y<-caseTita$Survived

model1 <- train(x,y,'nb',trControl=trainControl(method='cv',number=10))
model1
# Naive Bayes 
# 
# 2201 samples
# 3 predictor
# 2 classes: 'No', 'Yes' 
# 
# No pre-processing
# Resampling: Cross-Validated (10 fold) 
# Summary of sample sizes: 1981, 1981, 1981, 1981, 1981, 1981, ... 
# Resampling results across tuning parameters:
#   
#   usekernel  Accuracy   Kappa    
# FALSE      0.7791814  0.4474594
# TRUE      0.7791814  0.4474594
# 
# Tuning parameter 'fL' was held constant at a value of 0
# Tuning parameter 'adjust' was held
# constant at a value of 1
# Accuracy was used to select the optimal model using the largest value.
# The final values used for the model were fL = 0, usekernel = FALSE and adjust = 1.
  1. predict the outcome
predict(model1$finalModel,caseTita[sample(1:2201,10,replace=FALSE),])$class
# 27.63 12.225  30.44 12.630  11.24  15.38   9.76  31.15 10.150  10.53 
# No     No    Yes     No     No    Yes     No    Yes     No     No 
# Levels: No Yes
table(predict(model1$finalModel,x)$class,y)
#        y
#      No  Yes
# No  1364  362
# Yes  126  349

Reference:
Zhang Zhongheng Naïve Bayes classification in R

©著作权归作者所有,转载或内容合作请联系作者
【社区内容提示】社区部分内容疑似由AI辅助生成,浏览时请结合常识与多方信息审慎甄别。
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

相关阅读更多精彩内容

  • 前言 此程序基于新闻文本分类实验 使用朴素贝叶斯(Naive Bayes Classifier)模型实现分类任务。...
    奋斗青春无悔阅读 4,101评论 0 1
  • 我是黑夜里大雨纷飞的人啊 1 “又到一年六月,有人笑有人哭,有人欢乐有人忧愁,有人惊喜有人失落,有的觉得收获满满有...
    陌忘宇阅读 12,770评论 28 53
  • 信任包括信任自己和信任他人 很多时候,很多事情,失败、遗憾、错过,源于不自信,不信任他人 觉得自己做不成,别人做不...
    吴氵晃阅读 11,375评论 4 8
  • 怎么对待生活,它也会怎么对你 人都是哭着来到这个美丽的人间。每个人从来到尘寰到升入天堂,整个生命的历程都是一本书,...
    静静在等你阅读 10,484评论 1 6

友情链接更多精彩内容