1. 使用包绘制ROC曲线图
1.1 基本用法
# 安装并加载所需的R包
# install.packages("ROCR")
library(ROCR)
# 加载数据
data(ROCR.hiv)
# predictions 包含预测的向量、矩阵、列表或数据帧。
predictions <- ROCR.hiv$hiv.svm$predictions
# labels 包含真实类别标签的向量、矩阵、列表或数据框。必须与“预测”具有相同的维度
labels <- ROCR.hiv$hiv.svm$labels
# 使用prediction()函数构建prediction对象
pred <- prediction(predictions, labels)
pred
# 计算ROC值并绘制ROC曲线
## computing a simple ROC curve (x-axis: fpr, y-axis: tpr)
## Precision/recall graphs (x-axis: rec, y-axis: prec)
## Sensitivity/specificity plots (x-axis: spec, y-axis: sens)
## Lift charts (x-axis: rpp, y-axis: lift)
perf <- performance(pred, "tpr", "fpr")
perf
plot(perf,
avg = "threshold",
colorize=TRUE,
lwd= 3, # 曲线的粗细
box.lty = 7, box.lwd = 2, box.col = "gray", # 图例框的类型,粗细和颜色
spread.estimate = "boxplot",
main = "ROCR fingerpainting toolkit", # 标题
xlab = "Mary's axis", ylab="") # x和y轴标签
# 计算曲线下的面积即AUC值
auc <- performance(pred, "auc")
auc
auc_area <- slot(auc, "y.values")[[1]]
# 保留4位小数
auc_area <- round(auc_area, 4)
#添加文本注释
text_auc <- paste("AUC = ", auc_area, sep="")
text(0.25, 0.9, text_auc)
1.2 Precision/recall, Sensitivity/specificity,Lift charts等
perf <- performance(pred, "prec", "rec")
plot(perf,
avg= "threshold",
colorize = TRUE,
lwd= 3,
main = "Precision/Recall graphs")
plot(perf,
lty = 3,
col = "grey78",
add = TRUE)
perf <- performance(pred, "sens", "spec")
plot(perf,
avg= "threshold",
colorize=TRUE,
lwd = 3,
main = "Sensitivity/Specificity plots")
plot(perf,
lty = 3,
col = "grey78",
add = TRUE)
perf <- performance(pred, "lift", "rpp")
plot(perf,
avg = "threshold",
colorize = TRUE,
lwd = 3,
main = "Lift charts")
plot(perf,
lty = 3,
col = "grey78",
add = TRUE)
1.3 同时分析多批预测
data(ROCR.xval)
predictions <- ROCR.xval$predictions
labels <- ROCR.xval$labels
length(predictions)
pred <- prediction(predictions, labels)
perf <- performance(pred,'tpr','fpr')
plot(perf,
colorize = TRUE,
lwd = 2,
main = 'ROC curves from 10-fold cross-validation')
plot(perf,
avg = 'vertical',
spread.estimate = 'stderror',
lwd = 3,main = 'Vertical averaging + 1 standard error',
col = 'blue')
plot(perf,
avg = 'horizontal',
spread.estimate = 'boxplot',
lwd = 3,
main = 'Horizontal averaging + boxplots',
col = 'blue')
plot(perf,
avg = 'threshold',
spread.estimate = 'stddev',
lwd = 2,
main = 'Threshold averaging + 1 standard deviation',
colorize = TRUE)
2. 使用包绘制ROC曲线图
2.1 基本用法
# 安装并加载所需的R包
# install.packages("pROC")
library(pROC)
# 加载数据
data("aSAH")
head(aSAH)
## gos6 outcome gender age wfns s100b ndka
## 29 5 Good Female 42 1 0.13 3.01
## 30 5 Good Female 37 1 0.14 8.54
## 31 5 Good Female 42 1 0.10 8.09
## 32 5 Good Female 27 1 0.04 10.42
## 33 1 Poor Female 42 3 0.13 17.40
## 34 1 Poor Male 48 2 0.10 12.75
# 计算ROC值并绘制ROC曲线,response在前,predictor在后
roc(outcome ~ s100b, aSAH) # 或:roc(aSAH$outcome, aSAH$s100b)
# Smoothing
# roc(outcome ~ s100b, aSAH, smooth=TRUE)
roc1 <- roc(aSAH$outcome, aSAH$s100b, percent = TRUE,
partial.auc = c(100, 90), # 计算选定范围的AUC值
partial.auc.correct = TRUE, # 对AUC进行校正,以获得最大AUC为1.0和非歧视性AUC0.5
partial.auc.focus = "sens", # 计算partial.auc根据"sens",即敏感性;也可为"sp",准确性
# ci的参数
ci = TRUE, boot.n = 100, # bootstrap重复数(样本数据的放回抽样)
ci.alpha = 0.9, # ci置信区间
stratified = FALSE, # bootstrap是否分层
# 绘图
plot = TRUE, auc.polygon = TRUE, # 将area显示为多边形
max.auc.polygon = TRUE, # 将最大可能的区域显示为多边形
grid = TRUE, # 添加背景网格
print.auc = TRUE, # AUC的数值打印在图上
show.thres = TRUE)
# 将其他ROC曲线将被添加到现有的plot中
roc2 <- roc(aSAH$outcome, aSAH$wfns,
plot = TRUE, add = TRUE, percent = roc1$percent)
2.2 绘制置信区间,第二个参数为要找的坐标,“all“: ROC曲线上的所有点。
coords(roc1, "best", # "best": 用一个方法确定最佳阈值
ret = c("threshold", "specificity", "1-npv"))
## threshold specificity 1-npv
## threshold 0.075 22.22222 20
coords(roc2, "local maximas", # "local maximas": ROC曲线的局部极大值
ret = c("threshold", "sens", "spec", "ppv", "npv"))
## threshold sensitivity specificity ppv npv
## 1 -Inf 100.00000 0.00000 36.28319 NaN
## 2 1.5 95.12195 51.38889 52.70270 94.87179
## 3 2.5 65.85366 79.16667 64.28571 80.28169
## 4 3.5 63.41463 83.33333 68.42105 80.00000
## 5 4.5 43.90244 94.44444 81.81818 74.72527
## 6 Inf 0.00000 100.00000 NaN 63.71681
# 计算置信区间
ci(roc2)
# 绘制置信区间
sens.ci <- ci.se(roc1, specificities = seq(0, 100, 5)) # ci.se,在特定情况下计算灵敏度的置信区间
plot(sens.ci, type = "shape", col = "lightblue") # #type,plot的形状
plot(sens.ci, type = "bars")
# 加入ROC曲线
plot(roc2, add=TRUE) # 加入roc2
plot(ci.thresholds(roc2)) # 计算特异性和敏感性CI阈值
2.3 比较两条ROC曲线
# Two ROC curves
power.roc.test(roc1, roc2, reuse.auc = FALSE)
##
## Two ROC curves power calculation
##
## ncases = 41 # ncases,观察样本数
## ncontrols = 72 # ncontrols,对照样本数
## auc1 = 0.7313686
## auc2 = 0.8236789
## sig.level = 0.05 # sig.level,期望的显著性水平(第一类错误的probability)
## power = 0.3683204 # power,测试的期望power(第二类错误的1 -probability)
## alternative = two.sided
power.roc.test(roc1, roc2, power=0.9, reuse.auc=FALSE)
##
## Two ROC curves power calculation
##
## ncases = 123.0494
## ncontrols = 216.0868
## auc1 = 0.7313686
## auc2 = 0.8236789
## sig.level = 0.05
## power = 0.9
## alternative = two.sided
# One ROC curve
power.roc.test(auc=0.8, ncases=41, ncontrols=72)
##
## One ROC curve power calculation
##
## ncases = 41
## ncontrols = 72
## auc = 0.8
## sig.level = 0.05
## power = 0.999951
power.roc.test(auc=0.8, power=0.9)
##
## One ROC curve power calculation
##
## ncases = 16.6192
## ncontrols = 16.6192
## auc = 0.8
## sig.level = 0.05
## power = 0.9
3. 使用包绘制ROC曲线图
3.1 基本用法
# 安装并加载所需的R包
# install.packages("plotROC")
# remotes::install_github("sachsmc/plotROC")
library(plotROC)
# 创建数据集
set.seed(2529)
D.ex <- rbinom(200, size = 1, prob = .5)
M1 <- rnorm(200, mean = D.ex, sd = .65)
M2 <- rnorm(200, mean = D.ex, sd = 1.5)
test <- data.frame(D = D.ex, D.str = c("Healthy", "Ill")[D.ex + 1],
M1 = M1, M2 = M2, stringsAsFactors = FALSE)
# geom_roc添加ROC曲线图层的功能
basicplot_1 <- ggplot(test, aes(d = D, m = M1)) + geom_roc()
# style_roc可以添加ggplot 的函数到包含ROC曲线层的
basicplot_2 <- ggplot(test, aes(d = D, m = M1)) +
geom_roc(n.cuts = 5, # 定义要沿曲线显示的切割点数。可以使用n.cuts = 0和labels = FALSE,不显示标签
labelsize = 5, #标签的大小
labelround = 3 # 有效数字的数量
) +
style_roc(theme = theme_grey, # 会添加一条对角线,设置轴标签,并调整主要和次要网格线
xlab = "1 - Specificity")
# direct_label函数对 ggplot 对象进行操作,为绘图添加直接标签。并尝试智能地为标签选择合适的位置(也可使用nudge_x, nudge_y和label.angle调整位置)
basicplot_3 <- direct_label(basicplot_1, labels = "Biomarker", nudge_y = -.1) + style_roc()
library(patchwork)
basicplot_1 + basicplot_2 + basicplot_3 + plot_layout(ncol = 3)
3.2 设置置信区域
styledplot_1 <- basicplot_1 + style_roc()
# geom_rocci()函数使用Clopper和Pearson精确方法计算ROC曲线上点的置信区域,使用sig.level选项设置显着性水平(设置为 0.05)
styledplot_2 <- styledplot_1 + geom_rocci(sig.level = .01)
# 默认情况下,会选择沿曲线的一组 3 个均匀分布的点来显示置信区域。以通过将范围内的值向量传递m给ci.at参数来选择点
styledplot_3 <- ggplot(test, aes(d = D, m = M1)) + geom_roc(n.cuts = 0) +
geom_rocci(ci.at = quantile(M1, c(.1, .4, .5, .6, .9))) + style_roc()
styledplot_1 + styledplot_2 + styledplot_3 + plot_layout(ncol = 3)
3.3 多条 ROC 曲线
# ggplot需要长格式,将标记结果在一列中。melt_roc()执行此转换的功能
longtest <- melt_roc(test, "D", c("M1", "M2"))
head(longtest)
pairplot_1 <- ggplot(longtest, aes(d = D, m = M, color = name)) + geom_roc(n.cuts = 0) + style_roc()
pairplot_2 <- ggplot(longtest, aes(d = D, m = M)) + geom_roc() + facet_wrap(~ name) + style_roc()
pairplot_3 <- ggplot(longtest, aes(d = D, m = M, linetype = name)) + geom_roc() + geom_rocci()
pairplot <- ggplot(longtest, aes(d = D, m = M, color = name)) +
geom_roc(show.legend = FALSE) + style_roc()
pairplot_4 <- direct_label(pairplot)
pairplot_5 <- pairplot + geom_rocci(linetype = 1)
pairplot_1 + pairplot_2 + pairplot_3 + pairplot_4 + pairplot_5 + plot_layout(ncol = 2)
3.4 主题和注释
# calc_auc()计算ROC曲线下面积
calc_auc(basicplot_1)
# plotROC 使用ggplot2包创建要绘制的对象。因此,主题和注释可以通过通常的 ggplot2 方式添加,或者使用外部库,如ggthemes.
basicplot_1 +
style_roc(theme = theme_grey) +
theme(axis.text = element_text(colour = "blue")) +
ggtitle("Themes and annotations") +
theme(plot.title = element_text(hjust = 0.5, size = 18, face = "bold", margin = unit(c(0.5,0.5,0.5,0.5), "cm"))) +
annotate("text", x = .75, y = .25,
label = paste("AUC =", round(calc_auc(basicplot_1)$AUC, 2))) +
scale_x_continuous("1 - Specificity", breaks = seq(0, 1, by = .1))
4. 使用包绘制ROC曲线图
4.1 基本用法
# 安装并加载所需的R包
#install.packages("survivalROC")
library(survivalROC)
# 加载内置数据集
data(mayo)
head(mayo)
## time censor mayoscore5 mayoscore4
## 1 41 1 11.251850 10.629450
## 2 179 1 10.136070 10.185220
## 3 334 1 10.095740 9.422995
## 4 400 1 10.189150 9.567799
## 5 130 1 9.770148 9.039419
## 6 223 1 9.226429 9.033388
# 计算数据的行数
nobs <- NROW(mayo)
# 自定义阈值
cutoff <- 365
# 设置图形的边界
par(mar= c(5,5,3,3),cex.lab = 1.2, cex.axis = 1.2)
# 使用MAYOSCORE 4作为marker, 并用NNE(Nearest Neighbor Estimation)法计算ROC值
Mayo4.1 = survivalROC(Stime=mayo$time,
status=mayo$censor,
marker = mayo$mayoscore4,
predict.time = cutoff,
span = 0.25*nobs^(-0.20))
Mayo4.1
# 绘制ROC曲线
plot(Mayo4.1$FP, Mayo4.1$TP, type="l",
xlim=c(0,1), ylim=c(0,1), col="red",
xlab=paste( "FP", "\n", "AUC = ",round(Mayo4.1$AUC,3)),
ylab="TP",main="Mayoscore 4, Method = NNE \n Year = 1")
# 添加对角线
abline(0,1)
# 使用KM(Kaplan-Meier)法计算ROC值
## MAYOSCORE 4, METHOD = KM
Mayo4.2= survivalROC(Stime=mayo$time,
status=mayo$censor,
marker = mayo$mayoscore4,
predict.time = cutoff,
method="KM")
Mayo4.2
plot(Mayo4.2$FP, Mayo4.2$TP, type="l",
xlim=c(0,1), ylim=c(0,1), col="blue",
xlab=paste( "FP", "\n", "AUC = ",round(Mayo4.2$AUC,3)),
ylab="TP",main="Mayoscore 4, Method = KM \n Year = 1")
abline(0,1,lty=2,col="gray")
4.2 将两种方法的结果绘制到同一个图里
## 绘制NNE法计算的ROC曲线
plot(Mayo4.1$FP, Mayo4.1$TP,
type="l",col="red",
xlim=c(0,1), ylim=c(0,1),
xlab="FP",
ylab="TP",
main="Time dependent ROC")
# 添加对角线
abline(0,1,col="gray",lty=2)
## 添加KM法计算的ROC曲线
lines(Mayo4.2$FP, Mayo4.2$TP,
type="l",col="blue",
xlim=c(0,1), ylim=c(0,1))
# 添加图例
legend("bottomright",legend = c(paste("AUC of NNE =", round(Mayo4.1$AUC,3)),
paste("AUC of KM =", round(Mayo4.2$AUC,3))),
col=c("red","blue"),
lty= 1 ,lwd= 2)
4.3 计算最佳截点
# 1年的最佳截点
roc1 <- survivalROC(Stime = mayo$time,
status = mayo$censor,
marker = mayo$mayoscore4,
method = "KM",
predict.time = 365 # 时间选1年(365天)
)
Youden <- roc1$TP - roc1$FP # 约登指数 = TP - FP = 灵敏度+特异度-1
print(max(Youden)) # 查看最大的约登指数值
## [1] 0.8545455
print(which.max(Youden)) # 查看最大的约登指数值对应的向量位置
## [1] 263
roc1$cut.values[263] #根据向量位置(此处为263),取出位置263对应的mayoscore4表达值,即为最佳分界值7.504279
## [1] 7.504279
5. 使用包绘制ROC曲线图
5.1 基本用法
# 安装并加载所需的R包
#install.packages("timeROC")
library(timeROC)
library(survival)
# 加载内置数据集
data(pbc)
head(pbc)
pbc <- pbc[!is.na(pbc$trt),] # 只选择随机化的受试者
pbc$status <- as.numeric(pbc$status==2) # 创建事件指示器:1 代表死亡,0 代表被审查
ROC <- timeROC(T = pbc$time,
delta = pbc$status,
marker = pbc$bili,
cause = 1,
weighting = "marginal",
times = c(365, 730, 1095),
iid = TRUE)
plot(ROC,
time = 365, col = "red", lwd=2, title = "") # time是时间点,col是线条颜色
plot(ROC,
time = 730, col = "blue", add = TRUE, lwd = 2) # add指是否添加在上一张图中
plot(ROC,
time = 1095, col = "orange", add = TRUE, lwd = 2)
#添加标签信息
legend("bottomright",
c(paste0("AUC at 1 year: ",round(ROC[["AUC"]][1],2)),
paste0("AUC at 2 year: ",round(ROC[["AUC"]][2],2)),
paste0("AUC at 3 year: ",round(ROC[["AUC"]][3],2))),
col = c("red", "blue", "orange"),
lty = 1, lwd = 2, bty = "n")
参考: