前面的程序接上次从训练集到测试集到总数那篇文章,这篇加个FOR循环语句,自己找达到条件的模型
for(k in 80:10000){set.seed(k)
trainsam<-sample(rownames(sigcoxdata),length(rownames(sigcoxdata))/2)
trainsam1<-rownames(sigcoxdata)%in%trainsam
coxtrain<-sigcoxdata[trainsam1,]
coxtest<-sigcoxdata[!trainsam1,]
###单因素回归分析
univar4_out<-data.frame(matrix(NA,(ncol(coxtrain)-2),5))
rownames(univar4_out)<-colnames(coxtrain)[-c(1:2)]
colnames(univar4_out)<-c("Coeffcient","HR","lower.95","upper.95","P-Value")
for (i in colnames(coxtrain)[-c(1:2)]) {
cox<-coxph(Surv(life_span,status)~coxtrain[,i],data = coxtrain)
cox4_summary<-summary(cox)
univar4_out[i,1]<-cox4_summary$coefficients[,1]
univar4_out[i,2]<-cox4_summary$coefficients[,2]
univar4_out[i,3]<-cox4_summary$conf.int[,3]
univar4_out[i,4]<-cox4_summary$conf.int[,4]
univar4_out[i,5]<-cox4_summary$coefficients[,5]
}
uni001<-univar4_out[univar4_out$`P-Value`<0.01,]
uni005<-univar4_out[univar4_out$`P-Value`<0.05,]
###多因素回归并建模
siggenename<-rownames(uni005)
coxmultidata<-cbind(coxtrain[,c(1,2)],coxtrain[,siggenename])
coxmulti<-coxph(Surv(life_span,status)~.,data = coxmultidata)
cox_model<-step(coxmulti,direction = "both")
cox_model
cox_sum<-summary(cox_model)
multivar4_out<-data.frame(matrix(NA,length(rownames(cox_sum$coefficients)),5))
rownames(multivar4_out)<-rownames(cox_sum$coefficients)
colnames(multivar4_out)<-c("Coeffcient","HR","lower.95","upper.95","P-Value")
multivar4_out[,1]<-cox_sum$coefficients[,1]
multivar4_out[,2]<-cox_sum$coefficients[,2]
multivar4_out[,3]<-cox_sum$conf.int[,3]
multivar4_out[,4]<-cox_sum$conf.int[,4]
multivar4_out[,5]<-cox_sum$coefficients[,5]
coxgenename<-rownames(multivar4_out)
#######根据模型将数据分为high,low.并制作生存曲线
#coxgenename<-c("ENSG00000225194","ENSG00000214145","ENSG00000231324","ENSG00000234996",
# "ENSG00000236908","ENSG00000245685","ENSG00000256039","ENSG00000256417",
# "ENSG00000266970","ENSG00000270547","ENSG00000272449","ENSG00000273472")
###添加基因,"ENSG00000228918","ENSG00000230454","ENSG00000249917",
###,"ENSG00000265933","ENSG00000270547","ENSG00000272449","ENSG00000272512","ENSG00000272953","ENSG00000267260")
coxtraindata<-cbind(coxtrain[,c(1,2)],coxtrain[,coxgenename])
coxscore<-predict(cox_model,type = "risk",newdata = coxtraindata)
coxrisk<-as.vector(ifelse(coxscore>median(coxscore),"high","low"))
trainriskdata<-cbind(coxtrain[,c(1,2)],coxscore,coxrisk)
colnames(trainriskdata)<-c("status","life_span","riskscore","risk")
###做多因素森林图
#library(survminer)
#ggforest(cox_model,main = "Hazard ratio",cpositions = c(0.02,0.22, 0.4), fontsize = 0.7, refLabel = "reference", noDigits = 2)
##生存分析
diff_train<-survdiff(Surv(life_span,status)~risk,data = trainriskdata)
p_valuetrain<-signif((1 - pchisq(diff_train$chisq,df=1)),3)
p_valuetrain
fit_curve_train<-survfit(Surv(life_span,status)~risk,data = trainriskdata)##生存曲线做图
plot(fit_curve_train,col=c("red","blue"),xlab = "time(years)",ylab = "survival rate",
main="survival curve of train set",mark.time=T)
#legend(5,.4,paste("High risk (n=",nrow(allriskdata[allriskdata[,4]=="high",]),")",sep = ""),lty = NULL,text.col = "red",bty = "n")
#legend(5,.3,paste("Low risk (n=",nrow(allriskdata[allriskdata[,4]=="low",]),")",sep = ""),lty = NULL,text.col = "blue",bty = "n")
legend(15,.2,paste("P value =",p_valuetrain,sep = ""),lty = NULL,col = "black",bty = "n")
##绘制ROC曲线
roctrain<-survivalROC(Stime = trainriskdata$life_span,status = trainriskdata$status,marker = trainriskdata$riskscore,
predict.time = 5,method = "KM")
plot(roctrain$FP,roctrain$TP,type = "l",xlim = c(0,1),ylim = c(0,1),col="green")
roctrain$AUC
###训练集检测结果
coxtestdata<-cbind(coxtest[,1:2],coxtest[,coxgenename])
coxtestscore<-predict(cox_model,type = "risk",newdata = coxtestdata)
coxtestrisk<-as.vector(ifelse(coxtestscore>median(coxscore),"high","low"))
testriskdata<-cbind(coxtestdata[,c(1,2)],coxscore,coxrisk)
colnames(testriskdata)<-c("status","life_span","riskscore","risk")
##训练集生存分析和ROC曲线
diff_test<-survdiff(Surv(life_span,status)~risk,data = testriskdata)
p_valuetest<-signif((1 - pchisq(diff_test$chisq,df=1)),3)
p_valuetest
fit_curve_test<-survfit(Surv(life_span,status)~risk,data = testriskdata)##生存曲线做图
plot(fit_curve_test,col=c("red","blue"),xlab = "time(years)",ylab = "survival rate",
main="survival curve of test set",mark.time=T)
#legend(5,.4,paste("High risk (n=",nrow(allriskdata[allriskdata[,4]=="high",]),")",sep = ""),lty = NULL,text.col = "red",bty = "n")
#legend(5,.3,paste("Low risk (n=",nrow(allriskdata[allriskdata[,4]=="low",]),")",sep = ""),lty = NULL,text.col = "blue",bty = "n")
legend(15,.2,paste("P value =",p_valuetest,sep = ""),lty = NULL,col = "black",bty = "n")
roctest<-survivalROC(Stime = testriskdata$life_span,status = testriskdata$status,marker = testriskdata$riskscore,
predict.time = 5,method = "KM")
plot(roctest$FP,roctest$TP,type = "l",xlim = c(0,1),ylim = c(0,1),col="green")
roctest$AUC
print(k)
print(p_valuetest)
print(roctest$AUC)
if(p_valuetest<0.05&roctest$AUC>0.7){bresk;}
}