## R实现
library(survival)
library(survminer) #for plot
setwd("C:/Users/zhaox/Desktop/R_study_day01/cox/单因素cox接多因素cox回归_表达分组生存曲线")
all<-read.table("For_survival_01A.xls",header=T)
测试数据格式如下,来自TCGA数据整理:
rownames(all)<-all[,1]
start=4 # 需要对应数据进行修改,第4列开始的列号!
############################################
Pvalue_surv<-c(rep('NA',ncol(all)-start+1))
Hazard_Ratio_surv<-c(rep('NA',ncol(all)-start+1))
for(j in start:ncol(all)){
#j=4
i=j-start+1
gene_data<-all[,c(1:(start-1),j)]
colnames(gene_data)[start]<-"GENE"
model<-coxph(Surv(OS,EVENT)~GENE,data=gene_data)
## gene split two group
gdata<-data.frame(OS=gene_data$OS,event=gene_data$EVENT)
gdata$label=ifelse(gene_data$GENE>median(gene_data$GENE),'high','low')
model_g_p<-survfit(Surv(OS,event)~label,data=gdata) #画图用
model_g<-survdiff(Surv(OS,event)~label,data=gdata) #有分组
Pvalue_surv[i]<-1 - pchisq(model_g$chisq, length(model_g$n) -1)
Hazard_Ratio_surv[i] = (model_g$obs[1]/model_g$exp[1])/(model_g$obs[2]/model_g$exp[2])
#for picture #画图,速度较慢,默认关闭
name <- paste(colnames(all)[j],"_cox_one_to_one.tiff",sep = "",collapse = "")
tiff(file=name,res=300,width=4400,height=2000,compression = "lzw",units="px")
p<-ggsurvplot(model_g_p, conf.int=F,data = gdata, pval=T,risk.table=T,ggtheme = theme_minimal())
print(p)
dev.off()
}
## gene_2group_surv
name <- paste("pvalue_gene_groups_surv.csv",sep = "",collapse = "")
Pvalue_cox_results_g<-cbind(GENE=colnames(all)[start:ncol(all)],Pvalue_surv=Pvalue_surv,Hazard_Ratio_surv=Hazard_Ratio_surv)
write.csv(Pvalue_cox_results_g,file=name,row.names=FALSE)
name1 <- paste("pvalue_gene_groups_surv_filtered.csv",sep = "",collapse = "")
Pvalue_cox_results_filtered_g<-Pvalue_cox_results_g[as.numeric(Pvalue_cox_results_g[,2])<0.05,]
write.csv(Pvalue_cox_results_filtered_g,file=name1,row.names=FALSE)
## 结果表
HR 值>1 ,表示对应基因的高表达是一种危险因素。
##生存曲线