#多个数据集差异基因整合#

image.png

加载包,设置差异倍数logFC和P值

#=======================================================

#=======================================================
setwd('D:\\SCIwork\\F34TNBC\\s5deg')
library(dplyr)
library(tidyr)
library(tibble)
#清除环境变量
rm(list=ls()) 
padj=0.05
logFC=0.5

读取各个数据集的差异基因

setwd('D:\\SCIwork\\F34TNBC\\s5deg\\GSE18864')
a <- read.csv("allDiff.csv", header = T, row.names = 1)

setwd('D:\\SCIwork\\F34TNBC\\s5deg\\GSE58812')
b <- read.csv("allDiff.csv", header = T, row.names = 1)

setwd('D:\\SCIwork\\F34TNBC\\s5deg\\GSE76124')
c <-read.csv("allDiff.csv", header = T, row.names = 1)

setwd('D:\\SCIwork\\F34TNBC\\s5deg\\GSE76250')
d <- read.csv("allDiff.csv", header = T, row.names = 1)

setwd('D:\\SCIwork\\F34TNBC\\s5deg\\GSE83937')
e <- read.csv("allDiff.csv", header = T, row.names = 1)

setwd('D:\\SCIwork\\F34TNBC\\s5deg\\GSE95700')
f <- read.csv("allDiff.csv", header = T, row.names = 1)

setwd('D:\\SCIwork\\F34TNBC\\s5deg\\GSE106977')
g  <- read.csv("allDiff.csv", header = T, row.names = 1)

setwd('D:\\SCIwork\\F34TNBC\\s5deg\\TCGA')
h  <- read.csv("allDiff.csv", header = T, row.names = 1)
setwd('D:\\SCIwork\\F34TNBC\\s5deg')

读取各个数据集的差异基因

image.png

读取这些差异基因的数据框,其必须含有的列是logfc和p值两类

image.png

保存符合我们设置的差异基因标准基因

# --------------------------------------------------------
# 
# --------------------------------------------------------

a <- a %>% 
  rownames_to_column(var='gene')%>% 
  dplyr::filter((logFC > 0.5 & P.Value < 0.05) | (logFC < -0.5 & P.Value < 0.05))
  write.csv(a, file = "allDiff1.csv", row.names = F)

b <- b %>% 
  rownames_to_column(var='gene')%>% 
  dplyr::filter((logFC > 0.5 & P.Value < 0.05) | (logFC < -0.5 & P.Value < 0.05))
  write.csv(b, file = "allDiff2.csv", row.names = F)

c <- c %>% 
  rownames_to_column(var='gene')%>% 
  dplyr::filter((logFC > 0.5 & P.Value < 0.05) | (logFC < -0.5 & P.Value < 0.05))
  write.csv(c, file = "allDiff3.csv", row.names = F)


d <- d%>% 
  rownames_to_column(var='gene')%>% 
  dplyr::filter((logFC > 0.5 & P.Value < 0.05) | (logFC < -0.5 & P.Value < 0.05))
  write.csv(d, file = "allDiff4.csv", row.names = F)


e <- e%>% 
  rownames_to_column(var='gene')%>% 
  dplyr::filter((logFC > 0.5 & P.Value < 0.05) | (logFC < -0.5 & P.Value < 0.05))
  write.csv(e, file = "allDiff5.csv", row.names = F)



f <- f%>%
  rownames_to_column(var='gene')%>% 
  dplyr::filter((logFC > 0.5 & P.Value < 0.05) | (logFC < -0.5 & P.Value < 0.05))
  write.csv(f, file = "allDiff6.csv", row.names = F)


g <- g%>% 
  rownames_to_column(var='gene')%>% 
  dplyr::filter((logFC > 0.5 & P.Value < 0.05) | (logFC < -0.5 & P.Value < 0.05))
  write.csv(g, file = "allDiff7.csv", row.names = F)


h <- h%>% 
  rownames_to_column(var='gene')%>% 
  dplyr::filter((logFC > 0.5 & P.Value < 0.05) | (logFC < -0.5 & P.Value < 0.05))
  write.csv(h, file = "allDiff8.csv", row.names = F)

得到合并后的差异基因

# --------------------------------------------------------
# 
# --------------------------------------------------------
#读取差异基因分析结果,其必须包含两列:基因名和差异倍数#
files=c("allDiff1.csv","allDiff2.csv",
        "allDiff3.csv","allDiff4.csv",
        "allDiff5.csv","allDiff6.csv",
        "allDiff7.csv", "allDiff8.csv")
upList=list()
downList=list()
allFCList=list()

for(i in 1:length(files)){
  inputFile=files[i]
  rt=read.csv(inputFile,header=T,sep = ',', row.names = 1) # 注意文件读取
  rt$Gene <- rownames(rt)
  rt <- rt %>%
    distinct(Gene, .keep_all = T)%>%
    arrange(logFC)%>%
    dplyr::select(Gene, logFC)
  header=unlist(strsplit(inputFile,"_"))
  downList[[header[1]]]=as.vector(rt[,1])
  upList[[header[1]]]=rev(as.vector(rt[,1]))
  fcCol=rt[,1:2]
  colnames(fcCol)=c("Gene",header[[1]])
  allFCList[[header[1]]]=fcCol}

#合并文件
mergeLe=function(x,y){
  merge(x,y,by="Gene",all=T)}
newTab=Reduce(mergeLe,allFCList)
rownames(newTab)=newTab[,1]
newTab=newTab[,2:ncol(newTab)]
newTab[is.na(newTab)]=0

#计算共同上调基因
library(RobustRankAggreg)
upMatrix = rankMatrix(upList)
upAR = aggregateRanks(rmat=upMatrix)
colnames(upAR)=c("Name","Pvalue")
upAdj=p.adjust(upAR$Pvalue,method="bonferroni")
upXls=cbind(upAR,adjPvalue=upAdj)
upFC=newTab[as.vector(upXls[,1]),]
upXls=cbind(upXls,logFC=rowMeans(upFC))
write.table(upXls,file="up.xls",sep="\t",quote=F,row.names=F)
upSig=upXls[(upXls$Pvalue<padj & upXls$logFC>logFC),]
write.table(upSig,file="upSig.xls",sep="\t",quote=F,row.names=F)

#计算共同下调基因
downMatrix = rankMatrix(downList)
downAR = aggregateRanks(rmat=downMatrix)
colnames(downAR)=c("Name","Pvalue")
downAdj=p.adjust(downAR$Pvalue,method="bonferroni")
downXls=cbind(downAR,adjPvalue=downAdj)
downFC=newTab[as.vector(downXls[,1]),]
downXls=cbind(downXls,logFC=rowMeans(downFC))
write.table(downXls,file="down.xls",sep="\t",quote=F,row.names=F)
downSig=downXls[(downXls$Pvalue<padj & downXls$logFC< -logFC),]
write.table(downSig,file="downSig.xls",sep="\t",quote=F,row.names=F)

#合并上调与下调基因
allSig = rbind(upSig,downSig)
colnames(allSig)
write.csv(allSig,file = 'allSign.csv')

绘图准备:仅仅保留那些在所有数据存在的基因,以及删除TCGA和GEO趋势不一致的基因

setwd('D:\\SCIwork\\F34TNBC\\s5deg\\GSE18864')
a <- read.csv("allDiff.csv", header = T, row.names = 1) %>% rownames_to_column(var='gene')
setwd('D:\\SCIwork\\F34TNBC\\s5deg\\GSE58812')
b <- read.csv("allDiff.csv", header = T, row.names = 1)%>% rownames_to_column(var='gene')
setwd('D:\\SCIwork\\F34TNBC\\s5deg\\GSE76124')
c <-read.csv("allDiff.csv", header = T, row.names = 1)%>% rownames_to_column(var='gene')
setwd('D:\\SCIwork\\F34TNBC\\s5deg\\GSE76250')
d <- read.csv("allDiff.csv", header = T, row.names = 1)%>% rownames_to_column(var='gene')
setwd('D:\\SCIwork\\F34TNBC\\s5deg\\GSE83937')
e <- read.csv("allDiff.csv", header = T, row.names = 1)%>% rownames_to_column(var='gene')
setwd('D:\\SCIwork\\F34TNBC\\s5deg\\GSE95700')
f <- read.csv("allDiff.csv", header = T, row.names = 1)%>% rownames_to_column(var='gene')
setwd('D:\\SCIwork\\F34TNBC\\s5deg\\GSE106977')
g  <- read.csv("allDiff.csv", header = T, row.names = 1)%>% rownames_to_column(var='gene')
setwd('D:\\SCIwork\\F34TNBC\\s5deg\\TCGA')
h  <- read.csv("allDiff.csv", header = T, row.names = 1)%>% rownames_to_column(var='gene')
setwd('D:\\SCIwork\\F34TNBC\\s5deg')
target <- allSig$Name
length(target)
a <- subset(a, select =c("gene",   "logFC" ))
b <- subset(b, select =c("gene",   "logFC" ))
c <- subset(c, select =c("gene",   "logFC" ))
d <- subset(d, select =c("gene",   "logFC" ))
e <- subset(e, select =c("gene",   "logFC" ))
f <- subset(f, select =c("gene",   "logFC" ))
g <- subset(g, select =c("gene",   "logFC" ))
h <- subset(h, select =c("gene",   "logFC" ))
gene <- intersect(a$gene, b$gene)
gene <- intersect(c$gene, gene)
gene <- intersect(d$gene, gene)
gene <- intersect(e$gene, gene)
gene <- intersect(f$gene, gene)
gene <- intersect(g$gene, gene)
gene <- intersect(h$gene, gene)
gene <- intersect(allSig$Name, gene)
dat <-  data.frame()
for (i in 1:length(gene)) {   
  dat[i,1] <- a[which(a$gene %in% gene[i]),'logFC']
  dat[i,2] <- b[which(b$gene %in% gene[i]),'logFC']
  dat[i,3] <- c[which(c$gene %in% gene[i]),'logFC']
  dat[i,4] <- d[which(d$gene %in% gene[i]),'logFC']
  dat[i,5] <- e[which(e$gene %in% gene[i]),'logFC']
  dat[i,6] <- f[which(f$gene %in% gene[i]),'logFC']
  dat[i,7] <- g[which(g$gene %in% gene[i]),'logFC']
  dat[i,8] <- h[which(g$gene %in% gene[i]),'logFC']
}
colnames(dat) <- c("GSE18864",  "GSE58812",
                   "GSE76124",  "GSE76250",
                   "GSE83937",  "GSE95700",
                   "GSE106977",'TCGA'
)
rownames(dat) <- gene
dat <- dat[which(rownames(dat) %in% rownames(allSig)),]
hminput <- dat
for (i in 1:dim(hminput)[1]) {  
  hminput$re[i] <- hminput[i,1]*hminput[i,2]*hminput[i,3]*hminput[i,4]*hminput[i,5]*hminput[i,6]*hminput[i,7]*hminput[i,8]
  hminput$re1[i] <- hminput[i,1]*hminput[i,2]*hminput[i,3]*hminput[i,4]*hminput[i,5]*hminput[i,6]*hminput[i,7]  
}
table(hminput$re )
hminput <- subset(hminput, hminput$re != 0)
hminput$re <- NULL
hminput$re1 <- hminput$re1*hminput$TCGA
hminput <- subset(hminput, hminput$re1 >0)
hminput$re1 <- NULL
for (x in 1:nrow(hminput )) {  
  for ( y in 1:ncol(hminput )) {    
    if(hminput[x,y] > 2){
      hminput[x,y] <- 2}
    if(hminput[x,y] < -2){
      hminput[x,y] <- -2}   
    
  }
  
}
bk = unique(c(seq(-2,2, length=100)))
setwd('D:\\SCIwork\\F34TNBC\\s5deg')
library(pheatmap)
pdf(file="logFC.pdf",width = 4,height = 4)
pheatmap(hminput,display_numbers = F,
         cluster_cols = T,
         cluster_row = T,
         border_color  = NA,
         breaks = bk,
         show_colnames     = T,
         show_rownames     = T,
         color = colorRampPalette(c("#20B6E2",
                                    "#020303",
                                    "#F5EB17"))(100), 
         fontsize_row=1,
         fontsize_col=5 )
dev.off()

代码数据私信我

最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
禁止转载,如需转载请通过简信或评论联系作者。
  • 序言:七十年代末,一起剥皮案震惊了整个滨河市,随后出现的几起案子,更是在滨河造成了极大的恐慌,老刑警刘岩,带你破解...
    沈念sama阅读 213,099评论 6 492
  • 序言:滨河连续发生了三起死亡事件,死亡现场离奇诡异,居然都是意外死亡,警方通过查阅死者的电脑和手机,发现死者居然都...
    沈念sama阅读 90,828评论 3 387
  • 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事。” “怎么了?”我有些...
    开封第一讲书人阅读 158,540评论 0 348
  • 文/不坏的土叔 我叫张陵,是天一观的道长。 经常有香客问我,道长,这世上最难降的妖魔是什么? 我笑而不...
    开封第一讲书人阅读 56,848评论 1 285
  • 正文 为了忘掉前任,我火速办了婚礼,结果婚礼上,老公的妹妹穿的比我还像新娘。我一直安慰自己,他们只是感情好,可当我...
    茶点故事阅读 65,971评论 6 385
  • 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
    开封第一讲书人阅读 50,132评论 1 291
  • 那天,我揣着相机与录音,去河边找鬼。 笑死,一个胖子当着我的面吹牛,可吹牛的内容都是我干的。 我是一名探鬼主播,决...
    沈念sama阅读 39,193评论 3 412
  • 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...
    开封第一讲书人阅读 37,934评论 0 268
  • 序言:老挝万荣一对情侣失踪,失踪者是张志新(化名)和其女友刘颖,没想到半个月后,有当地人在树林里发现了一具尸体,经...
    沈念sama阅读 44,376评论 1 303
  • 正文 独居荒郊野岭守林人离奇死亡,尸身上长有42处带血的脓包…… 初始之章·张勋 以下内容为张勋视角 年9月15日...
    茶点故事阅读 36,687评论 2 327
  • 正文 我和宋清朗相恋三年,在试婚纱的时候发现自己被绿了。 大学时的朋友给我发了我未婚夫和他白月光在一起吃饭的照片。...
    茶点故事阅读 38,846评论 1 341
  • 序言:一个原本活蹦乱跳的男人离奇死亡,死状恐怖,灵堂内的尸体忽然破棺而出,到底是诈尸还是另有隐情,我是刑警宁泽,带...
    沈念sama阅读 34,537评论 4 335
  • 正文 年R本政府宣布,位于F岛的核电站,受9级特大地震影响,放射性物质发生泄漏。R本人自食恶果不足惜,却给世界环境...
    茶点故事阅读 40,175评论 3 317
  • 文/蒙蒙 一、第九天 我趴在偏房一处隐蔽的房顶上张望。 院中可真热闹,春花似锦、人声如沸。这庄子的主人今日做“春日...
    开封第一讲书人阅读 30,887评论 0 21
  • 文/苍兰香墨 我抬头看了看天上的太阳。三九已至,却和暖如春,着一层夹袄步出监牢的瞬间,已是汗流浃背。 一阵脚步声响...
    开封第一讲书人阅读 32,134评论 1 267
  • 我被黑心中介骗来泰国打工, 没想到刚下飞机就差点儿被人妖公主榨干…… 1. 我叫王不留,地道东北人。 一个月前我还...
    沈念sama阅读 46,674评论 2 362
  • 正文 我出身青楼,却偏偏与公主长得像,于是被迫代替她去往敌国和亲。 传闻我的和亲对象是个残疾皇子,可洞房花烛夜当晚...
    茶点故事阅读 43,741评论 2 351

推荐阅读更多精彩内容