需要‘All_hg19gene_len.txt’文件者私信留言
#counts值转化为TPM,FPKM
#读入length
len=read.table('All_hg19gene_len.txt',header = T,sep = '\t')
colnames(len)[1] = 'SYMBOL'
# read expression Matter
exp=read.csv('exp-ID-counts.csv')
head(exp)
#链接
library(dplyr)
merge<-left_join(exp,len,by="SYMBOL")#根据基因那列进行合并
merge <- na.omit(merge)#删除错误值行
write.csv(merge,file = "merge.csv",sep = "\t")#读出文件,直接往下运行也许
#计算TPM
head(merge)
merge1 <- merge[!duplicated(merge$SYMBOL),]
rownames(merge1)<-merge1$SYMBOL
merge1<-merge1[,-1]
head(merge2)#最后一列Length是基因长度
merge2=merge1[,-c(95,96)]
kb <- merge2$Length/1000
kb
countdata <- merge2[,1:94]
rpk <- countdata / kb
rpk
tpm <- t(t(rpk)/colSums(rpk) * 1000000)
head(tpm)
write.table(tpm,file="tpm.xls",sep="\t",quote=F)
##计算FPKM
fpkm <- t(t(rpk)/colSums(countdata) * 10^6)
head(fpkm)
write.table(fpkm,file="fpkm.xls",sep="\t",quote=F)