读取read counts数据
> options(stringsAsFactors = F)
> expMatrix <- read.csv("easy_input.csv",
row.names = 1, header = TRUE, as.is = T)
#查看前三个基因的read count
> expMatrix[1:3,]
> for(i in 1:4){
colnames(expMatrix)[i]=gsub("\\.","-",colnames(expMatrix)[i])
} ### 非特殊字符需要转换
载入GTF文件 (和之前的前期处理的gtf文件要一致) 读取注释文件基因的长度----------------------------------------------------------------------
> BiocManager::install("GenomicFeatures")
> library(GenomicFeatures)
> txdb <- makeTxDbFromGFF("F:/mto1 mito seq/GTF/Danio_rerio_Ensemble_97.gtf",format="gtf")```
----------
通过exonsBy获取每个gene上的所有外显子的起始位点和终止位点,然后用reduce去除重叠部分
> exons_gene <- exonsBy(txdb, by = "gene")
> exons_gene_lens <- lapply(exons_gene,function(x){sum(width(reduce(x)))})
> exons_gene_2_lens=data.frame(t(data.frame(exons_gene_lens)))
##变成二维数据结构
> names(exons_gene_2_lens)="length"
> head(exons_gene_2_lens)[1]
> write.csv(exons_gene_2_lens,"eff2_length.csv",row.names = T)
注释文件不变,获得的csv表可长期使用
eff_length2 <-read.csv("eff2_length.csv")
head(eff_length2)
##读取read count 表
count2 <-read.csv("mto1_mito_RNA_seq_rawreads_with genename.csv")
head(count2)
diff_name<-merge(eff_length2,count2,by="ensembl_gene_id")
head(diff_name)
write.csv(diff_name,"length.csv",row.names = T)
count <-read.csv("length.csv")
head(count)
mycounts<-count[,-1] #去掉第一列
rownames(mycounts)<-mycounts[,1]
mycounts<-mycounts[,-1]#去掉第一列
head(mycounts)
kb <- mycounts$length / 1000 #计算kb
kb
countdata <- mycounts[,2:7] ##获得2-7列所有Reas 数目
head(countdata)
rpk <- countdata / kb ## rpk 计算
head(rpk)
#write.csv(rpk,"rpk2.csv",row.names = T)
计算 FPKM
fpkm <- t(t(rpk)/colSums(countdata) * 10^6)
head(fpkm)
write.csv(fpkm,"fpkm.csv",row.names = T)
计算TPM
###tpm
###tpm <- t(t(rpk)/colSums(rpk) * 1000000)
###write.csv(tpm,"fpkm.csv",row.names = T)
将gene_id 转为 gene name
重新读入FPKM校正后的数据
mycounts<-read.csv("fpkm.csv")
head(mycounts)
rownames(mycounts)<-mycounts[,1]
###mycounts<-mycounts[,-1] ##去掉第1列
head(mycounts)
使用biomart包
library("biomaRt")
###看物种的数据库
#my_mart <-useMart("ensembl")
#datasets<-listDatasets(my_mart)
#View(datasets)
zebrafish<- useMart("ensembl", dataset = "drerio_gene_ensembl")
###看看有哪些物种属性
listAttributes(zebrafish)
###############
更换名字
my_ensembl_gene_id<-row.names(mycounts)
mms_symbols<- getBM(attributes=c('ensembl_gene_id','external_gene_name',"description",'start_position','end_position'),
filters = 'ensembl_gene_id', values = my_ensembl_gene_id, mart = zebrafish)
head(mms_symbols)
head(mycounts)
MTO1_FPKM<-merge(mycounts,mms_symbols,by="ensembl_gene_id")
### 两个表根据共同列合并
head(MTO1_FPKM)
write.csv(MTO1_FPKM, "MTO1_FPKM_START_END.csv",row.names=FALSE)