1.导入文件
library("phyloseq")
help(package="phyloseq")
library("ggplot2")
library("ggpubr")#用于事后检验标记
qiimedata <- import_qiime(otufilename = "otu_table.tsv",
mapfilename = "metadata.txt",
treefilename = "tree.nwk",
refseqfilename ="aligned-dna-sequences.fasta")
2. 查看所有样本总序列数
sample_sums(qiimedata)
#SRR10097305 SRR10097306 SRR10097307 SRR10097308 SRR10097309 SRR10097310 SRR10097311 SRR10097312
# 14093 14084 10680 12389 17451 6992 15308 12294
#SRR10097313 SRR10097314 SRR10097315 SRR10097316 SRR10097317 SRR10097318 SRR10097319 SRR10097320
# 12272 18391 17311 15918 16400 13078 13108 10339
#SRR10097321 SRR10097323 SRR10097324 SRR10097325 SRR10097326 SRR10097327 SRR10097328 SRR10097329
# 12959 7874 8725 9194 11842 10178 9095 10673
3.抽平
set.seed(1234)#设置一个随机种子便于重复
rare.data <- rarefy_even_depth(sub_qiimedata,replace = TRUE)
ps.rarefied <- rarefy_even_depth(qiimedata, rngseed=1, sample.size=0.95*min(sample_sums(qiimedata)), replace=F)
#...
#114OTUs were removed because they are no longer
#present in any sample after random subsampling
#...
其结果是按照序列数最少的样品重采样,
sample_sums(rare.data)
#SRR10097305 SRR10097306 SRR10097307 SRR10097308 SRR10097309
# 6977 6977 6977 6977 6977
#SRR10097310 SRR10097311 SRR10097312 SRR10097313 SRR10097314
# 6977 6977 6977 6977 6977
#SRR10097315 SRR10097316 SRR10097317 SRR10097318 SRR10097319
# 6977 6977 6977 6977 6977
#SRR10097320 SRR10097321 SRR10097323 SRR10097324 SRR10097325
# 6977 6977 6977 6977 6977
#SRR10097326 SRR10097327 SRR10097328 SRR10097329 SRR10097330
# 6977 6977 6977 6977 6977
114个OTU由于重采样丰度较低而被删除。
a=qiimedata@otu_table[setdiff(rownames(qiimedata@otu_table),rownames(rare.otu)),]
#OTU Table: [114 taxa and 25 samples]
# taxa are rows
# SRR10097305 SRR10097306 SRR10097307 SRR10097308 SRR10097309
#fc1b43bf2d8af4dd093b78f20fe5b133 0 0 0 0 0
#74cb8df8fd0e39cfd2a0e1fdb5fc7f86 0 0 0 0 0
#e387a97e6247e0fff825ad9f1adf83ae 0 0 0 0 0
max(rowSums(a))
#[1] 13
即把序列数少于等于13的otu删除。
作图
plot_bar(rare.data, fill="Phylum")
转化OTUcount数为相对丰度
GPr = transform_sample_counts(rare.data, function(x) x / sum(x) *100)
plot_bar(GPr, fill="Phylum")+
facet_wrap(~group,scales = "free")