导读
数据:多个样本,每个样本两个不同部位(gut breast)采样品做宏基因组数据分析,得到两个细菌丰度表。
目的1:计算每个样本两个部位的共有细菌数,绘制散点图。
目的2:计算每个样本两个部位的共有细菌总丰度和特有细菌总丰度,绘制堆叠图。
一、输入数据
head(gut_pick[,c(1,2,3,4)])
sample_1 sample_2 sample_3 sample_4
Veillonellaceae_Megamonas 0.600034324 0.048869197 0.000000000 0.0000000000
Enterococcaceae_Enterococcus 0.000000000 0.000000000 0.009826243 0.0000000000
Bacteroidaceae_Bacteroides 0.156519501 0.064343341 0.200399441 0.4834385139
Prevotellaceae_Prevotella 0.001072639 0.344630340 0.000000000 0.0001426177
Lachnospiraceae_Roseburia 0.000000000 0.015408015 0.000000000 0.0144400471
Lachnospiraceae_Ruminococcus 0.009224696 0.004166116 0.100978630 0.0054551289
head(breast_pick[,c(1,2,3,4)])
sample_1 sample_2 sample_3 sample_4
Prevotellaceae_Prevotella 0.010835880 0.096867862 1.541043e-01 0.056627412
Staphylococcaceae_Staphylococcus 0.105389767 0.005536413 3.527276e-03 0.096613213
Pseudomonadaceae_Pseudomonas 0.001300306 0.012156878 1.010239e-02 0.013091957
Oxalobacteraceae_Ralstonia 0.022083523 0.133028766 9.044211e-02 0.151298756
Corynebacteriaceae_Corynebacterium 0.004746115 0.016454373 1.161262e-01 0.001378101
Propionibacteriaceae_Propionibacterium 0.773443426 0.006426885 6.849081e-05 0.002630920
二、计算共有细菌数,共有、特有细菌丰度
# 交集
union_value = c()
breast_share = c()
breast_only = c()
for(i in 1:length(gut_pick[1, ]))
{
# 每个样品单独建两个部位细菌分度数据框
gut_tmp = data.frame(name = rownames(gut_pick), gut = gut_pick[, i])
breast_tmp = data.frame(name = rownames(breast_pick), breast = breast_pick[, i])
all_tmp = merge(gut_tmp, breast_tmp, by="name", all=T)
all_tmp[is.na(all_tmp)] <- 0 # 0代替NA
union = all_tmp[all_tmp[,2] > 0 & all_tmp[,3] > 0, ] # 收集共有细菌
breast_only_df = all_tmp[all_tmp[, 2] == 0 & all_tmp[, 3] > 0, ] # 收集breast特有细菌
union_value = c(union_value, nrow(union)) # 共有细菌数量
breast_share = c(breast_share, sum(union[, 3])) # 共有细菌在breast种的丰度和
breast_only = c(breast_only, sum(breast_only_df[, 3])) # breast特有细菌丰度和
}
三、准备绘图文件
# 对样品按照序号排序
li = strsplit(colnames(gut_pick), split="_")
name_id = c()
for(i in 1:length(li))
{
name_id = c(name_id, as.numeric(unlist(li[i])[2]))
}
# 构建绘图输入文件
data = data.frame(name = colnames(gut_pick), name_id, union_value, breast_share, breast_only)
data = data[order(data[, 2], decreasing = F),]
head(data)
name name_id union_value breast_share breast_only
1 sample_1 1 8 0.01543029 0.9636348
2 sample_2 2 24 0.15807813 0.5722250
3 sample_3 3 14 0.15561111 0.5703914
4 sample_4 4 15 0.10611376 0.6331955
5 sample_5 5 11 0.14202702 0.5492496
6 sample_6 6 12 0.02919120 0.6147524
四、绘制共有细菌数散点图
# 可视化
result = ggplot(data, aes(x=name, y=union_value, color = name)) +
geom_point(size=5) +
theme_classic() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 15, face = "bold")) +
theme(axis.text.y = element_text(size = 12, face = "bold")) +
labs(x="", y="Shared Genera") +
theme(axis.title.y = element_text(size = 28, face = "bold")) +
# theme(axis.title.x = element_text(face = "bold")) +
scale_x_discrete(limits = factor(data$name)) +
theme(legend.position = "none") +
theme(axis.line = element_line(size = 1)) +
theme(axis.ticks = element_line(size = 1))
ggsave(result, filename = "../source_scatter.pdf", width = 14, height = 7)
五、绘制共有、特有细菌丰度堆叠图
library(reshape2)
stack = data[, c(1, 4, 5)]
melt = melt(stack, id="name")
head(stack)
name breast_share breast_only
1 sample_1 0.01543029 0.9636348
2 sample_2 0.15807813 0.5722250
3 sample_3 0.15561111 0.5703914
4 sample_4 0.10611376 0.6331955
5 sample_5 0.14202702 0.5492496
6 sample_6 0.02919120 0.6147524
result = ggplot(melt, aes(x=name, y=value, fill = variable))+
geom_col(position='fill', width = 0.8) +
theme_classic() +
labs(x="", y="Relative Abundance") +
theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 15, face = "bold")) +
theme(axis.text.y = element_text(size = 12, face = "bold")) +
theme(axis.title.y = element_text(size = 28, face = "bold")) +
theme(legend.title = element_blank(), legend.position = "bottom") +
scale_fill_manual(
values = c("breast_share" = "#ADD8E6", "breast_only" = "#90EE90"),
labels = c("Shared with Gut", "Breast Only")) +
# values labels必须在一个manual里使用,否则报错
theme(legend.text = element_text(face = "bold")) +
theme(axis.line = element_line(size = 1)) +
theme(axis.ticks = element_line(size = 1)) +
scale_x_discrete(limits = factor(data$name)) +
scale_y_continuous(expand=c(0, 0))
ggsave(result, filename="source_stack.png", width=14, height=7)
六、配对样品中细菌共现分析
分析思路
1 merge获得总细菌信息,结合样品数信息,制作细菌样品矩阵/表,0填充
2 利用循环统计每对配对样品的细菌呈现情况(是否贡献),丰度都大于0的细菌记为1,否则仍是0
3 获得总细菌样品呈现表,统计每个细菌的总共现次数
4 对前10绘制柱形图
# 常见细菌
# 获取总细菌名称,制作细菌共现初始表
i = 1
gut_tmp = data.frame(name = rownames(gut_pick), gut = gut_pick[, i])
breast_tmp = data.frame(name = rownames(breast_pick), breast = breast_pick[, i])
all_tmp = merge(gut_tmp, breast_tmp, by="name", all=T)
all_tmp[is.na(all_tmp)] <- 0 # 0代替NA
row_num = nrow(all_tmp)
col_num = ncol(gut_pick)
count_freq = data.frame(matrix(0, row_num, col_num))
rownames(count_freq) = all_tmp[,1]
colnames(count_freq) = colnames(gut_pick)
count_freq = data.frame(count_freq)
# 计算每对样品中每个细菌是否共现,共现记为1
for(i in 1:length(gut_pick[1, ]))
{
# 每个样品单独建两个部位细菌分度数据框
gut_tmp = data.frame(name = rownames(gut_pick), gut = gut_pick[, i])
breast_tmp = data.frame(name = rownames(breast_pick), breast = breast_pick[, i])
all_tmp = merge(gut_tmp, breast_tmp, by="name", all=T)
all_tmp[is.na(all_tmp)] <- 0 # 0代替NA
for(j in 1:row_num)
{
if(all_tmp[j, 2] > 0 & all_tmp[j, 3] > 0)
{
count_freq[j, i] = 1
}
}
}
count_freq$sum = apply(count_freq[, 1:50], 1, FUN=sum)
count_freq = count_freq[order(count_freq$sum, decreasing = T),]
write.csv(count_freq, file="genus_frequency.csv", row.names=T)
七、共现细菌top10柱形图
绘图文件
# 绘柱形图
input = data.frame(name = rownames(count_freq), count = count_freq$sum)[1:10, ]
input
name count
1 Prevotellaceae_Prevotella 35
2 Streptococcaceae_Streptococcus 32
3 Bacteroidaceae_Bacteroides 31
4 Ruminococcaceae_Oscillospira 30
5 Bifidobacteriaceae_Bifidobacterium 27
6 Ruminococcaceae_Ruminococcus 22
7 Clostridiaceae_Clostridium 21
8 Lachnospiraceae_Ruminococcus 21
9 Lactobacillaceae_Lactobacillus 17
10 Lachnospiraceae_Blautia 16
绘图
result = ggplot(input, aes(x=name, y=count)) +
geom_bar(stat = "identity", color="black", fill="#6CA6CD") +
labs(x="", y="Number of Gut-Breast Pairs (n = 50)") +
scale_x_discrete(limits=factor(input[,1])) +
theme(panel.grid=element_blank(), panel.background=element_rect(color='black', fill='transparent')) +
theme(axis.text.x = element_text(angle = 60, hjust = 1)) +
geom_text(aes(label = count, y = count/2), size = 7) +
# ylim(0, 50) +
scale_y_continuous(expand = c(0, 0.5)) +
theme(axis.text.x = element_text(size = 12, face = "bold")) + # X轴文本
theme(axis.text.y = element_text(size = 12, face = "bold")) + # Y轴文本
theme(axis.title.y = element_text(size = 16))
ggsave(result, filename="genus_frequency.pdf", width=10)
input2 = input[order(input$count, decreasing=F), ]
result2 = ggplot(input2, aes(x=name, y=count)) +
geom_bar(stat = "identity", color="black", fill="#6CA6CD") +
coord_flip() +
labs(x="", y="Number of Gut-Breast Pairs (n = 50)") +
scale_x_discrete(limits=factor(input2[,1])) +
geom_text(aes(label = count, y = count/2), size = 5) +
theme(panel.grid=element_blank(), panel.background=element_rect(color='black', fill='transparent')) +
scale_y_continuous(expand = c(0, 0.5)) +
theme(axis.text.x = element_text(size = 12, face = "bold")) + # X轴文本
theme(axis.text.y = element_text(size = 12, face = "bold")) + # Y轴文本
theme(axis.title.x = element_text(size = 16))
ggsave(result2, filename="genus_frequency2.pdf", width=7)
参考:
Discordant transmission of bacteria and viruses from mothers to babies at birth. Microbiome 2019