导读

数据：多个样本，每个样本两个不同部位(gut breast)采样品做宏基因组数据分析，得到两个细菌丰度表。
目的1：计算每个样本两个部位的共有细菌数，绘制散点图。
目的2：计算每个样本两个部位的共有细菌总丰度和特有细菌总丰度，绘制堆叠图。

一、输入数据

head(gut_pick[,c(1,2,3,4)])
                                sample_1    sample_2    sample_3     sample_4
Veillonellaceae_Megamonas    0.600034324 0.048869197 0.000000000 0.0000000000
Enterococcaceae_Enterococcus 0.000000000 0.000000000 0.009826243 0.0000000000
Bacteroidaceae_Bacteroides   0.156519501 0.064343341 0.200399441 0.4834385139
Prevotellaceae_Prevotella    0.001072639 0.344630340 0.000000000 0.0001426177
Lachnospiraceae_Roseburia    0.000000000 0.015408015 0.000000000 0.0144400471
Lachnospiraceae_Ruminococcus 0.009224696 0.004166116 0.100978630 0.0054551289

head(breast_pick[,c(1,2,3,4)])
                                          sample_1    sample_2     sample_3    sample_4
Prevotellaceae_Prevotella              0.010835880 0.096867862 1.541043e-01 0.056627412
Staphylococcaceae_Staphylococcus       0.105389767 0.005536413 3.527276e-03 0.096613213
Pseudomonadaceae_Pseudomonas           0.001300306 0.012156878 1.010239e-02 0.013091957
Oxalobacteraceae_Ralstonia             0.022083523 0.133028766 9.044211e-02 0.151298756
Corynebacteriaceae_Corynebacterium     0.004746115 0.016454373 1.161262e-01 0.001378101
Propionibacteriaceae_Propionibacterium 0.773443426 0.006426885 6.849081e-05 0.002630920

二、计算共有细菌数，共有、特有细菌丰度

# 交集
union_value = c()
breast_share = c()
breast_only = c()

for(i in 1:length(gut_pick[1, ]))
{
    # 每个样品单独建两个部位细菌分度数据框
    gut_tmp = data.frame(name = rownames(gut_pick), gut = gut_pick[, i])
    breast_tmp = data.frame(name = rownames(breast_pick), breast = breast_pick[, i])
    all_tmp = merge(gut_tmp, breast_tmp, by="name", all=T)
    all_tmp[is.na(all_tmp)] <- 0  # 0代替NA
    
    union = all_tmp[all_tmp[,2] > 0 & all_tmp[,3] > 0, ]  # 收集共有细菌
    breast_only_df = all_tmp[all_tmp[, 2] == 0 & all_tmp[, 3] > 0, ]  # 收集breast特有细菌

    union_value = c(union_value, nrow(union))  # 共有细菌数量
    breast_share = c(breast_share, sum(union[, 3]))  # 共有细菌在breast种的丰度和
    breast_only = c(breast_only, sum(breast_only_df[, 3]))  # breast特有细菌丰度和
}

三、准备绘图文件

# 对样品按照序号排序
li = strsplit(colnames(gut_pick), split="_")
name_id = c()
for(i in 1:length(li))
{
    name_id = c(name_id, as.numeric(unlist(li[i])[2]))
}

# 构建绘图输入文件
data = data.frame(name = colnames(gut_pick), name_id, union_value, breast_share, breast_only)
data = data[order(data[, 2], decreasing = F),]

head(data)
      name name_id union_value breast_share breast_only
1 sample_1       1           8   0.01543029   0.9636348
2 sample_2       2          24   0.15807813   0.5722250
3 sample_3       3          14   0.15561111   0.5703914
4 sample_4       4          15   0.10611376   0.6331955
5 sample_5       5          11   0.14202702   0.5492496
6 sample_6       6          12   0.02919120   0.6147524

四、绘制共有细菌数散点图

# 可视化
result = ggplot(data, aes(x=name, y=union_value, color = name)) +
  geom_point(size=5) +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 15, face = "bold")) +
  theme(axis.text.y = element_text(size = 12, face = "bold")) +
  labs(x="", y="Shared Genera") +
  theme(axis.title.y = element_text(size = 28, face = "bold")) +
  # theme(axis.title.x = element_text(face = "bold")) +
  scale_x_discrete(limits = factor(data$name)) +
  theme(legend.position = "none") +
  theme(axis.line = element_line(size = 1)) +
  theme(axis.ticks = element_line(size = 1))
  
ggsave(result, filename = "../source_scatter.pdf", width = 14, height = 7)

五、绘制共有、特有细菌丰度堆叠图

library(reshape2)
stack = data[, c(1, 4, 5)]
melt = melt(stack, id="name")
head(stack)
      name breast_share breast_only
1 sample_1   0.01543029   0.9636348
2 sample_2   0.15807813   0.5722250
3 sample_3   0.15561111   0.5703914
4 sample_4   0.10611376   0.6331955
5 sample_5   0.14202702   0.5492496
6 sample_6   0.02919120   0.6147524

result = ggplot(melt, aes(x=name, y=value, fill = variable))+
  geom_col(position='fill', width = 0.8) +
  theme_classic() +
  labs(x="", y="Relative Abundance") +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 15, face = "bold")) +
  theme(axis.text.y = element_text(size = 12, face = "bold")) +
  theme(axis.title.y = element_text(size = 28, face = "bold")) +
  theme(legend.title = element_blank(), legend.position = "bottom") +
  scale_fill_manual(
      values = c("breast_share" = "#ADD8E6", "breast_only" = "#90EE90"), 
      labels = c("Shared with Gut", "Breast Only")) +
  # values labels必须在一个manual里使用，否则报错
  theme(legend.text = element_text(face = "bold")) +
  theme(axis.line = element_line(size = 1)) +
  theme(axis.ticks = element_line(size = 1)) +
  scale_x_discrete(limits = factor(data$name)) +
  scale_y_continuous(expand=c(0, 0))

ggsave(result, filename="source_stack.png", width=14, height=7)

六、配对样品中细菌共现分析

分析思路
1 merge获得总细菌信息，结合样品数信息，制作细菌样品矩阵/表，0填充
2 利用循环统计每对配对样品的细菌呈现情况（是否贡献），丰度都大于0的细菌记为1，否则仍是0
3 获得总细菌样品呈现表，统计每个细菌的总共现次数
4 对前10绘制柱形图

# 常见细菌
# 获取总细菌名称，制作细菌共现初始表
i = 1
gut_tmp = data.frame(name = rownames(gut_pick), gut = gut_pick[, i])
breast_tmp = data.frame(name = rownames(breast_pick), breast = breast_pick[, i])
all_tmp = merge(gut_tmp, breast_tmp, by="name", all=T)
all_tmp[is.na(all_tmp)] <- 0  # 0代替NA

row_num = nrow(all_tmp)
col_num = ncol(gut_pick)
count_freq = data.frame(matrix(0, row_num, col_num))
rownames(count_freq) = all_tmp[,1]
colnames(count_freq) = colnames(gut_pick)
count_freq = data.frame(count_freq)

# 计算每对样品中每个细菌是否共现，共现记为1
for(i in 1:length(gut_pick[1, ]))
{
    # 每个样品单独建两个部位细菌分度数据框
    gut_tmp = data.frame(name = rownames(gut_pick), gut = gut_pick[, i])
    breast_tmp = data.frame(name = rownames(breast_pick), breast = breast_pick[, i])
    all_tmp = merge(gut_tmp, breast_tmp, by="name", all=T)
    all_tmp[is.na(all_tmp)] <- 0  # 0代替NA
    
    for(j in 1:row_num)
    {
        if(all_tmp[j, 2] > 0 & all_tmp[j, 3] > 0)
        {
            count_freq[j, i] = 1
        }
    }
}

count_freq$sum = apply(count_freq[, 1:50], 1, FUN=sum)
count_freq = count_freq[order(count_freq$sum, decreasing = T),]

write.csv(count_freq, file="genus_frequency.csv", row.names=T)

七、共现细菌top10柱形图

绘图文件

# 绘柱形图
input = data.frame(name = rownames(count_freq), count = count_freq$sum)[1:10, ]
input
                                 name count
1           Prevotellaceae_Prevotella    35
2      Streptococcaceae_Streptococcus    32
3          Bacteroidaceae_Bacteroides    31
4        Ruminococcaceae_Oscillospira    30
5  Bifidobacteriaceae_Bifidobacterium    27
6        Ruminococcaceae_Ruminococcus    22
7          Clostridiaceae_Clostridium    21
8        Lachnospiraceae_Ruminococcus    21
9      Lactobacillaceae_Lactobacillus    17
10            Lachnospiraceae_Blautia    16

绘图

result = ggplot(input, aes(x=name, y=count)) +
  geom_bar(stat = "identity", color="black", fill="#6CA6CD") +
  labs(x="", y="Number of Gut-Breast Pairs (n = 50)") +
  scale_x_discrete(limits=factor(input[,1])) +
  theme(panel.grid=element_blank(), panel.background=element_rect(color='black', fill='transparent')) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1)) +
  geom_text(aes(label = count, y = count/2), size = 7) +
  # ylim(0, 50) +
  scale_y_continuous(expand = c(0, 0.5)) +
  theme(axis.text.x = element_text(size = 12, face = "bold")) +  # X轴文本
  theme(axis.text.y = element_text(size = 12, face = "bold")) + # Y轴文本
  theme(axis.title.y = element_text(size = 16)) 

ggsave(result, filename="genus_frequency.pdf", width=10)

input2 = input[order(input$count, decreasing=F), ]
result2 = ggplot(input2, aes(x=name, y=count)) +
  geom_bar(stat = "identity", color="black", fill="#6CA6CD") +
  coord_flip() +
  labs(x="", y="Number of Gut-Breast Pairs (n = 50)") +
  scale_x_discrete(limits=factor(input2[,1])) +
  geom_text(aes(label = count, y = count/2), size = 5) +
  theme(panel.grid=element_blank(), panel.background=element_rect(color='black', fill='transparent')) +
  scale_y_continuous(expand = c(0, 0.5)) +
  theme(axis.text.x = element_text(size = 12, face = "bold")) +  # X轴文本
  theme(axis.text.y = element_text(size = 12, face = "bold")) + # Y轴文本
  theme(axis.title.x = element_text(size = 16)) 

ggsave(result2, filename="genus_frequency2.pdf", width=7)

参考：
Discordant transmission of bacteria and viruses from mothers to babies at birth. Microbiome 2019

R语言：配对样品共有细菌分析 [散点图、堆叠图、柱形图]