数据准备ASV_table和tax_table,即asv丰度表和asv的注释表
其次需要安装Hmisc和igraph这两个包
输出结果:
①network.node_list.tsv ; network.edge_list.tsv;network_parameter.tsv;这三个文件主要可以用于后续在chiplot上进行可视化操作;
②network.graphml;该文件在Gephi中可识别,包含了①中三个文件的信息,可以方便在Gephi中可视化分析结果。
library(Hmisc)
library(igraph)
# ASV表:行为样本,列为ASV
ASV_table = read.table("/Users/xjm/Documents/sampling/resampling_data_analysis/16S_test/dada2_single_end_output/seq_table_no_chloro_mito.tsv", fill = T, check.names = F, row.names = 1, header=T, sep="\t")
# 物种分类表
tax_table = read.table("/Users/xjm/Documents/sampling/resampling_data_analysis/16S_test/dada2_single_end_output/ASV_taxon_no_chloro_mito.tsv", fill = T, check.names = F, row.names = 1, header=T, sep="\t")
# ASV过滤阈值参考文献:https://doi.org/10.1038/s41396-020-0621-7
# 取流行度比例大于20%的ASV
prev_prop_df = apply(X = ASV_table,
MARGIN = 2,
FUN = function(x){sum(x > 0)/dim(ASV_table)[1]})
total_relative_abundance = colSums(ASV_table)/sum(colSums(ASV_table))
is.above.prev = prev_prop_df > 0.2
table(is.above.prev)
# 取总相对丰度大于0.01%的ASV
is.above.abundance = total_relative_abundance > 0.0001
table(is.above.abundance)
table(is.above.prev & is.above.abundance)
target_ASV = colnames(ASV_table)[is.above.prev & is.above.abundance]
# 转换成相对丰度
ASV_table.prop = as.data.frame(t(apply(ASV_table, 1, function(ASV) ASV/sum(ASV))))
target_ASV_table.prop = ASV_table.prop[, target_ASV]
print(rowSums(target_ASV_table.prop))
# Calculate pairwise correlation (Spearman correlation)
correlation_matrix <- rcorr(as.matrix(target_ASV_table.prop), type="spearman")
# 相关性过滤阈值参考文献:https://doi.org/10.1186/s40168-023-01708-6
# Thresholding: Retain only significant correlations
correlation_threshold <- 0.7 # Adjust as needed
p_threshold <- 0.001 # Adjust as needed 0.05 有点太大
CorrDF <- function(cormat, pmat) {
ut <- upper.tri(cormat) # 由于相关性矩阵是对称的,取上三角矩阵计算即可
data.frame(
source = rownames(cormat)[col(cormat)[ut]],
target = rownames(cormat)[row(cormat)[ut]],
corr = (cormat)[ut],
weight = abs((cormat)[ut]), # 相关性系数绝对值作为权重
p = pmat[ut],
p.adjust = p.adjust(pmat[ut], method="BH"), #Multiple testing correction using Benjamini-Hochberg standard false discovery rate correction ("FDR-BH"), to minimize false-positive signals before network construction
cor_type = ifelse((cormat)[ut]> 0, "Positive", "Negative")
)
}
cor_df <- CorrDF(correlation_matrix$r , correlation_matrix$P) # 整理ASV之间的连接关系
significant_cor_df <- cor_df[which(cor_df$weight >= correlation_threshold),] # 保留spearman相关性绝对值>0.7的边
significant_cor_df <- cor_df[which(cor_df$p.adjust < p_threshold),] # 保留q-value < 0.001的边
# 非加权无向
g = graph_from_data_frame(significant_cor_df, directed=F)
# plot_network(g, ps.prop_above_prev_abund, type = 'taxa', color = "Phylum")
significant_tax = tax_table[V(g)$name,]
V(g)$Kingdom = significant_tax$Kingdom #界
V(g)$Phylum = significant_tax$Phylum #门
V(g)$Class = significant_tax$Class #纲
V(g)$Order = significant_tax$Order #目
V(g)$Family = significant_tax$Family #科
V(g)$Genus = significant_tax$Genus #属
V(g)$Species = significant_tax$Species #种
# 创建节点列表
node_list = data.frame(node_id = names(V(g)),significant_tax)
# 创建边列表
edge_list = data.frame(edge_id = paste0("edge_", c(1:length(E(g)))), significant_cor_df)
table(edge_list$cor_type)
# Network analysis
nodes_num = length(V(g)) #节点数
nodes_num
edges_num = length(E(g)) #边数
edges_num
positive.corr_num = sum(E(g)$corr>0) #正相关的数量
positive.corr_num
positive.corr_prop = positive.corr_num/edges_num #正相关的比例
positive.corr_prop
negative.corr_num = sum(E(g)$corr<0) #负相关的数量
negative.corr_num
negative.corr_prop = negative.corr_num/edges_num #负相关的比例
negative.corr_prop
average_degree = mean(degree(g)) #平均度
average_degree
average_shortest_path_length = mean_distance(g, directed = FALSE) #平均最短路径长度
average_shortest_path_length
network_diameter = diameter(g, directed = FALSE) #网络直径
network_diameter
network_density = edge_density(g) #网络密度
network_density
modularity = modularity(g, membership(cluster_louvain(g)), directed = F) # 模块度
modularity
# 传递性:Transitivity measures the probability that the adjacent vertices of a vertex are connected. This is sometimes also called the clustering coefficient.
# 聚类系数
clustering_coefficient <- transitivity(g, type="global")
clustering_coefficient
network_parameter = data.frame(nodes_num,
edges_num,
positive.corr_num,
positive.corr_prop,
negative.corr_num,
negative.corr_prop,
average_degree,
average_shortest_path_length,
network_diameter,
network_density,
clustering_coefficient,
modularity
)
output_dir = "/Users/xjm/Documents/sampling/resampling_data_analysis/16S_test/summary_output/network"
write.table(node_list, file = file.path(output_dir, "network.node_list.tsv"), sep = "\t", quote = F, row.names = F)
write.table(edge_list, file = file.path(output_dir, "network.edge_list.tsv"), sep = "\t", quote = F, row.names = F)
write.table(network_parameter, file = file.path(output_dir, "network_parameter.tsv"), sep = "\t", quote = F, row.names = F)
write_graph(g, file.path(output_dir, 'network.graphml'), format = 'graphml') #后续在Gephi中可视化