1. 代谢组数据示例(test_meta.txt)
SampleID mws1401
A13 0.00
A16 0.00
A7 0.00
A17 0.00
A11 0.00
A3 0.00
A12 0.00
A20 0.00
A19 0.00
A15 5.98
A14 5.32
A9 0.00
A8 6.01
A5 6.04
2.基因表达谱数据示例(test_gene.txt)
GeneID A13 A16 A7 A17 A11 A3 A12 A20 A19 A15 A14 A9 A8 A5
Gene1 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 5.98 5.32 0.00 6.01 6.04
Gene2 5.59 5.71 5.74 5.49 5.62 5.79 5.70 6.02 5.94 5.52 5.72 5.43 5.87 5.95
Gene3 5.26 5.96 6.49 5.58 5.97 6.58 6.10 6.72 6.51 6.52 6.37 5.87 6.50 6.92
Gene4 5.62 5.51 4.36 5.91 5.27 5.89 4.94 6.62 5.38 5.95 5.82 5.44 5.91 6.64
Gene5 4.35 4.35 4.85 4.02 4.49 5.02 4.50 4.87 4.71 5.13 5.28 5.20 4.88 5.27
脚本:
# 读取数据
meta_file <- "test_meta.txt"
gene_exp <- "test_gene.txt"
output <- "test_result.txt"
# 读取数据并进行转置
meta_data <- as.data.frame(read.table(file = meta_file, header = TRUE, row.names = 1, sep = "\t"))
gene_data <- as.data.frame(read.table(file = gene_exp, header = TRUE, row.names = 1))
gene_data <- t(gene_data)
# 获取列名
meta.names <- colnames(meta_data)[1:length(colnames(meta_data))]
gene.names <- colnames(gene_data)[1:length(colnames(gene_data))]
# 初始化空数据框 out
out <- data.frame(matrix(ncol = 4, nrow = 0))
colnames(out) <- c("meta", "gene", "cor", "p")
# 数据标准化
meta_data_scaled <- as.data.frame(scale(meta_data))
gene_data_scaled <- as.data.frame(scale(gene_data))
# 循环计算相关性
for (gene in gene.names) {
dd <- data.frame(gene1 = gene_data_scaled[, gene], meta_data_scaled)
for (meta in meta.names) {
form <- paste(meta, "~ gene1", sep = " ")
form <- as.formula(form)
lm.result <- lm(form, dd)
cor <- as.numeric(summary(lm.result)$coefficients[, 'Estimate'][2])
p <- as.numeric(summary(lm.result)$coefficients[, 'Pr(>|t|)'][2])
tmp <- data.frame(meta = meta, gene = gene, cor = cor, p = p)
out <- rbind(out, tmp)
}
}
# 写入结果
write.table(out, file = output, quote = FALSE, sep = "\t")
还可以添加群体结构(PCA结果作为其他自变量/协变量)。下面脚本中没做数据标准化处理,表头可能也有问题。
meta_file <- "test_meta.txt"
gene_exp <- "test_gene.txt"
pca_cor = read.table("pca_sort.txt",header=T,sep=" ")
output <- "test_result.txt"
meta_data=as.data.frame(read.table(file=meta_file,header=T,row.names=1,sep="\t"))
gene_data=as.data.frame(read.table(file=gene_exp,header=T,row.names=1))
meta.names=colnames(meta_data)[1:length(colnames(meta_data))]
gene.names=colnames(gene_data)[1:length(colnames(gene_data))]
out=c("meta","Gene","cor","p")
for (gene in gene.names) {
dd=data.frame(gene1 = gene_data[,gene], pc1 = pca_cor$PC1, pc2 = pca_cor$PC2, pc3 = pca_cor$PC3, meta_data[,1:length(colnames(meta_data))])
for(meta in meta.names) {
form <- paste(meta,"~ gene1+pc1+pc2+pc3",sep=" ")
form <- as.formula(form)
print(form)
lm.result <- lm(form,dd)
cor=as.numeric(summary(lm.result)$coefficients[,'Estimate'][2])
p=as.numeric(summary(lm.result)$coefficients[,'Pr(>|t|)'][2])
tmp=cbind(meta,gene,cor,p)
out=rbind(out,tmp)
}
}
write.table(out,file=output,quote=FALSE,sep="\t")