SMR(Summary data-based Mendelian Randomization)是一个针对汇总数据的孟德尔随机化方法,可以基于GWAS汇总数据进行因果推断
1. SMR安装和准备
# 下载
$ wget https://yanglab.westlake.edu.cn/software/smr/download/smr_v1.3.1_src.tar.gz
$ tar -zxvf smr_v1.3.1_src.tar.gz
$ cd smr_v1.3.1_src
# 安装依赖项
$ sudo apt update
$ sudo apt install build-essential g++ zlib1g-dev
$ sudo apt install libeigen3-dev
$ sudo apt-get install libomp-dev
# 编译 SMR 工具
$ make
# 测试安装
$ ./smr --version
## *******************************************************************
## * Summary-data-based Mendelian Randomization (SMR)
## * Version 1.3.1
## * Build at Dec 5 2024 11:33:26, by GCC 9.4
## * (C) 2015 Futao Zhang, Zhihong Zhu and Jian Yang
## * The University of Queensland
## * MIT License
## *******************************************************************
## flags include:
## --bfile,--gwas-summary,--beqtl-summary,--maf,
## --keep,--remove,--extract-snp,--exclude-snp,
## --extract-probe,--extract-outcome-probe,--extract-exposure-probe,--exclude-probe,
## --exclude-outcome-probe,--exclude-exposure-probe,--eqtl-summary,--ld-upper-limit,
## --peqtl-heidi,--heidi-min-m,--make-besd,--out,
## --peqtl-smr,--smr,--cis-wind,--peqtl-trans,
## --peqtl-cis,--peqtl-other,--efile,--query,
## --heidi-off,--target-snp,--extract-trait,--thread-num,
## --besd-flist,--trans-wind,--plot,--eqtl-flist,
## --smr-format,--merlin-fastassoc-format,--plink-qassoc-format,--gemma-format,
## --update-freq,--genes,--smr-wind,--smr-file,
## --recode,--probe-var,--chr,--probe-chr,
## --snp-chr,--snp,--from-snp,--to-snp,
## --probe,--from-probe,--to-probe,--snp-wind,
## --probe-wind,--gene,--from-snp-kb,--to-snp-kb,
## --from-probe-kb,--to-probe-kb,--extract-single-exposure-probe,--extract-single-outcome-probe,
## --exclude-single-exposure-probe,--exclude-single-outcome-probe,--gene-list,--set-list,
## --set-wind,--smr-multi,--qfile,--make-besd-dense,
## --bolt-assoc-format,--geno-uni,--psmr,--beqtl-qc,
## --z-thresh,--heidi-mtd,--phet,--descriptive-trans,
## --descriptive-cis,--meta,--trans,--extract-cis,
## --rm-technical,--p-technical,--ld-lower-limit,--heidi-max-m,
## --extract-snp-p,--exclude-snp-p,--matrix-eqtl-format,--fastqtl-nominal-format,
## --fastqtl-permu-format,--add-n,--show-n,--update-epi,
## --update-esi,--cis-to-all,--mecs,--pmecs,
## --mmecs,--sample-overlap,--ld-multi-snp,--extract-target-snp-probe,
## --extract-snp-probe,--disable-freq-ck,--diff-freq,--diff-freq-prop,
## --r,--r2,--ld-wind,--bld,
## --make-bld,--snp-rm,--qtltools-nominal-format,--qtltools-permu-format,
## --nmecs,--outcome-wind,
2. 数据准备
1)eQTL数据(SMR官方网站提供了很多符合SMR输入格式的数据,这里以GTEx中“Minor_Salivary_Gland“为例)
SMR-1
$ wget https://yanglab.westlake.edu.cn/data/SMR/GTEx_V8_cis_eqtl_summary/Minor_Salivary_Gland.zip
$ unzip Minor_Salivary_Gland.zip
$ ll Minor_Salivary_Gland
## total 2954168
## drwxrwxr-x 2 shumin shumin 4096 10月 27 2021 ./
## drwxrwxr-x 3 shumin shumin 4096 12月 19 11:07 ../
## -rw-r----- 1 shumin shumin 2712829664 10月 26 2021 Minor_Salivary_Gland.besd
## -rw-r----- 1 shumin shumin 957307 10月 26 2021 Minor_Salivary_Gland.epi
## -rw-r--r-- 1 shumin shumin 311264607 10月 26 2021 Minor_Salivary_Gland.esi
2)GWAS数据(这里以Finngen中“Minor_Salivary_Gland“为例)
SMR-2
library('data.table')
library(stringr)
Finngen_1 <- read.table("/data/GWAS/Finngen/summary_stats_release_finngen_R12_M13_SJOGREN.txt", sep = "\t" , header = T)
# 过滤掉包含多个rsids号的行(以逗号分隔的多rsids)(不然后续smr,运行报错)
Finngen_1_filtered <- Finngen_1_filtered %>%
filter(str_count(as.character(rsids), ",") == 0)
# 筛选强相关的变量(p值小于 5e-8 的SNPs)
Finngen_1_sig <- subset(Finngen_1_filtered, pval < 5e-8)
# 创建符合SMR格式的GWAS数据框
gwasdata <- data.frame(Finngen_1_sig$rsids,
Finngen_1_sig$alt,
Finngen_1_sig$ref,
Finngen_1_sig$af_alt,
Finngen_1_sig$beta,
Finngen_1_sig$sebeta,
Finngen_1_sig$pval)
colnames(gwasdata)<-c("snp","A1","A2","freq","b","se","p")
gwasdata$n <- 487569
write.table(gwasdata, "gwas.txt", quote = FALSE, row.names = FALSE, sep = "\t")
★ “A1”需要是效应等位基因,“A2”是其他等位基因,“freq”需要是“A1”的频率
3)LD数据(这里以1000 Genomes为例)
$ wget http://fileserve.mrcieu.ac.uk/ld/1kg.v3.tgz
$ tar -zxvf 1kg.v3.tgz
$ ls
## AFR.bed AFR.fam AMR.bim EAS.bed EAS.fam EUR.bim SAS.bed SAS.fam
## AFR.bim AMR.bed AMR.fam EAS.bim EUR.bed EUR.fam SAS.bim
3. SMR运行
$ ./smr --bfile /data/1000_geomes/1kg.v3/EUR --gwas-summary /data/software/smr_v1.3.1_src/gwas_finngen_R12_M13_SJOGREN.txt --beqtl-summary /data/GWAS/GTEx/SMR/Minor_Salivary_Gland/Minor_Salivary_Gland --out finngen_R12_M13_SJOGREN_eQTl --thread-num 10
## GWAS summary data of 10442 SNPs to be included from [/data/software/smr_v1.3.1_src/gwas_finngen_R12_M13_SJOGREN.txt].
## Reading eQTL SNP information from [/data/GWAS/GTEx/SMR/Minor_Salivary_Gland/Minor_Salivary_Gland.esi].
## WARNING: frequency is "NA" in one or more rows.
## 9830315 SNPs to be included from [/data/GWAS/GTEx/SMR/Minor_Salivary_Gland/Minor_Salivary_Gland.esi].
## Reading PLINK FAM file from [/data/data/1000_geomes/1kg.v3/EUR.fam].
## 503 individuals to be included from [/data/data/1000_geomes/1kg.v3/EUR.fam].
## Reading PLINK BIM file from [/data/data/1000_geomes/1kg.v3/EUR.bim].
## 8550156 SNPs to be included from [/data/data/1000_geomes/1kg.v3/EUR.bim].
## Checking the consistency of the alleles of each SNP between pairwise data sets (including the GWAS summary data, the eQTL summary data and the LD reference data).
## 8971 SNPs are included after allele checking.
## Reading PLINK BED file from [/data/data/1000_geomes/1kg.v3/EUR.bed] in SNP-major format ...
## Genotype data for 503 individuals and 8971 SNPs to be included from [/data/data/1000_geomes/1kg.v3/EUR.bed].
## Calculating allele frequencies ...
## Checking the consistency of allele frequency of each SNP between pairwise data sets (including the GWAS summary data, the eQTL summary data and the LD reference data).
## 3 SNPs (0.03% <= 5.00%) with allele frequency differences > 0.20 between any pair of the data sets are excluded from the analysis.
## Reading eQTL summary data...
$ less -S finngen_R12_M13_SJOGREN_eQTl.smr |head
## probeID ProbeChr Gene Probe_bp topSNP topSNP_chr topSNP_bp A1 A2 Freq b_GWAS se_GWAS p_GWAS b_eQTL se_eQTL p_eQTL b_SMR se_SMR p_SMR p_HEIDI nsnp_HEIDI
## ENSG00000186470 6 BTN3A2 26365387 rs72841536 6 26378288 A T 0.111332 0.283773 0.0403837 2.111540e-12 -1.23465 0.139335 7.927148e-19 -0.229841 0.0417451 3.674589e-08 5.280583e-04 20
## ENSG00000235109 6 ZSCAN31 28292470 rs213240 6 28315875 C T 0.380716 0.180854 0.0259715 3.317420e-12 0.591824 0.0966545 9.177786e-10 0.305587 0.066457 4.260118e-06 5.401103e-02 20
## ENSG00000187987 6 ZSCAN23 28399707 rs2531806 6 28417152 A C 0.45825 0.165434 0.0251116 4.459640e-11 0.7062 0.0786298 2.675247e-19 0.234259 0.0440993 1.083735e-07 6.682091e-04 20
## ENSG00000204644 6 ZFP57 29640169 rs2747429 6 29648377 C T 0.188867 0.318981 0.0331253 6.000670e-22 1.18989 0.0957176 1.768441e-35 0.268076 0.0352142 2.683977e-14 1.310642e-03 20
## ENSG00000239257 6 RPL23AP1 29694446 rs733839 6 29718094 A G 0.429423 0.158626 0.0255717 5.532860e-10 0.642641 0.1059551.317469e-09 0.246835 0.0569173 1.446211e-05 4.885868e-03 20
## ENSG00000235821 6 IFITM4P 29718506 rs915668 6 29798459 G C 0.459245 0.179372 0.0251195 9.283250e-13 -0.610153 0.101024 1.544047e-09 -0.293979 0.0637503 3.999485e-06 5.221384e-03 20
## ENSG00000176998 6 HCG4 29758816 rs1619379 6 29785235 T C 0.366799 0.22137 0.0263859 4.871920e-17 -0.839654 0.10965 1.894743e-14 -0.263644 0.0466142 1.550505e-08 2.564319e-02 20
## ENSG00000230521 6 HCG4P7 29855071 rs2263288 6 29931564 A G 0.124254 -0.200798 0.0314909 1.813130e-10 -0.917605 0.128638 9.803455e-13 0.218828 0.0460311 1.995040e-06 2.498160e-03 16
## ENSG00000227262 6 HCG4B 29893760 rs2247504 6 29819077 A G 0.392644 0.162039 0.025794 3.341640e-10 -0.650043 0.0974901 2.596925e-11 -0.249274 0.0545176 4.822501e-06 2.008251e-10 20
4. SMR 轨迹图
# 下载 R 脚本
$ wget https://yanglab.westlake.edu.cn/software/smr/download/plot.zip
$ unzip plot.zip
# SMR 命令行生成用于绘图的数据文件
$ ./smr --bfile /data/1000_geomes/1kg.v3/EUR --gwas-summary /data/software/smr_v1.3.1_src/gwas_finngen_R12_M13_SJOGREN.txt --beqtl-summary /data/GWAS/GTEx/SMR/Minor_Salivary_Gland/Minor_Salivary_Gland --out BTN3A2 --thread-num 10 --plot --probe ENSG00000186470 --probe-wind 500 --gene-list glist-hg19
# 绘制图表
source("/data/software/smr_v1.3.1_src/plot/plot_SMR.r")
SMRData = ReadSMRData("/data/software/smr_v1.3.1_src/plot/BTN3A2.ENSG00000186470.txt")
## Read 3 items
## Read 2 items
## Read 2 items
## Read 2 items
## Read 2 items
## Read 0 items
SMRLocusPlot(data = SMRData, smr_thresh = 8.4e-6, heidi_thresh = 0.05, plotWindow = 1000, max_anno_probe = 16)
## Warning messages:
## 1: In xtfrm.data.frame(x) : cannot xtfrm data frames
## 2: In xtfrm.data.frame(x) : cannot xtfrm data frames
SMR-3
SMREffectPlot(data = SMRData, trait_name="Sjögren's_syndrome")
SMR-4