孟德尔随机化分析2: SMR

SMR(Summary data-based Mendelian Randomization)是一个针对汇总数据的孟德尔随机化方法,可以基于GWAS汇总数据进行因果推断

1. SMR安装和准备

# 下载
$ wget https://yanglab.westlake.edu.cn/software/smr/download/smr_v1.3.1_src.tar.gz
$ tar -zxvf smr_v1.3.1_src.tar.gz
$ cd smr_v1.3.1_src

# 安装依赖项
$ sudo apt update
$ sudo apt install build-essential g++ zlib1g-dev
$ sudo apt install libeigen3-dev
$ sudo apt-get install libomp-dev

# 编译 SMR 工具
$ make

# 测试安装
$ ./smr --version
## *******************************************************************
## * Summary-data-based Mendelian Randomization (SMR)
## * Version 1.3.1
## * Build at Dec  5 2024 11:33:26, by GCC 9.4
## * (C) 2015 Futao Zhang, Zhihong Zhu and Jian Yang
## * The University of Queensland
## * MIT License
## *******************************************************************
## flags include:
## --bfile,--gwas-summary,--beqtl-summary,--maf,
## --keep,--remove,--extract-snp,--exclude-snp,
## --extract-probe,--extract-outcome-probe,--extract-exposure-probe,--exclude-probe,
## --exclude-outcome-probe,--exclude-exposure-probe,--eqtl-summary,--ld-upper-limit,
## --peqtl-heidi,--heidi-min-m,--make-besd,--out,
## --peqtl-smr,--smr,--cis-wind,--peqtl-trans,
## --peqtl-cis,--peqtl-other,--efile,--query,
## --heidi-off,--target-snp,--extract-trait,--thread-num,
## --besd-flist,--trans-wind,--plot,--eqtl-flist,
## --smr-format,--merlin-fastassoc-format,--plink-qassoc-format,--gemma-format,
## --update-freq,--genes,--smr-wind,--smr-file,
## --recode,--probe-var,--chr,--probe-chr,
## --snp-chr,--snp,--from-snp,--to-snp,
## --probe,--from-probe,--to-probe,--snp-wind,
## --probe-wind,--gene,--from-snp-kb,--to-snp-kb,
## --from-probe-kb,--to-probe-kb,--extract-single-exposure-probe,--extract-single-outcome-probe,
## --exclude-single-exposure-probe,--exclude-single-outcome-probe,--gene-list,--set-list,
## --set-wind,--smr-multi,--qfile,--make-besd-dense,
## --bolt-assoc-format,--geno-uni,--psmr,--beqtl-qc,
## --z-thresh,--heidi-mtd,--phet,--descriptive-trans,
## --descriptive-cis,--meta,--trans,--extract-cis,
## --rm-technical,--p-technical,--ld-lower-limit,--heidi-max-m,
## --extract-snp-p,--exclude-snp-p,--matrix-eqtl-format,--fastqtl-nominal-format,
## --fastqtl-permu-format,--add-n,--show-n,--update-epi,
## --update-esi,--cis-to-all,--mecs,--pmecs,
## --mmecs,--sample-overlap,--ld-multi-snp,--extract-target-snp-probe,
## --extract-snp-probe,--disable-freq-ck,--diff-freq,--diff-freq-prop,
## --r,--r2,--ld-wind,--bld,
## --make-bld,--snp-rm,--qtltools-nominal-format,--qtltools-permu-format,
## --nmecs,--outcome-wind,

2. 数据准备

1)eQTL数据(SMR官方网站提供了很多符合SMR输入格式的数据,这里以GTEx中“Minor_Salivary_Gland“为例)


SMR-1
$ wget https://yanglab.westlake.edu.cn/data/SMR/GTEx_V8_cis_eqtl_summary/Minor_Salivary_Gland.zip
$ unzip Minor_Salivary_Gland.zip

$ ll Minor_Salivary_Gland
## total 2954168
## drwxrwxr-x 2 shumin shumin       4096 10月 27  2021 ./
## drwxrwxr-x 3 shumin shumin       4096 12月 19 11:07 ../
## -rw-r----- 1 shumin shumin 2712829664 10月 26  2021 Minor_Salivary_Gland.besd
## -rw-r----- 1 shumin shumin     957307 10月 26  2021 Minor_Salivary_Gland.epi
## -rw-r--r-- 1 shumin shumin  311264607 10月 26  2021 Minor_Salivary_Gland.esi

2)GWAS数据(这里以Finngen中“Minor_Salivary_Gland“为例)


SMR-2
library('data.table')
library(stringr)

Finngen_1 <- read.table("/data/GWAS/Finngen/summary_stats_release_finngen_R12_M13_SJOGREN.txt", sep = "\t" , header = T)

# 过滤掉包含多个rsids号的行(以逗号分隔的多rsids)(不然后续smr,运行报错)
Finngen_1_filtered <- Finngen_1_filtered %>%
  filter(str_count(as.character(rsids), ",") == 0)

# 筛选强相关的变量(p值小于 5e-8 的SNPs)
Finngen_1_sig <- subset(Finngen_1_filtered, pval < 5e-8)

# 创建符合SMR格式的GWAS数据框
gwasdata <- data.frame(Finngen_1_sig$rsids,
                       Finngen_1_sig$alt,
                       Finngen_1_sig$ref,
                       Finngen_1_sig$af_alt,
                       Finngen_1_sig$beta,
                       Finngen_1_sig$sebeta,
                       Finngen_1_sig$pval)

colnames(gwasdata)<-c("snp","A1","A2","freq","b","se","p")
gwasdata$n <- 487569

write.table(gwasdata, "gwas.txt", quote = FALSE, row.names = FALSE, sep = "\t")

★ “A1”需要是效应等位基因,“A2”是其他等位基因,“freq”需要是“A1”的频率

3)LD数据(这里以1000 Genomes为例)

$ wget http://fileserve.mrcieu.ac.uk/ld/1kg.v3.tgz
$ tar -zxvf 1kg.v3.tgz

$ ls
## AFR.bed  AFR.fam  AMR.bim  EAS.bed  EAS.fam  EUR.bim  SAS.bed  SAS.fam
## AFR.bim  AMR.bed  AMR.fam  EAS.bim  EUR.bed  EUR.fam  SAS.bim

3. SMR运行

$ ./smr --bfile /data/1000_geomes/1kg.v3/EUR --gwas-summary /data/software/smr_v1.3.1_src/gwas_finngen_R12_M13_SJOGREN.txt --beqtl-summary /data/GWAS/GTEx/SMR/Minor_Salivary_Gland/Minor_Salivary_Gland --out finngen_R12_M13_SJOGREN_eQTl --thread-num 10
## GWAS summary data of 10442 SNPs to be included from [/data/software/smr_v1.3.1_src/gwas_finngen_R12_M13_SJOGREN.txt].
## Reading eQTL SNP information from [/data/GWAS/GTEx/SMR/Minor_Salivary_Gland/Minor_Salivary_Gland.esi].
## WARNING: frequency is "NA" in one or more rows.
## 9830315 SNPs to be included from [/data/GWAS/GTEx/SMR/Minor_Salivary_Gland/Minor_Salivary_Gland.esi].
## Reading PLINK FAM file from [/data/data/1000_geomes/1kg.v3/EUR.fam].
## 503 individuals to be included from [/data/data/1000_geomes/1kg.v3/EUR.fam].
## Reading PLINK BIM file from [/data/data/1000_geomes/1kg.v3/EUR.bim].
## 8550156 SNPs to be included from [/data/data/1000_geomes/1kg.v3/EUR.bim].
## Checking the consistency of the alleles of each SNP between pairwise data sets (including the GWAS summary data, the eQTL summary data and the LD reference data).
## 8971 SNPs are included after allele checking.
## Reading PLINK BED file from [/data/data/1000_geomes/1kg.v3/EUR.bed] in SNP-major format ...
## Genotype data for 503 individuals and 8971 SNPs to be included from [/data/data/1000_geomes/1kg.v3/EUR.bed].
## Calculating allele frequencies ...
## Checking the consistency of allele frequency of each SNP between pairwise data sets (including the GWAS summary data, the eQTL summary data and the LD reference data).
## 3 SNPs (0.03% <= 5.00%) with allele frequency differences > 0.20 between any pair of the data sets are excluded from the analysis.
## Reading eQTL summary data...

$ less -S finngen_R12_M13_SJOGREN_eQTl.smr |head
## probeID  ProbeChr    Gene    Probe_bp    topSNP  topSNP_chr  topSNP_bp   A1  A2  Freq    b_GWAS  se_GWAS p_GWAS  b_eQTL  se_eQTL p_eQTL  b_SMR   se_SMR  p_SMR   p_HEIDI nsnp_HEIDI
## ENSG00000186470  6   BTN3A2  26365387    rs72841536  6   26378288    A   T   0.111332    0.283773    0.0403837   2.111540e-12    -1.23465    0.139335    7.927148e-19    -0.229841   0.0417451   3.674589e-08    5.280583e-04    20
## ENSG00000235109  6   ZSCAN31 28292470    rs213240    6   28315875    C   T   0.380716    0.180854    0.0259715   3.317420e-12    0.591824    0.0966545   9.177786e-10    0.305587    0.066457    4.260118e-06    5.401103e-02    20
## ENSG00000187987  6   ZSCAN23 28399707    rs2531806   6   28417152    A   C   0.45825 0.165434    0.0251116   4.459640e-11    0.7062  0.0786298   2.675247e-19    0.234259    0.0440993   1.083735e-07    6.682091e-04    20
## ENSG00000204644  6   ZFP57   29640169    rs2747429   6   29648377    C   T   0.188867    0.318981    0.0331253   6.000670e-22    1.18989 0.0957176   1.768441e-35    0.268076    0.0352142   2.683977e-14    1.310642e-03    20
## ENSG00000239257  6   RPL23AP1    29694446    rs733839    6   29718094    A   G   0.429423    0.158626    0.0255717   5.532860e-10    0.642641    0.1059551.317469e-09    0.246835    0.0569173   1.446211e-05    4.885868e-03    20
## ENSG00000235821  6   IFITM4P 29718506    rs915668    6   29798459    G   C   0.459245    0.179372    0.0251195   9.283250e-13    -0.610153   0.101024    1.544047e-09    -0.293979   0.0637503   3.999485e-06    5.221384e-03    20
## ENSG00000176998  6   HCG4    29758816    rs1619379   6   29785235    T   C   0.366799    0.22137 0.0263859   4.871920e-17    -0.839654   0.10965 1.894743e-14    -0.263644   0.0466142   1.550505e-08    2.564319e-02    20
## ENSG00000230521  6   HCG4P7  29855071    rs2263288   6   29931564    A   G   0.124254    -0.200798   0.0314909   1.813130e-10    -0.917605   0.128638    9.803455e-13    0.218828    0.0460311   1.995040e-06    2.498160e-03    16
## ENSG00000227262  6   HCG4B   29893760    rs2247504   6   29819077    A   G   0.392644    0.162039    0.025794    3.341640e-10    -0.650043   0.0974901   2.596925e-11    -0.249274   0.0545176   4.822501e-06    2.008251e-10    20

4. SMR 轨迹图

# 下载 R 脚本
$ wget https://yanglab.westlake.edu.cn/software/smr/download/plot.zip
$ unzip plot.zip

# SMR 命令行生成用于绘图的数据文件
$ ./smr --bfile /data/1000_geomes/1kg.v3/EUR --gwas-summary /data/software/smr_v1.3.1_src/gwas_finngen_R12_M13_SJOGREN.txt --beqtl-summary /data/GWAS/GTEx/SMR/Minor_Salivary_Gland/Minor_Salivary_Gland --out BTN3A2 --thread-num 10  --plot --probe ENSG00000186470 --probe-wind 500 --gene-list glist-hg19 
# 绘制图表
source("/data/software/smr_v1.3.1_src/plot/plot_SMR.r") 
SMRData = ReadSMRData("/data/software/smr_v1.3.1_src/plot/BTN3A2.ENSG00000186470.txt")
## Read 3 items
## Read 2 items
## Read 2 items
## Read 2 items
## Read 2 items
## Read 0 items

SMRLocusPlot(data = SMRData, smr_thresh = 8.4e-6, heidi_thresh = 0.05, plotWindow = 1000, max_anno_probe = 16)
## Warning messages:
## 1: In xtfrm.data.frame(x) : cannot xtfrm data frames
## 2: In xtfrm.data.frame(x) : cannot xtfrm data frames
SMR-3
SMREffectPlot(data = SMRData, trait_name="Sjögren's_syndrome")
SMR-4
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容