### 计算平均Ho和He,以及标准偏差
vcftools --vcf native_M2.vcf --minQ 20 --maf 0.05 --hwe 0.001 --recode --recode-INFO-all --out native_M2_min_maf_hwe
cd part_analysis
mv ../native_M2_min_maf_hwe.recode.vcf ./
plink --vcf native_M2_min_maf_hwe.recode.vcf --make-bed --out native_out1 --allow-extra-chr
wc -l native_out1.bim > snp_number1.txt
sed -e "s/NC_064243.1/1/g" -e "s/NC_064244.1/2/g" -e "s/NC_064245.1/3/g" -e "s/NC_064246.1/4/g" -e "s/NC_064247.1/5/g" -e "s/NC_064248.1/6/g" -e "s/NC_064249.1/7/g" -e "s/NC_064250.1/8/g" -e "s/NC_064251.1/9/g" -e "s/NC_064252.1/10/g" -e "s/NC_064253.1/11/g" -e "s/NC_064254.1/12/g" -e "s/NC_064255.1/13/g" -e "s/NC_064256.1/14/g" -e "s/NC_064257.1/15/g" -e "s/NC_064258.1/16/g" -e "s/NC_064259.1/17/g" -e "s/NC_064260.1/18/g" -e "s/NC_064261.1/19/g" -e "s/NC_064262.1/20/g" -e "s/NC_064263.1/21/g" -e "s/NC_064264.1/22/g" -e "s/NC_064265.1/23/g" -e "s/NC_064266.1/24/g" -e "s/NC_064267.1/25/g" -e "s/NC_064268.1/26/g" -e "s/NC_064269.1/27/g" -e "s/NC_064270.1/28/g" -e "s/NC_064271.1/29/g" -e "s/NC_064272.1/30/g" -e "s/NC_064273.1/31/g" -e "s/NC_064274.1/32/g" -e "s/NC_064275.1/33/g" -e "s/NC_064276.1/34/g" -e "s/NC_064277.1/35/g" -e "s/NC_064278.1/36/g" -e "s/NC_064279.1/37/g" -e "s/NC_064280.1/38/g" -i native_out1.bim
awk '{print $1 ,"\t", $2="snp"NR, "\t", $3 , "\t", $4 , "\t", $5 ,"\t", $6}' native_out1.bim > native_out1.txt
rm -rf native_out1.bim | cp native_out1.txt native_out1.bim
rm -rf native_out1.txt
awk '{ if($1 >=1 && $1 <= 38) print $2}' native_out1.bim > snp_1_38.txt
plink --bfile native_out1 --extract snp_1_38.txt --make-bed --out native_out2 --allow-extra-chr --chr-set 38
rm -rf native_out1*
wc -l native_out2.bim > snp_number2.txt
plink --bfile native_out2 --recode --out native_out2 --allow-extra-chr --chr-set 38
plink --file native_out2 --hardy --out native_het_test --allow-extra-chr --chr-set 38
awk '{print $7 "\t" $8}' native_het_test.hwe | sed '1d' > count_het.txt
awk '{print $1}' count_het.txt | datamash mean 1 sstdev 1 > native_Ho.txt
awk '{print $2}' count_het.txt | datamash mean 1 sstdev 1 > native_Ho.txt
##for i in `head -n 1 count_het.txt | awk '{print NF}' | xargs seq `; do awk -v a=$i 'BEGIN{sum = 0} {sum += $a} END {print sum/NR}' count_het.txt >> mean.txt; done
### 计算snp密度
#!/bin/bash
plink --bfile native_out2 --export vcf --out native_1_38 --chr-set 38
cd ../part2_sd_analysis
mv ../part1_Ho_analysis/native_1_38.vcf ./
vcftools --vcf native_1_38.vcf --SNPdensity 100000 --out native_SNPdensity
awk '{print $4}' native_SNPdensity.snpden | sed '1d' > native_SNPdensity.txt
awk '{print $1}' native_SNPdensity.txt | datamash mean 1 sstdev 1 > snpdensity.txt
笔记:计算Ho、He、SNP密度的流程
©著作权归作者所有,转载或内容合作请联系作者
- 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事。” “怎么了?”我有些...
- 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
- 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...
推荐阅读更多精彩内容
- 转载自:https://www.cnblogs.com/triple-y/p/10331449.html 收集vc...
- 参考 收集vcftools所有用法 命令 100000 是指定窗口长度--out 是输出文件的前缀 使用R语言中的...
- 个人进行SNP分析用的软件是snippy[https://github.com/tseemann/snippy],...