输入数据格式为:
利用该脚本,可以获得标记的等位信息、杂合率、MAF、缺失率及PIC值。
perl stat_info_marker.pl example.txt
#!/usr/bin/perl
use warnings;
use strict;
open IN,"$ARGV[0]";
open OUT,">marker-info-$ARGV[0]";
print OUT "Name\tAlleles\tHeterogeneity\tMAF\tMissing\tPIC\n";
<IN>;
while (<IN>) {
chomp;
my @a = split/\s+/;
print OUT "$a[0]\t",stat_alleles(@a[1..$#a]),"\t",stat_het(@a[1..$#a]),"\t",stat_maf(@a[1..$#a]),"\t",stat_missing(@a[1..$#a]),"\t",stat_pic(@a[1..$#a]),"\n";
}
close IN;
close OUT;
sub stat_alleles {
my %alleles = ();
foreach (@_) {
s/://;
/(\w)(\w)/;
next if $1 =~ /N/;
$alleles{$1}++;
$alleles{$2}++;
}
return join("/",sort keys %alleles);
}
sub stat_missing {
my $a = 0;
my $b = 0;
foreach (@_) {
s/://;
/(\w)(\w)/;
if ($1 =~ /N/) {
$a++;
}
$b++;
}
if ($b > 0) {
return $a/$b;
}else{
return "NA";
}
}
sub stat_maf {
my $t = 0;
my %maf = ();
foreach (@_) {
s/://;
/(\w)(\w)/;
if ($1 =~ /N/) {
next;
} else{
$maf{$1}++;
$maf{$2}++;
$t += 2;
}
}
my @return = sort{$maf{$b}<=>$maf{$a}}keys %maf;
if ($#return >= 1 && $t > 0) {
return $maf{$return[1]}/$t;
}else{
return "NA";
}
}
sub stat_pic {
my $a = 0;
my $b = 0;
my %pic = ();
foreach (@_) {
s/://;
/(\w)(\w)/;
if ($1 =~ /N/) {
next;
} else{
$pic{$1}++;
$pic{$2}++;
$a += 2;
}
}
if ($a > 0) {
foreach (values %pic) {
$b += (($_/$a)*($_/$a));
}
return 1-$b;
} else{
return "NA";
}
}
sub stat_het {
my $a = 0;
my $b = 0;
foreach (@_) {
s/://;
/(\w)(\w)/;
if ($1 =~ /N/) {
next;
} elsif ($1 ne $2) {
$a++;
} else {
$b++;
}
}
if ($a+$b > 0) {
return $a/($a+$b);
}else{
return "NA";
}
}