查看第9列有哪些注释信息:
awk 'BEGIN{FS=OFS="\t"} {split($9, a, ";"); for(i in a){split(a[i], b, "="); if(++c[b[1]]==1) print b[1]}}' XXX.gff
awk -F "\t" '{print $9}' XXX.gff | cat -n | less
下面使用awk进行注释信息提取(以提取QTL_ID、Name、VTO_name、breed等信息为例):
awk 'BEGIN{FS=OFS="\t"} {print $0}' 1.gff | sed 's/;/\t/g' | awk 'BEGIN{FS=OFS="\t"} {for(i=1; i<=NF; i++){split($i, a, "=");b[a[1]]=a[2]}} {print b["QTL_ID"],b["Name"],b["VTO_name"],b["Abbrev"],b["PUBMED_ID"],b["trait_ID"],b["Trait"],b["Coord_src"],b["breed"],b["CMO_name"],b["gene_ID"],b["gene_IDsrc"],b["Test_Base"],b["Additive_Effect"],b["Variance"],b["PTO_name"],b["F-Stat"],b["Dominance_Effect"],b["Likelihood_Ratio"],b["LOD-score"],b["LS-means"],b["Bayes-value"],b["Model"],b["Map_Type"],b["Significance"],b["P-value"]}{split("", b, ":")}'
awk 'BEGIN{FS=OFS="\t"} {print $0}' 1.gff | sed 's/;/\t/g' | awk 'BEGIN{FS=OFS="\t"} {for(i=1; i<=NF; i++){split($i, a,"=");b[a[1]]=a[2]"="a[3]}} {print b["QTL_ID"],b["Name"],b["VTO_name"],b["Abbrev"],b["PUBMED_ID"],b["trait_ID"],b["Trait"],b["Coord_src"],b["breed"],b["CMO_name"],b["gene_ID"],b["gene_IDsrc"],b["Test_Base"],b["Additive_Effect"],b["Variance"],b["PTO_name"],b["F-Stat"],b["Dominance_Effect"],b["Likelihood_Ratio"],b["LOD-score"],b["LS-means"],b["Bayes-value"],b["Model"],b["Map_Type"],b["Significance"],b["P-value"]}{split("", b, ":")}'
awk 'BEGIN{FS=OFS="\t"} {print $0}' 1.gff | sed 's/;/\t/g' | awk 'BEGIN{FS=OFS="\t"} {for(i=1; i<=NF; i++){split($i, a, "=");b[a[1]]=a[2]a[3]}} {print $1,"\t",$2,"\t",$3,"\t",$4,"\t",$5,"\t",$6,"\t",$7,"\t",$8,"\t",b["QTL_ID"],b["Name"],b["VTO_name"],b["Abbrev"],b["PUBMED_ID"],b["trait_ID"],b["Trait"],b["Coord_src"],b["breed"],b["CMO_name"],b["gene_ID"],b["gene_IDsrc"],b["Test_Base"],b["Additive_Effect"],b["Variance"],b["PTO_name"],b["F-Stat"],b["Dominance_Effect"],b["Likelihood_Ratio"],b["LOD-score"],b["LS-means"],b["Bayes-value"],b["Model"],b["Map_Type"],b["Significance"],b["P-value"]}{split("", b, ":")}'
注:输出rs列的信息
awk 'BEGIN{FS=OFS="\t"} {print $0}' 1.gff | sed 's/;/\t/g' | awk 'BEGIN{FS=OFS="\t"} {for(i=1; i<=NF; i++){split($i, a,"=");b[a[1]]=a[2]"="a[3]}} {print b["Coord_src"]}{split("",b, ":")}'