#!/usr/bin/perl -w
use strict;
my $content="";
my $infile=shift @ARGV;
my $out1file=shift @ARGV;
my $out2file=shift @ARGV;
#此脚本使用方式:perl fasta2oneline_findpmotif.pl input.fasta oneline.fasta Pmotif.fasta
###step1:先将下载的原始多行aa序列合并成一行
open(IN,"$infile")or die "cannot open the file:$!"; #打开要读入的文件
open(OUT,">$out1file")or die "cannot open the file:$!"; #打开要写入的将合并成单行的文件
while(<IN>){
chomp;
if(/^>/){
if($content ne ""){
print OUT "$content\n";
$content="";}
print OUT "$_\n";
}else{$content.= $_;}
}
print OUT "$content\n";
close IN; #关闭读入文件,完成了除注释行外将多行aa序列合并成一行的要求
close OUT; #关闭输出的文件,完成将多行序列合并为一行操作
##step2:再寻找符合磷酸化修饰的motif
open(IN,"$out1file")or die "cannot open the file:$!"; #打开要读入的文件
open(OUT,">$out2file")or die "cannot open the file:$!"; #打开寻找motif结果文件
while(<IN>){
chomp;
if(/(>).{3}(.{6})/){#注释模式为>sp|uniprotID|...(且uniprotID为6个字符),想获取uniprotID,需要略去开头除>外的3个字符
print OUT "$1$2\n";
}
else{
if (/(SD.[DE]|L.R..S|R..S)/ig){
print OUT "$1\n";}
else{print OUT "没有匹配到磷酸化模体\n";}
}
}
close IN;
close OUT;