端粒存在于真核生物染色体末端,可能由AAACCCT、AACCCT、AACCAC等重复多次形成。其中真菌端粒长度范围在100-1000bp之间(部分),而动植物通常在几k-几十k之间。
1.下载tidk
conda install -c bioconda tidk
2.检测端粒重复序列
tidk explore -f A.fasta -o output --dir ./ --length 0 --maximum 12 --minimum 5 -t 5


更新后explore参数用不了解决方案
试着使用以下命令
tidk explore --length 0 --maximum 12 --minimum 5 -t 5 A.fasta
下载老版本就可以了
conda install -c bioconda tidk=0.2.0
附件.脚本预测端粒

#!/usr/bin/perl -w
use strict;
my ($list,$out)=@ARGV;
if(@ARGV!=2){ print "perl $0 <list> <out>\n"; exit; }
my @chr=("Chr1","Chr2","Chr3","Chr4","Chr5","Chr6","Chr7","Chr8","Chr9","Chr10","Chr11","Chr12","Chr13","ChrB1","ChrB2","ChrB3","ChrB4");
open OUT,">$out" or die $!;
print OUT "Sample";
foreach(@chr){ print OUT "\t$_\t$_ telomere"; }
print OUT "\tNormal;complete;part;No\tB;complete;part;No\n";
open FH,$list or die $!;
while(<FH>){
chomp;
my ($sample,$tel)=split;
my @tmp=split /\//,$tel;
pop(@tmp);
my $dir=join("/",@tmp);
my ($fa) = glob("$dir/$sample.*fa");
unless($fa && -f $fa){ print "Error! $sample fasta not exist!\n"; exit; }
my ($normal,$chrb)=(0,0);
open IN,$fa or die $!;
my %chromosome;
while(<IN>){
chomp;
next unless /^>/;
my $chr=(split)[0];
$chr=~s/>//;
$chromosome{$chr}=1;
if($chr=~/ChrB/){ $chrb++ }
elsif($chr=~/tig/){}
else{ $normal++ }
}
close IN;
my (%left,%right);
open IN,$tel or die $!;
while(<IN>){
chomp;
my ($chr,$start,$end,$len)=split;
$left{$chr}=1 if $start<5000;
$right{$chr}=1 if $len-$end<5000;
}
close IN;
my ($ncom,$npart,$nno)=(0,0,0);
my ($bcom,$bpart,$bno)=(0,0,0);
my %type;
for my $i(0..12){
my $chr=$chr[$i];
next unless $chromosome{$chr};
my $l=$left{$chr}||0;
my $r=$right{$chr}||0;
my $sum=$l+$r;
if($sum==2){ $ncom++; $type{$chr}="complete" }
elsif($sum==1){ $npart++; $type{$chr}=$l?"left":"right" }
else{ $nno++; $type{$chr}="NA" }
}
for my $i(13..$#chr){
my $chr=$chr[$i];
next unless $chromosome{$chr};
my $l=$left{$chr}||0;
my $r=$right{$chr}||0;
my $sum=$l+$r;
if($sum==2){ $bcom++; $type{$chr}="complete" }
elsif($sum==1){ $bpart++; $type{$chr}=$l?"left":"right" }
else{ $bno++; $type{$chr}="NA" }
}
print OUT "$sample";
for my $c(@chr){
if($chromosome{$c}){ print OUT "\t+\t$type{$c}" }
else{ print OUT "\t-\t-" }
}
print OUT "\t$normal;$ncom;$npart;$nno\t$chrb;$bcom;$bpart;$bno\n";
}
close FH;
close OUT;