chr长度文件
less Cbre.chr.gff3|grep '^Chr' >Cbre.chr.gff3
less Cbre.chr.gff3|cut -d ' ' -f1|sort -u >id
seqkit grep -f id ../genome.toplevel.fa >Cbre.chr.fna
fastalength Cbre.chr.fna|awk '{print $2"\t1\t"$1}' >Chrlength.txt
gc含量
#生成窗口文件,窗口大小50kb
bedtools makewindows -w 50000 -g genome.len > genome.window.bed
#计算每个窗口平均GC含量
seqtk subseq Cbre.chr.fna genome.window.bed > genome.window.fasta
seqtk comp genome.window.fasta |awk '{print $1 "\t" ($4+$5)/($3+$4+$5+$6)}' |awk -F ":|-" '{print $1"\t"$2"\t"$3"\t"$4}' > sind_gc.txt
计算每个窗口基因条数
#先将蛋白注释的gff文件转换成bed文件,在py_36环境中将gff转化为bed
conda activate py_36
convert2bed -i gff < Cbre.chr.gff3 > Cbre.bed
bedtools intersect -a genome.window.bed -b Cbre.bed -c -F 0.1 > sind_genecount.txt
track1,基因在染色体上面的正负链
less Cbre.chr.gff3|awk '{print $1"\t"$4"\t"$5"\t"$7}'|sed 's/+/0.5/g'|sed 's/-/-0.5/g' >track.txt
突出显示的基因位置
grep -f SUS.id Cbre.chr.gff3|grep 'gene'|sort|awk '{print $1"\t"$4"\t"$5"\tSUS"}' > label.txt
重复序列含量
/home/lx_sky6/software/RepeatMasker/util/rmOutToGFF3.pl /home/lx_sky6/yt/0729_Carex/7-repeat_toplevel/2-EDTA/genome.Carex_breviculmis.toplevel.fa.mod.EDTA.final/genome.Carex_breviculmis.toplevel.fa.mod.EDTA.intact.fa.out >> Cbre.repeat.gff
bedtools coverage -a genome.window.bed -b Cbre.repeat.gff |awk '{print $1 "\t" $2 "\t" $3 "\t" $7}' >Cbre_repeat.txt
link
conda activate jcvi
python3 -m jcvi.formats.gff bed Cbre.chr.gff3 -o Cbre1.bed
python3 -m jcvi.formats.gff bed Cbre.chr.gff3 -o Cbre2.bed
python3 -m jcvi.formats.bed uniq Cbre1.bed
python3 -m jcvi.formats.bed uniq Cbre2.bed
awk '{print $4".t1"}' Cbre2.bed | seqkit grep -f - ../21-Collinearity/Cbre.chr.cds
awk '{print $4".t1"}' Cbre2.bed | seqkit grep -f - ../21-Collinearity/Cbre.chr.cds >Cbre2.cds
##这里由于gff3中的mRNA的id是不带t1的,二cds和pep的id是代t1的,所以要加上.t1才能提取出来。
##下面是gff3
Chr1 EVM gene 34110 34304 . - . ID=evm00001;Name=evm00001
Chr1 EVM mRNA 34110 34304 . - . ID=evm00001.t1;Parent=evm00001
Chr1 EVM exon 34110 34304 . - . ID=evm00001.t1.exon1;Parent=evm00001.t1
Chr1 EVM CDS 34110 34304 . - 0 ID=evm00001.t1.CDS1;Parent=evm00001.t1
##为了与生成的bed文件保持一致,提取出来的cds的id仍然要去掉.t1
sed -i 's/.t1//g' Cbre1.cds
sed -i 's/.t1//g' Cbre2.cds
python -m jcvi.compara.catalog ortholog --no_strip_names Cbre1 Cbre2
python -m jcvi.compara.synteny screen --minspan=30 --simple Cbre1.Cbre2.anchors Cbre1.Cbre2.anchors.new
python /home/lx_sky6/software/miniconda3/envs/jcvi/simple2links.py Cbre1.Cbre2.anchors.simple
mv Cbre1.Cbre2.anchors.simple_link.txt sind_link.txt