projPath="~/new_test"
config1=${projPath}/config1
mkdir -p ${projPath}/fastqc
mkdir -p ${projPath}/unalign #可以看看未比对上是什么污染
mkdir -p ${projPath}/alignment/sam/bowtie2_summary
mkdir -p ${projPath}/alignment/bam/picard_summary
mkdir -p ${projPath}/alignment/bed
mkdir -p $projPath/alignment/sam/fragmentLen
mkdir -p $projPath/alignment/bed/binLen
mkdir -p $projPath/alignment/bigwig
mkdir -p $projPath/macs2
ref="~/INDEX/bowtie2Index/mouse/mm10"
- fastqc 0.12.1 [#Nextera Transposase Sequence & PolyG]
echo "started at $(date)"
cat $config1 | while read line
do
arr=($line)
fq1=${arr[1]}
fq2=${arr[2]}
sample=${arr[0]}
fastqc -o ${projPath}/fastqc -f fastq $fq1
fastqc -o ${projPath}/fastqc -f fastq $fq2
done
echo "finished at $(date)"
cat $config1 | while read line
do
arr=($line)
fq1=${arr[1]}
fq2=${arr[2]}
sample=${arr[0]}
ktrim -k Nextera -1 $fq1 -2 $fq2 -o ${projPath}/$sample.k
done
module load tools/bowtie2-2.3.5.1
cat $config1 | while read line
do
arr=($line)
sample=${arr[0]}
bowtie2 --local --very-sensitive --no-mixed --no-discordant --phred33 -I 10 -X 700 \
-p 32 -x ${ref} -1 $sample.k.read1.fq -2 $sample.k.read2.fq \
--un-conc-gz ${projPath}/unalign/$sample.un.fq.gz \
-S ${projPath}/alignment/sam/$sample.sam 2> ${projPath}/alignment/sam/bowtie2_summary/$sample.txt
## Extract the 9th column from the alignment sam file which is the fragment length
samtools view -F 0x04 $projPath/alignment/sam/$sample.sam | awk -F'\t' 'function abs(x){return ((x < 0.0) ? -x : x)} {print abs($9)}' | sort | uniq -c | awk -v OFS="\t" '{print $2, $1/2}' >$projPath/alignment/sam/fragmentLen/$sample.fragmentLen.txt
done
picardCMD="java -jar ~/.conda/envs/cuttagg/share/picard-2.18.29-0/picard.jar"
cat $config1 | while read line
do
arr=($line)
sample=${arr[0]}
## Filter and keep the mapped read pairs
samtools view -q 30 -bS -F 1804 $projPath/alignment/sam/$sample.sam >$projPath/alignment/bam/$sample.mapped.bam
## remove duplicates
samtools sort -o $projPath/alignment/bam/$sample.sorted.bam $projPath/alignment/bam/$sample.mapped.bam
$picardCMD MarkDuplicates REMOVE_DUPLICATES=true \
I=$projPath/alignment/bam/$sample.sorted.bam \
O=$projPath/alignment/bam/$sample.sorted.rmDup.bam \
M=$projPath/alignment/bam/picard_summary/$sample.sorted.rmDup.txt
done