SRA转换成fastq
$ conda activate rna
(rna) Mar23 11:28:37 ~
$ pwd
/trainee2/Mar23
(rna) Mar23 11:52:58 ~
$ cd Data/
(rna) Mar23 11:53:07 ~/Data
$ cd rawdata/
(rna) Mar23 11:53:16 ~/Data/rawdata
$ ls
fq SRR1039510_1.fastq.gz SRR1039511_2.fastq.gz
qc SRR1039510_2.fastq.gz SRR1039512_1.fastq.gz
sra SRR1039511_1.fastq.gz SRR1039512_2.fastq.gz
(rna) Mar23 11:53:19 ~/Data/rawdata
$ cd sra/
(rna) Mar23 11:53:23 ~/Data/rawdata/sra
$ ls
CHECK SRR1039510
filereport_read_run_PRJNA229998_tsv.txt SRR1039510_1.fastq.gz
md5.txt SRR1039510_2.fastq.gz
raw_md5.txt SRR1039511
sample.ID SRR1039512
sra.url
(rna) Mar23 11:54:29 ~/Data/rawdata/sra
$ cat filereport_read_run_PRJNA229998_tsv.txt |head -n 1 | tr '\t' '\n'
study_accession
sample_accession
experiment_accession
run_accession
tax_id
scientific_name
fastq_md5
fastq_aspera
submitted_ftp
sra_bytes
sra_md5
sra_ftp
sra_aspera
(rna) Mar23 11:54:57 ~/Data/rawdata/sra
$ cat filereport_read_run_PRJNA229998_tsv.txt |head -n 1 | tr '\t' '\n'| cat -n
1 study_accession
2 sample_accession
3 experiment_accession
4 run_accession
5 tax_id
6 scientific_name
7 fastq_md5
8 fastq_aspera
9 submitted_ftp
10 sra_bytes
11 sra_md5
12 sra_ftp
13 sra_aspera
(rna) Mar23 11:55:36 ~/Data/rawdata/sra
$ cut -f 8 filereport_read_run_PRJNA229998_tsv.txt
fastq_aspera
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/008/SRR1039508/SRR1039508_1.fastq.gz;fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/008/SRR1039508/SRR1039508_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/009/SRR1039509/SRR1039509_1.fastq.gz;fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/009/SRR1039509/SRR1039509_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/000/SRR1039510/SRR1039510_1.fastq.gz;fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/000/SRR1039510/SRR1039510_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/001/SRR1039511/SRR1039511_1.fastq.gz;fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/001/SRR1039511/SRR1039511_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/002/SRR1039512/SRR1039512_1.fastq.gz;fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/002/SRR1039512/SRR1039512_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/003/SRR1039513/SRR1039513.fastq.gz;fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/003/SRR1039513/SRR1039513_1.fastq.gz;fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/003/SRR1039513/SRR1039513_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/004/SRR1039514/SRR1039514_1.fastq.gz;fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/004/SRR1039514/SRR1039514_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/005/SRR1039515/SRR1039515.fastq.gz;fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/005/SRR1039515/SRR1039515_1.fastq.gz;fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/005/SRR1039515/SRR1039515_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/006/SRR1039516/SRR1039516.fastq.gz;fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/006/SRR1039516/SRR1039516_1.fastq.gz;fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/006/SRR1039516/SRR1039516_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/007/SRR1039517/SRR1039517_1.fastq.gz;fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/007/SRR1039517/SRR1039517_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/008/SRR1039518/SRR1039518_1.fastq.gz;fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/008/SRR1039518/SRR1039518_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/009/SRR1039519/SRR1039519.fastq.gz;fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/009/SRR1039519/SRR1039519_1.fastq.gz;fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/009/SRR1039519/SRR1039519_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/000/SRR1039520/SRR1039520.fastq.gz;fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/000/SRR1039520/SRR1039520_1.fastq.gz;fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/000/SRR1039520/SRR1039520_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/001/SRR1039521/SRR1039521.fastq.gz;fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/001/SRR1039521/SRR1039521_1.fastq.gz;fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/001/SRR1039521/SRR1039521_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/002/SRR1039522/SRR1039522.fastq.gz;fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/002/SRR1039522/SRR1039522_1.fastq.gz;fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/002/SRR1039522/SRR1039522_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/003/SRR1039523/SRR1039523_1.fastq.gz;fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/003/SRR1039523/SRR1039523_2.fastq.gz
(rna) Mar23 11:55:53 ~/Data/rawdata/sra
$ cut -f 8 filereport_read_run_PRJNA229998_tsv.txt | tr ';' '\n'
fastq_aspera
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/008/SRR1039508/SRR1039508_1.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/008/SRR1039508/SRR1039508_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/009/SRR1039509/SRR1039509_1.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/009/SRR1039509/SRR1039509_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/000/SRR1039510/SRR1039510_1.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/000/SRR1039510/SRR1039510_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/001/SRR1039511/SRR1039511_1.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/001/SRR1039511/SRR1039511_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/002/SRR1039512/SRR1039512_1.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/002/SRR1039512/SRR1039512_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/003/SRR1039513/SRR1039513.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/003/SRR1039513/SRR1039513_1.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/003/SRR1039513/SRR1039513_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/004/SRR1039514/SRR1039514_1.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/004/SRR1039514/SRR1039514_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/005/SRR1039515/SRR1039515.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/005/SRR1039515/SRR1039515_1.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/005/SRR1039515/SRR1039515_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/006/SRR1039516/SRR1039516.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/006/SRR1039516/SRR1039516_1.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/006/SRR1039516/SRR1039516_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/007/SRR1039517/SRR1039517_1.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/007/SRR1039517/SRR1039517_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/008/SRR1039518/SRR1039518_1.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/008/SRR1039518/SRR1039518_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/009/SRR1039519/SRR1039519.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/009/SRR1039519/SRR1039519_1.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/009/SRR1039519/SRR1039519_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/000/SRR1039520/SRR1039520.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/000/SRR1039520/SRR1039520_1.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/000/SRR1039520/SRR1039520_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/001/SRR1039521/SRR1039521.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/001/SRR1039521/SRR1039521_1.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/001/SRR1039521/SRR1039521_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/002/SRR1039522/SRR1039522.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/002/SRR1039522/SRR1039522_1.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/002/SRR1039522/SRR1039522_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/003/SRR1039523/SRR1039523_1.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/003/SRR1039523/SRR1039523_2.fastq.gz
(rna) Mar23 12:01:28 ~/Data/rawdata/sra
$ cut -f 8 filereport_read_run_PRJNA229998_tsv.txt | tr ';' '\n' | less -S
(rna) Mar23 12:09:46 ~/Data/rawdata/sra
$ cut -f 8 filereport_read_run_PRJNA229998_tsv.txt | tr ';' '\n' | grep -e '_1' -e '_2'
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/008/SRR1039508/SRR1039508_1.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/008/SRR1039508/SRR1039508_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/009/SRR1039509/SRR1039509_1.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/009/SRR1039509/SRR1039509_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/000/SRR1039510/SRR1039510_1.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/000/SRR1039510/SRR1039510_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/001/SRR1039511/SRR1039511_1.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/001/SRR1039511/SRR1039511_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/002/SRR1039512/SRR1039512_1.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/002/SRR1039512/SRR1039512_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/003/SRR1039513/SRR1039513_1.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/003/SRR1039513/SRR1039513_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/004/SRR1039514/SRR1039514_1.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/004/SRR1039514/SRR1039514_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/005/SRR1039515/SRR1039515_1.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/005/SRR1039515/SRR1039515_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/006/SRR1039516/SRR1039516_1.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/006/SRR1039516/SRR1039516_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/007/SRR1039517/SRR1039517_1.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/007/SRR1039517/SRR1039517_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/008/SRR1039518/SRR1039518_1.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/008/SRR1039518/SRR1039518_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/009/SRR1039519/SRR1039519_1.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/009/SRR1039519/SRR1039519_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/000/SRR1039520/SRR1039520_1.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/000/SRR1039520/SRR1039520_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/001/SRR1039521/SRR1039521_1.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/001/SRR1039521/SRR1039521_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/002/SRR1039522/SRR1039522_1.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/002/SRR1039522/SRR1039522_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/003/SRR1039523/SRR1039523_1.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/003/SRR1039523/SRR1039523_2.fastq.gz
(rna) Mar23 12:04:34 ~/Data/rawdata/sra
$ cut -f 8 filereport_read_run_PRJNA229998_tsv.txt | tr ';' '\n' | grep -e '_1' -e '-2'>fq.url
(rna) Mar23 12:06:24 ~/Data/rawdata/sra
$ cat fq.url
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/008/SRR1039508/SRR1039508_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/009/SRR1039509/SRR1039509_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/000/SRR1039510/SRR1039510_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/001/SRR1039511/SRR1039511_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/002/SRR1039512/SRR1039512_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/003/SRR1039513/SRR1039513_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/004/SRR1039514/SRR1039514_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/005/SRR1039515/SRR1039515_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/006/SRR1039516/SRR1039516_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/007/SRR1039517/SRR1039517_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/008/SRR1039518/SRR1039518_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/009/SRR1039519/SRR1039519_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/000/SRR1039520/SRR1039520_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/001/SRR1039521/SRR1039521_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/002/SRR1039522/SRR1039522_2.fastq.gz
fasp.sra.ebi.ac.uk:/vol1/fastq/SRR103/003/SRR1039523/SRR1039523_2.fastq.gz
(rna) Mar23 15:34:35 ~/Data/rawdata/sra
(rna) Mar23 15:49:51 ~/Data/cleandata/fastp
$ cleandata=/trainee2/Mar23/Data/cleandata/fastp
(rna) Mar23 15:50:45 ~/Data/cleandata/fastp
$ rawdata=/trainee2/Mar23/Data/rawdata/
(rna) Mar23 15:50:51 ~/Data/cleandata/fastp
$ fastp -i $rawdata/SRR1039510_1.fastq.gz \
> -I $rawdata/SRR1039510_2.fastq.gz \
> -o $cleandata/SRR1039510_1.fastp.fq.gz \
> -O $cleandata/SRR1039510_2.fastp.fq.gz \
> -h $cleandata/SRR1039510.fastp.html \
> -j $cleandata/SRR1039510.fastp.json \
> -l 36 -q 30 --compression=6 -R $cleandata/SRR1039510
Read1 before filtering:
total reads: 25000
total bases: 1575000
Q20 bases: 1554922(98.7252%)
Q30 bases: 1528821(97.068%)
Read2 before filtering:
total reads: 25000
total bases: 1575000
Q20 bases: 1541170(97.8521%)
Q30 bases: 1514774(96.1761%)
Read1 after filtering:
total reads: 23772
total bases: 1497088
Q20 bases: 1492762(99.711%)
Q30 bases: 1475413(98.5522%)
Read2 aftering filtering:
total reads: 23772
total bases: 1497088
Q20 bases: 1492258(99.6774%)
Q30 bases: 1475816(98.5791%)
Filtering result:
reads passed filter: 47544
reads failed due to low quality: 2306
reads failed due to too many N: 136
reads failed due to too short: 14
reads with adapter trimmed: 116
bases trimmed due to adapters: 1610
Duplication rate: 0.189013%
Insert size peak (evaluated by paired-end reads): 95
JSON report: /trainee2/Mar23/Data/cleandata/fastp/SRR1039510.fastp.json
HTML report: /trainee2/Mar23/Data/cleandata/fastp/SRR1039510.fastp.html
fastp -i /trainee2/Mar23/Data/rawdata//SRR1039510_1.fastq.gz -I /trainee2/Mar23/Data/rawdata//SRR1039510_2.fastq.gz -o /trainee2/Mar23/Data/cleandata/fastp/SRR1039510_1.fastp.fq.gz -O /trainee2/Mar23/Data/cleandata/fastp/SRR1039510_2.fastp.fq.gz -h /trainee2/Mar23/Data/cleandata/fastp/SRR1039510.fastp.html -j /trainee2/Mar23/Data/cleandata/fastp/SRR1039510.fastp.json -l 36 -q 30 --compression=6 -R /trainee2/Mar23/Data/cleandata/fastp/SRR1039510
fastp v0.20.1, time used: 1 seconds
(rna) Mar23 16:03:05 ~/Data/cleandata/fastp
$ cat ~/Data/rawdata/fq/sample.ID | while read id
> do
> echo "fastp -i ${rawdata}/${id}_1.fastq.gz -I ${rawdata}/${id}_2.fastq.gz -o ${cleandata}/${id}_1.fastp.fq.gz -O ${cleandata}/${id}_2.fastp.fq.gz -l 36 -q 20 --compression=6 -R ${cleandata}/${id} -h ${cleandata}/${id}.fastp.html -j ${cleandata}/${id}.fastp.json 1>$cleandata/${id}.fastp.log 2>&1"
> done >fastp.sh
(rna) Mar23 16:03:31 ~/Data/cleandata/fastp
$ ls
fastp.sh SRR1039510_2.fastp.fq.gz SRR1039510.fastp.json
SRR1039510_1.fastp.fq.gz SRR1039510.fastp.html
(rna) Mar23 16:10:23 ~/Data/cleandata/fastp
$ less -S fastp.sh #不换行显示
fastp -i /trainee2/Mar23/Data/rawdata//SRR1039510_1.fastq.gz -I /trainee2/Ma...
fastp -i /trainee2/Mar23/Data/rawdata//SRR1039511_1.fastq.gz -I /trainee2/Ma...
fastp -i /trainee2/Mar23/Data/rawdata//SRR1039512_1.fastq.gz -I /trainee2/Ma...
(rna) Mar23 16:37:46 ~/Data/cleandata/fastp
$ nohup sh fastp.sh >fastp.log &
[1] 17669
(rna) Mar23 16:38:22 ~/Data/cleandata/fastp
$ nohup: ignoring input and redirecting stderr to stdout
ls
fastp.log SRR1039511.fastp.html
fastp.sh SRR1039511.fastp.json
SRR1039510_1.fastp.fq.gz SRR1039511.fastp.log
SRR1039510_2.fastp.fq.gz SRR1039512_1.fastp.fq.gz
SRR1039510.fastp.html SRR1039512_2.fastp.fq.gz
SRR1039510.fastp.json SRR1039512.fastp.html
SRR1039510.fastp.log SRR1039512.fastp.json
SRR1039511_1.fastp.fq.gz SRR1039512.fastp.log
SRR1039511_2.fastp.fq.gz
[1]+ Done nohup sh fastp.sh > fastp.log
(rna) Mar23 16:39:53 ~/Data/cleandata/fastp
$ ll
total 9868
drwxrwxr-x 2 Mar23 Mar23 4096 Apr 16 16:38 ./
drwxrwxr-x 4 Mar23 Mar23 4096 Apr 16 12:21 ../
-rw-rw-r-- 1 Mar23 Mar23 0 Apr 16 16:38 fastp.log
-rw-rw-r-- 1 Mar23 Mar23 1542 Apr 16 16:03 fastp.sh
-rw-rw-r-- 1 Mar23 Mar23 1396763 Apr 16 16:38 SRR1039510_1.fastp.fq.gz
-rw-rw-r-- 1 Mar23 Mar23 1399580 Apr 16 16:38 SRR1039510_2.fastp.fq.gz
-rw-rw-r-- 1 Mar23 Mar23 434756 Apr 16 16:38 SRR1039510.fastp.html
-rw-rw-r-- 1 Mar23 Mar23 92326 Apr 16 16:38 SRR1039510.fastp.json
-rw-rw-r-- 1 Mar23 Mar23 1428 Apr 16 16:38 SRR1039510.fastp.log
-rw-rw-r-- 1 Mar23 Mar23 1412004 Apr 16 16:38 SRR1039511_1.fastp.fq.gz
-rw-rw-r-- 1 Mar23 Mar23 1390176 Apr 16 16:38 SRR1039511_2.fastp.fq.gz
-rw-rw-r-- 1 Mar23 Mar23 437298 Apr 16 16:38 SRR1039511.fastp.html
-rw-rw-r-- 1 Mar23 Mar23 93042 Apr 16 16:38 SRR1039511.fastp.json
-rw-rw-r-- 1 Mar23 Mar23 1428 Apr 16 16:38 SRR1039511.fastp.log
-rw-rw-r-- 1 Mar23 Mar23 1424240 Apr 16 16:38 SRR1039512_1.fastp.fq.gz
-rw-rw-r-- 1 Mar23 Mar23 1448573 Apr 16 16:38 SRR1039512_2.fastp.fq.gz
-rw-rw-r-- 1 Mar23 Mar23 435781 Apr 16 16:38 SRR1039512.fastp.html
-rw-rw-r-- 1 Mar23 Mar23 92283 Apr 16 16:38 SRR1039512.fastp.json
-rw-rw-r-- 1 Mar23 Mar23 1422 Apr 16 16:38 SRR1039512.fastp.log
查看read1有接头的序列
(rna) Mar23 19:22:32 ~/Data/cleandata/fastp
$ zless ../../rawdata/fq/SRR1039510_1.fastq.gz | grep 'CAAGAAAACATCAGATCGGAAGAGCACACGTC'
CAGCACAGCCTCTCCTGCGGGCCAGCGTCATCAAGAAAACATCAGATCGGAAGAGCACACGTC
查看read2有接头的序列
na) Mar23 22:51:29 ~/Data/cleandata/fastp
$ zless ../../rawdata/fq/SRR1039510_2.fastq.gz | grep 'CTGGCAGGTCCCAGATCGGAAGAGCGT'
TTGGTTGGCAAGAAGGAGCTAACCACAAAAACGGTGCTGGCAGGTCCCAGATCGGAAGAGCGT
查看文件
(rna) Mar23 23:28:04 ~/Data/cleandata/compare
$ zcat ../trim_galore/SRR1039510_1_val_1.fq.gz | less -S
@SRR1039510.1 HWI-ST177:290:C0TECACXX:1:1101:1373:2104 length=63
TGGGAGGCTGAGGCAGGAGAATCACTTAAACCTGGGAGGCAGAGGTTACAGTGAGCCGAGATT
+
HJJJIJJJJJJJJIJJJGHHIJIIIIIIJJEHGGIJGIJIJJIJHHHGGFFDFFFDEDDDBDC
@SRR1039510.2 HWI-ST177:290:C0TECACXX:1:1101:1340:2124 length=63
AAAGAAGGCGACAGTGAGAAGGAGTCCGAGAAGAGTGATGGAGACCCAATAGTCGATCCTGAG
+
HJJJJJJJJJJJIJIIGIJJJJGJHJJJHHDFFFE@CEEEDDDDDDDDDDDDDDDBDDDDDDD
@SRR1039510.3 HWI-ST177:290:C0TECACXX:1:1101:1273:2183 length=63
CTGCTGGGCCCCAAGGTCCTCCTGGTCCCAGTGGTGAAGAAGGAAAGAGAGGCCCTAATGGGG
+
HJJJJJJJJJJJJJJJGIIIJJJJJHIJJJJHIJFHGIJJJJJJJHHHHHFFFDDDEDDDDDD
@SRR1039510.4 HWI-ST177:290:C0TECACXX:1:1101:1562:2147 length=63
CTTGGCTGCAGCCATCCCGCTTAGCCTGCCTCACCCACACCCGTGTGGTACCTTCAGCCCTGG
+
HJJJJJJJJJJJJJJJJJIJJJJJJJJJJJJIJHJJIJJJJJHHFFFFEEEEEEEDDDDDDDB
@SRR1039510.5 HWI-ST177:290:C0TECACXX:1:1101:1577:2181 length=63
TGAGACAGGTAATTCAGTATAGTAGATTAATATTTTTAATATATATTTTCCCTTAAGATTTCC
+
HIJJJJJJJEHJIJJJJIIIJJIIJJJJJJJJJJJJJJJJJJJJJJJJJEHJGI>FFCBGGGI
@SRR1039510.6 HWI-ST177:290:C0TECACXX:1:1101:1650:2181 length=63
ATTTCTCAGTGTAGAAATCATGTCTTCTTAATTGCTGAACCTTACTGCAAAAACTTGTGATGT
+
HJJJJJJJJJJJHIJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJIJJJJJJJJHHIJJD
:
4行变1行
(rna) Mar23 23:34:14 ~/Data/cleandata/compare
$ zcat ../trim_galore/SRR1039510_1_val_1.fq.gz |paste - - - - | less -S
写入trim.txt
(rna) Mar23 23:35:51 ~/Data/cleandata/compare
$ zcat ../trim_galore/SRR1039510_1_val_1.fq.gz |paste - - - - >trim.txt
(rna) Mar23 23:37:03 ~/Data/cleandata/compare
$ ls
trim.txt
查看小于50的信息
(rna) Mar23 23:37:06 ~/Data/cleandata/compare
$ awk '(length($4)<50)' trim.txt | less -S
长度小于50的id
(rna) Mar23 23:47:22 ~/Data/cleandata/compare
$ awk '(length($4)<50) {print$1}' trim.txt | less -S
前10个id
(rna) Mar23 23:49:45 ~/Data/cleandata/compare
$ awk '(length($4)<50) {print$1}' trim.txt | head -n 10
@SRR1039510.751
@SRR1039510.809
@SRR1039510.1129
@SRR1039510.1490
@SRR1039510.1693
@SRR1039510.1821
@SRR1039510.1987
@SRR1039510.1999
@SRR1039510.2032
@SRR1039510.2096
(rna) Mar23 23:49:57 ~/Data/cleandata/compare
$ awk '(length($4)<50) {print$1}' trim.txt | head -n 10 >trim.id
(rna) Mar23 23:51:35 ~/Data/cleandata/compare
$ cat trim.id
@SRR1039510.751
@SRR1039510.809
@SRR1039510.1129
@SRR1039510.1490
@SRR1039510.1693
@SRR1039510.1821
@SRR1039510.1987
@SRR1039510.1999
@SRR1039510.2032
@SRR1039510.2096
存为原始数据
(rna) Mar23 23:52:48 ~/Data/cleandata/compare
$ zcat ../../rawdata/fq/SRR1039510_1.fastq.gz |paste - - - - >raw.txt
(rna) Mar23 23:55:31 ~/Data/cleandata/compare
$ less -S raw.txt
定义过滤之前的和之后的
(rna) Mar23 23:57:01 ~/Data/cleandata/compare
$ grep -w -f trim.id raw.txt >raw.sm
(rna) Mar23 00:01:56 ~/Data/cleandata/compare
$ grep -w -f trim.id trim.txt >trim.sm
(rna) Mar23 00:02:13 ~/Data/cleandata/compare
$ less -S raw.sm
(rna) Mar23 00:02:51 ~/Data/cleandata/compare
$ less -S trim.sm
进行比对
(rna) Mar23 00:03:17 ~/Data/cleandata/compare
$ paste raw.sm trim.sm |less -S
对比
(rna) Mar23 00:13:02 ~/Data/cleandata/compare
$ paste raw.sm trim.sm | awk -F'\t' '{print$2"\t"$6}' | tr '\t' '\n' | less -S