批量转换SRA文件为FASTQ

无聊之作，水一水

话不多说，上代码

import os
import subprocess
import argparse

if __name__ == "__main__":
    # SRA2FASTQ tool是将SRA文件批量转换为FASTQ格式的工具，遍历目录下所有子目录的SRA文件并转换
    print("========================== SRA2FASTQ tool is running... ==========================")
    print("Author: Yuntao Zhu")
    print("This script will search for all SRA files in the specified directory.")
    print("The output FASTQ files will be stored in the same directory as the SRA files.")
    print("Usage: python sra2fastq.py <directory>")
    print("Example: python sra2fastq.py /path/to/sra_files")
    print("===================================================================================")

def parse_args():
    parser = argparse.ArgumentParser(description="Convert SRA files to FASTQ format using fasterq-dump.")
    parser.add_argument("directory", type=str, help="The directory to search for SRA files.")
    return parser.parse_args()

# 转换 SRA 文件为 FASTQ 格式
def convert_sra_to_fastq(directory, threads=8):
    # 遍历指定目录下的所有子目录
    for root, dirs, files in os.walk(directory):
        for dir_name in dirs:
            # 构建子目录路径
            dir_path = os.path.join(root, dir_name)
            
            # 查找 SRA 文件
            for file in os.listdir(dir_path):
                if file.endswith(".sra"):
                    sra_file_path = os.path.join(dir_path, file)
                    
                    # 使用 fasterq-dump 转换 SRA 文件为 FASTQ 格式
                    # 输出文件将存储在相同的子目录下，并设置线程数
                    cmd = f"fasterq-dump --threads {threads} --outdir {dir_path} --split-3 {sra_file_path}"
                    
                    # 执行命令
                    subprocess.run(cmd, shell=True)
                    print(f"Converted {sra_file_path} to FASTQ in {dir_path}")

if __name__ == "__main__":
    args = parse_args()    
    # 指定线程数(默认为8)
    convert_sra_to_fastq(args.directory, threads=8)

默认线程为8，很保守，用法直接看代码或者运行的时候提示

线程最后一行改一下就行，个人认为这是个不需要每次都指定的参数，于是就没加

批量转换SRA文件为FASTQ

推荐阅读更多精彩内容