1. 病情说明:
第一次做宏基因组相关的分析的时候,我是基于kraken2这个方法的,然后又是组装,定量,转biom等等。是一个多步骤的流程呢。怎么说呢,各有各的好处吧,具体有哪些不同我们还是需要百度一下kraken 和metaphlan这两个方法。
今天呢主要记录一下metaphlan流程和生成merged_abundance_table.txt文件的代码,嗯嗯我琢磨了半天怎么把merged_abundance_table.txt文件的内容导入到phyloseq进行更优雅的可视化(目前网上好像只有这篇新技能Get!宏基因组分析结果导入qiime2分析和可视化_qiime2怎么读取biom数据_zd200572的博客-CSDN博客
但是我没有运行成功呢,下一篇再记录吧,我是参考了这个,因为测试数据只有两个样本,等下次我再做可视化吧,不过流程是跑成功了的。raw.githubusercontent.com/fconstancias/DivComAnalyses/master/R/phyloseq_biobakery_functions.R
2. 治疗方案
import time
import threading
import argparse
import os
from pickle import FALSE
import threading
import shutil
import datetime
import json
import time
import subprocess
import pandas as pd
method_path = 'plot_scripts'
refer_path = 'DB'
# 1: check function
def metaphlan_check(data_path: str = None, ## String: path to knead
input_type: str = None, ## default: fastq,{fastq,fasta,bowtie2out,sam}
min_mapq_val = 5, ## default [5],choose 6,7,8 Minimum mapping quality value
tax_lev: str = None, ## default : 'a', choose 'g','s',The taxonomic level for the relative abundance output
stat: str = None, ## [default tavg_g], Statistical approach for converting marker abundances into clade abundances,
#or choose 'avg_g','avg_l','tavg_g','tavg_l','wavg_g','wavg_l','med'
out_path: str = None ## String: output path for knead procedure
):
## check knead file
check_file = f'{data_path}/knead_call_file.txt'
if not os.path.exists(check_file):
raise SystemExit('No knead file! Please run knead module!')
else:
knead_param = pd.read_csv(check_file, header=None)
cleandata_path = knead_param.at[0,0]
sampleinfo = knead_param.at[1,0]
inputtype = f'{input_type}'
min_mapq_val = f'{min_mapq_val}'
tax_lev = f"'{tax_lev}' "
stat = f"'{stat}' "
if not os.path.exists(out_path):
os.makedirs(out_path, exist_ok=True)
else:
pass
## output
out_path = os.path.abspath(out_path)
## mataphlan v3 database
mataphlan_db = refer_path + '/metaphlan_databasesV3'
metaphlan_path = os.path.abspath(mataphlan_db)
check_file_path = f'{out_path}/metaphlan_check_file.txt'
check_file = pd.DataFrame([
cleandata_path,
sampleinfo,
inputtype,
min_mapq_val,
tax_lev,
stat,
out_path,
metaphlan_path
])
check_file.to_csv(check_file_path,
header=None, index=None, sep='\t', mode='w')
return 0
data_path = 'kneaddout2'
input_type= 'fastq' ## default: fastq,{fastq,fasta,bowtie2out,sam}
min_mapq_val = 5 ## default [5],choose 6,7,8 Minimum mapping quality value
tax_lev= 'a' ## default : 'a', choose 'g','s',The taxonomic level for the relative abundance output
stat = 'tavg_g' ## [default tavg_g], Statistical approach for converting marker abundances into clade abundances, #or choose 'avg_g','avg_l','tavg_g','tavg_l','wavg_g','wavg_l','med'
out_path = 'metaphlanout'
metaphlan_check(data_path,input_type,min_mapq_val,tax_lev,stat,out_path)
def execCmd(cmd):
try:
print("命令{}开始运行{}".format(cmd, datetime.datetime.now()))
time.sleep(5)
os.system(cmd)
print("命令{}结束运行{}".format(cmd, datetime.datetime.now()))
except:
print("{}\t运行失败".format(cmd))
def metaphlan_call(
out_path: str = None ## output of metaphlan
):
## check metaphlan_check file
metaphlan_check_path = f'{out_path}/metaphlan_check_file.txt'
if not os.path.exists(metaphlan_check_path):
raise SystemExit(' No metaphlan_check_file ! Please run metaphlan_check !')
else:
metaphlan_check = pd.read_csv(f'{out_path}/metaphlan_check_file.txt',
header=None,delimiter='\t')
cleandata_path = metaphlan_check.at[0,0]
sampleinfo = metaphlan_check.at[1,0]
inputtype =metaphlan_check.at[2,0]
min_mapq_val= metaphlan_check.at[3,0]
tax_lev = metaphlan_check.at[4,0]
stat = metaphlan_check.at[5,0]
out_path = metaphlan_check.at[6,0]
metaphlan_path = metaphlan_check.at[7,0]
sample_info_dic = {}
cmds = []
for line in open(sampleinfo):
if not line.startswith("sample"):
line=line.strip().split()
sample_info_dic[line[0]]=line[1]
cmd = f'''/root/miniconda3/envs/Rdoc/bin/metaphlan --input_type {inputtype} \
{cleandata_path}/{line[0]}/{line[0]}.fastq \
--bowtie2db {metaphlan_path} \
--output_file {out_path}/{line[0]}.tsv \
--tmp_dir {out_path} --min_mapq_val {min_mapq_val} \
--tax_lev {tax_lev} --stat {stat} --nproc 8 \
--read_min_len 70 --no_map '''
cmds.append(cmd)
threads = []
for cmd in cmds:
th = threading.Thread(target=execCmd, args=(cmd,))
th.start()
th.join()
time.sleep(2)
threads.append(th)
time.sleep(10)
## merge all sample
merge_cmd = f'/root/miniconda3/envs/Rdoc/bin/merge_metaphlan_tables.py {out_path}/*.tsv \
> {out_path}/merged_abundance_table.txt'
execCmd(merge_cmd)
## define output file
metaphlan_outfile = out_path+ '/merged_abundance_table.txt'
metaphlan_outfile_path = os.path.abspath(metaphlan_outfile)
call_file_path = f'{out_path}/metaphlan_call_file.txt'
call_file = pd.DataFrame([metaphlan_outfile_path])
call_file.to_csv(call_file_path,
header=None, index=None, sep='\t', mode='w')
return 0
3. 补充说明
最后文件夹会有这么几个文件噢,测试数据已经打包好啦,docker一键部署。持续关注噢,因为我会持续更新的。