首先构建一个dataframe,包含相关的metadata
读取内容包括转录组、TCR和Loom文件,注意看看TCR和转录组的barcode是否一致
注意,velocyto生成的loom文件文件名有随机生成的字符串,所以应该查找后缀来找到对应的文件
import os
import glob
sample_name_list=[]
for i in range(0,11):
name=sample_info['name'].to_list()[i]
#matrix
matrix=sc.read_10x_mtx(f'/home/user/myh/raw_data/LYU_CART/GEX/{name}_count_out/outs/filtered_feature_bc_matrix', # the directory with the `.mtx` file
var_names='gene_symbols', # use gene symbols for the variable names (variables-axis index)
cache=True)
#loom
loom_file_dir_list=glob.glob(os.path.join(f"/home/user/myh/raw_data/LYU_CART/GEX/{name}_count_out/outs", "*.loom"))
loom_file_dir=''.join(loom_file_dir_list)
loom_file=scv.read(loom_file_dir,cache=True)
#TCR
TCR_path=f"/home/user/myh/raw_data/LYU_CART/TCR/{name}_TCR_vdj_out/outs/filtered_contig_annotations.csv"
TCR_file = ir.io.read_10x_vdj(TCR_path)
TCR_file.obs.index = TCR_file.obs.index.map(lambda x: str(x)[:-2])
#merge
adata = scv.utils.merge(matrix, loom_file)
ir.pp.merge_with_ir(adata, TCR_file)
#metadata
adata.obs['Sample_name']=f"{name}"
patient=name.split("_")[0]
adata.obs['Patient']=f"{patient}"
adata.obs['Time_point']=sample_info['time_point'].to_list()[i]
adata.obs['Outcome']=sample_info['outcome'].to_list()[i]
#output
exec(f'{name}_adata=adata')
sample_name_list.append(f'{name}_adata')
print(sample_name_list)