代码库11-人鼠基因名转换

人转鼠

transformHomoloGene <- function(genes.in, inTaxID = 9606, outTaxID = 10090) {
    library(homologene)
    res.home <- homologene(genes.in, inTax = inTaxID, outTax = outTaxID)
    res.home <- res.home[!duplicated(res.home[, 1]),]
    res.home <- res.home[!duplicated(res.home[, 2]),]
    genes.out <- res.home[, 1]
    return(genes.out)
}

鼠转人

transformHomoloGene <- function(genes.in, inTaxID = 10090, outTaxID = 9606) {
    library(homologene)
    res.home <- homologene(genes.in, inTax = inTaxID, outTax = outTaxID)
    res.home <- res.home[!duplicated(res.home[, 1]),]
    res.home <- res.home[!duplicated(res.home[, 2]),]
    genes.out <- res.home[, 1]
    return(genes.out)
}

biomaRt

# 导入所需的包
library(biomaRt)

# 创建biomaRt数据集
human_dataset <- useMart("ensembl", dataset = "hsapiens_gene_ensembl", host = "https://dec2021.archive.ensembl.org/") 
mouse_dataset <- useMart("ensembl", dataset = "mmusculus_gene_ensembl", host = "https://dec2021.archive.ensembl.org/")

# 从Excel文件中读取人的基因名列表
gene_df <- read.xlsx(excel_file_path)

# 提取人的基因名列
human_genes <- gene_df$Gene  # 列名为 "Gene",你需要根据实际情况修改

# 创建一个空的输出基因名向量,与输入基因名向量的长度相同
mouse_genes <- character(length(human_genes))

# 进行基因名称的转换
for (i in 1:length(human_genes)) {
  converted_genes <- getLDS(attributes = c("external_gene_name"), 
                            filters = "external_gene_name", 
                            values = human_genes[i], 
                            mart = human_dataset, 
                            attributesL = c("external_gene_name"), 
                            martL = mouse_dataset)
  
  if (length(converted_genes) > 0) {
    # 将多个匹配结果连接为逗号分隔的字符串
    mouse_gene <- paste(converted_genes, collapse = ", ")
    
    # 去除包含人基因的部分
    mouse_gene <- gsub(paste(human_genes, collapse = "|"), "", mouse_gene)
    
    # 移除多余的逗号和空格
    mouse_gene <- gsub(",\\s+", ", ", mouse_gene)
    mouse_gene <- gsub("^,\\s+|,\\s+$", "", mouse_gene)
    
    mouse_genes[i] <- mouse_gene
  } else {
    mouse_genes[i] <- ""  # 如果没有匹配的小鼠基因名,使用空字符串
  }
}

# 创建包含转换结果的新数据框,包括人基因列和小鼠基因列
output_gene_df <- data.frame(Gene_Human = human_genes, Gene_Mouse = mouse_genes)
最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容