2025-07-17

1. 
/pub/software/interproscan-5.59-91.0/interproscan.sh -i tdom.pep.fa -f tsv -goterms -iprlookup -pa  -cpu 16  -o output.tsv
2.
cat output.tsv | grep "GO:" > output.tsv.tmp
python3 01.py output.tsv.tmp
3. (in R)
library(clusterProfiler)
library(dplyr)
library(stringr)
library(AnnotationForge)

my_fChr <- read.table("fChr",col.names = c("GID","CHROMOSOME"))
my_fGO <- read.table("fGO",col.names = c("GID","GO","EVIDENCE"))
my_fSym <- read.table("fSym",col.names = c("GID","SYMBOL","GENENAME"),sep = "\t")

makeOrgPackage(gene_info=my_fSym,
               chromosome = my_fChr,
               go = my_fGO,
               maintainer = "anxuan <so@someplace.org>",
               author = "anxuan <so@someplace.org>",
               tax_id = "19962",
               genus = "an",
               species = "xuan",
               goTable = "go",
               version = "0.1",
               outputDir = "./")
4. (in R)
install.packages("/home/ug2092/anxuan/10.tdom.genome.cluster/01.interproscan/org.Axuan.eg.db",
                 repos=NULL, type="sources")
library(org.Axuan.eg.db)

GO_database <- 'org.Axuan.eg.db'

gset1 = read.table("../02.rihc/clade2-1.tdom.gene.list",header = F)
g1 = gset1$V1
g1 <- as.vector(g1)
g1_go <- enrichGO(g1,
                  OrgDb = GO_database,
                  keyType = "SYMBOL",
                  ont = "ALL",
                  pvalueCutoff = 0.05,
                  minGSSize = 1,
                  pAdjustMethod = "BH",
                  qvalueCutoff = 0.05)
5. (in R )
write.table(g1_go@result, file = "../02.rihc/g1_go_results.tsv",
            sep = "\t", quote = FALSE, row.names = FALSE)
barplot(g1_go, split="ONTOLOGY")+facet_grid(ONTOLOGY~., scale="free")
enrichplot::cnetplot(g1_go,circular=FALSE,colorEdge = TRUE)
enrichplot::heatplot(g1_go,showCategory = 50)

01.py

import sys

f1 = open(sys.argv[1],'r')

gid = 0
x ={}

for line in f1 :
    line = line.strip().split("\t")
    if line[0] not in x:
        tmp = {}
        tmp["GO"] = []
        tmp["chr"] = "1"
        tmp["gid"] = str(gid)
        tmp["des"] = line[5]
        for i in line :
            if "GO:" in i:
                info = i.split("|")
                for a in info :
                    tmp["GO"].append(a)
        x[line[0]] = tmp
        gid = gid + 1
    else :
        for i in line :
            if "GO:" in i :
                info = i.split("|")
                for a in info :
                    x[line[0]]["GO"].append(a)


for k,v in x.items():
    goinfo = list(set(v["GO"]))
    with open("fSym",'a') as file1 :
        file1.write(f'{v["gid"]}\t{k}\t{v["des"]}\n')
    with open("fChr",'a') as file2 :
        file2.write(f'{v["gid"]}\t1\n')
    with open("fGO",'a') as file3 :
        for i in goinfo :
            file3.write(f'{v["gid"]}\t{i}\tIEA\n')

f1.close()

©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容

  • 话说天下营销,分久必合,合久必分。自古以来,对营销这个玩意儿,世人的理解可谓是五花八门,各有各的道理,也各有各的糊...
    孤岛囚徒阅读 42评论 0 1
  • 第2种是与上级失联走投无路之下转换的阵营,这些人没有出卖,也不可能出卖任何人,他们的职业生涯等于重新开始,就跟现在...
    我辈之礼阅读 721评论 1 4
  • 为什么总被……?
    花生瓜子牛肉干阅读 26评论 0 1
  • 万般努力只为出人头地 低头弯腰只为爬的更高, 好好努力,好好搞钱
    如雪_5261阅读 18评论 0 0
  • 我的发小 我们三出生在一个四面环水的小岛,小学毕业各奔东西,小孟去了姑姑家镇上的初...
    伍月廿一阅读 13评论 0 1