1.
/pub/software/interproscan-5.59-91.0/interproscan.sh -i tdom.pep.fa -f tsv -goterms -iprlookup -pa -cpu 16 -o output.tsv
2.
cat output.tsv | grep "GO:" > output.tsv.tmp
python3 01.py output.tsv.tmp
3. (in R)
library(clusterProfiler)
library(dplyr)
library(stringr)
library(AnnotationForge)
my_fChr <- read.table("fChr",col.names = c("GID","CHROMOSOME"))
my_fGO <- read.table("fGO",col.names = c("GID","GO","EVIDENCE"))
my_fSym <- read.table("fSym",col.names = c("GID","SYMBOL","GENENAME"),sep = "\t")
makeOrgPackage(gene_info=my_fSym,
chromosome = my_fChr,
go = my_fGO,
maintainer = "anxuan <so@someplace.org>",
author = "anxuan <so@someplace.org>",
tax_id = "19962",
genus = "an",
species = "xuan",
goTable = "go",
version = "0.1",
outputDir = "./")
4. (in R)
install.packages("/home/ug2092/anxuan/10.tdom.genome.cluster/01.interproscan/org.Axuan.eg.db",
repos=NULL, type="sources")
library(org.Axuan.eg.db)
GO_database <- 'org.Axuan.eg.db'
gset1 = read.table("../02.rihc/clade2-1.tdom.gene.list",header = F)
g1 = gset1$V1
g1 <- as.vector(g1)
g1_go <- enrichGO(g1,
OrgDb = GO_database,
keyType = "SYMBOL",
ont = "ALL",
pvalueCutoff = 0.05,
minGSSize = 1,
pAdjustMethod = "BH",
qvalueCutoff = 0.05)
5. (in R )
write.table(g1_go@result, file = "../02.rihc/g1_go_results.tsv",
sep = "\t", quote = FALSE, row.names = FALSE)
barplot(g1_go, split="ONTOLOGY")+facet_grid(ONTOLOGY~., scale="free")
enrichplot::cnetplot(g1_go,circular=FALSE,colorEdge = TRUE)
enrichplot::heatplot(g1_go,showCategory = 50)
01.py
import sys
f1 = open(sys.argv[1],'r')
gid = 0
x ={}
for line in f1 :
line = line.strip().split("\t")
if line[0] not in x:
tmp = {}
tmp["GO"] = []
tmp["chr"] = "1"
tmp["gid"] = str(gid)
tmp["des"] = line[5]
for i in line :
if "GO:" in i:
info = i.split("|")
for a in info :
tmp["GO"].append(a)
x[line[0]] = tmp
gid = gid + 1
else :
for i in line :
if "GO:" in i :
info = i.split("|")
for a in info :
x[line[0]]["GO"].append(a)
for k,v in x.items():
goinfo = list(set(v["GO"]))
with open("fSym",'a') as file1 :
file1.write(f'{v["gid"]}\t{k}\t{v["des"]}\n')
with open("fChr",'a') as file2 :
file2.write(f'{v["gid"]}\t1\n')
with open("fGO",'a') as file3 :
for i in goinfo :
file3.write(f'{v["gid"]}\t{i}\tIEA\n')
f1.close()