一共30个样本,要出弦图,先在jupyter里批量运行了PlotFancyVJUsage:
def circos():
import os
for i in (1,7,35,189,217):
for j in (1003,1501,1503,2001,2501,2503):
x = "day" + str(i) + "-" + str(j)
print(x)
cmd_string = "java -jar ./vdjtools-1.2.1.jar PlotFancyVJUsage "+x+".txt "+ x
print('x:{}'.format(cmd_string))
print(os.popen(cmd_string).read())
circos()
之前是在产生的错误文件(vj_pairing_plot.r)之里修改内容后一个个样本文件进行运行,想到这样太麻烦,就想写成一个函数,变化的是输入文件的名字
输入的文件名称的规律:
代码里输入文件的地方在:
args <- c("day1-2501.fancyvj.wt.txt", "day1-2501.fancyvj.wt.pdf")
temp <- read.table("day1-2501.fancyvj.wt.txt", sep="\t", comment="")
本来这个循环我是写成这样的:
但是要么报这个错
要么报这个错
因为手动输入文件名是可以运行的,所以觉得200G这个不存在……一直在找文件名哪里输的不对
然后改成只运行一个循环,就成功了
可能真的是需要那么多运行内存吧...
circle_plot <- function(){
for(i in c(1,7,35,189,217)){
x = paste('day',i,'-2503',sep = "")
y = '.fancyvj.wt.txt'
z = '.fancyvj.wt.pdf'
a = paste(x,y,sep = "")
b = paste(x,z,sep = "")
args <- c(a,b)
#args<-commandArgs(TRUE)
file_in <- args[1]
file_out <- args[2]
require(circlize); require(RColorBrewer)
# load data and preproc to fit formats
temp <- read.table(a, sep="\t", comment="")
n <- nrow(temp)
m <- ncol(temp)
rn = as.character(temp[2:n,1])
cn = apply(temp[1,2:m], 2 , as.character)
mat <- matrix(apply(temp[2:n, 2:m], 1:2, as.numeric), n - 1, m-1) * 100
n <- nrow(temp)
m <- ncol(temp)
# Here columns and rows correspond to V and J segments respectively
# Also replace possible duplicates (undef, '.', ...)
duplicates <- intersect(rn, cn)
rownames(mat) <- replace(rn, rn==duplicates, paste("V", duplicates, sep=""))
colnames(mat) <- replace(cn, cn==duplicates, paste("J", duplicates, sep=""))
# sort
col_sum = apply(mat, 2, sum)
row_sum = apply(mat, 1, sum)
mat <- mat[order(row_sum), order(col_sum)]
# equal number of characters for visualizaiton
rn <- rownames(mat)
cn <- colnames(mat)
maxrn <- max(nchar(rn))
maxcn <- max(nchar(cn))
for(i in seq_len(length(rn))) {
rn[i] <- paste(rn[i], paste(rep(" ", maxrn - nchar(rn[i])), collapse = ''))
}
for(i in seq_len(length(cn))) {
cn[i] <- paste(cn[i], paste(rep(" ", maxcn - nchar(cn[i])), collapse = ''))
}
rownames(mat) <- rn
colnames(mat) <- cn
# viz using circlize
if (grepl("\\.pdf$",file_out)){
pdf(file_out)
} else if (grepl("\\.png$",file_out)) {
png(file_out, width = 3.25,
height = 3.25,
units = "in",
res = 1200,
pointsize = 4)
} else {
stop('Unknown plotting format')
}
circos.par(gap.degree = c(rep(1, nrow(mat)-1), 10, rep(1, ncol(mat)-1), 15), start.degree = 5)
rcols <- rep(brewer.pal(12, "Paired"), nrow(mat)/12 + 1)[1:nrow(mat)]
ccols <- rep(brewer.pal(12, "Paired"), ncol(mat)/12 + 1)[1:ncol(mat)]
names(rcols) <- sort(rownames(mat))
names(ccols) <- sort(colnames(mat))
chordDiagram(mat, annotationTrack = "grid",
reduce = 0,
grid.col = c(rcols, ccols),
preAllocateTracks = list(track.height = 0.2), transparency = 0.5)
circos.trackPlotRegion(track.index = 1, bg.border = NA,
panel.fun = function(x, y) {
sector.name = get.cell.meta.data("sector.index")
xlim = get.cell.meta.data("xlim")
ylim = get.cell.meta.data("ylim")
circos.text(mean(xlim), ylim[1], cex = 0.5, sector.name, facing = "clockwise", adj = c(0, 0.5))
}
)
circos.clear()
dev.off()
}
}
circle_plot()