无意中发现 一个免疫组库分析论坛:B-T.CR,内容很丰富。
我们都知道免疫组库的数据较多,很多时候不好描述,更不说建模了,所以在分析的时候不免有种无处下手的感觉。B-T.CR https://b-t.cr/ 提供了开放的交流平台,涵盖了公开的免疫组库方法/数据/软件等。
是值得常常逛逛的站点。
从他们列出的文章也不难看出,确实积累了很多的经验,提炼了不少新的想法:
The promise and challenge of high-throughput sequencing of the antibody repertoire.
Practical guidelines for B-cell receptor repertoire sequencing analysis.
Sequencing the functional antibody repertoire--diagnostic and therapeutic discovery.
The Diversity and Molecular Evolution of B-Cell Receptors during Infection
Studying the antibody repertoire after vaccination: practical applications.
High-throughput sequencing of the T-cell receptor repertoire: pitfalls and opportunities.
Sequence analysis of T-cell repertoires in health and disease.
+The Past, Present, and Future of Immune Repertoire Biology – The Rise of Next-Generation Repertoire Analysis
。。。
x <- c("tidyverse", "alakazam", "ggthemes")
sapply(x, require, character.only = T)
path ="C:\\Users\\86158\\Documents\\GZ04_immune\\Changeo_Example\\Changeo_Example\\"
out <- readChangeoDb(paste0(path,"S43_db-pass_parse-select.tab"))
out
我们正常看到的VDJ的数据:
out
# A tibble: 1,374 x 54
SEQUENCE_ID SEQUENCE_INPUT FUNCTIONAL IN_FRAME STOP MUTATED_INVARIA~ INDELS V_CALL D_CALL J_CALL SEQUENCE_VDJ SEQUENCE_IMGT V_SEQ_START V_SEQ_LENGTH V_GERM_START_VDJ
<chr> <chr> <lgl> <lgl> <lgl> <chr> <lgl> <chr> <chr> <chr> <chr> <chr> <int> <int> <int>
1 SRR765688.~ NNNNNNNNNNNNN~ TRUE TRUE FALSE NA FALSE IGHV2~ IGHD1~ IGHJ3~ ACTGACCTGCA~ ............~ 21 245 57
2 SRR765688.~ NNNNNNNNNNNNN~ TRUE TRUE FALSE NA FALSE IGHV3~ IGHD1~ IGHJ6~ GGGGGAGGCGT~ ............~ 20 275 22
3 SRR765688.~ NNNNNNNNNNNNN~ TRUE TRUE FALSE NA FALSE IGHV6~ IGHD3~ IGHJ6~ CTGTGCCATCT~ ............~ 20 242 63
4 SRR765688.~ NNNNNNNNNNNNN~ TRUE TRUE FALSE NA TRUE IGHV4~ IGHD1~ IGHJ4~ CCCTGTCCCTC~ ............~ 21 244 50
5 SRR765688.~ NNNNNNNNNNNNN~ TRUE TRUE FALSE NA FALSE IGHV7~ IGHD3~ IGHJ6~ AAGAAGCCTGG~ ............~ 21 263 34
6 SRR765688.~ NNNNNNNNNNNNN~ TRUE TRUE FALSE NA FALSE IGHV1~ IGHD3~ IGHJ2~ AAGCCTGGGGC~ ............~ 21 259 37
7 SRR765688.~ NNNNNNNNNNNNN~ TRUE TRUE FALSE NA FALSE IGHV7~ IGHD3~ IGHJ4~ AAGAAGCCTGG~ ............~ 21 262 34
8 SRR765688.~ NNNNNNNNNNNNN~ TRUE TRUE FALSE NA FALSE IGHV1~ IGHD1~ IGHJ3~ GTCTCCTGCAA~ ............~ 21 239 58
9 SRR765688.~ NNNNNNNNNNNNN~ TRUE TRUE FALSE NA FALSE IGHV2~ IGHD1~ IGHJ4~ CTGACGTGTTC~ ............~ 22 243 58
10 SRR765688.~ NNNNNNNNNNNNN~ TRUE TRUE FALSE NA FALSE IGHV1~ IGHD6~ IGHJ6~ AAGCCTGGGTC~ ............~ 21 259 37
# ... with 1,364 more rows, and 39 more variables: V_GERM_LENGTH_VDJ <int>, V_GERM_START_IMGT <int>, V_GERM_LENGTH_IMGT <int>, NP1_LENGTH <int>, D_SEQ_START <int>, D_SEQ_LENGTH <int>,
# D_GERM_START <int>, D_GERM_LENGTH <int>, NP2_LENGTH <int>, J_SEQ_START <int>, J_SEQ_LENGTH <int>, J_GERM_START <int>, J_GERM_LENGTH <int>, JUNCTION_LENGTH <int>, JUNCTION <chr>,
# FWR1_IMGT <chr>, FWR2_IMGT <chr>, FWR3_IMGT <chr>, FWR4_IMGT <chr>, CDR1_IMGT <chr>, CDR2_IMGT <chr>, CDR3_IMGT <chr>, V_SCORE <dbl>, V_IDENTITY <dbl>, V_EVALUE <dbl>,
# V_BTOP <chr>, J_SCORE <dbl>, J_IDENTITY <dbl>, J_EVALUE <dbl>, J_BTOP <chr>, SEQORIENT <chr>, MID <chr>, VPRIMER <chr>, CPRIMER <chr>, DUPCOUNT <int>, V_GENE <chr>, D_GENE <chr>,
# J_GENE <chr>, ISOTYPE <fct>
out$V_GENE <- as.character(substr(lapply(strsplit(as.character(out$V_CALL), "\\*"), "[", 1), 1, 20))
out$D_GENE <- as.character(substr(lapply(strsplit(as.character(out$D_CALL), "\\*"), "[", 1), 1, 20))
out$J_GENE <- as.character(substr(lapply(strsplit(as.character(out$J_CALL), "\\*"), "[", 1), 1, 20))
out$ISOTYPE <- out$CPRIMER %>% gsub("-PCR", "", .) %>% factor(levels = c("IgD", "IgM", "IgA", "IgG", "IgE"))
a <- out %>% group_by(ISOTYPE, MID, V_GENE, D_GENE, J_GENE) %>% summarise(Frequency = sum(DUPCOUNT))
a[0, ] %>%
ggplot(aes(V_GENE, D_GENE, group = J_GENE, color = J_GENE, size = Frequency)) +
theme_few(8) + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5), strip.text = element_text(size = 12, face = "bold")) +
geom_point(data = a %>% filter(J_GENE == "IGHJ1"), position = position_nudge(x = -.2, y = .1), alpha = .5, shape = 16) +
geom_point(data = a %>% filter(J_GENE == "IGHJ2"), position = position_nudge(x = 0, y = .1), alpha = .5, shape = 16) +
geom_point(data = a %>% filter(J_GENE == "IGHJ3"), position = position_nudge(x = .2, y = .1), alpha = .5, shape = 16) +
geom_point(data = a %>% filter(J_GENE == "IGHJ4"), position = position_nudge(x = -.2, y = -.1), alpha = .5, shape = 16) +
geom_point(data = a %>% filter(J_GENE == "IGHJ5"), position = position_nudge(x = 0, y = -.1), alpha = .5, shape = 16) +
geom_point(data = a %>% filter(J_GENE == "IGHJ6"), position = position_nudge(x = .2, y = -.1), alpha = .5, shape = 16) +
facet_grid(MID ~ ISOTYPE)
J.nudge <- data.frame(J_GENE = paste0("IGHJ", 1:6), J.x = rep(c(-.2, 0, .2), 2), J.y = rep(c(.1, -.1), each = 3), color = c("#F8766D", "#B79F00", "#00BA38", "#00BFC4", "#619CFF", "#F564E3"), stringsAsFactors = F)
VDJplot <- a[0, ] %>%
ggplot(aes(V_GENE, D_GENE, group = J_GENE, color = J_GENE)) + theme_few(8) + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5), strip.text = element_text(size = 12, face = "bold")) +
geom_point(data = a, alpha = 0) +
facet_grid(MID ~ ISOTYPE) + guides(size = F, color = guide_legend(override.aes = list(alpha = 1))) + scale_size(range = c(0, 10)) # scale_size adjusts the size of the points
for (i in J.nudge$J_GENE){
VDJplot <- VDJplot +
geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*30), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/30, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*29), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/29, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*28), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/28, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*27), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/27, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*26), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/26, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*25), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/25, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*24), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/24, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*23), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/23, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*22), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/22, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*21), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/21, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*20), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/20, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*19), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/19, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*18), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/18, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*17), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/17, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*16), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/16, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*15), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/15, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*14), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/14, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*13), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/13, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*12), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/12, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*11), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/11, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*10), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/10, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*9), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/9, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*8), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/8, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*7), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/7, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*6), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/6, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*5), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/5, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*4), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/4, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*3), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/3, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*2), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/2, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i])
}
print(VDJplot)
··