B-T.CR|| 一个免疫组库分析论坛:数据/软件与方法

无意中发现 一个免疫组库分析论坛:B-T.CR,内容很丰富。

我们都知道免疫组库的数据较多,很多时候不好描述,更不说建模了,所以在分析的时候不免有种无处下手的感觉。B-T.CR https://b-t.cr/ 提供了开放的交流平台,涵盖了公开的免疫组库方法/数据/软件等。

是值得常常逛逛的站点。

从他们列出的文章也不难看出,确实积累了很多的经验,提炼了不少新的想法:

+The Past, Present, and Future of Immune Repertoire Biology – The Rise of Next-Generation Repertoire Analysis

。。。

x <- c("tidyverse", "alakazam", "ggthemes")
sapply(x, require, character.only = T)
path ="C:\\Users\\86158\\Documents\\GZ04_immune\\Changeo_Example\\Changeo_Example\\"

out <- readChangeoDb(paste0(path,"S43_db-pass_parse-select.tab"))

out

我们正常看到的VDJ的数据:

out
# A tibble: 1,374 x 54
   SEQUENCE_ID SEQUENCE_INPUT FUNCTIONAL IN_FRAME STOP  MUTATED_INVARIA~ INDELS V_CALL D_CALL J_CALL SEQUENCE_VDJ SEQUENCE_IMGT V_SEQ_START V_SEQ_LENGTH V_GERM_START_VDJ
   <chr>       <chr>          <lgl>      <lgl>    <lgl> <chr>            <lgl>  <chr>  <chr>  <chr>  <chr>        <chr>               <int>        <int>            <int>
 1 SRR765688.~ NNNNNNNNNNNNN~ TRUE       TRUE     FALSE NA               FALSE  IGHV2~ IGHD1~ IGHJ3~ ACTGACCTGCA~ ............~          21          245               57
 2 SRR765688.~ NNNNNNNNNNNNN~ TRUE       TRUE     FALSE NA               FALSE  IGHV3~ IGHD1~ IGHJ6~ GGGGGAGGCGT~ ............~          20          275               22
 3 SRR765688.~ NNNNNNNNNNNNN~ TRUE       TRUE     FALSE NA               FALSE  IGHV6~ IGHD3~ IGHJ6~ CTGTGCCATCT~ ............~          20          242               63
 4 SRR765688.~ NNNNNNNNNNNNN~ TRUE       TRUE     FALSE NA               TRUE   IGHV4~ IGHD1~ IGHJ4~ CCCTGTCCCTC~ ............~          21          244               50
 5 SRR765688.~ NNNNNNNNNNNNN~ TRUE       TRUE     FALSE NA               FALSE  IGHV7~ IGHD3~ IGHJ6~ AAGAAGCCTGG~ ............~          21          263               34
 6 SRR765688.~ NNNNNNNNNNNNN~ TRUE       TRUE     FALSE NA               FALSE  IGHV1~ IGHD3~ IGHJ2~ AAGCCTGGGGC~ ............~          21          259               37
 7 SRR765688.~ NNNNNNNNNNNNN~ TRUE       TRUE     FALSE NA               FALSE  IGHV7~ IGHD3~ IGHJ4~ AAGAAGCCTGG~ ............~          21          262               34
 8 SRR765688.~ NNNNNNNNNNNNN~ TRUE       TRUE     FALSE NA               FALSE  IGHV1~ IGHD1~ IGHJ3~ GTCTCCTGCAA~ ............~          21          239               58
 9 SRR765688.~ NNNNNNNNNNNNN~ TRUE       TRUE     FALSE NA               FALSE  IGHV2~ IGHD1~ IGHJ4~ CTGACGTGTTC~ ............~          22          243               58
10 SRR765688.~ NNNNNNNNNNNNN~ TRUE       TRUE     FALSE NA               FALSE  IGHV1~ IGHD6~ IGHJ6~ AAGCCTGGGTC~ ............~          21          259               37
# ... with 1,364 more rows, and 39 more variables: V_GERM_LENGTH_VDJ <int>, V_GERM_START_IMGT <int>, V_GERM_LENGTH_IMGT <int>, NP1_LENGTH <int>, D_SEQ_START <int>, D_SEQ_LENGTH <int>,
#   D_GERM_START <int>, D_GERM_LENGTH <int>, NP2_LENGTH <int>, J_SEQ_START <int>, J_SEQ_LENGTH <int>, J_GERM_START <int>, J_GERM_LENGTH <int>, JUNCTION_LENGTH <int>, JUNCTION <chr>,
#   FWR1_IMGT <chr>, FWR2_IMGT <chr>, FWR3_IMGT <chr>, FWR4_IMGT <chr>, CDR1_IMGT <chr>, CDR2_IMGT <chr>, CDR3_IMGT <chr>, V_SCORE <dbl>, V_IDENTITY <dbl>, V_EVALUE <dbl>,
#   V_BTOP <chr>, J_SCORE <dbl>, J_IDENTITY <dbl>, J_EVALUE <dbl>, J_BTOP <chr>, SEQORIENT <chr>, MID <chr>, VPRIMER <chr>, CPRIMER <chr>, DUPCOUNT <int>, V_GENE <chr>, D_GENE <chr>,
#   J_GENE <chr>, ISOTYPE <fct>

out$V_GENE <- as.character(substr(lapply(strsplit(as.character(out$V_CALL), "\\*"), "[", 1), 1, 20))
out$D_GENE <- as.character(substr(lapply(strsplit(as.character(out$D_CALL), "\\*"), "[", 1), 1, 20))
out$J_GENE <- as.character(substr(lapply(strsplit(as.character(out$J_CALL), "\\*"), "[", 1), 1, 20))
out$ISOTYPE <- out$CPRIMER %>% gsub("-PCR", "", .) %>% factor(levels = c("IgD", "IgM", "IgA", "IgG", "IgE"))


a <- out %>% group_by(ISOTYPE, MID, V_GENE, D_GENE, J_GENE) %>% summarise(Frequency = sum(DUPCOUNT))

a[0, ] %>% 
  ggplot(aes(V_GENE, D_GENE, group = J_GENE, color = J_GENE, size = Frequency)) + 
  theme_few(8) + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5), strip.text = element_text(size = 12, face = "bold")) + 
  geom_point(data = a %>% filter(J_GENE == "IGHJ1"), position = position_nudge(x = -.2, y = .1), alpha = .5, shape = 16) + 
  geom_point(data = a %>% filter(J_GENE == "IGHJ2"), position = position_nudge(x = 0, y = .1), alpha = .5, shape = 16) + 
  geom_point(data = a %>% filter(J_GENE == "IGHJ3"), position = position_nudge(x = .2, y = .1), alpha = .5, shape = 16) + 
  geom_point(data = a %>% filter(J_GENE == "IGHJ4"), position = position_nudge(x = -.2, y = -.1), alpha = .5, shape = 16) + 
  geom_point(data = a %>% filter(J_GENE == "IGHJ5"), position = position_nudge(x = 0, y = -.1), alpha = .5, shape = 16) + 
  geom_point(data = a %>% filter(J_GENE == "IGHJ6"), position = position_nudge(x = .2, y = -.1), alpha = .5, shape = 16) +
  facet_grid(MID ~ ISOTYPE)


J.nudge <- data.frame(J_GENE = paste0("IGHJ", 1:6), J.x = rep(c(-.2, 0, .2), 2), J.y = rep(c(.1, -.1), each = 3), color = c("#F8766D", "#B79F00", "#00BA38", "#00BFC4", "#619CFF", "#F564E3"), stringsAsFactors = F)




VDJplot <- a[0, ] %>% 
  ggplot(aes(V_GENE, D_GENE, group = J_GENE, color = J_GENE)) + theme_few(8) + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5), strip.text = element_text(size = 12, face = "bold")) + 
  geom_point(data = a, alpha = 0) + 
  facet_grid(MID ~ ISOTYPE) + guides(size = F, color = guide_legend(override.aes = list(alpha = 1))) + scale_size(range = c(0, 10)) # scale_size adjusts the size of the points



for (i in J.nudge$J_GENE){ 
  VDJplot <- VDJplot + 
    geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*30), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/30, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
    geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*29), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/29, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
    geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*28), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/28, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
    geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*27), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/27, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
    geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*26), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/26, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
    geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*25), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/25, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
    geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*24), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/24, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
    geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*23), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/23, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
    geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*22), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/22, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
    geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*21), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/21, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
    geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*20), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/20, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
    geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*19), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/19, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
    geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*18), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/18, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
    geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*17), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/17, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
    geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*16), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/16, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
    geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*15), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/15, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
    geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*14), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/14, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
    geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*13), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/13, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
    geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*12), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/12, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
    geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*11), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/11, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
    geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*10), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/10, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
    geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*9), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/9, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
    geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*8), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/8, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
    geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*7), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/7, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
    geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*6), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/6, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
    geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*5), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/5, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
    geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*4), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/4, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
    geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*3), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/3, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) +
    geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency*2), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1/2, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i]) + 
    geom_point(data = a %>% filter(J_GENE == i), aes(size = Frequency), position = position_nudge(x = J.nudge$J.x[J.nudge$J_GENE == i], y = J.nudge$J.y[J.nudge$J_GENE == i]), alpha = 1, shape = 16, color = J.nudge$color[J.nudge$J_GENE == i])
}
print(VDJplot)

··

最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。