简介

本文记录了：
对各个细胞亚群在各个样本中所占比例进行秩和检验，并保存为tiff图像。

示例数据：PBMC

library(dplyr)
library(tidyr)
library(ggpubr)
library(coin)
library(ggrepel)

data <- readRDS("pbmcrenamed.rds")

数据有3个样本，9类细胞亚群。

unique(data$orig.ident)
[1] "C57" "AS1" "P3" 

unique(data@active.ident)
[1] VSMC          T cell        Fibro         EC            Myeloid cells B cell       
[7] Mono          Macro         Neut         
Levels: VSMC T cell Macro B cell Fibro Myeloid cells Mono Neut EC

取出细胞亚群和样本。

count_df <- data.frame(
  "cell_type" = data@active.ident, # cell type
  "subject" = data$orig.ident  # sample
)
head(count_df)
                         cell_type subject
AAAGGGCTCGCAGAGA-1_1          VSMC     C57
AACACACAGCATCCCG-1_1        T cell     C57
AACAGGGTCAACACCA-1_1         Fibro     C57
AACCCAAAGGCCTTGC-1_1            EC     C57
AACCCAAAGTGGACGT-1_1 Myeloid cells     C57
AACCCAAGTCCGAAGA-1_1        T cell     C57

计算细胞频数。

count_df <- data.frame(
  table(count_df$cell_type, count_df$subject)
)
head(count_df)
           Var1 Var2 Freq
1          VSMC  AS1    8
2        T cell  AS1   45
3         Macro  AS1   25
4        B cell  AS1   46
5         Fibro  AS1   16
6 Myeloid cells  AS1   52

因为计算频数后列名改变了，这里再改回来。

count_df <- rename(count_df, cell_type = Var1, subject = Var2)
head(count_df)
      cell_type subject Freq
1          VSMC     AS1    8
2        T cell     AS1   45
3         Macro     AS1   25
4        B cell     AS1   46
5         Fibro     AS1   16
6 Myeloid cells     AS1   52

为了计算各个样本中各个细胞亚群的所占比例，转化为宽数据。

wide_data <- pivot_wider(
  count_df,
  names_from = "subject",
  values_from = "Freq"
) %>% as.data.frame()

wide_data
      cell_type AS1 C57 P3
1          VSMC   8  63 29
2        T cell  45  32 23
3         Macro  25   3 72
4        B cell  46  39 15
5         Fibro  16  48 36
6 Myeloid cells  52  44  4
7          Mono  49  21 30
8          Neut  28   0 72
9            EC  18  59 23

得到频数后计算频率。

cell_prop <- data.frame(
  cell_type = wide_data$cell_type,
  prop.table(as.matrix(wide_data[, -1]), margin = 2)
)

cell_prop
      cell_type        AS1         C57         P3
1          VSMC 0.02787456 0.203883495 0.09539474
2        T cell 0.15679443 0.103559871 0.07565789
3         Macro 0.08710801 0.009708738 0.23684211
4        B cell 0.16027875 0.126213592 0.04934211
5         Fibro 0.05574913 0.155339806 0.11842105
6 Myeloid cells 0.18118467 0.142394822 0.01315789
7          Mono 0.17073171 0.067961165 0.09868421
8          Neut 0.09756098 0.000000000 0.23684211
9            EC 0.06271777 0.190938511 0.07565789

最后再转化为长数据，后续用于进行秩和检验。

cell_prop <- pivot_longer(
  cell_prop,
  cols = -cell_type,
  names_to = "subject",
  values_to = "percent"
) %>% as.data.frame()

cell_prop
  cell_type subject    percent
1      VSMC     AS1 0.02787456
2      VSMC     C57 0.20388350
3      VSMC      P3 0.09539474
4    T cell     AS1 0.15679443
5    T cell     C57 0.10355987
6    T cell      P3 0.07565789

假设“AS1”和“C57”为“group1”，“P3”为“group2”，后续用秩和检验，检验在两组间存在的差异细胞亚群。

cell_prop$pheno <- ifelse(cell_prop$subject %in% c("AS1", "C57"), "group1", "group2") %>%
  factor()

head(cell_prop)
  cell_type subject    percent  pheno
1      VSMC     AS1 0.02787456 group1
2      VSMC     C57 0.20388350 group1
3      VSMC      P3 0.09539474 group2
4    T cell     AS1 0.15679443 group1
5    T cell     C57 0.10355987 group1
6    T cell      P3 0.07565789 group2

进行秩和检验，并画图保存为tiff图像，图如下所示。

subject <- unique(cell_prop$cell_type)

for (i in 1:length(subject)) {
  subject_i <- cell_prop[cell_prop$cell_type == subject[i], ]
  
  print(as.character(unique(subject_i$cell_type)))
  
  wt <- wilcox_test(percent ~ pheno, data = subject_i, distribution = "exact")
  
  out_name <- paste0("No", i, "_", subject[i], ".boxplot.tiff")
  
  subject_i$percent <- 100 * subject_i$percent
  
  p_value_text <- paste0(as.character(unique(subject_i$cell_type)), ": P = ", round(pvalue(wt), 4))
  
  print(p_value_text)
  
  tiff(file = out_name, width = 820, height = 1000, units = "px", res = 300)
  
  # Add jitter points and errors (mean_se)
  
  print(
    ggboxplot(subject_i,
              x = "pheno", y = "percent", ylab = "(%) of T cell",
              label.pos = "out", lab.nb.digits = 1, lab.size = 3,
              add = c("jitter"), width = 0.5, color = "pheno",
              palette = c("#00AFBB", "#E7B800"), legend = NULL, shape = "pheno"
    ) +
      labs(title = p_value_text) +
      theme(plot.title = element_text(size = 9, face = "bold", hjust = 0.5)) +
      theme(axis.title.x = element_blank()) # hide x axis name
  )
  
  dev.off()
}

附录

library(dplyr)
library(tidyr)
library(ggpubr)
library(coin)
library(ggrepel)

data <- readRDS("pbmcrenamed.rds")

unique(data$orig.ident)
unique(data@active.ident)

count_df <- data.frame(
  "cell_type" = data@active.ident, # cell type
  "subject" = data$orig.ident  # sample
)
head(count_df)

count_df <- data.frame(
  table(count_df$cell_type, count_df$subject)
)
head(count_df)

count_df <- rename(count_df, cell_type = Var1, subject = Var2)
head(count_df)

wide_data <- pivot_wider(
  count_df,
  names_from = "subject",
  values_from = "Freq"
) %>% as.data.frame()
wide_data

cell_prop <- data.frame(
  cell_type = wide_data$cell_type,
  prop.table(as.matrix(wide_data[, -1]), margin = 2)
)
cell_prop

cell_prop <- pivot_longer(
  cell_prop,
  cols = -cell_type,
  names_to = "subject",
  values_to = "percent"
) %>% as.data.frame()
head(cell_prop)
sum(cell_prop$percent)

cell_prop$pheno <- ifelse(cell_prop$subject %in% c("AS1", "C57"), "group1", "group2") %>%
  factor()
head(cell_prop)

subject <- unique(cell_prop$cell_type)

for (i in 1:length(subject)) {
  subject_i <- cell_prop[cell_prop$cell_type == subject[i], ]
  
  print(as.character(unique(subject_i$cell_type)))
  
  wt <- wilcox_test(percent ~ pheno, data = subject_i, distribution = "exact")
  
  out_name <- paste0("No", i, "_", subject[i], ".boxplot.tiff")
  
  subject_i$percent <- 100 * subject_i$percent
  
  p_value_text <- paste0(as.character(unique(subject_i$cell_type)), ": P = ", round(pvalue(wt), 4))
  
  print(p_value_text)
  
  tiff(file = out_name, width = 820, height = 1000, units = "px", res = 300)
  
  # Add jitter points and errors (mean_se)
  
  print(
    ggboxplot(subject_i,
              x = "pheno", y = "percent", ylab = "(%) of T cell",
              label.pos = "out", lab.nb.digits = 1, lab.size = 3,
              add = c("jitter"), width = 0.5, color = "pheno",
              palette = c("#00AFBB", "#E7B800"), legend = NULL, shape = "pheno"
    ) +
      labs(title = p_value_text) +
      theme(plot.title = element_text(size = 9, face = "bold", hjust = 0.5)) +
      theme(axis.title.x = element_blank()) # hide x axis name
  )
  
  dev.off()
}

对各个细胞亚群在各个样本中所占比例进行秩和检验

简介

附录

推荐阅读更多精彩内容