简介
本文记录了:
对各个细胞亚群在各个样本中所占比例进行秩和检验,并保存为tiff图像。
示例数据:PBMC
library(dplyr)
library(tidyr)
library(ggpubr)
library(coin)
library(ggrepel)
data <- readRDS("pbmcrenamed.rds")
数据有3个样本,9类细胞亚群。
unique(data$orig.ident)
[1] "C57" "AS1" "P3"
unique(data@active.ident)
[1] VSMC T cell Fibro EC Myeloid cells B cell
[7] Mono Macro Neut
Levels: VSMC T cell Macro B cell Fibro Myeloid cells Mono Neut EC
取出细胞亚群和样本。
count_df <- data.frame(
"cell_type" = data@active.ident, # cell type
"subject" = data$orig.ident # sample
)
head(count_df)
cell_type subject
AAAGGGCTCGCAGAGA-1_1 VSMC C57
AACACACAGCATCCCG-1_1 T cell C57
AACAGGGTCAACACCA-1_1 Fibro C57
AACCCAAAGGCCTTGC-1_1 EC C57
AACCCAAAGTGGACGT-1_1 Myeloid cells C57
AACCCAAGTCCGAAGA-1_1 T cell C57
计算细胞频数。
count_df <- data.frame(
table(count_df$cell_type, count_df$subject)
)
head(count_df)
Var1 Var2 Freq
1 VSMC AS1 8
2 T cell AS1 45
3 Macro AS1 25
4 B cell AS1 46
5 Fibro AS1 16
6 Myeloid cells AS1 52
因为计算频数后列名改变了,这里再改回来。
count_df <- rename(count_df, cell_type = Var1, subject = Var2)
head(count_df)
cell_type subject Freq
1 VSMC AS1 8
2 T cell AS1 45
3 Macro AS1 25
4 B cell AS1 46
5 Fibro AS1 16
6 Myeloid cells AS1 52
为了计算各个样本中各个细胞亚群的所占比例,转化为宽数据。
wide_data <- pivot_wider(
count_df,
names_from = "subject",
values_from = "Freq"
) %>% as.data.frame()
wide_data
cell_type AS1 C57 P3
1 VSMC 8 63 29
2 T cell 45 32 23
3 Macro 25 3 72
4 B cell 46 39 15
5 Fibro 16 48 36
6 Myeloid cells 52 44 4
7 Mono 49 21 30
8 Neut 28 0 72
9 EC 18 59 23
得到频数后计算频率。
cell_prop <- data.frame(
cell_type = wide_data$cell_type,
prop.table(as.matrix(wide_data[, -1]), margin = 2)
)
cell_prop
cell_type AS1 C57 P3
1 VSMC 0.02787456 0.203883495 0.09539474
2 T cell 0.15679443 0.103559871 0.07565789
3 Macro 0.08710801 0.009708738 0.23684211
4 B cell 0.16027875 0.126213592 0.04934211
5 Fibro 0.05574913 0.155339806 0.11842105
6 Myeloid cells 0.18118467 0.142394822 0.01315789
7 Mono 0.17073171 0.067961165 0.09868421
8 Neut 0.09756098 0.000000000 0.23684211
9 EC 0.06271777 0.190938511 0.07565789
最后再转化为长数据,后续用于进行秩和检验。
cell_prop <- pivot_longer(
cell_prop,
cols = -cell_type,
names_to = "subject",
values_to = "percent"
) %>% as.data.frame()
cell_prop
cell_type subject percent
1 VSMC AS1 0.02787456
2 VSMC C57 0.20388350
3 VSMC P3 0.09539474
4 T cell AS1 0.15679443
5 T cell C57 0.10355987
6 T cell P3 0.07565789
假设“AS1”和“C57”为“group1”,“P3”为“group2”,后续用秩和检验,检验在两组间存在的差异细胞亚群。
cell_prop$pheno <- ifelse(cell_prop$subject %in% c("AS1", "C57"), "group1", "group2") %>%
factor()
head(cell_prop)
cell_type subject percent pheno
1 VSMC AS1 0.02787456 group1
2 VSMC C57 0.20388350 group1
3 VSMC P3 0.09539474 group2
4 T cell AS1 0.15679443 group1
5 T cell C57 0.10355987 group1
6 T cell P3 0.07565789 group2
进行秩和检验,并画图保存为tiff图像,图如下所示。
subject <- unique(cell_prop$cell_type)
for (i in 1:length(subject)) {
subject_i <- cell_prop[cell_prop$cell_type == subject[i], ]
print(as.character(unique(subject_i$cell_type)))
wt <- wilcox_test(percent ~ pheno, data = subject_i, distribution = "exact")
out_name <- paste0("No", i, "_", subject[i], ".boxplot.tiff")
subject_i$percent <- 100 * subject_i$percent
p_value_text <- paste0(as.character(unique(subject_i$cell_type)), ": P = ", round(pvalue(wt), 4))
print(p_value_text)
tiff(file = out_name, width = 820, height = 1000, units = "px", res = 300)
# Add jitter points and errors (mean_se)
print(
ggboxplot(subject_i,
x = "pheno", y = "percent", ylab = "(%) of T cell",
label.pos = "out", lab.nb.digits = 1, lab.size = 3,
add = c("jitter"), width = 0.5, color = "pheno",
palette = c("#00AFBB", "#E7B800"), legend = NULL, shape = "pheno"
) +
labs(title = p_value_text) +
theme(plot.title = element_text(size = 9, face = "bold", hjust = 0.5)) +
theme(axis.title.x = element_blank()) # hide x axis name
)
dev.off()
}
附录
library(dplyr)
library(tidyr)
library(ggpubr)
library(coin)
library(ggrepel)
data <- readRDS("pbmcrenamed.rds")
unique(data$orig.ident)
unique(data@active.ident)
count_df <- data.frame(
"cell_type" = data@active.ident, # cell type
"subject" = data$orig.ident # sample
)
head(count_df)
count_df <- data.frame(
table(count_df$cell_type, count_df$subject)
)
head(count_df)
count_df <- rename(count_df, cell_type = Var1, subject = Var2)
head(count_df)
wide_data <- pivot_wider(
count_df,
names_from = "subject",
values_from = "Freq"
) %>% as.data.frame()
wide_data
cell_prop <- data.frame(
cell_type = wide_data$cell_type,
prop.table(as.matrix(wide_data[, -1]), margin = 2)
)
cell_prop
cell_prop <- pivot_longer(
cell_prop,
cols = -cell_type,
names_to = "subject",
values_to = "percent"
) %>% as.data.frame()
head(cell_prop)
sum(cell_prop$percent)
cell_prop$pheno <- ifelse(cell_prop$subject %in% c("AS1", "C57"), "group1", "group2") %>%
factor()
head(cell_prop)
subject <- unique(cell_prop$cell_type)
for (i in 1:length(subject)) {
subject_i <- cell_prop[cell_prop$cell_type == subject[i], ]
print(as.character(unique(subject_i$cell_type)))
wt <- wilcox_test(percent ~ pheno, data = subject_i, distribution = "exact")
out_name <- paste0("No", i, "_", subject[i], ".boxplot.tiff")
subject_i$percent <- 100 * subject_i$percent
p_value_text <- paste0(as.character(unique(subject_i$cell_type)), ": P = ", round(pvalue(wt), 4))
print(p_value_text)
tiff(file = out_name, width = 820, height = 1000, units = "px", res = 300)
# Add jitter points and errors (mean_se)
print(
ggboxplot(subject_i,
x = "pheno", y = "percent", ylab = "(%) of T cell",
label.pos = "out", lab.nb.digits = 1, lab.size = 3,
add = c("jitter"), width = 0.5, color = "pheno",
palette = c("#00AFBB", "#E7B800"), legend = NULL, shape = "pheno"
) +
labs(title = p_value_text) +
theme(plot.title = element_text(size = 9, face = "bold", hjust = 0.5)) +
theme(axis.title.x = element_blank()) # hide x axis name
)
dev.off()
}