1. 简介
为了观察特征基因在训练集和验证集中是否具有一致的表达趋势,在训练集和验证集所有样本中,通过 Wilcoxon秩和检验比较AAV组样本和对照组样本中特征基因的表达水平
2. 数据信息
第一步数据处理的训练集和验证集,和上一步筛选出来的特征基因名
训练集:* GSE104954-GPL22945_series_matrix.txt.gz(储存表达量文件)
- GPL22945.soft.gz (储存样本信息文件)
验证集:* GSE108113_series_matrix.txt.gz(储存表达量文件)
- GPL19983.soft.gz (储存样本信息文件)
3. 思路
- 对训练集进行 Wilcoxon 秩和检验比较,记录具有显著性差异的特征基因,并可视化。
- 对验证集进行 Wilcoxon 秩和检验比较,记录具有显著性差异的特征基因,并可视化。
- 对比两个结果是否一致,并分析。
4. 代码
library(ggplot2)
library(dplyr)
library(tidyverse)
library(ggpubr)
##---- 1.数据准备 ----
hub_train_genes <- train_genes[rownames(train_genes) %in% keygene_names, ]
valid_genes <- read.csv("01.dat.GSE108113.csv", row.names = 1)
#valid_genes <- rownames_to_column(as.data.frame(exp_verify),"probe_id")
#valid_genes <- merge(valid_genes, hub_probe, by = "probe_id", all.y = T )
#hub_valid_genes <- column_to_rownames(subset(valid_genes, select = -probe_id),"Symbol")
hub_valid_genes <- valid_genes[rownames(valid_genes) %in% keygene_names, ]
valid_group <- read.csv("02.group.GSE108113.csv")
colnames(valid_group) <- c("Sample","label")
##---- 2. 定义一个函数进行Wilcoxon检验并筛选关键基因 ----
key_value_pair <- function(data, group) {
data1 = rownames_to_column(data,"Gene")
data_long <- data1 %>%
pivot_longer(
cols = -Gene,
names_to = "Sample",
values_to = "Value"
)
data_merge <- merge(data_long, group, by.x = "Sample", by.y = "Sample")
return(data_merge)
}
train_merge <- key_value_pair(hub_train_genes, train_group)
valid_merge <- key_value_pair(hub_valid_genes, valid_group)
# 对每个基因在两个组中进行Wilcoxon秩和检验
find_key_genes <- function(data_merge) {
result <- data.frame(Gene = character(), p_value = numeric())
for (gene in unique(data_merge$Gene)) {
df <- data_merge[data_merge$Gene == gene, ]
#p_value <- round(wilcox.test(df$Value ~ df$label,exact = F,correct = TRUE,conf.level = 0.95)$p.value, 4)
p_value <- round(wilcox.test(df$Value ~ df$label,var.equal = TRUE)$p.value, 4)
result <- rbind(result, data.frame(Gene = gene, p_value = p_value))
}
significant_Gene <- result %>% filter(p_value < 0.05)
return(list(res = result, sign_gene = significant_Gene))
}
train_result = find_key_genes(train_merge)
valid_result = find_key_genes(valid_merge)
# 添加显著性标记
select_sign_gene <- function(result,data_merge) {
significant_Gene <- result$sign_gene %>%
mutate(signif_mark = ifelse(p_value < 0.01, "**", "*"))
data_merge_new <- merge(data_merge, significant_Gene, by = "Gene", all.x = FALSE) # 将原数据表只留下显著的数据
return(list(significant_Gene = significant_Gene, data_merge_new = data_merge_new))
}
train_select_sign_gene <- select_sign_gene(train_result,train_merge)
valid_select_sign_gene <- select_sign_gene(valid_result,valid_merge)
train_select_sign_gene$data_merge_new <- train_select_sign_gene$data_merge_new %>%
mutate(label = recode(label, "C" = "Control"))
valid_select_sign_gene$data_merge_new <- valid_select_sign_gene$data_merge_new %>%
mutate(label = recode(label, "C" = "Control"))
write.csv(train_select_sign_gene$significant_Gene, file = "train_significant_Gene.csv", row.names = FALSE)
write.csv(valid_select_sign_gene$significant_Gene, file = "valid_significant_Gene.csv", row.names = FALSE)
# 绘制箱型图
plot_box <- function(data_merge_new) {
p <- ggplot(data_merge_new, aes(x = Gene, y = Value, fill = label)) +
geom_boxplot(position = position_dodge(width = 0.8), alpha = 0.7) +
theme_minimal() +
labs(title = "Significant Genes Expression",
x = "Gene",
y = "Expression Level") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))+
stat_compare_means(aes(group = label),
method = "wilcox.test",
label = "p.signif",
label.y = max(data_merge_new$Value) * 1.1)
return(p)
}
train_boxplot <- plot_box(train_select_sign_gene$data_merge_new)
valid_boxplot <- plot_box(valid_select_sign_gene$data_merge_new)
# 显示图形
print(train_boxplot)
print(valid_boxplot)
ggsave( "train_Wilcoxon.png", train_boxplot,dpi=500, width=8, height=5, device="png")
ggsave( "valid_Wilcoxon.png", valid_boxplot,dpi=500, width=8, height=5, device="png")
ggsave( "train_Wilcoxon.pdf", train_boxplot,width=10, height=10, device="pdf")
ggsave( "valid_Wilcoxon.pdf", valid_boxplot,width=10, height=10, device="pdf")
5. 结果展示
train_Wilcoxon.png
valid_Wilcoxon.png