【Rna-seq 分析流程】05. Wilcoxon秩和检验比较

1. 简介

为了观察特征基因在训练集和验证集中是否具有一致的表达趋势,在训练集和验证集所有样本中,通过 Wilcoxon秩和检验比较AAV组样本和对照组样本中特征基因的表达水平


2. 数据信息

第一步数据处理的训练集和验证集,和上一步筛选出来的特征基因名

训练集:* GSE104954-GPL22945_series_matrix.txt.gz(储存表达量文件)

  • GPL22945.soft.gz (储存样本信息文件)

验证集:* GSE108113_series_matrix.txt.gz(储存表达量文件)

  • GPL19983.soft.gz (储存样本信息文件)

3. 思路

  1. 对训练集进行 Wilcoxon 秩和检验比较,记录具有显著性差异的特征基因,并可视化。
  2. 对验证集进行 Wilcoxon 秩和检验比较,记录具有显著性差异的特征基因,并可视化。
  3. 对比两个结果是否一致,并分析。

4. 代码

library(ggplot2)
library(dplyr)
library(tidyverse)
library(ggpubr)


##---- 1.数据准备 ----
hub_train_genes <- train_genes[rownames(train_genes) %in% keygene_names, ]

valid_genes <- read.csv("01.dat.GSE108113.csv", row.names = 1)
#valid_genes <- rownames_to_column(as.data.frame(exp_verify),"probe_id")
#valid_genes <- merge(valid_genes, hub_probe, by = "probe_id", all.y = T )
#hub_valid_genes <- column_to_rownames(subset(valid_genes, select = -probe_id),"Symbol")
hub_valid_genes <- valid_genes[rownames(valid_genes) %in% keygene_names, ]
valid_group <- read.csv("02.group.GSE108113.csv")
colnames(valid_group) <- c("Sample","label")


##---- 2. 定义一个函数进行Wilcoxon检验并筛选关键基因 ----
key_value_pair <- function(data, group) {
  data1 = rownames_to_column(data,"Gene")
  data_long <- data1 %>%
    pivot_longer(
      cols = -Gene,
      names_to = "Sample",
      values_to = "Value"
    )  
  
  data_merge <- merge(data_long, group, by.x = "Sample", by.y = "Sample")
  return(data_merge)
}


train_merge <- key_value_pair(hub_train_genes, train_group)
valid_merge <- key_value_pair(hub_valid_genes, valid_group)



# 对每个基因在两个组中进行Wilcoxon秩和检验
find_key_genes <- function(data_merge) {
  result <- data.frame(Gene = character(), p_value = numeric()) 
  for (gene in unique(data_merge$Gene)) {
    df <- data_merge[data_merge$Gene == gene, ]
    #p_value <- round(wilcox.test(df$Value ~ df$label,exact = F,correct = TRUE,conf.level = 0.95)$p.value, 4)
    p_value <- round(wilcox.test(df$Value ~ df$label,var.equal = TRUE)$p.value, 4)
    result <- rbind(result, data.frame(Gene = gene, p_value = p_value)) 
  }
  significant_Gene <- result %>% filter(p_value < 0.05) 
  return(list(res = result, sign_gene = significant_Gene))
}


train_result = find_key_genes(train_merge)
valid_result = find_key_genes(valid_merge)  



# 添加显著性标记
select_sign_gene <- function(result,data_merge) {
  significant_Gene <- result$sign_gene %>%
    mutate(signif_mark = ifelse(p_value < 0.01, "**", "*"))
  data_merge_new <- merge(data_merge, significant_Gene, by = "Gene", all.x = FALSE)  # 将原数据表只留下显著的数据
  return(list(significant_Gene = significant_Gene, data_merge_new = data_merge_new))
}

train_select_sign_gene <- select_sign_gene(train_result,train_merge)
valid_select_sign_gene <- select_sign_gene(valid_result,valid_merge)
train_select_sign_gene$data_merge_new <- train_select_sign_gene$data_merge_new %>%
  mutate(label = recode(label, "C" = "Control"))
valid_select_sign_gene$data_merge_new <- valid_select_sign_gene$data_merge_new %>%
  mutate(label = recode(label, "C" = "Control"))
write.csv(train_select_sign_gene$significant_Gene, file = "train_significant_Gene.csv", row.names = FALSE)
write.csv(valid_select_sign_gene$significant_Gene, file = "valid_significant_Gene.csv", row.names = FALSE)



# 绘制箱型图
plot_box <- function(data_merge_new) {
  p <- ggplot(data_merge_new, aes(x = Gene, y = Value, fill = label)) +
    geom_boxplot(position = position_dodge(width = 0.8), alpha = 0.7) +
    theme_minimal() +
    labs(title = "Significant Genes Expression",
         x = "Gene",
         y = "Expression Level") +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))+
    stat_compare_means(aes(group = label),    
                       method = "wilcox.test", 
                       label = "p.signif", 
                       label.y = max(data_merge_new$Value) * 1.1)
  return(p)
}

train_boxplot <- plot_box(train_select_sign_gene$data_merge_new)
valid_boxplot <- plot_box(valid_select_sign_gene$data_merge_new)


# 显示图形
print(train_boxplot)
print(valid_boxplot)
ggsave( "train_Wilcoxon.png", train_boxplot,dpi=500, width=8, height=5, device="png")
ggsave( "valid_Wilcoxon.png", valid_boxplot,dpi=500, width=8, height=5, device="png")
ggsave( "train_Wilcoxon.pdf", train_boxplot,width=10, height=10, device="pdf")
ggsave( "valid_Wilcoxon.pdf", valid_boxplot,width=10, height=10, device="pdf")


5. 结果展示

train_Wilcoxon.png

valid_Wilcoxon.png
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容