[R数据]合并sample,计算sd,平均值,变异系数


# Load the merged_data.tsv file
merged_data <- read.delim("merged_with_filenames.tsv", header = TRUE, sep = "\t", stringsAsFactors=FALSE)
head(merged_data)

group<-read.delim("group.tsv", header = TRUE, sep = "\t", stringsAsFactors=FALSE)
head(group)  


merged_result <- merge(merged_data, group, by = "sample")
head(merged_result)
  
if (!require(dplyr)) {
  install.packages("dplyr")
}
library(dplyr)



result <- merged_result %>%
  group_by(k5, gene) %>%
  summarize(
    avg_count = round(mean(num), 2),
    sd_count = round(sd(num), 2)
  ) %>%
  mutate(CV = round((sd_count / avg_count),2))

print(result)
write.csv(result, file = "results.csv", row.names = FALSE)
merged_with_filenames.tsv
每个基因的结果

绘图代码

library(ggplot2)

# 绘制箱线图并添加颜色
ggplot(result, aes(x = k5, y = CV, fill = k5)) +
  geom_boxplot() +
  labs(x = "k5", y = "CV") +
  theme_minimal() +
  scale_fill_manual(values = c("red", "blue", "green", "orange","pink")) 
# 根据 k5 列的不同类别数量和你想要的颜色自定义颜色值,如上面的 values 中可以根据实际 k5 列的类别修改颜色数量和颜色值


# 筛选出不包含 Admix 的数据
filtered_data <- result %>% filter(k5!= "Admix")

# 进行组间比较
comparison <- list(c("Xian", "Geng"))

# 绘制箱线图并添加显著性标记
p <- ggplot(filtered_data, aes(x = k5, y = CV, fill = k5)) +
  geom_boxplot() +
  labs(x = "k5", y = "CV") +
  theme_minimal() +
  scale_fill_manual(values = c("Xian" = "red", "Geng" = "blue")) +
  stat_compare_means(comparisons = comparison)

print(p)
最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容