# Load the merged_data.tsv file
merged_data <- read.delim("merged_with_filenames.tsv", header = TRUE, sep = "\t", stringsAsFactors=FALSE)
head(merged_data)
group<-read.delim("group.tsv", header = TRUE, sep = "\t", stringsAsFactors=FALSE)
head(group)
merged_result <- merge(merged_data, group, by = "sample")
head(merged_result)
if (!require(dplyr)) {
install.packages("dplyr")
}
library(dplyr)
result <- merged_result %>%
group_by(k5, gene) %>%
summarize(
avg_count = round(mean(num), 2),
sd_count = round(sd(num), 2)
) %>%
mutate(CV = round((sd_count / avg_count),2))
print(result)
write.csv(result, file = "results.csv", row.names = FALSE)
merged_with_filenames.tsv
绘图代码
library(ggplot2)
# 绘制箱线图并添加颜色
ggplot(result, aes(x = k5, y = CV, fill = k5)) +
geom_boxplot() +
labs(x = "k5", y = "CV") +
theme_minimal() +
scale_fill_manual(values = c("red", "blue", "green", "orange","pink"))
# 根据 k5 列的不同类别数量和你想要的颜色自定义颜色值,如上面的 values 中可以根据实际 k5 列的类别修改颜色数量和颜色值
# 筛选出不包含 Admix 的数据
filtered_data <- result %>% filter(k5!= "Admix")
# 进行组间比较
comparison <- list(c("Xian", "Geng"))
# 绘制箱线图并添加显著性标记
p <- ggplot(filtered_data, aes(x = k5, y = CV, fill = k5)) +
geom_boxplot() +
labs(x = "k5", y = "CV") +
theme_minimal() +
scale_fill_manual(values = c("Xian" = "red", "Geng" = "blue")) +
stat_compare_means(comparisons = comparison)
print(p)