title: "pared t-test 0h vs. 24h"
author: "hejubu"
date: "2023-11-09"
output: html_document
knitr::opts_chunk$set(echo = TRUE)
导入数据
rm(list=ls())
library(readxl)
library(ggplot2)
library(stringr)
library(tidyverse)
library(dplyr)
library(rstatix)
library(ggpubr)
chushai=list()
for (i in 1:2){
data = read_excel(paste("Cellclusterfrequencies_C", i, ".xlsx", sep = ""))
data = data[,-1]
chushai[[i]]= data
remove(data)
}
#进一步处理原始表达,使用stingr包
chu=list()
for (i in 1:length(chushai)){
df=chushai[[i]]
time=str_split(df$sample_id,pattern = "_",simplify = T)
class(time)
time=as.data.frame(time)
time=time[,-3]
df=cbind(df,time)
colnames(df)[4]="patientid"
colnames(df)[5]="timepoint"
chu[[i]]=df
}
两个for循环完成计算
Pairedtest=list()
for (i in 1:length(chu)){
chu_a = chu[[i]]
dt = split(chu_a, chu_a$cluster_id)
#创建空的外部列表用于存储数据
paired_test <- list()
#for循环完成对c1管不同细胞群的计算
for (j in 1:length(dt)){
#创建空的子列表
sublist <- list()
df = dt[[j]]
df_0h = split(df, df$timepoint)[[1]]
df_24h = split(df, df$timepoint)[[2]]
colnames(df_0h)[3]="0h"
colnames(df_24h)[3]="24h"
df_0h<-df_0h[,c(3,4)]
df_24h<-df_24h[,c(3,4)]
df <- merge(df_0h,df_24h, by = "patientid")
# Transform into long data:
# gather the before and after values in the same column
df_long <- df %>%
gather(key = "timepoint", value = "Freq", `0h`, `24h`) #这步是必须的,规定了链接“key"可以看作是一个加权
head(df_long, 3)
# Summary statistics
df_long %>%
group_by(timepoint) %>%
get_summary_stats(Freq, type = "mean_sd")
# Visualization
bxp <- ggpaired(df_long, x = "timepoint", y = "Freq",
order = c("0h", "24h"),
ylab = "Frequencies", xlab = "Time")
bxp
# Assumptions and preleminary tests
df <- df %>% mutate(differences=`24h`-`0h` );df
df
#identify outliers
df %>% identify_outliers(differences)
#Check normality assumption
df %>% shapiro_test(differences)
ggqqplot(df, "differences")
#Computation
stat.test <- df_long %>%
t_test(Freq ~ timepoint, paired = TRUE) %>%
add_significance()
stat.test
#To compute one tailed paired t-test, you can specify the option alternative as follow.
#if you want to test whether the average weight before treatment is greater than the average weight after treatment, type this:
df_long %>%
t_test(Freq ~ timepoint, alternative = "greater")
# Effect size
df_long %>% cohens_d(Freq ~ timepoint, paired = TRUE)
#存储本次迭代产生的数据
sublist[["outliers"]] <- df %>% identify_outliers(differences)
sublist[["effectsize"]] <- df_long %>% cohens_d(Freq ~ timepoint, paired = TRUE)
sublist[["stat.test"]] <- stat.test
#将子列表添加到外部列表中
paired_test[[j]]<-sublist
}
#输出数据
Pairedtest[[i]]=paired_test
}
Note that the echo = FALSE
parameter was added to the code chunk to prevent printing of the R code that generated the plot.