##########################################################
###第3题 方差分析
##示例如下: 上次作业
setwd("c:/Users/du/Desktop/K/")
data<-read.table("MPI_enzyme_activity.txt",sep = "",header = T)
data
library(dplyr)
## %>% 赋值给后面
group_by(data,Sex)%>%
summarise(count=n(),mean = mean(Activity,na.rm = TRUE),
sd = sd(Activity, na.rm = TRUE))
se_female<-0.856/sqrt(24)
se_female
se_male<-0.851/sqrt(12)
se_male
#2.1 boxplot 并做散点图,改变散点颜色,改变分组标签位置在图右边
library(ggpubr)
##ggpubr 是对ggplot的补充
ggboxplot(data,x="Genotype",y="Activity",color = "Sex",order = c("ff","fs","ss"))+
theme(legend.position = "right")+
geom_jitter(shape=16,position = position_jitter(0.2),aes(color=Genotype))
#2.2 interation
library("ggpubr")
ggline(data,x="Genotype",y="Activity",color = "Sex",add=c("mean_se","dotplot"), palette = c("#00AFBB", "#E7B800"))
#two way anova
aov<-aov(Activity~Genotype+Sex,data = data)
summary(aov)
#Two-way ANOVA with interaction effect
##此次第三次作业
data_3<-read.table("final3/lung_cancer.txt",sep = "\t",header=T)
library(ggpubr)
group_by(data_3,gender)%>%
summarise(count=n(),mean=mean(exp_A,na.rm = T),
sd=sd(exp_A,na.rm=T))
group_by(data_3,hospital)%>%
summarise(count=n(),mean=mean(exp_A,na.rm=T),sd=sd(exp_A,na.rm=T))
##做boxplot图:
ggboxplot(data=data_3,y="exp_A",x="gender",color = c("green","red"))
ggboxplot(data_3,y="exp_A",x="hospital")
##方差分析 two way anova
aov<-aov(exp_A~gender*hospital,data = data_3)
summary(aov)
##交互作用
library(HH)
interaction2wt(exp_A~gender*hospital,data=data_3)
##肺癌与蛋白maker是否具有强烈的相关性
##基本统计量分析
group_by(data_3,status)%>%
summarise(count=n(),mean=mean(exp_A,na.rm = T),
sd=sd(exp_A,na.rm=T))
ggboxplot(data=data_3,y="exp_A",x="status",color = c("green","red"))
##先关性分析--由于数据为0/1,可用logistic回归分析
glm_data_3<-glm(status~exp_A,data=data_3,family = binomial())
summary(glm_data_3)
glm_data_3_2<-glm(survival_status~exp_A+exp_actin+hospital+gender+smoke+drink+age,data=data_3,family = binomial())
summary(glm_data_3_2)
glm_data_3_3<-glm(survival_status~exp_A,data=data_3)
##检验合理性 Chisq中C大写
anova(glm_data_3_2,glm_data_3_3,test = "Chisq")