#01.
#模拟试验
set.seed(18200116)
test <- function(n,m,sd,a){
c=0
for (i in 1:1000)
{
s <- rnorm(n,mean = m,sd = sd)
u <- qnorm(1-a/2)
up <- mean(s)+sd(s)*u/sqrt(n)
low <- mean(s)-sd(s)*u/sqrt(n)
if(low<0 & 0<up)
c=c+1
}
1000-c
}
test(10,0,5,0.05)
test(10,0,5,0.1)
test(10,0,5,0.2)
test(20,0,5,0.05)
test(20,0,5,0.1)
test(20,0,5,0.2)
test(50,0,5,0.05)
test(50,0,5,0.1)
test(50,0,5,0.2)
#02.
#导入数据
data <- read.csv("C:/Users/Administrator/Desktop/2018级生物统计班R语言平时测试试题/test_data.csv")
#(1)年龄age变量分布的柱形图和核密度曲线图绘制
p1 <- barplot(table(data$age))
par(new=TRUE)
p2 <- density(data$age)
p1 <- plot(p2)
#(2)编制自定义函数并输出体重指数bmi和吸烟强度packyr的结果
function1 <- function(x){
a <- quantile(x,0.5,na.rm=TRUE)
b <- quantile(x,0.25,na.rm=TRUE)
c <- quantile(x,0.75,na.rm=TRUE)
result <- paste0(a,"(",b,"-",c,")")
return(result)
}
function1(data$bmi)
function1(data$packyr)
#(3)年龄,教育程度,体重指数,吸烟,上述哪几个因素在不同性别间差异显著
t.test(age~sex,data)
#p<0.05,年龄在不同性别间有差异
wilcox.test(education~sex,data)
#p>0.05,教育程度在不同性别间无差异
t.test(bmi~sex,data)
#p>0.05,体重指数在不同性别间无差异
mytable1 <- xtabs(~sex+smoke,data = data)
chisq.test(mytable1)
#p<0.05,吸烟在不同性别间有差异
#(4)使用适当的广义线性模型评估肺癌风险与基线因素的关联强度
summary(data)
full <- glm(lung_ca~age+sex+education+bmi+family_ca+smoke+packyr+respdis+secsmoke+exposure+drink+exercise,data = data,family = binomial())
summary(full)