实验一
#练习一
# 1.1 产生一个等差数列(1,3,5,7,……,99)赋值给向量x,尝试不同的方式。
#1.1.1
x <- seq(from=1,to=99,by=2)
#1.1.2
x<-1:100
index = rep(c(TRUE, FALSE), 100)
x<-x[index]
x<-x[!is.na(x)]
#1.1.3
x<-NULL
num<-1
for(i in 1:50){
x[i]<-num
num<-num+2
}
# 1.2 产生一个内容重复的数列(1,2,3,4,5……,1,2,3,4,5),重复次数为10,并将其赋值给向量y。
y<-rep(1:5,10)
# 1.3 向量z由x和y组成。请判断x、y、z的属性是否为向量。
z <- c(x,y)
is.vector(x)
is.vector(y)
is.vector(z)
# 1.4 请删除x中第个1数值。
x <- x[-1]
# 1.5 请删除y中所有的取值1,有几种方法?
#1.5.1
y <- y[-which(y==1)]
#1.5.2
y<-y[!(y==1)]
# 1.6 请选择出x中第2个数值,以及>90的数值,结果存放在同一个向量中。
vector<-c(x[2],x[which(x>90)])
#可以使用c()函数创建多个值的向量
# 1.7 请选择出x中分别处于偶数位置和奇数位置的数值。
x_odd<- x[seq(from=1,to=length(x),by=2)]
x_even<-x[seq(from=2,to=length(x),by=2)]
#练习二
# 2.1 将自己的姓名、学号,性别,生日,以字符形式存放在向量m中。
m<-c("Tony","2020110","Male","1998.8.1")
# 2.2 为我们班30名学员产生一个编号,编号是NJU16-1, NJU16-2,……, NJU16-30,赋值给向量n。
n<-paste("NJU16",1:30,sep="-")
#seq: 连接的字符串
#paste (…, sep = " ", collapse = NULL)
#paste0()
# 2.3 仅保留30名学员编号中含”8”的编号。
n[grep("8",n)]
#grep函数和正则表达式一起进行筛选查询
#具体见“正则表达式.txt”
# 2.4 根据讲授的内容,你有几种方法实现以下的内容:
# 2.5 请将”I Like You”的动词取出。
statement<-"I Like You"
verb<-substr(statement,3,6)
# 2.6 请将”I Like You”替换为”I Love You”。
substr(statement,3,6) <- "Love"
statement
# 2.7 请将个人信息”Jack,Male,35,worker”分别取出。
strsplit("Jack,Male,35,worker",split=",")
#strsplit:拆分字符向量的元素
# 2.8 请将”+86-021-88681188Ext1”中的主机号取出。
substr("+86-021-88681188Ext1",5,16)
#练习三
# 3.1 计算mtcars数据中mpg、cyl、hp、drat的最小值、最大值、均数、标准差、中位数、25%分位数、75%分位数。
mpg<- data.frame(min(mtcars$mpg),max(mtcars$mpg),mean(mtcars$mpg),sd(mtcars$mpg),median(mtcars$mpg),quantile(mtcars$mpg,0.25),quantile(mtcars$mpg,0.75))
# 3.2 能否找到hp最大、最小的个体,请给出其个体编号。
hp_max <- which.max(mtcars$hp)
hp_max <- which(mtcars$hp==max(mtcars$hp))
hp_min<- which.min(mtcars$hp)
hp_min<- which(mtcars$hp==min(mtcars$hp))
# 3.3 请判断cyl中的取值有哪几个?能否给出每个取值的人数。
unique(mtcars$cyl)
table(mtcars$cyl)
# 3.4 请将hp大于均数的样本取出来。
sample <- mtcars[which(mtcars$hp>mean(mtcars$hp)),]
#练习四
#将下列使用excel创建下列数据,并分别保存为.csv格式,将
#导入R(提示:read.csv),转存为.Rdata
setwd("")
data <- read.csv("data.csv",header = TRUE)
save(read.data,file="read.Rdata")
实验二
# R语言练习
# 1.11.将成绩单按照姓名进行排序
2.将学生的各科考试成绩组合为单一的成绩衡量指标
3.基于相对名次(四等分)给出从A到D的评分(因子型)
Student <- c("John Davis", "Angela Williams", "Bullwinkle Moose","David Jones", "Janice Markhammer", "Cheryl Cushing","Reuven Ytzrhak", "Greg Knox", "Joel England","Mary Rayburn")
Math <- c(502, 600, 412, 358, 495, 512, 410, 625, 573, 522)
Science <- c(95, 99, 80, 82, 75, 85, 80, 95, 89, 86)
English <- c(25, 22, 18, 15, 20, 28, 15, 30, 27, 18)
roster <- data.frame(Student, Math, Science, English,stringsAsFactors=FALSE)
roster <- roster[order(roster$Student),]
# 1
Z_new <- scale(roster[,c("Math", "Science", "English")],center = T,scale = T)
roster$score <- apply(Z_new,1,mean)
#apply(X, MARGIN, FUN, …)
#X表示一个数组[一个矩阵]
#MARGIN: 1表示行,2表示列
#FUN表示一种遍历准则,比如连接函数等等
#Apply函数的介绍见:https://www.zhihu.com/question/39843392
Y_Cut <- quantile(roster$score,c(0,0.25,0.50,0.75,1))
roster$Rank <- cut(roster$score,breaks = Y_Cut ,include.lowest=T,labels = c('D','C','B','A'))
#cut:将数字转换成因子;include.lowest表示是否包括最低
# table(roster$Rank)
# 2.1分别使用for/while 求n!
For_ <- function(n){
q <- 1
for(i in 1:n){
q<- q*i
}
return(q)
}
# For_(2)
while_ <- function(n){
q<-1
i = 1
while(n>=i){
q<- q*i
i <- i+1
}
return(q)
}
# while_(3)
# 2.2编写程序计算 h(x,n)=1+x+x^2+……+x^n.
HF <- function(x,n){
q <-1
for(i in 1:n){
q <- q +x^n
}
return(q)
}
# HF(1,1) # HF(1,2)
# 2.3编写程序,求斐波那契数列第n项
fbo <- function(N){
if(N==1|N==2){
return(1)
}else{
return(fbo(N-1) + fbo(N-2))
}
}
# for(i in 1:10) {
# print(fbo(i))
# }
# 31.编写函数进行体操评分,输入10名评委所评分数,去除一个最高分,一个最低分,在计算出平均分作为选手得分。
2.使用备注代码生成数据,计算运动员得分。
set.seed(123465)
my_data <- data.frame(matrix(sample.int(100,1000,replace = T),100,10))
names(my_data) <- paste0('评委',1:10)
my_data$ID <- 1:100
Score <- function(x){
max_ <- which.max(x)[1]
min_ <- which.min(x)[1]
x <- x[-c(max_,min_)]
return(mean(x,na.rm = T))
}
my_data$score <- apply(my_data[,paste0('评委',1:10)],1,Score)
#
my_data$score <- 0
for(i in 1:nrow(my_data)){
my_data$score[i] <- Score(as.numeric(my_data[i,paste0('评委',1:10)]))
}
################################################
set.seed(123465)
my_data <- data.frame(matrix(sample.int(100,100000,replace = T),10000,10))
names(my_data) <- paste0('评委',1:10)
my_data$ID <- 1:100
a <- Sys.time()
my_data$score <- apply(my_data[,paste0('评委',1:10)],1,Score)
(b <- Sys.time() -a)
a <- Sys.time()
my_data$score <- 0
for(i in 1:nrow(my_data)){
my_data$score[i] <- Score(as.numeric(my_data[i,paste0('评委',1:10)]))
}
(b <- Sys.time() -a)
###########################################################
library(parallel)
cores <- detectCores()
cl <- makeCluster(cores-2)
Res <- parApply(cl = cl, my_data[,paste0('评委',1:10)],1,Score)
stopCluster(cl)
实验三
•根据mtcars数据,做出右图
mean<- aggregate(mtcars,list(mtcars$cyl),mean)
boxplot(mpg ~ cyl, data=mtcars,main="Car Mileage Data",
xlab="Number of Cylinders",ylab="Miles Per Gallon",
col = c('red','blue','green'))
points(2,c(mean$mpg[2]+5),col='red',pch = 8)
points(1:3,mean$mpg,col = grey(0.5),lwd=2,pch = 14)
lines(1:3,mean$mpg,col = grey(0.5),lwd=2,lty = 6)
•使用备注数据,做左图
x <- c(1:10)
y <- x
z <- 10/x
### 练习二
x <- c(1:10)
y <- x
z <- 10/x
plot(x, y, type="b",pch=21, col="red",
yaxt="n", lty=3, ann=FALSE)
lines(x, z, type="b", pch=22, col="blue", lty=2)
axis(2, at=x, labels=x, col.axis="red", las=2)
axis(4, at=z, labels=round(z, digits=2),
col.axis="blue", cex.axis=0.7, tck=-.01)
mtext("y=10/x", side=4, line=3, cex.lab=1, col="blue")
title("An Example of Creative Axes",
xlab="X values",ylab="Y=X")
###练习三
test3<- par(no.readonly=TRUE)
par(mfrow=c(2,2),cex.lab = 1.5,mar = c(6,6,2,2))
plot(mtcars$wt,mtcars$mpg,xlab = 'wt',ylab = 'mpg')
boxplot(mpg~cyl,data=mtcars)
plot(factor(mtcars$cyl),factor(mtcars$gear),
xlab = 'gear',ylab = 'cyl')
count<- table(mtcars$cyl)
pie(count, labels = paste0("cyl_",names(count)))
par(test3)
### 练习四
test4 <- par(no.readonly=TRUE)
par(fig=c(0, 0.8, 0, 0.8))
plot(mtcars$wt, mtcars$mpg,
xlab="Miles Per Gallon",ylab="Car Weight")
par(fig=c(0, 0.8, 0.45, 1), new=TRUE)
boxplot(mtcars$wt, horizontal=TRUE, axes=FALSE)
par(fig=c(0.65, 1, 0, 0.8), new=TRUE)
boxplot(mtcars$mpg, axes=FALSE)
实验四
###练习一:根据mtcars数据,做出右图
library(ggplot2)
mtcars$cyl <- factor(mtcars$cyl)
ggplot(mtcars, aes(x=cyl, y=mpg)) +
geom_boxplot( color="black",
notch=TRUE,
fill = c('red','blue','green'))+
labs(x ="Number of Cylinders" ,y="Miles Per Gallon")+
geom_point(position="jitter", color="blue", alpha=.5)
###练习二:根据mtcars数据,做出右图
ggplot(mtcars, aes(x=wt, y=mpg, size=disp)) +
geom_point(shape=21, color="black", fill="cornsilk") +
labs(x="Weight", y="Miles Per Gallon", size="Engine\nDisplacement")
###练习三:根据mtcars数据,做出右图
library(gridExtra)
p1 <- ggplot(data =mtcars ) + geom_point(aes(x=wt,y=mpg))
p2 <- ggplot(data =mtcars ) + geom_bar(aes(x= am,fill = cyl),position = 'fill')
grid.arrange(p1,p2, ncol=2)
##4
library(ggplot2)
mytheme <- theme(plot.title=element_text(face="bold.italic",
size="14", color="brown"),
axis.title=element_text(face="bold.italic",
size=10, color="brown"),
axis.text=element_text(face="bold", size=9,
color="darkblue"),
panel.background=element_rect(fill="white",
color="darkblue"),
panel.grid.major.y=element_line(color="grey",
linetype=1),
panel.grid.minor.y=element_line(color="grey",
linetype=2),
panel.grid.minor.x=element_blank(),
legend.position="top")
library(ggplot2)
mtcars$cyl <- factor(mtcars$cyl)
ggplot(mtcars, aes(x=cyl, y=mpg)) +
geom_boxplot( color="black",
notch=TRUE,
fill = c('red','blue','green'))+
labs(x ="Number of Cylinders" ,y="Miles Per Gallon")+
geom_point(position="jitter", color="blue", alpha=.5) + mytheme
ggplot(mtcars, aes(x=wt, y=mpg, size=disp)) +
geom_point(shape=21, color="black", fill="cornsilk") +
labs(x="Weight", y="Miles Per Gallon", size="Engine\nDisplacement")+ mytheme
###KM曲线
seq<-read.table("tcga-01-seq.txt",header=T)
data<-read.table("tcga-clinical data.txt",header=T)
data$patient.bcr_patient_barcode<-toupper(data$patient.bcr_patient_barcode)
data2<-merge(data,seq,by="patient.bcr_patient_barcode")
library(survival)
fit <- survfit(Surv(survival_time, patient.vital_status) ~ SAV1_exp, data = data2) #璁$畻鐢熷瓨鐜?
#plot(fit, lty = 2:1, col=1:2,ylab="Estimated survival function",xlab="Survival Time (months)",xlim=c(0,90))
plot(fit, col=c("green","red"),ylab="Estimated survival function",xlab="Survival Time (days)") #棰滆壊鎸夌収 exp=c(1,2)
##plot(1:26,pch=letters,col=1:26)
#axis(1, seq(0,100,by=10))
text(2150,0.75,"log-rank P<0.0001")
legend(500, .95, c("SAV1_high", "SAV1_low"),lty = 1:1, col=c("red","green"))
library("survminer")
b=ggsurvplot(fit,surv.median.line = "hv" , conf.int = TRUE, cumcensor = TRUE,pval = TRUE, xlab="OS (months)",ylab="Survival probability",palette=c("#E7B800", "#2E9FDF"),legend.labs =c("SAV1_low","SAV1_high") )
b
实验五
# 5.1:问该店每月的顾客数量是否服从均匀分布?
data1 <- c(27, 18, 15, 24, 36, 30)
ks.test(data1,y = 'runif')
# 5.2:问该地区学生的体重是否服从正态分布?
data2 <- c(36,36,37,38,40,42,43,43,44,45,48,48,50,50,51,
52,53,54,54,56,57,57,57,58,58,58,58,58,59,60,
61,61,61,62,62,63,63,65,66,68,68,70,73,73,75)
shapiro.test(data2)
#5.3:问不同种族与支持持政党之间是否存在独立性?
data3 <- data.frame(nrow = c(1,1,1,2,2,2),
ncol = c(1,2,3,1,2,3),
freq = c(341,405,105,103,11,15))
mid_tab = xtabs(freq~nrow+ncol,data = data3)
mid_table<-matrix(c(341,405,105,103,11,15),nrow=2,ncol=3)
chisq.test(mid_tab)
chisq.test(mid_table)
# 5.4:以淀粉为原料生产葡萄的过程中, 残留许多糖蜜, 可作为生产酱色的原料. 在生产酱色的过程之前应尽可能彻彻底底除杂, 以保证酱色质量.为此对除杂方法进行选择. 在实验中选用5种不同的除杂方法, 每种方法做4次试验, 即重复4次, 结果见表
X<-c(25.6, 22.2, 28.0, 29.8, 24.4, 30.0, 29.0, 27.5, 25.0, 27.7,
23.0, 32.2, 28.8, 28.0, 31.5, 25.9, 20.6, 21.2, 22.0, 21.2)
A<-factor(rep(1:5, each=4))
data4<-data.frame(X, A)
aov.mis<-aov(X~A, data=data4)
summary(aov.mis)
plot(data4$X~data4$A)
# # summary(fit)
# library(gplots)
# plotmeans(X ~ A, data=data4)
#
plot(TukeyHSD(aov.mis))
pairwise.t.test(X, A, p.adjust.method="none")
pairwise.t.test(X, A, p.adjust.method="bonferroni")
# p.adjust(p, method = p.adjust.methods, n = length(p))
# 5.5:为研究A、B、C三种饲料对猪的催肥效果,用每种饲料喂养8头猪一段时间,测得每头猪的初始重量(X)和增重(Y),数据见表,试分析三种饲料对猪的催肥效果是否相同?
group_<-rep(c("A","B","C"),each=8)
Weight_A <- c(15,13,11,12,12,16,14,17,17,16,
18,18,21,22,19,18,22,24,20,23,
25,27,30,32)
Y <-c(85,83,65,76,80,91,84,90,97,90,
100,95,103,106,99,94,89,91,83,
95,100,102,105,110)
data5<-data.frame(group_,Weight_A,Y)
res_5 <- aov(Y ~ Weight_A+group_ , data=data5)
summary(res_5)
###########################
my_own <- function(x){
res <- mean(x)
return(res)
}
my_own(c(1:100))
my_own(c(1:100,NA,NA,4,5))
my_own2 <- function(x,...){
res <- mean(x,...)
return(res)
}
my_own2(c(1:100,NA,NA,4,5),na.rm =T)