1.常用可视化R包
- 作图
- base
- ggplot2
- ggpubr
- 拼图
- par里mfrow
- grid.arrage
- cowplot
- customLayout
- patchwork
- 导出
- pdf()等三段论
- ggsave
- eoffice
- topptx
2.基础包-绘图函数
高级绘图函数
plot() #绘制散点图等多种图形,根据数据的类型,调用相应的函数绘图
hist() #频率直方图
boxplot() #箱线图
stripchart() #点图
barplot() #柱状图
dotplot() #点图
piechart() #饼图
matplot() #数字图形
低级绘图函数
lines() #添加线
curve() #添加曲线
abline() #添加给定斜率的线
points() #添加点
segments() #折线
arrows() #箭头
axis() #坐标轴
box() #外框
title() #标题
text() #文字
mtext() #图边文字
绘图参数
#参数用在函数内部,在没有设定值时使用默认值。
font=字体
lty=线类型
lwd=线宽度
pch=点的类型
xlab=横坐标
ylab=纵坐标
xlim = 横坐标范围
ylim=纵坐标范围
也可以对整个要绘制图形的各种参数进行设定
参见par()
3.gglot2语法
1.入门级绘图模板
ggplot(data = <DATA>) +
<GEOM_FUNCTION>(mapping = aes(<MAPPINGS>))
ggplot(data=iris)+
geom_point(mapping=aes(x=Sepal.Length,
y=Petal.Length))
2.映射-颜色、大小、透明度、形状
属性 | 参数 |
---|---|
x轴 | x |
y轴 | y |
颜色 | color |
大小 | size |
形状 | shape |
透明度 | alpha |
填充颜色 | fill |
- 图中加粗为可手动设置的参数
- 手动设置需要设为有意义的值
- 颜色:字符串,blue,red等
- 大小:单位mm
- 形状:数字编号表示
- 空心形状 0-14 color边框
- 实心形状 15-20 color填充
- 填充形状 21-24 color边框,和fill填充
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy), color = "blue")
-
映射vs手动设置
ggplot(data = mpg) + geom_point(mapping = aes(x = displ, y = hwy,color = class)) ggplot(data = mpg) + geom_point(mapping = aes(x = displ, y = hwy),color = "blue")
3.分面
ggplot(data = iris) +
geom_point(mapping = aes(x = Sepal.Length, y = Petal.Length)) +
facet_wrap(~ Species)
-
双分面
test$group = sample(letters[1:5],150,replace = T) ggplot(data = test) + geom_point(mapping = aes(x = Sepal.Length, y = Petal.Length)) + facet_grid(group ~ Species)
4.几何对象
-
理解分组
ggplot(data = test) + geom_smooth(aes(x = Sepal.Length, y = Petal.Length,group = Species))
ggplot(data = test) +
geom_smooth(aes(x = Sepal.Length,
y = Petal.Length,color = Species))
-
几何对象可以叠加
#局部映射 ggplot(data = test) + geom_smooth(mapping = aes(x = Sepal.Length, y = Petal.Length))+ geom_point (mapping = aes(x = Sepal.Length, y = Petal.Length)) #全局映射 ggplot(data = test, mapping = aes(x = Sepal.Length, y = Petal.Length)) + geom_smooth()+ geom_point ()
- 映射
- 局部映射
- 仅对当前图层有效
- 全局映射
- 对所有图层有效
- 当局部映射和全局映射冲突时,以局部为准
- 局部映射
5.统计变换
ggplot(data = diamonds) +
geom_bar(mapping = aes(x = cut))
ggplot(data = diamonds) +
stat_count(mapping = aes(x = cut))
-
使用场景1:使用表中数据直接做图,而不是统计
ggplot(data = fre) + geom_bar(mapping = aes(x = Var1, y = Freq), stat = "identity")
-
使用场景2:不统计count,统计prop(比例)
ggplot(data = diamonds) + geom_bar(mapping = aes(x = cut, y = ..prop.., group = 1))
6.位置调整
-
位置关系
- geom_point()
- geom_jitter()
-
堆叠直方图
ggplot(data = diamonds) + geom_bar(mapping = aes(x =cut,fill=clarity))
-
并列直方图
ggplot(data = diamonds) + geom_bar(mapping = aes(x = cut, fill =clarity), position = "dodge")
7.坐标系
-
翻转coord_flip()
ggplot(data = mpg, mapping = aes(x = class, y = hwy)) + geom_boxplot() + coord_flip()
-
极坐标系coord_polar()
bar <- ggplot(data = diamonds) + geom_bar( mapping = aes(x = cut, fill = cut), show.legend = FALSE, width = 1 ) + theme(aspect.ratio = 1) + labs(x = NULL, y = NULL) bar + coord_flip() bar + coord_polar() bar + theme_classic() bar + theme_dark()
8.完整绘图模板
ggplot(data = <DATA>) +
<GEOM_FUNCTION>(
mapping = aes(<MAPPINGS>),
stat = <STAT>,
position = <POSITION>
) +
<COORDINATE_FUNCTION> +
<FACET_FUNCTION>
4.ggpubr
ggscatter(iris,x="Sepal.Length",y="Petal.Length",color="Species")
ggboxplot(iris, x = "Species",
y = "Sepal.Length",
color = "Species",
shape = "Species",
add = "jitter") #ggpubr去掉了映射和图层的概念
p
my_comparisons <- list( c("setosa", "versicolor"), c("setosa", "virginica"),
c("versicolor", "virginica") )
p + stat_compare_means(comparisons = my_comparisons)+
stat_compare_means(label.y = 9)
5.图片保存
-
ggplot2系列:
ggsave("iris_box_ggpubr.png") ggsave(p,filename = "iris_box_ggpubr2.png")
-
通用:三段论
保存的格式及文件名 pdf(“test.pdf”) 作图代码 .......... ......... 画完了,关闭画板 dev.off()
-
神奇eoffice
library(eoffice) topptx(p,"iris_box_ggpubr.pptx")
6.拼图
- R包patchwork
- 语法简单,完美兼容ggplot2
- 拼图比例设置简单
- (1)支持直接p1+p2拼图,比任何一个包都简单
- (2)复杂的布局代码易读性更强
- (3)可以给子图添加标记(例如ABCD, I II III IV 这样)
- (4)可以统一修改所有子图
- (5)可以将子图的图例移到一起,整体性特别好
ibrary(patchwork)
p1 = ggscatter(iris,x="Sepal.Length",
y="Petal.Length",
color="Species")
p2 <- ggboxplot(iris, x = "Species",
y = "Sepal.Length",
color = "Species",
shape = "Species",
add = "jitter")
p3 = ggplot(data = mpg, mapping = aes(x = class, y = hwy)) +
geom_boxplot()
p4 = ggplot(data = diamonds) +
geom_bar(
mapping = aes(x = cut, fill = cut),
show.legend = FALSE,
width = 1
)
p1 + p2 + p3 + p4 + plot_annotation(tag_level = "A")
p1/p2
-
代码可运行却不出图——因为画板被占用
dev.off() #表示关闭画板 #多次运行dev.off(),到null device为止,在运行出图代码或dev.new(y)
7.进阶
1.tidyr 核心函数
-
tidyr的扁和长
### 原始数据 test <- data.frame(geneid = paste0("gene",1:4), sample1 = c(1,4,7,10), sample2 = c(2,5,0.8,11), sample3 = c(0.3,6,9,12)) test ### 扁变长 test_gather <- gather(data = test, key = sample_nm, value = exp, - geneid) head(test_gather) ### 长变扁 test_re <- spread(data = test_gather, key = sample_nm, value = exp) head(test_re)
-
tidyr的分与合
### 原始数据 test <- data.frame(x = c( "a,b", "a,d", "b,c"));test ### 分割 test_seprate <- separate(test,x, c("X", "Y"),sep = ",");test_seprate ### 合并 test_re <- unite(test_seprate,"x",X,Y,sep = ",")
-
处理NA
### 原始数据 X<-data.frame(X1 = LETTERS[1:5],X2 = 1:5) X[2,2] <- NA X[4,1] <- NA ### 1.去掉含有NA的行,可以选择只根据某一列来去除 drop_na(X) drop_na(X,X1) drop_na(X,X2) ### 2.替换NA replace_na(X$X2,0) ### 3.用上一行的值填充NA X fill(X,X2)
2.dplyr
1.mutate(),新增列
test <- iris[c(1:2,51:52,101:102),]
rownames(test) =NULL
mutate(test, new = Sepal.Length * Sepal.Width)
2.select(),按列筛选
####(1)按列号筛选
select(test,1)
select(test,c(1,5))
####(2)按列名筛选
select(test,Sepal.Length)
select(test, Petal.Length, Petal.Width)
vars <- c("Petal.Length", "Petal.Width")
select(test, one_of(vars))
#####一组来自tidyselect的有用函数
select(test, starts_with("Petal"))
select(test, ends_with("Width"))
select(test, contains("etal"))
select(test, matches(".t."))
select(test, everything())
select(test, last_col())
select(test, last_col(offset = 1))
####(4)利用everything(),列名可以重排序
select(test,Species,everything())
3.filter()筛选行
filter(test, Species == "setosa")
filter(test, Species == "setosa"&Sepal.Length > 5 )
filter(test, Species %in% c("setosa","versicolor"))
4.arrange(),按某一列对整个表格进行排序
arrange(test, Sepal.Length)#默认从小到大排序
arrange(test, desc(Sepal.Length))#用desc从大到小
arrange(test, desc(Sepal.Width),Sepal.Length)
5.summarise():汇总
#对数据进行汇总操作,结合group_by使用实用性强
summarise(test, mean(Sepal.Length), sd(Sepal.Length))# 计算Sepal.Length的平均值和标准差:
# 先按照Species分组,计算每组Sepal.Length的平均值和标准差
group_by(test, Species)
tmp = summarise(group_by(test, Species),mean(Sepal.Length), sd(Sepal.Length))
6.两个实用技能
- 1:管道操作 %>% (cmd/ctr + shift + M)
library(dplyr)
x1 = filter(iris,Sepal.Width>3)
x2 = select(x1,c("Sepal.Length","Sepal.Width" ))
x3 = arrange(x2,Sepal.Length)
colnames(iris)
iris %>%
filter(Sepal.Width>3) %>%
select(c("Sepal.Length","Sepal.Width" ))%>%
arrange(Sepal.Length)
- 2:count统计某列的unique值
count(test,Species)
##处理关系数据:即将2个表进行连接,注意:不要引入factor
options(stringsAsFactors = F)
test1 <- data.frame(name = c('jimmy','nicker','doodle'),
blood_type = c("A","B","O"))
test1
test2 <- data.frame(name = c('doodle','jimmy','nicker','tony'),
group = c("group1","group1","group2","group2"),
vision = c(4.2,4.3,4.9,4.5))
test2
test3 <- data.frame(NAME = c('doodle','jimmy','lucy','nicker'),
weight = c(140,145,110,138))
merge(test1,test2,by="name")
merge(test1,test3,by.x = "name",by.y = "NAME")
###1.內连inner_join,取交集
inner_join(test1, test2, by = "name")
inner_join(test1,test3,by = c("name"="NAME"))
###2.左连left_join
left_join(test1, test2, by = 'name')
left_join(test2, test1, by = 'name')
###3.全连full_join
full_join(test1, test2, by = 'name')
###4.半连接:返回能够与y表匹配的x表所有记录semi_join
semi_join(x = test1, y = test2, by = 'name')
###5.反连接:返回无法与y表匹配的x表的所记录anti_join
anti_join(x = test2, y = test1, by = 'name')
3.stringr
1.检测字符串长度
library(stringr)
x <- "The birch canoe slid on the smooth planks."
x
length(x)
str_length(x)
2.字符串拆分与组合
str_split(x," ")
x2 = str_split(x," ")[[1]]
str_c(x2,collapse = " ")
str_c(x2,1234,sep = "+")
3.提取字符串的一部分
str_sub(x,5,9)
4.大小写转换
str_to_upper(x2)
str_to_lower(x2)
str_to_title(x2)
5.字符串排序
str_sort(x2)
6.字符检测
str_detect(x2,"h")
str_starts(x2,"T")
str_ends(x2,"e")
###与sum和mean连用,可以统计匹配的个数和比例
sum(str_detect(x2,"h"))
mean(str_detect(x2,"h"))
7.提取匹配到的字符串
str_subset(x2,"h")
8.字符计数
str_count(x," ")
str_count(x2,"o")
9.字符串替换
str_replace(x2,"o","A")
str_replace_all(x2,"o","A")
8.条件语句和循环语句
一.条件语句
###1.if(){ }
#### (1)只有if没有else,那么条件是FALSE时就什么都不做
i = -1
if (i<0) print('up')
if (i>0) print('up')
#理解下面代码
if(!require(tidyr)) install.packages('tidyr')
#### (2)有else
i =1
if (i>0){
cat('+')
} else {
print("-")
}
ifelse(i>0,"+","-")
x=rnorm(10)
y=ifelse(x>0,"+","-")
y
#### (3)多个条件
i = 0
if (i>0){
print('+')
} else if (i==0) {
print('0')
} else if (i< 0){
print('-')
}
ifelse(i>0,"+",ifelse((i<0),"-","0"))
### 2.switch()
cd = 3
foo <- switch(EXPR = cd,
#EXPR = "aa",
aa=c(3.4,1),
bb=matrix(1:4,2,2),
cc=matrix(c(T,T,F,T,F,F),3,2),
dd="string here",
ee=matrix(c("red","green","blue","yellow")))
foo
-
ifelse函数
- 三个参数
- ifelse(x,yes,no)
- x:逻辑值
- yes:逻辑值为TRUE时的返回值
- no:逻辑值为FALSE时的返回值
二、循环语句
### 1.for循环
#**顺便看一下next和break**
x <- c(5,6,0,3)
s=0
for (i in x){
s=s+i
#if(i == 0) next
#if (i == 0) break
print(c(which(x==i),i,1/i,s))
}
x <- c(5,6,0,3)
s = 0
for (i in 1:length(x)){
s=s+x[[i]]
#if(i == 3) next
#if (i == 3) break
print(c(i,x[[i]],1/i,s))
}
#如何将结果存下来?
s = 0
result = list()
for(i in 1:length(x)){
s=s+x[[i]]
result[[i]] = c(i,x[[i]],1/i,s)
}
do.call(cbind,result)
### 2.while 循环
i = 0
while (i < 5){
print(c(i,i^2))
i = i+1
}
### 3.repeat 语句
#注意:必须有break
i=0L
s=0L
repeat{
i = i + 1
s = s + i
print(c(i,s))
if(i==50) break
}
三、长脚本管理方式
- 1.分成多个脚本,每个脚本最后保存Rdata,下一个脚本开头清空再加载。
- if(F){…} ,则{ }里的脚本被跳过 if(T){…} ,则{ }里的脚本被运行 凡是带有{ }的代码,均可以被折叠
四、apply函数
apply(X, MARGIN, FUN, …)
apply(test, 2, mean)
apply(test, 1, sum)
#其中X是数据框/矩阵名;MARGIN为1表示取行,为2表示取列,FUN是函数
#对X的每一行/列进行FUN这个函数
五、R语言遍历、创建、删除文件夹
dir()
file.create() file.exists(...)
file.remove(...)
file.rename(from, to)
file.append(file1, file2)
file.copy(from, to, overwrite = recursive, recursive = FALSE,
copy.mode = TRUE, copy.date = FALSE)
file.symlink(from, to)
file.link(from, to)
dir.create("doudou")
unlink("doudou",recursive = T)