- 前期回顾
1.1 tidyverse
1.2 dplyr
2.1 管道符号
2.2 stringr
3.1 条件语句和循环语句:if
3.2 长脚本管理方式
3.3 for 循环
3.4 while循环
3.5 apply
3.6 敲黑板
- 前期回顾
写代码的时候,哪个图的代码写在最前面,其画出来的图即在最底下;
当前后命令有冲突时,以后一个命令为准;
1.1 tidyverse
1.2 dplyr mutate()新增列 select()按列筛选 filter()按行筛选 arrange()排序
count()
image.png
2.1 管道符号
image.png
2.2 stringr
image.png
> x <- "The birch canoe slid on the smooth planks."
> x
[1] "The birch canoe slid on the smooth planks."
> ###1.检测字符串长度
> length(x)
[1] 4
> str_length(x)
[1] 1 1 1 1
> ###2.字符串拆分与组合
> str_split(x," ")
[[1]]
[1] "5"
[[2]]
[1] "6"
[[3]]
[1] "0"
[[4]]
[1] "3"
> x2 = str_split(x," ")[[1]]
> str_c(x2,collapse = " ")
[1] "5"
> str_c(x2,1234,sep = "+")
[1] "5+1234"
> ###3.提取字符串的一部分
> str_sub(x,5,9)
[1] "" "" "" ""
> ###4.大小写转换
> str_to_upper(x2)
[1] "5"
> str_to_lower(x2)
[1] "5"
> str_to_title(x2)
[1] "5"
> ###5.字符串排序
> str_sort(x2)
[1] "5"
> ###6.字符检测
> str_detect(x2,"h")
[1] FALSE
> str_starts(x2,"T")
[1] FALSE
> str_ends(x2,"e")
[1] FALSE
> ###与sum和mean连用,可以统计匹配的个数和比例
> sum(str_detect(x2,"h"))
[1] 0
> mean(str_detect(x2,"h"))
[1] 0
> ###7.提取匹配到的字符串
> str_subset(x2,"h")
character(0)
> ###8.字符计数
> str_count(x," ")
[1] 0 0 0 0
> str_count(x2,"o")
[1] 0
> ###9.字符串替换
> str_replace(x2,"o","A")
[1] "5"
> str_replace_all(x2,"o","A")
[1] "5"
> tmp <-"Bioinformatics is a new subject of genetic data collection,analysis and dissemination to the research community."
> tmp2 = tmp %>%
+ str_replace(","," ") %>%
+ str_remove("[.]") %>%
+ str_split(" ")
> tmp2
[[1]]
[1] "Bioinformatics" "is" "a" "new" "subject"
[6] "of" "genetic" "data" "collection" "analysis"
[11] "and" "dissemination" "to" "the" "research"
[16] "community"
image.png
去掉中间的逗号以及末尾的句号
3.1 条件语句和循环语句
ifelse
image.png
ifelse永远只有三个参数
3.2 长脚本管理方式
image.png
3.3 for 循环
包括两种:向量的内容循环/向量的结果循环
image.png
如何将结果存下来?
image.png
#**顺便看一下next和break**
x <- c(5,6,0,3)
s=0
for (i in x){
s=s+i
#if(i == 0) next
#if (i == 0) break
print(c(which(x==i),i,1/i,s))
}
x <- c(5,6,0,3)
s = 0
for (i in 1:length(x)){
s=s+x[[i]]
#if(i == 3) next
#if (i == 3) break
print(c(i,x[[i]],1/i,s))
}
s = 0
result = list()
for(i in 1:length(x)){
s=s+x[[i]]
result[[i]] = c(i,x[[i]],1/i,s)
}
do.call(cbind,result)
3.4 while循环(很容易死循环,容易烧电脑)
3.5 apply
image.png
3.6 敲黑板
image.png
练习题
#练习2----
#Bioinformatics is a new subject of genetic data collection,analysis and dissemination to the research community.
#1.将上面这句话作为一个长字符串,赋值给tmp
tmp = "Bioinformatics is a new subject of genetic data collection,analysis and dissemination to the research community."
#2.拆分为一个由单词组成的向量,赋值给tmp2(注意标点符号)
library(stringr)
tmp2 = tmp %>%
str_replace(","," ") %>%
str_remove("[.]") %>%
str_split(" ")
tmp2 = tmp2[[1]]
#3.用函数返回这句话中有多少个单词。
length(tmp2)
#4.用函数返回这句话中每个单词由多少个字母组成。
str_length(tmp2)
#5.统计tmp2有多少个单词中含有字母"e"
table(str_detect(tmp2,"e"))
#1.使用循环,对iris的1到4列分别画点图(plot)
par(mfrow = c(2,2))
for(i in 1:4){
plot(iris[,i],col = iris[,5])
}
#2.生成一个随机数(rnorm)组成的10行6列的矩阵,列名为sample1,sample2….sample6,行名为gene1,gene2…gene10,分组为sample1、2、3属于A组,sample4、5、6属于B组。用循环对每个基因画ggplot2箱线图,并尝试把10张图拼到一起。
exp = matrix(rnorm(60),nrow = 10)
colnames(exp) <- paste0("sample",1:6)
rownames(exp) <- paste0("gene",1:10)
exp[1:4,1:4]
#dat = cbind(t(exp),group = rep(c("A","B"),each = 3))
dat = data.frame(t(exp))
dat = mutate(dat,group = rep(c("A","B"),each = 3))
p = list()
library(ggplot2)
for(i in 1:(ncol(dat)-1)){
p[[i]] = ggplot(data = dat,aes_string(x = "group",y=colnames(dat)[i]))+
geom_boxplot(aes(color = group))+
geom_jitter(aes(color = group))+
theme_bw()
}
library(patchwork)
wrap_plots(p,nrow = 2,guides = "collect")
#练习3----
#1.使用循环,查看"a",TRUE和3的数据类型
x = list("a",TRUE,3)
for(i in 1:length(x)){
print(class(x[[i]]))
}
#2.生成10个随机数,根据这10个随机数生成一个新向量,>中位数的值对应"A",<中位数的值对应"B"。
y = rnorm(10)
ifelse(y>median(y),"A","B")
#3.根据上一练习题中的tmp2生成一个新向量,含有e的值对应"A",不含有e的值对应"B"
tmp2 = tmp %>%
str_replace(","," ") %>%
str_remove("[.]") %>%
str_split(" ")
tmp2 = tmp2[[1]]
ifelse(str_detect(tmp2,"e"),"A","B")
#4.加载deg.Rdata,根据a、b两列的值,按照以下条件生成向量x:
#a<1 且b<0.05,则x对应的值为down;
#a>1 且b<0.05,则x对应的值为up;
#其他情况,x对应的值为no
load("deg.Rdata")
k1 = deg$a<1 & deg$b<0.05
k2 = deg$a>1 & deg$b<0.05
x = ifelse(k1,"down",ifelse(k2,"up","no"))
# 5.统计x的重复值个数
table(x)
# 6.将x添加到deg数据框中,成为新的一列
deg$x=x
library(dplyr)
load("deg.Rdata")
deg = mutate(deg,x=x)