R，笔记03

基本运算

> x = 3 ** 2
> x
[1] 9
> 
> x = 3 ^ 2
> x
[1] 9
> 
> x = sqrt(64)
> x
[1] 8
> 
> x = -7
> y = abs(x)
> y
[1] 7
>

对数指数

> x = exp(1) # exp()是指自然数e的x次方
> x
[1] 2.718282
> 
> x = exp(3) # e的3次方
> x
[1] 20.08554
> 
> x = exp(0.5) # e的0.5次方
> x
[1] 1.648721
> 
> 
> # exp()和log()互为反函数
> # log()以e为底的对数
> # 一般基底的对数，语法是log(x,m),其中m是底。如果底是10，也可这么写log10()
> x = log(2) # 计算以e为底的对数
> x
[1] 0.6931472
> 
> x = log(2, 10) # 计算以10为底的对数
> x 
[1] 0.30103
> 
> x = log10(2) # 计算以10为底的对数
> x 
[1] 0.30103
> 
> x = 1.2 * 10^4
> x
[1] 12000
> 
> x = 1.2e4
> x
[1] 12000
>

近似数

> round(98.345, digits = 2) 
[1] 98.34
> round(98.345, 2)
[1] 98.34
> 
> signif(1234567.896543, digits = 7) #第二个位置是有效数字的个数
[1] 1234568
> signif(1234567.896543, digits = 8)
[1] 1234568
> signif(1234567.896543, digits = 6)
[1] 1234570
> signif(1234567.896543, digits = 5)
[1] 1234600
> signif(1234567.896543, digits = 1)
[1] 1e+06
> 
> # 近似函数floo(), ceiling(), trunc()可直接区整数
>

设置重复

> # rep(x, times=重复次数，each=每次每个元素的重复次数，length.out=向量长度)
>

简单统计

> # sum(), max(), min(), mean()
> 
> # prod() 计算所有元素的积
> 
> # cumsum()计算所有元素的累计和
> 
> # cumprod()计算所有元素的累计积
> 
> # cummax()可返回各元素从向量起点到该元素位置间所有元素的最大值
> 
> # cummin()可返回各元素从向量起点到该元素位置间所有元素的最小值
> 
> # diff()返回各元素与下一元素的差
> 
> # sort(x, decreasing = FALSE)排序
> 
> # rank()返回将元素从小到大排序后的位置编号
> 
> # rev()将向量对象颠倒排列
> 
> # length()计算向量对象长度，即向量对象元素个数
> 
> # sd()标准差
> 
> # var()样本变异数
>

查询确认/转换数据类型

> # is.integer()
> # is.numeric()
> # is.double()
> # is.character()
> # is.matrix()
> # is.array()
> 
> # as.character()
> # as.numeric()
> 
> 
> # str()探索对象结构，了解数据类型/长度/内容
> # class()对向量对象而言，可使用它了解对象元素的数据类型
>

逻辑运算

> # x & y 如果x和y均为T，则传回T
> # x | y 如果x或y为T，则传回T
> # !x 传回非x
> # xor (x, y) 相当于XOR运算，如果x和y不同，传回T

which函数

> # which()所使用的参数是一个比较表达式，可以返回符合条件的索引值
> x <- c(6, 5, 3, 4, 5)
> which( x > 4 )
[1] 1 2 5
> 
> # wich.max():给出最大值的第一个索引值。注意，一个向量中，最大值可能出现多次。
> # wich.min()

NA的去除

> # 关于NA。可先用is.na()判断向量中是否含有NA，然后用!is.na()即可删除NA
> x <- c(9, 1, NA, 8, 6)
> x[x>5 & !is.na(x)]
[1] 9 8 6
> x
[1]  9  1 NA  8  6

any函数

> # any(),给与比较条件，只要参数向量对象有1个元素是T，则返回T
> 
> # 向量对象元素的命名obj <- c (name1 = data1, name2 = data2, ……)

names等函数

> # names()可查询向量对象元素名称，也可更改向量对象元素名称。如想删除向量对象的元素的名称，将其设为NULL即可。
> 
> # matrix(data, nrow=?, ncol=?, byrow=logical, dimnames=NULL)
> x <- matrix(3:11, nrow = 3, byrow=T, dimnames=list(rownames,colnames))
Error in matrix(3:11, nrow = 3, byrow = T, dimnames = list(rownames, colnames)) : 
  length of 'dimnames' [1] not equal to array extent
> colnames <- c("col1", "col2", "col3")
> rownames <- c("row1","row2","row3")
> x
[1]  9  1 NA  8  6

矩阵

> # ncol()可得到矩阵列数
> # nrow()
> # dim()获得矩阵的行和列
> # length()也可用于矩阵和数组对象的元素个数

> # 将向量组成矩阵，用cbind()或rbind()
> # 如何取得元素值?
> # 如何修改元素值?
> # 如何取得和修改矩阵对象的行名和列名？也可以用dimnames()来获得
> # 在矩阵中可用行名和列名代替索引取得元素值

> # rowSums()
> # colSums()
> # rowMeans()
> # colMeans()

> # t()转置

factor

> # 使用factor()或as.factor()函数建立因子。参数当中，x向量：是要转为因子得向量；levels：原x内元素的可能值
> 
> yes.or.no <- c("yes", "no", "no", "yes", "yes")
> first.factor <- factor(yes.or.no)
> first.factor
[1] yes no  no  yes yes
Levels: no yes
> 
> 
> yes.or.no <- c("yes", "no", "no", "yes", "yes")
> second.factor <- factor(yes.or.no, levels = c("yes", "no")) # 指定顺序
> second.factor
[1] yes no  no  yes yes
Levels: yes no
> 
> 
> # 指定缺失的levels值
> directions <- c("east", "west", "north", "east", "west")
> a.factor <- factor(directions)
> a.factor # 缺少一个factor, 可补上
[1] east  west  north east  west 
Levels: east north west
> 
> b.factor <- factor(a.factor, levels = c("east", "west", "south", "north")) #补上了
> b.factor
[1] east  west  north east  west 
Levels: east west south north
> 
> # 因子带标签
> c.factor <- factor(b.factor, levels = c("east", "west", "south", "north"), labels = c("E", "W", "S", "N"))
> c.factor
[1] E W N E W
Levels: E W S N
> 
> # 因子的level参数。用nlevels()可以传回levels的数量，length()是传回因子元素的数量。
> 
> # 数值型因子在转换时常见的错误
> temperature <- factor(c(28, 32, 30, 34, 32, 34))
> str(temperature) #level有4个值28/30/32/34，分别对应1/2/3/4.注意下面的情况：
 Factor w/ 4 levels "28","30","32",..: 1 3 2 4 3 4
> as.numeric(temperature) # 解决方法如下：
[1] 1 3 2 4 3 4
> as.numeric(as.character(temperature))
[1] 28 32 30 34 32 34
> 
> #有序因子
> str1 <- c("A", "B", "A", "C", "D", "B", "D")
> str1.order <- factor(str1, levels = c("D", "C", "B", "A"), ordered = T)
> str1.order
[1] A B A C D B D
Levels: D < C < B < A
> 
> #tabel()统计在因子的所有元素中，levels中各值出现的次数。即level可能取得的值的出现的次数。
> #state.name向量集收集了美国50各州，state.region是因子，记录每个州属于美国那个区。
> state.region
 [1] South         West          West          South         West          West          Northeast     South         South         South         West         
[12] West          North Central North Central North Central North Central South         South         Northeast     South         Northeast     North Central
[23] North Central South         North Central West          North Central West          Northeast     Northeast     West          Northeast     South        
[34] North Central North Central South         West          Northeast     Northeast     South         North Central South         South         West         
[45] Northeast     South         West          South         North Central West         
Levels: Northeast South North Central West
> table(state.region)
state.region
    Northeast         South North Central          West 
            9            16            12            13

数据框

> # 数据框
> name <- c("Kevin", "Peter", "Frank", "Maggie")
> gender <- c("M", "M", "M", "F")
> height <- c(170, 175, 165, 168)
> info <- data.frame(name, gender, height)
> info
    name gender height
1  Kevin      M    170
2  Peter      M    175
3  Frank      M    165
4 Maggie      F    168
> 
> #分别用names()和colnames()查询info数据框的列名
> names(info)
[1] "name"   "gender" "height"
> colnames(info)
[1] "name"   "gender" "height"
> 
> #查询行名
> row.names(info)
[1] "1" "2" "3" "4"
> 
> #用names()给第一列改名
> names(info)[1] <- "n.name"
> info
  n.name gender height
1  Kevin      M    170
2  Peter      M    175
3  Frank      M    165
4 Maggie      F    168
> 
> # 认识数据框结构
> str(info) #发现字符串变成因子了。这是R默认。如不想要，则stringsAsFactors = F
'data.frame':   4 obs. of  3 variables:
 $ n.name: Factor w/ 4 levels "Frank","Kevin",..: 2 4 1 3
 $ gender: Factor w/ 2 levels "F","M": 2 2 2 1
 $ height: num  170 175 165 168
> 
> # 取数据框内容
> info[, "n.name"]
[1] Kevin  Peter  Frank  Maggie
Levels: Frank Kevin Maggie Peter
> info[2, ]
  n.name gender height
2  Peter      M    175
> info$n.name
[1] Kevin  Peter  Frank  Maggie
Levels: Frank Kevin Maggie Peter
> info[, 1] #返回的是向量
[1] Kevin  Peter  Frank  Maggie
Levels: Frank Kevin Maggie Peter
> info[1] #返回的四数据框
  n.name
1  Kevin
2  Peter
3  Frank
4 Maggie
> 
> # 用$为数据框添加列数据
> weight <- c (65, 71, 58, 55)
> info$weight <- weight
> info
  n.name gender height weight
1  Kevin      M    170     65
2  Peter      M    175     71
3  Frank      M    165     58
4 Maggie      F    168     55
> 
> rm(list = ls())
> name <- c("Kevin", "Peter", "Frank", "Maggie")
> gender <- c("M", "M", "M", "F")
> height <- c(170, 175, 165, 168)
> info <- data.frame(name, gender, height)
> age <- c(19, 20, 20, 19)
> score <- c(88, 91, 75, 80)
> addinfo <- data.frame(age, score)
> addinfo
  age score
1  19    88
2  20    91
3  20    75
4  19    80
> newinfo <- cbind(info, addinfo)
> newinfo
    name gender height age score
1  Kevin      M    170  19    88
2  Peter      M    175  20    91
3  Frank      M    165  20    75
4 Maggie      F    168  19    80
> 
> #注意，数据框是一些列的列向量组成，如果把矩阵转为数据框，则用到data.frame()函数。
>

list

> # list
> 
> x <- c(7, 8, 6, 11, 9, 12, 12, 8, 9, 15, 7, 12)
> colnames <- c("1st", "2nd", "3rd", "4th", "5th", "6th")
> rownames <- c("lin","ge")
> team.cal <- matrix(x, 2, byrow = T, dimnames=list(rownames,colnames))
> baskets.cal <- list("zhang", "2018-12", team.cal) # 注意这里的team.cal不要加引号
> baskets.cal
[[1]]
[1] "zhang"

[[2]]
[1] "2018-12"

[[3]]
    1st 2nd 3rd 4th 5th 6th
lin   7   8   6  11   9  12
ge   12   8   9  15   7  12

> 
> # 给刚才的list里的对象命名
> n.baskets.cal <- list(teamname = "zhang", season = "2018-12", score.info = team.cal)  # 注意这里的team.cal不要加引号
> n.baskets.cal
$`teamname`
[1] "zhang"

$season
[1] "2018-12"

$score.info
    1st 2nd 3rd 4th 5th 6th
lin   7   8   6  11   9  12
ge   12   8   9  15   7  12

> 
> #names()函数可以获得及修改list里对象的名称
> names(n.baskets.cal)
[1] "teamname"   "season"     "score.info"
> names(n.baskets.cal)[1] <- "great"
> n.baskets.cal
$`great`
[1] "zhang"

$season
[1] "2018-12"

$score.info
    1st 2nd 3rd 4th 5th 6th
lin   7   8   6  11   9  12
ge   12   8   9  15   7  12

> 
> # 获得list里的元素
> n.baskets.cal$great
[1] "zhang"
> n.baskets.cal$score.info[2, 4]
[1] 15
> n.baskets.cal[[3]][2, 4]
[1] 15
> 
> # list内的对象名可当索引
> n.baskets.cal[["score.info"]]
    1st 2nd 3rd 4th 5th 6th
lin   7   8   6  11   9  12
ge   12   8   9  15   7  12
> n.baskets.cal[names(n.baskets.cal) != "great"]
$`season`
[1] "2018-12"

$score.info
    1st 2nd 3rd 4th 5th 6th
lin   7   8   6  11   9  12
ge   12   8   9  15   7  12

> 
> # 如何修改、添加、删除（赋值为NULL）list里元素的内容?
> # 如何合并list?
>

文本操作

> # 语句分割
> x <- c("Hello R World")
> x
[1] "Hello R World"
> strsplit(x, " ") # 以空格为界拆分。注意返回的是什么格式
[[1]]
[1] "Hello" "R"     "World"

> 
> # 延上例，拆分后存入向量对象内
> a <- strsplit(x, " ")[[1]]
> a
[1] "Hello" "R"     "World"
> 
> # toupper() 小写变大写
> # tolower() 
> 
> # uniqe() 使向量内容不重复出现
> 
> # paste()的collapse参数
> coffee.str <- c("boiling", "coffee", "brings", "out", "a", "bitterly", "taste")
> paste(coffee.str)
[1] "boiling"  "coffee"   "brings"   "out"      "a"        "bitterly" "taste"   
> paste(coffee.str, collapse = " ") # 字符串以空格相连
[1] "boiling coffee brings out a bitterly taste"
> 
> 
> # paste()主要作用是将两个或多个向量连接
> str_1 <- letters[1:6]
> str_2 <- 1:6
> paste(str_1, str_2)
[1] "a 1" "b 2" "c 3" "d 4" "e 5" "f 6"
> paste(str_1, str_2, sep = "") # 去掉空格
[1] "a1" "b2" "c3" "d4" "e5" "f6"
> paste(str_1, str_2, sep = "", collapse = " ")
[1] "a1 b2 c3 d4 e5 f6"
> 
> 
> # 使用索引值搜索
> # 列出state.name数据集内第2到第4个子字符串
> substr(state.name, start = 2, stop = 4)
 [1] "lab" "las" "riz" "rka" "ali" "olo" "onn" "ela" "lor" "eor" "awa" "dah" "lli" "ndi" "owa" "ans" "ent" "oui" "ain" "ary" "ass" "ich" "inn" "iss" "iss" "ont" "ebr"
[28] "eva" "ew " "ew " "ew " "ew " "ort" "ort" "hio" "kla" "reg" "enn" "hod" "out" "out" "enn" "exa" "tah" "erm" "irg" "ash" "est" "isc" "yom"
> 
> # grep(pattern, x) pattern,搜索目标；x，字符串向量
> grep("M", state.name) # 返回的是索引值
[1] 19 20 21 22 23 24 25 26 31
> state.name[grep("M", state.name)]
[1] "Maine"         "Maryland"      "Massachusetts" "Michigan"      "Minnesota"     "Mississippi"   "Missouri"      "Montana"       "New Mexico"   
> state.name[grep(" ", state.name)]# 搜索州名中有空格的
 [1] "New Hampshire"  "New Jersey"     "New Mexico"     "New York"       "North Carolina" "North Dakota"   "Rhode Island"   "South Carolina" "South Dakota"  
[10] "West Virginia" 
> 
> # 字符串内容更改
> # sub(pattern, replacement, x) 其中replacement用空字符""代替，相当于删除。
> 
> state.name[grep("New|South", state.name)] # New|South 不要有空格
[1] "New Hampshire"  "New Jersey"     "New Mexico"     "New York"       "South Carolina" "South Dakota"  
> 
> str_a <- c("ch6.xls", "ch7.xls", "ch7.c", "ch7.doc", "ch8.xls")
> str_b <- c("ch.xls", "ch7.xls", "ch77.xls", "ch87.xls", "ch88.xls")
> str_a[grep("ch(6|7).xls", str_a)]
[1] "ch6.xls" "ch7.xls"
> str_b[grep("ch(7*|8*).xls",str_b)] # 注意，*代笔0次或多次；+代表1次或多次
[1] "ch.xls"   "ch7.xls"  "ch77.xls" "ch88.xls"
> str_b[grep("ch(7+|8+).xls", str_b)]
[1] "ch7.xls"  "ch77.xls" "ch88.xls"

R，笔记03

基本运算

对数 指数

近似数

设置重复

简单统计

查询确认/转换数据类型

逻辑运算

which函数

NA的去除

any函数

names等函数

矩阵

factor

数据框

list

文本操作

推荐阅读更多精彩内容

对数指数