1 向量
- 创建
两种方式:vector()和c()
x1 <- vector(mode = "integer", length = 4) #创建空向量
x2 <- c(1:4)
x3 <- c(1,2,3,4)
- 类型转换
> as.character(x3)
[1] "1" "2" "3" "4"
> x4 <- c("a", "b", "c", "d") #转换应该具有意义
> as.numeric(x4)
[1] NA NA NA NA
- 长度
> length(x4)
[1] 4
- 列命名
> names(x4) <- c("1", "2", "3", "4") #长度应该不大于向量长度
> x4
1 2 3 4
"a" "b" "c" "d"
- 元素访问
> x4[1:3] #切片访问
1 2 3
"a" "b" "c"
> x4[c(1:3)]
1 2 3
"a" "b" "c"
x4[c(1,2,3)] #枚举访问
1 2 3
"a" "b" "c"
> x4[-2] #与Python不同,负数不是倒数,而是排除掉这一个
1 3 4
"a" "c" "d"
2 数组和矩阵
2.1 数组
array函数,维度可大于2,默认按列存储
#创建一个二维数组
> y1 <- array(data = 1:6,
+ dim = c(2,3),
+ dimnames = list(
+ c("r1", "r2"),
+ c("c1", "c2", "c3")
+ ))
> y1
c1 c2 c3
r1 1 3 5
r2 2 4 6
#创建一个三维数组
z1 <- array(data = 1:12,
+ dim = c(2,3,2))
> z1
, , 1
[,1] [,2] [,3]
[1,] 1 3 5
[2,] 2 4 6
, , 2
[,1] [,2] [,3]
[1,] 7 9 11
[2,] 8 10 12
- 属性
> dim(z1)
[1] 2 3 2
> attributes(z1)
$dim
[1] 2 3 2
> attributes(y1)
$dim
[1] 2 3
$dimnames
$dimnames[[1]]
[1] "r1" "r2"
$dimnames[[2]]
[1] "c1" "c2" "c3"
- 元素访问
> y1[2,]
c1 c2 c3
2 4 6
> y1[,1]
r1 r2
1 2
> y1[,2:3]
c2 c3
r1 3 5
r2 4 6
> y1[,c(2,3)]
c2 c3
r1 3 5
r2 4 6
> y1[,"c1"]
r1 r2
1 2
> y1["r1",]
c1 c2 c3
1 3 5
2.2 矩阵
> y2 <- matrix(data = 7:12,
+ nrow = 2,
+ ncol = 3,
+ dimnames = list(
+ c("r1", "r2"),
+ c("c1", "c2", "c3")
+ ))
> y2
c1 c2 c3
r1 7 9 11
r2 8 10 12
- 矩阵合并
rbind(): 按行合并
cbind(): 按列合并
行列的命名如果不同,默认以第一个矩阵为准
> rbind(y1, y2)
c1 c2 c3
r1 1 3 5
r2 2 4 6
r1 7 9 11
r2 8 10 12
> rbind(y2, y1)
c1 c2 c3
r1 7 9 11
r2 8 10 12
r1 1 3 5
r2 2 4 6
> cbind(y1, y2)
c1 c2 c3 c1 c2 c3
r1 1 3 5 7 9 11
r2 2 4 6 8 10 12
2.3 矩阵运算
> y1 * y2 #注意不是向量乘
c1 c2 c3
r1 7 27 55
r2 16 40 72
> y1 + y2
c1 c2 c3
r1 8 12 16
r2 10 14 18
> y1 - y2
c1 c2 c3
r1 -6 -6 -6
r2 -6 -6 -6
> t(y1) #转置
r1 r2
c1 1 2
c2 3 4
c3 5 6
> y1 ^ -1 #注意这样不能求逆矩阵
c1 c2 c3
r1 1.0 0.33333333 0.20000000
r2 0.5 0.25000000 0.16666667
3 数据框
- 创建
> df = data.frame(id=1:4,
+ name=c("a", "b", "c", "d"),
+ score=c(60,70,80,90))
> df
id name score
1 1 a 60
2 2 b 70
3 3 c 80
4 4 d 90
- 命名
> rownames(df) #取行名
[1] "1" "2" "3" "4"
> rownames(df) <- 5:8
> df
id name score
5 1 a 60
6 2 b 70
7 3 c 80
8 4 d 90
> colnames(df) #取列名
[1] "id" "name" "score"
- 元素访问
> df[c("5","6"),c("score","name")]
score name
5 60 a
6 70 b
> df$name #$符号快速选取一列
[1] a b c d
Levels: a b c d
> subset(df, score >= 80)
id name score
7 3 c 80
8 4 d 90
> subset(df, score >= 80, c(id,name))
id name
7 3 c
8 4 d