R语言之数据结构

1 向量

  • 创建
    两种方式:vector()和c()
x1 <- vector(mode = "integer", length = 4)    #创建空向量
x2 <- c(1:4)
x3 <- c(1,2,3,4)
  • 类型转换
> as.character(x3)
[1] "1" "2" "3" "4"

> x4 <- c("a", "b", "c", "d")                 #转换应该具有意义
> as.numeric(x4)
[1] NA NA NA NA
  • 长度
> length(x4)
[1] 4
  • 列命名
> names(x4) <- c("1", "2", "3", "4")        #长度应该不大于向量长度  
> x4
  1   2   3   4 
"a" "b" "c" "d"    
  • 元素访问
> x4[1:3]              #切片访问
  1   2   3 
"a" "b" "c" 

> x4[c(1:3)]
  1   2   3 
"a" "b" "c"

x4[c(1,2,3)]     #枚举访问
  1   2   3 
"a" "b" "c" 

> x4[-2]         #与Python不同,负数不是倒数,而是排除掉这一个
  1   3   4 
"a" "c" "d" 

2 数组和矩阵

2.1 数组

array函数,维度可大于2,默认按列存储

#创建一个二维数组
> y1 <- array(data = 1:6, 
+             dim = c(2,3), 
+             dimnames = list(
+                 c("r1", "r2"),
+                 c("c1", "c2", "c3")
+             ))
> y1
   c1 c2 c3
r1  1  3  5
r2  2  4  6

#创建一个三维数组
z1 <- array(data = 1:12,
+             dim = c(2,3,2))
> z1
, , 1

     [,1] [,2] [,3]
[1,]    1    3    5
[2,]    2    4    6

, , 2

     [,1] [,2] [,3]
[1,]    7    9   11
[2,]    8   10   12
  • 属性
> dim(z1)
[1] 2 3 2

> attributes(z1)
$dim
[1] 2 3 2

> attributes(y1)
$dim
[1] 2 3

$dimnames
$dimnames[[1]]
[1] "r1" "r2"

$dimnames[[2]]
[1] "c1" "c2" "c3"
  • 元素访问
> y1[2,]
c1 c2 c3 
 2  4  6 

> y1[,1]
r1 r2 
 1  2 

> y1[,2:3]
   c2 c3
r1  3  5
r2  4  6

> y1[,c(2,3)]
   c2 c3
r1  3  5
r2  4  6

> y1[,"c1"]
r1 r2 
 1  2 

> y1["r1",]
c1 c2 c3 
 1  3  5 
2.2 矩阵
> y2 <- matrix(data = 7:12,
+              nrow = 2,
+              ncol = 3,
+              dimnames = list(
+                  c("r1", "r2"),
+                  c("c1", "c2", "c3")
+              ))
> y2
   c1 c2 c3
r1  7  9 11
r2  8 10 12
  • 矩阵合并
    rbind(): 按行合并
    cbind(): 按列合并
    行列的命名如果不同,默认以第一个矩阵为准
> rbind(y1, y2)
   c1 c2 c3
r1  1  3  5
r2  2  4  6
r1  7  9 11
r2  8 10 12

> rbind(y2, y1)
   c1 c2 c3
r1  7  9 11
r2  8 10 12
r1  1  3  5
r2  2  4  6

> cbind(y1, y2)
   c1 c2 c3 c1 c2 c3
r1  1  3  5  7  9 11
r2  2  4  6  8 10 12
2.3 矩阵运算
> y1 * y2            #注意不是向量乘
   c1 c2 c3
r1  7 27 55
r2 16 40 72

> y1 + y2
   c1 c2 c3
r1  8 12 16
r2 10 14 18

> y1 - y2
   c1 c2 c3
r1 -6 -6 -6
r2 -6 -6 -6

> t(y1)              #转置
   r1 r2
c1  1  2
c2  3  4
c3  5  6

> y1 ^ -1                   #注意这样不能求逆矩阵
    c1         c2         c3
r1 1.0 0.33333333 0.20000000
r2 0.5 0.25000000 0.16666667

3 数据框

  • 创建
> df = data.frame(id=1:4, 
+                 name=c("a", "b", "c", "d"),
+                 score=c(60,70,80,90))
> df
  id name score
1  1    a    60
2  2    b    70
3  3    c    80
4  4    d    90
  • 命名
> rownames(df)                #取行名
[1] "1" "2" "3" "4"

> rownames(df) <- 5:8
> df
  id name score
5  1    a    60
6  2    b    70
7  3    c    80
8  4    d    90

> colnames(df)                #取列名
[1] "id"    "name"  "score"
  • 元素访问
> df[c("5","6"),c("score","name")]
  score name
5    60    a
6    70    b

> df$name                        #$符号快速选取一列
[1] a b c d
Levels: a b c d

> subset(df, score >= 80)
  id name score
7  3    c    80
8  4    d    90

> subset(df, score >= 80, c(id,name))
  id name
7  3    c
8  4    d
最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容