19.2 列表列
dataframe
- data.frame()默认将列表作为列的列表处理
> data.frame(x=list(1:3,3:5))
x.1.3 x.3.5
1 1 3
2 2 4
3 3 5
解决:使用I()
> data.frame(
+ x=I(list(1:3,3:5)),
+ y=c("1,2","3,4,5")
+ )
x y
1 1, 2, 3 1,2
2 3, 4, 5 3,4,5
tibble
- 不需要修改输入
> tibble(
+ x=list(1:3,3:5),
+ y=c("1,2","3,4,5")
+ )
# A tibble: 2 x 2
x y
<list> <chr>
1 <int [3]> 1,2
2 <int [3]> 3,4,5
> tb <- tibble(
+ x=list(1:3,3:5),
+ y=c("1,2","3,4,5")
+ )
> tb$x
[[1]]
[1] 1 2 3
[[2]]
[1] 3 4 5
> tb$y
[1] "1,2" "3,4,5"
tribble()
- tribble()比tibble()更容易,可以自动识别想要的列表
> trb <- tribble(
+ ~x,~y,
+ 1:3,"1,2",
+ 3:5,"3,4,5"
+ )
> trb
# A tibble: 2 x 2
x y
<list> <chr>
1 <int [3]> 1,2
2 <int [3]> 3,4,5
19.3 创建列表列
19.3.1 使用嵌套; nest()
- 嵌套数据框
行 | 元观测 |
---|---|
列 (列表列) | 组成元观测的具体观测 |
列(其他) | 定义观测的变量 |
- nest() 的使用
- 用于分组数据框:保留用于分组的列,而将其他所有数据归并到列表列中(其他所有数据归并到列表列)
- 用于未分组数据框: 需要指定嵌套哪些列
19.3.2 使用向量化函数
> df <- tribble(
+ ~x1,
+ "a,b,c",
+ "d,e,f,g"
+ )
> str_split(df$x1,",")
[[1]]
[1] "a" "b" "c"
[[2]]
[1] "d" "e" "f" "g"
#嵌套:mutate(function()) [function()生成一个list]
> df %>%
+ mutate(x2= str_split(x1,","))
# A tibble: 2 x 2
x1 x2
<chr> <list>
1 a,b,c <chr [3]>
2 d,e,f,g <chr [4]>
# 还原嵌套:unnest()
> df %>%
+ mutate(x2= str_split(x1,",")) %>%
+ unnest()
# A tibble: 7 x 2
x1 x2
<chr> <chr>
1 a,b,c a
2 a,b,c b
3 a,b,c c
4 d,e,f,g d
5 d,e,f,g e
6 d,e,f,g f
7 d,e,f,g g
# 调用不同函数
> sim <- tribble(
+ ~f, ~params,
+ "runif", list(min=-1,max=-1),
+ "rnorm", list(sd=5),
+ "rpois", list(lambda= 10)
+ )
> sim %>%
+ mutate(sims = invoke_map(f,params,n=10))
# A tibble: 3 x 3
f params sims
<chr> <list> <list>
1 runif <named list [2]> <dbl [10]>
2 rnorm <named list [1]> <dbl [10]>
3 rpois <named list [1]> <int [10]>
19.3.3 使用多值摘要
summarize()只能返回单一值的摘要函数,对于返回更长向量的函数,可以将结果包装在一个list中
> mtcars %>%
+ group_by(cyl) %>%
+ summarize(q = quantile(mpg))
`summarise()` regrouping output by 'cyl' (override with `.groups` argument)
# A tibble: 15 x 2
# Groups: cyl [3]
cyl q
<dbl> <dbl>
1 4 21.4
2 4 22.8
3 4 26
4 4 30.4
5 4 33.9
6 6 17.8
7 6 18.6
8 6 19.7
9 6 21
10 6 21.4
11 8 10.4
12 8 14.4
13 8 15.2
14 8 16.2
15 8 19.2
我也不知道为啥这里没有报错/(ㄒoㄒ)/~~
* 修改代码: 将结果包装为list
> mtcars %>%
+ group_by(cyl) %>%
+ summarize(q = list(quantile(mpg)))
`summarise()` ungrouping output (override with `.groups` argument)
# A tibble: 3 x 2
cyl q
<dbl> <list>
1 4 <dbl [5]>
2 6 <dbl [5]>
3 8 <dbl [5]>
酱紫就对啦O(∩_∩)O
By the way, group_by 在不搭配其他函数使用就没什么用
> mtcars %>% group_by(cyl)
# A tibble: 32 x 11
# Groups: cyl [3]
mpg cyl disp hp drat wt qsec vs am gear carb
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 21 6 160 110 3.9 2.62 16.5 0 1 4 4
2 21 6 160 110 3.9 2.88 17.0 0 1 4 4
3 22.8 4 108 93 3.85 2.32 18.6 1 1 4 1
4 21.4 6 258 110 3.08 3.22 19.4 1 0 3 1
5 18.7 8 360 175 3.15 3.44 17.0 0 0 3 2
6 18.1 6 225 105 2.76 3.46 20.2 1 0 3 1
7 14.3 8 360 245 3.21 3.57 15.8 0 0 3 4
8 24.4 4 147. 62 3.69 3.19 20 1 0 4 2
9 22.8 4 141. 95 3.92 3.15 22.9 1 0 4 2
10 19.2 6 168. 123 3.92 3.44 18.3 1 0 4 4
看起来啥也没变,但没有毛病就是酱紫
直接unnes()显示结果
> mtcars %>%
+ group_by(cyl) %>%
+ summarize(q = list(quantile(mpg))) %>%
+ unnest()
`summarise()` ungrouping output (override with `.groups` argument)
# A tibble: 15 x 2
cyl q
<dbl> <dbl>
1 4 21.4
2 4 22.8
3 4 26
4 4 30.4
5 4 33.9
6 6 17.8
7 6 18.6
8 6 19.7
9 6 21
10 6 21.4
11 8 10.4
12 8 14.4
13 8 15.2
14 8 16.2
15 8 19.2
Warning message:
`cols` is now required when using unnest().
Please use `cols = c(q)`
为了让结果和比率值一同显示
> mtcars %>%
+ group_by(cyl) %>%
+ summarise(p=list(probs),q=list(quantile(mpg,probs))) %>%
+ unnest()
`summarise()` ungrouping output (override with `.groups` argument)
# A tibble: 15 x 3
cyl p q
<dbl> <dbl> <dbl>
1 4 0.01 21.4
2 4 0.25 22.8
3 4 0.5 26
4 4 0.75 30.4
5 4 0.99 33.8
6 6 0.01 17.8
7 6 0.25 18.6
8 6 0.5 19.7
9 6 0.75 21
10 6 0.99 21.4
11 8 0.01 10.4
12 8 0.25 14.4
13 8 0.5 15.2
14 8 0.75 16.2
15 8 0.99 19.1
19.3.4 使用命名列表
> x <- list(
+ a=1:5,
+ b=3:4,
+ c=5:6
+ )
> x
$a
[1] 1 2 3 4 5
$b
[1] 3 4
$c
[1] 5 6
> df <- enframe(x)
> df
# A tibble: 3 x 2
name value
<chr> <list>
1 a <int [5]>
2 b <int [2]>
3 c <int [2]>
> library(stringr)
> ?str_c
> x <- list(
+ a=1:5,
+ b=3:4,
+ c=5:6
+ )
> x
$a
[1] 1 2 3 4 5
$b
[1] 3 4
$c
[1] 5 6
> df <- enframe(x)
> df
# A tibble: 3 x 2
name value
<chr> <list>
1 a <int [5]>
2 b <int [2]>
3 c <int [2]>
> library(stringr)
> df %>%
+ mutate(
+ smry= map2_chr(
+ name,
+ value,
+ ~str_c(.x,":",y[1])
+ )
+ )
Error: Problem with `mutate()` input `smry`.
x object 'y' not found
i Input `smry` is `map2_chr(name, value, ~str_c(.x, ":", y[1]))`.
Run `rlang::last_error()` to see where the error occurred.
> df
# A tibble: 3 x 2
name value
<chr> <list>
1 a <int [5]>
2 b <int [2]>
3 c <int [2]>
> df %>%
+ mutate(
+ smry= map2_chr(
+ name,
+ value,
+ ~str_c(.x,":",.y[1])
+ )
+ )
# A tibble: 3 x 3
name value smry
<chr> <list> <chr>
1 a <int [5]> a:1
2 b <int [2]> b:3
3 c <int [2]> c:5
注意,str_c(.x,":",.y) .漏掉会报错
异构列表筛选必备!!!(。^▽^)
- 根据类型筛选
> df %>%
+ mutate(
+ smry= map2_chr(
+ name,
+ value,
+ ~str_c(.x,":",y[1])
+ )
+ )
Error: Problem with `mutate()` input `smry`.
x object 'y' not found
i Input `smry` is `map2_chr(name, value, ~str_c(.x, ":", y[1]))`.
19.4 简化列表列
> df <- tribble(
+ ~x,
+ letters[1:5],
+ 1:3,
+ runif(5)
+ )
> df
# A tibble: 3 x 1
x
<list>
1 <chr [5]>
2 <int [3]>
3 <dbl [5]>
> df %>%
+ mutate(
+ type= map_chr(x,typeof),
+ length= map_int(x,length)
+ )
# A tibble: 3 x 3
x type length
<list> <chr> <int>
1 <chr [5]> character 5
2 <int [3]> integer 3
3 <dbl [5]> double 5
- 从list中x的所有元素中提取指定变量中的内容
.null=NA_real_ 可以提供一个缺失值的返回值
df <- tribble(
~x,
list(a=1,b=2),
list(a=2,c=4)
)
> df2 %>%
+ mutate(
+ a= map_dbl(x,"a"),
+ b= map_dbl(x,"b",.null=NA_real_)
+ )
# A tibble: 2 x 3
x a b
<list> <dbl> <dbl>
1 <named list [2]> 1 2
2 <named list [2]> 2 NA
19.4.2 嵌套还原
> tibble(
+ x=1:2,
+ y=list(1:4,1)
+ )
# A tibble: 2 x 2
x y
<int> <list>
1 1 <int [4]>
2 2 <dbl [1]>
> tibble(
+ x=1:2,
+ y=list(1:4,1)
+ ) %>%
+ unnest()
# A tibble: 5 x 2
x y
<int> <dbl>
1 1 1
2 1 2
3 1 3
4 1 4
5 2 1
Warning message:
`cols` is now required when using unnest().
Please use `cols = c(y)`
第二行之重复了一次,这意味着:
- 不能同时还原包含不同数量元素的两个列表列
df1 <- tribble(
~x,~y,~z,
1,c("a","b"),1:2,
2,"c",3
)
df1
df1 %>% unnest()
y和z每行中元素数量相等,可以正常运行
> df1 <- tribble(
+ ~x,~y,~z,
+ 1,c("a","b"),1:2,
+ 2,c("b","c"),3
+ )
> df1
# A tibble: 2 x 3
x y z
<dbl> <list> <list>
1 1 <chr [2]> <int [2]>
2 2 <chr [2]> <dbl [1]>
> df1 %>% unnest()
# A tibble: 4 x 3
x y z
<dbl> <chr> <dbl>
1 1 a 1
2 1 b 2
3 2 b 3
4 2 c 3
Warning message:
`cols` is now required when using unnest().
Please use `cols = c(y, z)`
(lll¬ω¬)书上这里应该跑不出来……可能更新后level up 了,应该x、y、z元素数量相同就可以