例如我们需要将一下数据的第二列从and处拆分为两列:
before = data.frame(attr = c(1,30,4,6), type=c('foo_and_bar','foo_and_bar_2'))
attr type
1 1 foo_and_bar
2 30 foo_and_bar_2
3 4 foo_and_bar
4 6 foo_and_bar_2
===>
attr type_1 type_2
1 1 foo bar
2 30 foo bar_2
3 4 foo bar
4 6 foo bar_2
- 使用内置函数
do.call()
before <- data.frame(attr = c(1,30,4,6), type=c('foo_and_bar','foo_and_bar_2'))
out <- strsplit(as.character(before$type),'_and_')
do.call(rbind, out)
使用sapply 以及 "["
before$type_1 < sapply(strsplit(as.character(before$type),'_and_'), "[", 1)
before$type_2 < sapply(strsplit(as.character(before$type),'_and_'), "[", 2)
或者
before <- data.frame(attr = c(1,30,4,6), type=c('foo_and_bar','foo_and_bar_2'))
after <- with(before, data.frame(attr = attr))
after <- cbind(after, data.frame(t(sapply(out, `[`))))
names(after)[2:3] <- paste("type", 1:2, sep = "_")
- 使用stringr包的str_split_fixed函数
str_split_fixed()
library(stringr)
str_split_fixed(before$type, "_and_", 2)
- 使用tidyr包
separate()
library(tidyr)
separate(data, col, into, sep = "[^[:alnum:]]+", remove = TRUE, convert = FALSE, extra = "warn", fill = "warn", ...)
data: 数据框格式
col: 要分解的列,可以是列名c(“col1”, “col2”),也可以是位置c(1,2)
into: 分解后的列名
sep: 分隔符
remove: 去除被分解的列