首先给出的是h2o在R语言里面的下载,
if ("package:h2o" %in% search()) { detach("package:h2o", unload=TRUE) }
if ("h2o" %in% rownames(installed.packages())) { remove.packages("h2o") }
# Next, we download packages that H2O depends on.
pkgs <- c("RCurl","jsonlite")
for (pkg in pkgs) {
if (! (pkg %in% rownames(installed.packages()))) { install.packages(pkg) }
}
# Now we download, install and initialize the H2O package for R.
install.packages("h2o", type="source", repos="http://h2o-release.s3.amazonaws.com/h2o/rel-wright/8/R")
# Finally, let's load H2O and start up an H2O cluster
library(h2o)
h2o.init()
直接复制粘贴到你的r里面运行就好了
Load file
# Load CSV file
df <- h2o.importFile()
df <- h2o.uploadFile()
# Load Directly from R
as.h2o()
数据管理
# 管理数据
h2o.rm()
h2o.removeAll()
h2o.ls()
数据的基本描述
# Data summaries
h2o.describe(data)
h2o.quantile(data)
h2o.levels(data)
一些简单的函数
# 数据转换
h2o.sd()
h2o.mean()
h2o.cor()
分组聚合
# 分组聚合
iris.h2o <- as.h2o(iris)
h2o.group_by(iris.h2o,by = 'Species',nrow('Species'),mean('Sepal.Length'),
mean('Sepal.Width'),mean('Petal.Length'),mean('Petal.Width'))
mean('Sepal.Width'),mean('Petal.Length'),mean('Petal.Width'))
Species nrow mean_Sepal.Length mean_Sepal.Width
1 setosa 50 5.006 3.428
2 versicolor 50 5.936 2.770
3 virginica 50 6.588 2.974
mean_Petal.Length mean_Petal.Width
1 1.462 0.246
2 4.260 1.326
3 5.552 2.026
[3 rows x 6 columns]
数据划分
# 数据划分
part <- h2o.splitFrame(iris.h2o,ratios = c(0.6,0.2))
trin <- part[[1]]
test <- part[[2]]
valid <- part[[3]]
rm(part)
export data
# Get data out of h2o
h2o.downloadCSV()
h2o.download_mojo()
h2o.download_pojo()
h2o.exportFile()
h2o.saveModel()