1. Kylin源码编译
1.1 安装Maven
[root@compile opt]# wget http://mirrors.tuna.tsinghua.edu.cn/apache/maven/maven-3/3.6.3/binaries/apache-maven-3.6.3-bin.tar.gz
[root@compile opt]# tar -zxvf apache-maven-3.6.3-bin.tar.gz
[root@compile opt]# mv apache-maven-3.6.3 maven-3.6.3
[root@compile opt]# ln -s maven-3.6.3 maven
[root@compile opt]# chown -R root:root maven*
[root@compile opt]# rm -rf apache-maven-3.6.3-bin.tar.gz
# 配置环境变量
[root@compile opt]# vim /etc/profile
export MAVEN_HOME=/opt/maven
export MAVEN_OPTS="-Xms1024m -Xmx2048m"
export PATH=$MAVEN_HOME/bin:$PATH
[root@compile opt]# source /etc/profile
# maven配置
[root@compile maven]# vim conf/settings.xml
<localRepository>/opt/maven/repo</localRepository>
<mirrors>
<mirror>
<id>nexus-aliyun</id>
<name>nexus-aliyun</name>
<url>http://maven.aliyun.com/nexus/content/groups/public</url>
<mirrorOf>central</mirrorOf>
</mirror>
<mirror>
<id>osc_thirdparty</id>
<url>http://maven.aliyun.com/nexus/content/repositories/thirdparty/</url>
<mirrorOf>thirdparty</mirrorOf>
</mirror>
</mirrors>
<profiles>
<profile>
<id>jdk-1.8</id>
<activation>
<activeByDefault>true</activeByDefault>
<jdk>1.8</jdk>
</activation>
<properties>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<maven.compiler.compilerVersion>1.8</maven.compiler.compilerVersion>
</properties>
</profile>
</profiles>
[root@compile maven]# mkdir -p /opt/maven/repo
1.2 安装Git
[root@compile maven]# yum -y install git
1.3 安装nodejs
注意:nodejs版本不能太高,11即可,否则和gulp不兼容
[root@compile maven]# curl --silent --location https://rpm.nodesource.com/setup_11.x | bash -
# 弹出来的警告信息不用管
================================================================================
================================================================================
DEPRECATION WARNING
Node.js 11.x is no longer actively supported!
...
================================================================================
================================================================================
Continuing in 20 seconds ...
...
## Run `sudo yum install -y nodejs` to install Node.js 11.x and npm.
...
# 安装nodejs
[root@compile opt]# yum install -y nodejs
# 安装cnpm
[root@compile opt]# npm install -g cnpm --registry=https://registry.npm.taobao.org
1.4 获取Kylin源码
[root@compile opt]# cd ~
[root@compile ~]# mkdir src
[root@compile ~]# cd src/
[root@compile src]# git clone https://github.com/apache/kylin.git
[root@compile src]# cd kylin/
# kylin版本切换
# 本文是基于HDP-3.1.4.0-315安装部署Kylin,HDP-3.1.4.0-315的Hadoop版本是3.1.1
# 故切换到2.6.x-hadoop3.1版本
[root@compile kylin]# git checkout 2.6.x-hadoop3.1
1.5 Kylin源码修改
# 1.修改kylin/pom.xml
# 修改guava版本
<!--<guava.version>14.0</guava.version>-->
<guava.version>28.0-jre</guava.version>
# 注释一个仓库
<!--
<repository>
<id>kyligence</id>
<name>Kyligence Repository</name>
<url>https://repository.kyligence.io/repository/maven-public/</url>
<releases>
<enabled>true</enabled>
</releases>
<snapshots>
<enabled>true</enabled>
</snapshots>
</repository>
-->
# 2.修改kylin/core-metadata/src/main/java/org/apache/kylin/metadata/cachesync/Broadcaster.java
// import com.google.common.base.Objects;
import com.google.common.base.MoreObjects;
@Override
public String toString() {
// return Objects.toStringHelper(this).add("entity", entity).add("event", event).add("cacheKey", cacheKey).toString();
return MoreObjects.toStringHelper(this).add("entity", entity).add("event", event).add("cacheKey", cacheKey).toString();
}
# 3.修改kylin/core-metadata/src/main/java/org/apache/kylin/source/SourcePartition.java
// import com.google.common.base.Objects;
import com.google.common.base.MoreObjects;
@Override
public String toString() {
// return Objects.toStringHelper(this).add("tsRange", tsRange).add("segRange", segRange).add("sourcePartitionOffsetStart", sourcePartitionOffsetStart.toString()).add("sourcePartitionOffsetEnd", sourcePartitionOffsetEnd.toString()).toString();
return MoreObjects.toStringHelper(this).add("tsRange", tsRange).add("segRange", segRange).add("sourcePartitionOffsetStart", sourcePartitionOffsetStart.toString()).add("sourcePartitionOffsetEnd", sourcePartitionOffsetEnd.toString()).toString();
}
# 4.修改kylin/core-cube/src/main/java/org/apache/kylin/cube/model/RowKeyColDesc.java
// import com.google.common.base.Objects;
import com.google.common.base.MoreObjects;
@Override
public String toString() {
// return Objects.toStringHelper(this).add("column", column).add("encoding", encoding).toString();
return MoreObjects.toStringHelper(this).add("column", column).add("encoding", encoding).toString();
}
# 5.修改kylin/core-cube/src/main/java/org/apache/kylin/cube/model/RowKeyDesc.java
// import com.google.common.base.Objects;
import com.google.common.base.MoreObjects;
@Override
public String toString() {
// return Objects.toStringHelper(this).add("RowKeyColumns", Arrays.toString(rowkeyColumns)).toString();
return MoreObjects.toStringHelper(this).add("RowKeyColumns", Arrays.toString(rowkeyColumns)).toString();
}
# 6.修改kylin/core-cube/src/main/java/org/apache/kylin/cube/model/DimensionDesc.java
// import com.google.common.base.Objects;
import com.google.common.base.MoreObjects;
@Override
public String toString() {
// return Objects.toStringHelper(this).add("name", name).add("table", table).add("column", column).add("derived", Arrays.toString(derived)).add("join", join).toString();
return MoreObjects.toStringHelper(this).add("name", name).add("table", table).add("column", column).add("derived", Arrays.toString(derived)).add("join", join).toString();
}
# 7.修改kylin/core-job/src/main/java/org/apache/kylin/job/execution/AbstractExecutable.java
// import com.google.common.base.Objects;
import com.google.common.base.MoreObjects;
@Override
public String toString() {
// return Objects.toStringHelper(this).add("id", getId()).add("name", getName()).add("state", getStatus()).toString();
return MoreObjects.toStringHelper(this).add("id", getId()).add("name", getName()).add("state", getStatus()).toString();
}
# 8.修改kylin/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryTest.java
// Stopwatch sw = new Stopwatch();
Stopwatch sw = Stopwatch.createUnstarted();
# 9.修改kylin/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing2/DoggedCubeBuilder2.java
import java.util.concurrent.TimeUnit;
// Stopwatch sw = new Stopwatch();
Stopwatch sw = Stopwatch.createUnstarted();
// Stopwatch stopwatch = new Stopwatch().start();
Stopwatch stopwatch = Stopwatch.createStarted();
// long sleepTime = stopwatch.elapsedMillis();
long sleepTime = stopwatch.elapsed(TimeUnit.MILLISECONDS);
// logger.info("Dogged Cube Build2 splits complete, took " + sw.elapsedMillis() + " ms");
logger.info("Dogged Cube Build2 splits complete, took " + sw.elapsed(TimeUnit.MILLISECONDS) + " ms");
// logger.info("Dogged Cube Build2 end, totally took " + sw.elapsedMillis() + " ms");
logger.info("Dogged Cube Build2 end, totally took " + sw.elapsed(TimeUnit.MILLISECONDS) + " ms");
# 10.修改kylin/core-cube/src/main/java/org/apache/kylin/cube/inmemcubing2/InMemCubeBuilder2.java
import java.util.concurrent.TimeUnit;
// Stopwatch sw = new Stopwatch();
Stopwatch sw = Stopwatch.createUnstarted();
// logger.info("Cuboid {} has {} rows, build takes {}ms", baseCuboidId, count, sw.elapsedMillis());
logger.info("Cuboid {} has {} rows, build takes {}ms", baseCuboidId, count, sw.elapsed(TimeUnit.MILLISECONDS));
// return updateCuboidResult(baseCuboidId, baseCuboid, count, sw.elapsedMillis(), 0, input.inputConverterUnit.ifChange());
return updateCuboidResult(baseCuboidId, baseCuboid, count, sw.elapsed(TimeUnit.MILLISECONDS), 0, input.inputConverterUnit.ifChange());
// Stopwatch sw = new Stopwatch();
Stopwatch sw = Stopwatch.createUnstarted();
// logger.info("Cuboid {} has {} rows, build takes {}ms", cuboidId, count, sw.elapsedMillis());
logger.info("Cuboid {} has {} rows, build takes {}ms", cuboidId, count, sw.elapsed(TimeUnit.MILLISECONDS));
// return updateCuboidResult(cuboidId, newGridTable, count, sw.elapsedMillis(), 0);
return updateCuboidResult(cuboidId, newGridTable, count, sw.elapsed(TimeUnit.MILLISECONDS), 0);
# 11.修改kylin/core-cube/src/test/java/org/apache/kylin/cube/cuboid/algorithm/CuboidStatsUtilTest.java
import java.util.concurrent.TimeUnit;
// Stopwatch sw = new Stopwatch();
Stopwatch sw = Stopwatch.createUnstarted();
// System.out.println("Time elapsed for creating sorted cuboid list: " + sw.elapsedMillis());
System.out.println("Time elapsed for creating sorted cuboid list: " + sw.elapsed(TimeUnit.MILLISECONDS));
// System.out.println("Time elapsed for creating direct children cache: " + sw.elapsedMillis());
System.out.println("Time elapsed for creating direct children cache: " + sw.elapsed(TimeUnit.MILLISECONDS));
# 12.修改kylin/core-cube/src/test/java/org/apache/kylin/gridtable/AggregationCacheMemSizeTest.java
import java.util.concurrent.TimeUnit;
// final Stopwatch stopwatch = new Stopwatch();
final Stopwatch stopwatch = Stopwatch.createUnstarted();
// estimateMillis += stopwatch.elapsedMillis();
estimateMillis += stopwatch.elapsed(TimeUnit.MILLISECONDS);
// actualMillis += stopwatch.elapsedMillis();
actualMillis += stopwatch.elapsed(TimeUnit.MILLISECONDS);
# 13.修改kylin/core-cube/src/test/java/org/apache/kylin/cube/cuboid/CuboidUtilTest.java
// Stopwatch sw = new Stopwatch();
Stopwatch sw = Stopwatch.createUnstarted();
# 14.修改kylin/core-cube/src/main/java/org/apache/kylin/cube/util/CubingUtils.java
// row_hashcodes[i] = hc.putString(cell).hash().asBytes();
row_hashcodes[i] = hc.putUnencodedChars(cell).hash().asBytes();
# 15.修改kylin/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/FactDistinctColumnsMapper.java
// rowHashCodes[i] = hc.putString(colValue).hash().asBytes();
rowHashCodes[i] = hc.putUnencodedChars(colValue).hash().asBytes();
// byte[] bytes = hc.putString(colValue).hash().asBytes();
byte[] bytes = hc.putUnencodedChars(colValue).hash().asBytes();
# 16.修改kylin/engine-mr/src/main/java/org/apache/kylin/engine/mr/steps/CalculateStatsFromBaseCuboidMapper.java
// rowHashCodes[i] = hc.putString(colValue).hash().asBytes();
rowHashCodes[i] = hc.putUnencodedChars(colValue).hash().asBytes();
// byte[] bytes = hc.putString(colValue).hash().asBytes();
byte[] bytes = hc.putUnencodedChars(colValue).hash().asBytes();
# 17.修改kylin/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/NewCubeSamplingMethodTest.java
// colHashValues[x++] = hc.putString(field).hash().asBytes();
colHashValues[x++] = hc.putUnencodedChars(field).hash().asBytes();
// byte[] bytes = hc.putString(x + field).hash().asBytes();
byte[] bytes = hc.putUnencodedChars(x + field).hash().asBytes();
// colHashValues[x++] = hc.putString(field).hash().asBytes();
colHashValues[x++] = hc.putUnencodedChars(field).hash().asBytes();
// byte[] bytes = hc.putString(x + field).hash().asBytes();
byte[] bytes = hc.putUnencodedChars(x + field).hash().asBytes();
# 18.修改kylin/engine-mr/src/test/java/org/apache/kylin/engine/mr/steps/CubeSamplingTest.java
// row_index[x++] = hc.putString(field).hash().asBytes();
row_index[x++] = hc.putUnencodedChars(field).hash().asBytes();
# 19.修改kylin/engine-spark/src/main/java/org/apache/kylin/engine/spark/SparkFactDistinct.java
// rowHashCodes[i] = hc.putString(colValue).hash().asBytes();
rowHashCodes[i] = hc.putUnencodedChars(colValue).hash().asBytes();
// byte[] bytes = hc.putString(colValue).hash().asBytes();
byte[] bytes = hc.putUnencodedChars(colValue).hash().asBytes();
# 20.修改kylin/server-base/src/main/java/org/apache/kylin/rest/security/KylinAuthenticationProvider.java
// byte[] hashKey = hf.hashString(authentication.getName() + authentication.getCredentials()).asBytes();
byte[] hashKey = hf.hashUnencodedChars(authentication.getName() + authentication.getCredentials()).asBytes();
# 21.修改kylin/tool-assembly/pom.xml
# 注释掉以下内容
<!--
<relocation>
<pattern>com.google.common</pattern>
<shadedPattern>${shadeBase}.com.google.common</shadedPattern>
</relocation>
-->
# 22.修改kylin/build/script/build.sh
# npm install -g bower || { exit 1; }
cnpm install -g bower || { exit 1; }
# npm install || { exit 1; }
cnpm install || { exit 1; }
# npm install -g grunt-cli || { exit 1; }
cnpm install -g grunt-cli || { exit 1; }
1.6 编译源码
# 1.编译安装Kylin版calcite到本地maven仓库
# 拉源码的时候,注意执行命令的目录,不要拉到kylin的源码中
[root@compile src]# git clone https://github.com/Kyligence/calcite.git
[root@compile src]# ll
total 8
drwxr-xr-x 19 root root 4096 2021-04-08 23:06 calcite
drwxr-xr-x 35 root root 4096 2021-04-08 21:41 kylin
[root@compile src]# cd calcite/
[root@compile calcite]# git checkout 1.16.0-kylin-r2
[root@compile calcite]# mvn clean install -DskipTests
# 2.安装javax.el-3.0.1-b08.jar到本地maven仓库
[root@compile src]# mvn install:install-file -DgroupId=org.glassfish -DartifactId=javax.el -Dversion=3.0.1-b08 -Dpackaging=jar -Dfile=/root/src/javax.el-3.0.1-b08.jar
# 3.安装phantomjs
[root@compile src]# wget https://github.com/Medium/phantomjs/releases/download/v1.9.19/phantomjs-1.9.8-linux-x86_64.tar.bz2
[root@compile src]# tar -jxvf phantomjs-1.9.8-linux-x86_64.tar.bz2 -C /usr/local/
[root@compile src]# mv /usr/local/phantomjs-1.9.8-linux-x86_64 /usr/local/phantomjs
[root@compile src]# vim /etc/profile
export PHANTOMJS_HOME=/usr/local/phantomjs
export PATH=$PHANTOMJS_HOME/bin:$PATH
[root@compile src]# source /etc/profile
# 4.修改git配置
[root@compile src]# git config --global url."git://".insteadOf https://
# 3.编译kylin源码
# 如果不是root用户,先切换到root用户下
[root@compile src]# cd kylin/
[root@compile kylin]# build/script/package.sh -Dcheckstyle.skip‘
# 最终看到
Package ready: dist/apache-kylin-2.6.6-bin.tar.gz
# 如果后端编译完成后,编译前端的时候有问题,尝试清空cnpm的缓存后再尝试:cnpm cache clean -f
2. Kylin-2.6安装部署
2.1 集群规划
hdp01 | hdp02 | hdp03 | hdp04 | |
---|---|---|---|---|
HDFS | NameNode DataNode HDFS Client |
Secondary NameNode DataNode HDFS Client |
DataNode HDFS Client |
DataNode HDFS Client |
YARN | ResourceManager NodeManager TimelineService V2.0 YARN Client |
NodeManager TimelineService V1.5 YARN Client |
NodeManager YARN Client |
NodeManager YARN Client |
HBase | HBaseMaster RegionServer HBase Client |
HBaseMaster RegionServer HBase Client |
RegionServer HBase Client |
RegionServer HBase Client |
Hive | Hive Client | HiveServer2 Hive Metastore Hive Client |
Hive Client | Hive Client |
Spark | Spark2 Thrift Server Spark2 History Server Spark2 Client |
Spark2 Client | Spark2 Client | Spark2 Client |
Kylin | Kylin |
-
保证Kylin所在的节点可以使用HDFS Client、YARN Client、HBase Client、Hive Client访问到对应的集群
# 在kylin所在节点应该都有这些客户端 [admin@hdp01 ~]$ which hdfs /usr/bin/hdfs [admin@hdp01 ~]$ which hive /usr/bin/hive [admin@hdp01 ~]$ which hbase /usr/bin/hbase [admin@hdp01 ~]$ which yarn /usr/bin/yarn
- 本文基于HDP3.1大数据平台安装部署Kylin,大数据平台的安装部署参考博主的其他文章
2.2 安装部署
# 1.上传编译好的kylin安装包并解压
[admin@hdp01 apps]$ tar -zxvf apache-kylin-2.6.6-bin.tar.gz
[admin@hdp01 apps]$ mv apache-kylin-2.6.6-bin kylin-2.6.6
# 2.安装Spark的两个jar包
# HDP3.1中安装部署的Spark中缺少Kylin需要使用的两个jar包,这里下载开源版spark,获取这两个jar包
[admin@hdp01 apps]$ wget http://archive.apache.org/dist/spark/spark-2.3.2/spark-2.3.2-bin-hadoop2.7.tgz
[admin@hdp01 apps]$ tar -zxvf spark-2.3.2-bin-hadoop2.7.tgz
[admin@hdp01 apps]$ sudo cp spark-2.3.2-bin-hadoop2.7/jars/xercesImpl-2.9.1.jar /usr/hdp/current/spark2-client/jars/
[admin@hdp01 apps]$ sudo cp spark-2.3.2-bin-hadoop2.7/jars/commons-configuration-1.6.jar /usr/hdp/current/spark2-client/jars/
[admin@hdp01 apps]$ sudo chown root:root /usr/hdp/current/spark2-client/jars/xercesImpl-2.9.1.jar
[admin@hdp01 apps]$ sudo chown root:root /usr/hdp/current/spark2-client/jars/commons-configuration-1.6.jar
# 3.配置环境变量
[admin@hdp01 apps]$ sudo vim /etc/profile
export HIVE_HOME=/usr/hdp/current/hive-client
export HIVE_CONF=/etc/hive/conf
export PATH=$HIVE_HOME/bin:$PATH
export HCAT_HOME=/usr/hdp/current/hive-webhcat
export PATH=$HCAT_HOME/bin:$PATH
export SPARK_HOME=/usr/hdp/current/spark2-client
export PATH=$SPARK_HOME/bin:$PATH
export KYLIN_HOME=/opt/apps/kylin-2.6.6
export PATH=$KYLIN_HOME/bin:$PATH
[admin@hdp01 apps]$ source /etc/profile
# 4.kylin配置
[admin@hdp01 apps]$ cd kylin
[admin@hdp01 kylin]$ vim conf/kylin.properties
# 未列出的配置保持默认即可
# 因为后续有可能会部署Kylin3.x,所在这里修改了metedata url,区分这里是2.x版本
kylin.metadata.url=kylin_2_metadata@hbase
kylin.env.hdfs-working-dir=/kylin2
kylin.env.zookeeper-base-path=/kylin2
kylin.server.mode=all
kylin.server.cluster-servers=hdp01:7070
kylin.web.timezone=GMT+8
kylin.source.hive.client=cli
kylin.storage.url=hbase
kylin.source.hive.quote-enabled=false
kylin.engine.spark-conf.spark.eventLog.dir=hdfs:///kylin2/spark-history
kylin.engine.spark-conf.spark.history.fs.logDirectory=hdfs:///kylin2/spark-history
# 5.修改bin/find-hive-dependency.sh
# 注释这一行
# hive_env=`hive ${hive_conf_properties} -e set 2>&1 | grep 'env:CLASSPATH'`
# 在下方添加
hive -e set >/tmp/hive_env.txt 2>&1
hive_env=`grep 'env:CLASSPATH' /tmp/hive_env.txt`
# 修改这一行
# if [ -z $hive_env ]
if [ -z "$hive_env" ]
# 6.修改tomcat/conf/catalina.properties
# 108行,下面定义了要扫描的jar包
tomcat.util.scan.StandardJarScanFilter.jarsToSkip=\
annotations-api.jar,\
ant-junit*.jar,\
ant-launcher.jar,\
...
# 将这些jar包删除掉,修改为
tomcat.util.scan.StandardJarScanFilter.jarsToSkip=*.jar
# 7.生成keystore
[admin@hdp01 kylin]$ keytool -genkey -alias kylin -keyalg RSA
# 我这里是123456
Enter keystore password:
Re-enter new password:
# 下面直接回车
What is your first and last name?
[Unknown]:
What is the name of your organizational unit?
[Unknown]:
What is the name of your organization?
[Unknown]:
What is the name of your City or Locality?
[Unknown]:
What is the name of your State or Province?
[Unknown]:
What is the two-letter country code for this unit?
[Unknown]:
Is CN=Unknown, OU=Unknown, O=Unknown, L=Unknown, ST=Unknown, C=Unknown correct?
# 手动输入y
[no]: y
# 123456
Enter key password for <kylin>
(RETURN if same as keystore password):
# 123456
Re-enter new password:
Warning:
The JKS keystore uses a proprietary format. It is recommended to migrate to PKCS12 which is an industry standard format using "keytool -importkeystore -srckeystore /home/admin/.keystore -destkeystore /home/admin/.keystore -deststoretype pkcs12".
[admin@hdp01 kylin]$ mv ~/.keystore tomcat/conf/
# 配置Kylin web server的SSL
# 修改tomcat/conf/server.xml
<Connector port="7443" protocol="org.apache.coyote.http11.Http11Protocol"
maxThreads="150" SSLEnabled="true" scheme="https" secure="true"
keystoreFile="conf/.keystore" keystorePass="123456"
clientAuth="false" sslProtocol="TLS" />
-
Hive Client权限变更
# 9. 检查环境
[admin@hdp01 kylin]$ sudo su -
[root@hdp01 ~]# cd /opt/apps/kylin-2.6.6/
[root@hdp01 kylin-2.6.6]# source /etc/profile
[root@hdp01 kylin-2.6.6]# bin/check-env.sh
Retrieving hadoop conf dir...
# 看到这一行说明环境没有问题了
KYLIN_HOME is set to /opt/apps/kylin
# 10.设置HDFS权限,admin是Linux的普通用户
[admin@hdp01 kylin]$ sudo usermod -a -G hadoop admin
[admin@hdp01 kylin]$ sudo -u hdfs hdfs -mkdir /kylin2
[admin@hdp01 kylin]$ sudo -u hdfs hdfs dfs -chown hdfs:hadoop /kylin2
[admin@hdp01 kylin]$ sudo -u hdfs hdfs dfs -chmod 775 /kylin2
# 11.启动Kylin
[admin@hdp01 kylin]$ bin/kylin.sh start
...
A new Kylin instance is started by admin. To stop it, run 'kylin.sh stop'
Check the log at /opt/apps/kylin/logs/kylin.log
Web UI is at http://hdp01:7070/kylin
到这里,Kylin2.6就安装部署成功了!
3. Kylin入门使用
Kylin官网提供了入门案例,其中5张表的信息如下:
表名 | 类型 | 数据量 | 描述 |
---|---|---|---|
kylin_account | 维表 | 10000 | 账户表,买家卖家都在这里,一个用户既可以是买家也可以是买家 |
kylin_cal_dt | 维表 | 731 | 时间扩展信息表,日期所在的年始、月始、周始、年份、月份等 |
kylin_category_groupings | 维表 | 144 | 商品分类信息表 |
kylin_country | 维表 | 244 | 国家信息表,国家名字、代码、维度、经度 |
kylin_sales | 事实表 | 10000 | 销售明细信息表,例如卖家、商品分类、订单金额、数量等等 |
- 导入示例数据
# /warehouse是hive的数据目录
[admin@hdp01 kylin]$ sudo -u hdfs hdfs dfs -chown -R hdfs:hadoop /user
[admin@hdp01 kylin]$ sudo -u hdfs hdfs dfs -chown -R hdfs:hadoop /warehouse
[admin@hdp01 kylin]$ sudo -u hdfs hdfs dfs -chmod -R 775 /warehouse
[admin@hdp01 kylin]$ sudo -u hdfs hdfs dfs -chmod -R 775 /user
[admin@hdp01 kylin]$ bin/sample.sh
# 最后打印出这两行日志说明数据导入成功
Sample cube is created successfully in project 'learn_kylin'.
Restart Kylin Server or click Web UI => System Tab => Reload Metadata to take effect
查看Hive表:
[admin@hdp01 kylin]$ hive
0: jdbc:hive2://hdp02:2181,hdp03:2181,hdp04:2> use default;
0: jdbc:hive2://hdp02:2181,hdp03:2181,hdp04:2> show tables;
+--------+
| _c0 |
+--------+
| 10000 |
+--------+
查看Kylin的Mode和Cube:
-
构建Cube
在Hive中查询示例数据的日期区间:
0: jdbc:hive2://hdp02:2181,hdp03:2181,hdp04:2> select min(part_dt),max(part_dt) from kylin_sales; +-------------+-------------+ | _c0 | _c1 | +-------------+-------------+ | 2012-01-01 | 2014-01-01 | +-------------+-------------+
这里可以选择时间范围,就是说明Kylin是支持的增量构建的
构建过程是一个MapReduce任务,比较耗时,构建之前确保MapReduce History Server是启动的,否则会报错
-
查询Cube
上面的复杂查询耗时0.56s,我们在Hive中测试一下:
0: jdbc:hive2://hdp02:2181,hdp03:2181,hdp04:2> select part_dt, sum(price) as total_selled, count(distinct seller_id) as sellers from kylin_sales group by part_dt order by part_dt;
...
731 rows selected (7.692 seconds)
可见经过Kylin预计算后,大大提升了查询性能。