一 下载软件
具体版本
apache-livy-0.6.0-incubating-bin
hadoop-2.7.0
jdk1.8.0_141
spark-2.3.1-bin-hadoop2.6
apache-kylin-3.1.0-bin-hbase1x
apache-maven-3.6.1
hbase-1.1.2
kafka_2.11-1.1.1
apache-hive-1.2.1-bin
创建目录/root/kylin,下载以下软件到/root/kylin
1 下载kylin
export KYLIN_VERSION=3.1.0
wget https://downloads.apache.org/kylin/apache-kylin-$KYLIN_VERSION/apache-kylin-$KYLIN_VERSION-bin-hbase1x.tar.gz
tar zxvf apache-kylin-$KYLIN_VERSION-bin-hbase1x.tar.gz
2 下载hive
export HIVE_VERSION=1.2.1
wget https://archive.apache.org/dist/hive/hive-$HIVE_VERSION/apache-hive-$HIVE_VERSION-bin.tar.gz
tar zxvf hive-$HIVE_VERSION/apache-hive-$HIVE_VERSION-bin.tar.gz
3 下载hadoop
export HADOOP_VERSION=2.7.0
wget https://archive.apache.org/dist/hadoop/core/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz
tar zxvf hadoop-$HADOOP_VERSION.tar.gz
4 下载hbase
export HBASE_VERSION=1.1.2
wget https://archive.apache.org/dist/hbase/$HBASE_VERSION/hbase-$HBASE_VERSION-bin.tar.gz
tar zxvf hbase-$HBASE_VERSION-bin.tar.gz
5 下载kafa
wget https://archive.apache.org/dist/kafka/1.1.1/kafka_2.11-1.1.1.tgz
tar zxvf kafka_2.11-1.1.1.tgz
二 环境配置
1 免密码
1.1 在每台机器上操作
ssh-keygen -t rsa
cd ~/.ssh
cat id_rsa.pub >> authorized_keys
chmod 600 authorized_keys
1.2 互拷公钥
把177上的id_ras.pub内容拷贝到178,179机器中的authorized_keys
把178上的id_ras.pub内容拷贝到178,179机器中的authorized_keys
把179上的id_ras.pub内容拷贝到178,177机器中的authorized_keys
1.3 测试
ssh 192.168.0.177 #看是否可以正常登录
2 安装java
具体过程请参考相关文档,版本大于等于1.8.0
配置~/.bashrc
export JAVA_HOME=/root/kylin/jdk1.8.0_141
export JRE_HOME=$JAVA_HOME/jre
export PATH=$HADOOP_HOME/bin:$JAVA_HOME/bin:$JRE_HOME/bin:$PATH
3 配置/etc/hosts
192.168.0.177 kylin-cluster1
192.168.0.178 kylin-cluster2
192.168.0.179 kylin-cluster3
三 安装
创建目录/root/kylin/data/,/root/kylin/log
1 安装hadoop
1.1 在/root/kylin/data/ 下创建以下目录
[root@kylin-cluster1 kylin]# cd data/
[root@kylin-cluster1 data]# mkdir hadoop
[root@kylin-cluster1 data]# mkdir hadoop/dfs -p
[root@kylin-cluster1 data]# mkdir hadoop/dfs/name -p
[root@kylin-cluster1 data]# mkdir hadoop/dfs/data -p
[root@kylin-cluster1 data]# mkdir hadoop/tmp -p
[root@kylin-cluster1 data]# mkdir hadoop/var -p
1.2 修改配置文件
在 /root/kylin/hadoop-2.7.0/etc/hadoop 目录下
1.2.1 配置core-site.xml文件
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://dev-1:9000</value>
</property>
<property>
<name>hadoop.http.staticuser.user</name>
<value>root</value>
</property>
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
</property>
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/root/kylin/data/hadoop/tmp</value>
<description>Abase for other temporary directories.</description>
</property>
<property>
<name>fs.default.name</name>
<value>hdfs://kylin-cluster1:9000</value>
</property>
</configuration>
1.2.2 配置hdfs-site.xml
<configuration>
<property>
<name>dfs.namenode.rpc-address</name>
<value>kylin-cluster1:9000</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/root/kylin/data/hadoop/dfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/root/kylin/data/hadoop/dfs/data</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>kylin-cluster1:50090</value>
</property>
</configuration>
1.2.3 配置 mapred-site.xml
mv mapred-site.xml.template mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
1.2.4 配置yarn-site.xml
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>kylin-cluster1</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>2048</value>
</property>
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>2</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.vmem-pmem-ratio</name>
<value>2.1</value>
</property>
<property>
<name>yarn.nodemanager.pmem-check-enabled</name>
<value>false</value>
</property>
</configuration>
1.2.5 配置slave文件
kylin-cluster1
kylin-cluster2
kylin-cluster3
1.3 启动历史服务器
mr-jobhistory-daemon.sh start historyserver
1.4 启动
sbin/start-all.sh
1.5 测试
hadoop dfs -ls / #运行成功
2 安装zookeeper
2.1 配置 conf/zoo_sample
dataDir=/root/kylin/data/zookeeper/data
server.1=kylin-cluster1:2888:3888
server.2=kylin-cluster2:2888:3888
server.3=kylin-cluster3:2888:3888
2.2 拷贝zookeeper到其他节点并配置
在kylin-cluster1节点:
echo 1 > /root/kylin/data/zookeeper/data/myid
在kylin-cluster2节点:
echo 2 > /root/kylin/data/zookeeper/data/myid
在kylin-cluster3节点:
echo 3 > /root/kylin/data/zookeeper/data/myid
2.3 启动
每个节点分别运行:
bin/zkServer.sh start
2.4 测试
bin/zkCli.sh -server 192.168.0.178:2181
3 安装hbase
修改配置 /root/kylin/hbase-1.1.2/conf下的文件
3.1 配置hbase-env.sh
export JAVA_HOME=/usr/apps/jdk1.8.0_141
export HBASE_CLASSPATH=/usr/apps/hbase-1.2.7/conf
HBASE_MANAGES_ZK=false
3.2 配置hbase-site.xml
<configuration>
<!--是否开启分布式-->
<property>
<name>hbase.cluster.distributed</name>
<value>true</value>
</property>
<!--对应的zookeeper集群,不用加端口-->
<property>
<name>hbase.zookeeper.quorum</name>
<value>kylin-cluster1,kylin-cluster2,kylin-cluster3</value>
</property>
<!--指定Zookeeper数据存储目录-->
<property>
<name>hbase.zookeeper.property.dataDir</name>
<value>/root/kylin/data/hbase/data</value>
</property>
<property>
<name>hbase.zookeeper.property.clientPort</name>
<value>2181</value>
</property>
<property>
<name>hbase.rootdir</name>
<value>hdfs://kylin-cluster1:9000/hbase</value>
</property>
</configuration>
3.3 配置regionservers
kylin-cluster1
kylin-cluster2
kylin-cluster3
3.4 创建数据目录
mkdir /root/kylin/data/hbase/data -p
3.5 同步hbase到其他节点和创建目录
kylin-cluster2:
scp -r hbase-1.1.2 root@kylin-cluster2:/root/kylin
mkdir /root/kylin/data/hbase/data -p
kylin-cluster3:
scp -r hbase-1.1.2 root@kylin-cluster3:/root/kylin
mkdir /root/kylin/data/hbase/data -p
3.6 启动
在kylin-cluster1运行以下命令,其他节点不用运行
bin/start-hbase.sh
启动后jps查看HBASE进程HMaster、HRegionServer
3.7 测试
./bin/hbase shell #可以正常登录进去
4 安装hive
创建目录
mkdir /root/kylin/data/hive/tmp -p
4.1 配置 文件
cp hive-env.sh.template hive-env.sh
cp hive-default.xml.template hive-site.xml
4.1.1配置hive-env.sh
HADOOP_HOME=${HADOOP_HOME}
4.1.2 配置hive-site.xml
把{system:java.io.tmpdir} 改成 /root/kylin/data/hive/tmp
把 {system:user.name} 改成 {user.name}
<configuration>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://kylin-cluster1:3306/kylin_hive?createDatabaseIfNotExist=true</value>
<description>JDBC connect string for a JDBC metastore</description>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
<description>Driver class name for a JDBC metastore</description>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
<description>username to use against metastore database</description>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>kylin@1234</value>
<description>password to use against metastore database</description>
</property>
</configuration>
4.2 hdfs 中创建下面的目录 ,并且授权
hdfs dfs -mkdir-p /user/hive/warehouse
hdfs dfs -mkdir-p /user/hive/tmp
hdfs dfs -mkdir-p /user/hive/log
hdfs dfs -chmod-R777/user/hive/warehouse
hdfs dfs -chmod-R777/user/hive/tmp
hdfs dfs -chmod-R777/user/hive/log
4.3 安装mysql5.7版本
4.4 配置mysql-connector-java
配置完后这里需要注意一下,因为访问mysql,需要mysql的驱动jar包,需要把mysql的驱动jar包放到hive的lib目录下,找一个jar包放进去,我这里放的是mysql-connector-java-5.1.43.jar这个版本的,要注意mysql和jdbc的配合。
mv mysql-connector-java-5.1.43.jar /usr/apps/hive-1.2.1/lib/
初始化hive
schematool-dbType mysql -initSchema hive hive
4.5 启动
nohup bin/hiveserver2 > /root/kylin/log/hive/hive.log 2>&1 &
4.6 测试
hive #可以正常登录进去
5 安装spark(可选)
$KYLIN_HOME/bin/download-spark.sh
6 安装flink(可选)
$KYLIN_HOME/bin/download-flink.sh
7 安装kafka
mkdir /root/kylin/log/kafka-logs
7.1 配置 server.properties
broker.id=0
zookeeper.connect=kylin-cluster1:2181,kylin-cluster2:2181,kylin-cluster3:2181
log.dirs=/root/kylin/log/kafka-logs
7.2 配置 zookeeper.properties
dataDir=/root/kylin/data/zookeeper/data
7.3 拷贝kafka到其他节点并修改
7.3.1 kylin-cluster2节点:
scp -r kafka_2.11-1.1.1 root@kylin-cluster2:/root/kylin
配置 server.properties
broker.id=1
7.3.2 kylin-cluster3节点:
scp -r kafka_2.11-1.1.1 root@kylin-cluster3:/root/kylin
broker.id=2
7.4 分别启动各节点
bin/kafka-server-start.sh -daemon config/server.properties
运行jps命令:
会看到 Kafka
7.5 测试
通过zookeeper 中zkCli.sh来看下
8 安装kylin
8.1 配置环境
修改~/.bashrc
export HADOOP_HOME=/root/kylin/hadoop-2.7.0
export HBASE_HOME=/root/kylin/hbase-1.1.2
export HIVE_HOME=/root/kylin/apache-hive-1.2.1-bin
export HADOOP_INSTALL=$HADOOP_HOME
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export YARN_HOME=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib:$HADOOP_COMMON_LIB_NATIVE_DIR"
export HIVE_CONF_DIR=$HIVE_HOME/conf
export KYLIN_HOME=/root/kylin/kylin-3.1.0
export HCAT_HOME=$HIVE_HOME/hcatalog
export KYLIN_CONF_HOME=$KYLIN_HOME/conf
export tomcat_root=$KYLIN_HOME/tomcat
export KAFKA_HOME=/root/kylin/kafka_2.11-1.1.1
export hive_dependency=$HIVE_HOME/conf:$HIVE_HOME/lib/*:$HCAT_HOME/share/hcatalog/hive-hcatalog-core-1.2.1.jar
export PATH=$JAVA_HOME/bin:$JRE_HOME/bin:$KAFKA_HOME/bin:$ZOOKEEPER_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$HBASE_HOME/bin:$HIVE_HOME/bin:$KYLIN_HOME/bin:$PATH
8.2 配置kylin.sh
export HBASE_CLASSPATH_PREFIX=$hive_dependency::${kafka_dependency}:${spark_dependency}:${flink_dependency}:$HBASE_CLASSPATH_PREFIX
8.3 配置kylin.properties
kylin.env.hdfs-working-dir=/kylin
kylin.metadata.url=kylin_metadata@hbase
kylin.server.mode=all
8.4 启动
bin/check-env.sh
bin/kylin.sh start
8.5 测试
8.5.1 通过浏览器访问 http://kylin-cluster1:7070/kylin
登录用户名:ADMIN 密码:KYLIN
8.5.2 测试例子
运行bin/sample.sh
9 错误处理
9.1 org.apache.kylin.engine.mr.exception.MapReduceException: Exception: java.net.ConnectException: Call From kylin-cluster1/192.168.0.177 to 0.0.0.0:10020 failed on connection exception: java.net.ConnectException: Connection refused; For more details see: http://wiki.apache.org/hadoop/ConnectionRefused
java.net.ConnectException: Call From kylin-cluster1/192.168.0.177 to 0.0.0.0:10020 failed on connection exception: java.net.ConnectException: Connection refused; For more details see: http://wiki.apache.org/hadoop/ConnectionRefused
at org.apache.kylin.engine.mr.common.MapReduceExecutable.doWork(MapReduceExecutable.java:223)
解决办法:
root@doitedu01 hadoop-2.7.0]# mr-jobhistory-daemon.sh start historyserver
参考
https://hbase.apache.org/
https://hbase.apache.org/book.html#quickstart
https://zookeeper.apache.org/doc/r3.5.8/zookeeperProgrammers.html
https://hive.apache.org/
http://kafka.apache.org/quickstart
http://spark.apache.org/
https://blog.csdn.net/gaoxs_csdn/article/details/107629048