CDH离线安装分布式tar包


机器

172.21.51.87
172.21.51.88
172.21.51.89

修改每台机器hosts

vim /etc/hosts
172.21.51.87 dev5187
172.21.51.88 dev5188
172.21.51.89 dev5189

禁用191.21.51.87网卡
sudo systemctl stop firewalld.service
sudo systemctl disable firewalld.service
vim /etc/selinux/config
SELINUX=enforcing 改为 disabled
关闭swap;
临时关闭 swapoff -a 永久关闭 : sudo echo 'vm.swappiness=0'>> /etc/sysctl.conf 重启
创建用户(每一台)

groupadd hadoop
useradd hadoop -g hadoop
passwd hadoop

修改权限
vim /etc/sudoers注意权限 第一次需要chmod 640 /etc/sudoers(每一台)

hadoop ALL =(ALL) NOPASSWD: ALL
修改配置
vim /etc/security/limits.conf (这步可以通过root也可以用别的用户sudo实现)

* soft nofile 32768
* soft nproc 65536
* hard nofile 1048576
* hard nproc unlimited
* hard memlock unlimited
* soft memlock unlimited

gz包存放位置
/APS/usr/vdsappas/package

解压后文件存放位置
/APS/usr/vdsappas/soft

1.zookeeper搭建
a.解压
tar -xzvf zookeeper-3.4.5-cdh5.14.4.tar.gz -C /APS/usr/vdsappas/soft/
b.创建符号连接
ln -s zookeeper-3.4.5-cdh5.14.4 zookeeper
c.配置环境变量
vim ~/.bash_profile

ZOOKEEPER_HOME=/APS/usr/vdsappas/soft/zookeeper/bin
PATH=$PATH:$HOME/.local/bin:$HOME/bin:$UPSQL_CLIENT_HOME:$ZOOKEEPER_HOME

source ~/.bash_profile
d.修改配置文件
cd /APS/usr/vdsappas/soft/zookeeper/conf
cp zoo_sample.cfg zoo.cfg
vim zoo.cfg

# The number of milliseconds of each tick
tickTime=2000
# The number of ticks that the initial 
# synchronization phase can take
initLimit=10
# The number of ticks that can pass between 
# sending a request and getting an acknowledgement
syncLimit=5
# the directory where the snapshot is stored.
# do not use /tmp for storage, /tmp here is just 
# example sakes.

# the port at which the clients will connect
clientPort=2181
# the maximum number of client connections.
# increase this if you need to handle more clients
#maxClientCnxns=60
#
# Be sure to read the maintenance section of the 
# administrator guide before turning on autopurge.
#
# http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance
#
# The number of snapshots to retain in dataDir
#autopurge.snapRetainCount=3
# Purge task interval in hours
# Set to "0" to disable auto purge feature
#autopurge.purgeInterval=1
dataDir=/APS/usr/vdsappas/softdata/zookeeper/data
dataLogDir=/APS/usr/vdsappas/softdata/zookeeper/logs
server.1=172.21.51.87:2888:3888
server.2=172.21.51.88:2888:3888
server.3=172.21.51.89:2888:3888

e.修改日志配置文件
vim /APS/usr/vdsappas/soft/zookeeper/conf/log4j.properties

zookeeper.log.dir=/APS/usr/vdsappas/softdata/zookeeper/logs
zookeeper.tracelog.dir=/APS/usr/vdsappas/softdata/zookeeper/logs

f.添加myid文件
echo 1 > /APS/usr/vdsappas/softdata/zookeeper/data/myid

g.按以上步骤安装其他服务器

h.启动zk(所有安装的服务器)
zkServer.sh start
查看状态
zkServer.sh status
正常启动的情况下,会显示leader follower

2.配置ssh
在每台服务器的当前用户目录下
cd ~
生成
ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa

操作172.21.51.87服务器,配置ssh连接其他服务器
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys #公钥写入自己的认证文件
chmod 600 ~/.ssh/authorized_keys
87上的公匙发送至其他服务器
scp ~/.ssh/id_rsa.pub vdsappas@172.21.51.89:~/.ssh/authorized_keys

操作172.21.51.88服务器,配置ssh连接其他服务器
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys #公钥写入自己的认证文件
chmod 600 ~/.ssh/authorized_keys
88上的公匙发送至其他服务器
scp ~/.ssh/id_rsa.pub vdsappas@172.21.51.89:~/.ssh/id_rsa.pub.2
88上的公匙写入认证文件
cat ~/.ssh/id_rsa.pub.2 >> ~/.ssh/authorized_keys

3.配置hadoop
a.解压
tar -xzvf hadoop-2.6.0-cdh5.14.4.tar.gz -C ~/soft/
ln -s hadoop-2.6.0-cdh5.14.4 hadoop
b.修改配置文件
cd /APS/usr/vdsappas/soft/hadoop/etc/hadoop

[hdfs-site.xml]

<configuration>
    <property>
          <name>dfs.nameservices</name>
          <value>unionpayCluster</value>
      </property>
      <property>
          <name>dfs.ha.namenodes.unionpayCluster</name>
         <value>nn1,nn2</value>
      </property>
      <property>
          <name>dfs.namenode.rpc-address.unionpayCluster.nn1</name>
          <value>172.21.51.87:8020</value>
      </property>
      <property>
          <name>dfs.namenode.rpc-address.unionpayCluster.nn2</name>
          <value>172.21.51.88:8020</value>
      </property>
      <property>
          <name>dfs.namenode.http-address.unionpayCluster.nn1</name>
          <value>172.21.51.87:50070</value>
      </property>
      <property>
          <name>dfs.namenode.http-address.unionpayCluster.nn2</name>
          <value>172.21.51.88:50070</value>
      </property>
      <property>
          <name>dfs.namenode.shared.edits.dir</name>
          <value>qjournal://172.21.51.87:8485;172.21.51.88:8485;172.21.51.89:8485/unionpayCluster</value>
      </property>
      <property>
          <name>dfs.client.failover.proxy.provider.unionpayCluster</name>
          <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
      </property>
      <property>
          <name>dfs.ha.fencing.methods</name>
          <value>sshfence</value>
      </property>  
      <property>
            <name>dfs.ha.fencing.methods</name>
            <value>
                    sshfence
                    shell(/bin/true)
            </value>
        </property>
      <property>
          <name>dfs.ha.fencing.ssh.private-key-files</name>
          <value>/APS/usr/vdsappas/.ssh/id_rsa</value>
      </property>     
      <property>
          <name>dfs.journalnode.edits.dir</name>
          <value>/APS/usr/vdsappas/softdata/hadoop/journal</value>
      </property>
      <property>
          <name>dfs.replication</name>
          <value>3</value>
      </property>                    
      <property>
          <name>dfs.ha.automatic-failover.enabled</name>
          <value>true</value>
      </property>
      <property>
                <name>dfs.namenode.name.dir</name>
                <value>/APS/usr/vdsappas/softdata/hadoop/hdfs/dfs/name</value>
        </property>
        <property>
                <name>dfs.permissions.superusergroup</name>
                <value>hadoop</value>
        </property>
        <property>
                <name>dfs.datanode.data.dir</name>
                <value>/APS/usr/vdsappas/softdata/hadoop/diskb/dfs</value>
        </property>
        <property>
                <name>dfs.permissions</name>
                <value>false</value>
        </property>
</configuration>

[core-site.xml]

<configuration>
    <property>
        <name>fs.defaultFS</name>
        <value>hdfs://unionpayCluster</value>
    </property>
    <property>
            <name>ha.zookeeper.quorum</name>
            <value>172.21.51.87:2181,172.21.51.88:2181,172.21.51.89:2181</value>
        </property>
    <property>
                <name>hadoop.tmp.dir</name>
                <value>/APS/usr/vdsappas/softdata/hadoop/log</value>
        </property>
</configuration>

[yarn-site.xml]

<configuration>
  <property>
     <name>yarn.nodemanager.aux-services</name>
     <value>mapreduce_shuffle</value>
  </property>    
  <property>
    <name>yarn.resourcemanager.ha.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>yarn.resourcemanager.ha.automatic-failover.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>yarn.resourcemanager.ha.automatic-failover.embedded</name>
    <value>true</value>
  </property>
  <property>
    <name>yarn.resourcemanager.recovery.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>yarn.resourcemanager.zk-address</name>
    <value>172.21.51.87:2181,172.21.51.88:2181,172.21.51.89:2181</value>
  </property>
  <property>
    <name>yarn.resourcemanager.store.class</name>
    <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
  </property>
  <property>
    <name>yarn.client.failover-sleep-base-ms</name>
    <value>100</value>
  </property>
  <property>
    <name>yarn.client.failover-sleep-max-ms</name>
    <value>2000</value>
  </property>
  <property>
    <name>yarn.resourcemanager.cluster-id</name>
    <value>yarncluster</value>
  </property>
  <property>
    <name>yarn.resourcemanager.address.rm1</name>
    <value>172.21.51.87:8032</value>
  </property>
  <property>
    <name>yarn.resourcemanager.scheduler.address.rm1</name>
    <value>172.21.51.87:8030</value>
  </property>
  <property>
    <name>yarn.resourcemanager.resource-tracker.address.rm1</name>
    <value>172.21.51.87:8031</value>
  </property>
  <property>
    <name>yarn.resourcemanager.admin.address.rm1</name>
    <value>172.21.51.87:8033</value>
  </property>
  <property>
    <name>yarn.resourcemanager.webapp.address.rm1</name>
    <value>172.21.51.87:8088</value>
  </property>
  <property>
    <name>yarn.resourcemanager.webapp.https.address.rm1</name>
    <value>172.21.51.87:8090</value>
  </property>
  <property>
    <name>yarn.resourcemanager.address.rm2</name>
    <value>172.21.51.89:8032</value>
  </property>
  <property>
    <name>yarn.resourcemanager.scheduler.address.rm2</name>
    <value>172.21.51.89:8030</value>
  </property>
  <property>
    <name>yarn.resourcemanager.resource-tracker.address.rm2</name>
    <value>172.21.51.89:8031</value>
  </property>
  <property>
    <name>yarn.resourcemanager.admin.address.rm2</name>
    <value>172.21.51.89:8033</value>
  </property>
  <property>
    <name>yarn.resourcemanager.webapp.address.rm2</name>
    <value>172.21.51.89:8088</value>
  </property>
  <property>
    <name>yarn.resourcemanager.webapp.https.address.rm2</name>
    <value>172.21.51.89:8090</value>
  </property>
  <property>
    <name>yarn.resourcemanager.ha.rm-ids</name>
    <value>rm1,rm2</value>
  </property>
  <property>
    <name>yarn.resourcemanager.client.thread-count</name>
    <value>50</value>
  </property>
  <property>
    <name>yarn.resourcemanager.scheduler.client.thread-count</name>
    <value>50</value>
  </property>
  <property>
    <name>yarn.resourcemanager.admin.client.thread-count</name>
    <value>1</value>
  </property>
  <property>
    <name>yarn.scheduler.minimum-allocation-mb</name>
    <value>1024</value>
  </property>
  <property>
    <name>yarn.scheduler.increment-allocation-mb</name>
    <value>512</value>
  </property>
  <property>
    <name>yarn.scheduler.maximum-allocation-mb</name>
    <value>16384</value>
  </property>
  <property>
    <name>yarn.scheduler.minimum-allocation-vcores</name>
    <value>1</value>
  </property>
  <property>
    <name>yarn.scheduler.increment-allocation-vcores</name>
    <value>1</value>
  </property>
  <property>
    <name>yarn.scheduler.maximum-allocation-vcores</name>
    <value>8</value>
  </property>
  <property>
    <name>yarn.resourcemanager.amliveliness-monitor.interval-ms</name>
    <value>1000</value>
  </property>
  <property>
    <name>yarn.am.liveness-monitor.expiry-interval-ms</name>
    <value>600000</value>
  </property>
  <property>
    <name>yarn.resourcemanager.am.max-attempts</name>
    <value>2</value>
  </property>
  <property>
    <name>yarn.resourcemanager.container.liveness-monitor.interval-ms</name>
    <value>600000</value>
  </property>
  <property>
    <name>yarn.resourcemanager.nm.liveness-monitor.interval-ms</name>
    <value>1000</value>
  </property>
  <property>
    <name>yarn.nm.liveness-monitor.expiry-interval-ms</name>
    <value>600000</value>
  </property>
  <property>
    <name>yarn.resourcemanager.resource-tracker.client.thread-count</name>
    <value>50</value>
  </property>
  <property>
    <name>yarn.application.classpath</name>
    <value>$HADOOP_CLIENT_CONF_DIR,$HADOOP_CONF_DIR,$HADOOP_COMMON_HOME/*,$HADOOP_COMMON_HOME/lib/*,$HADOOP_HDFS_HOME/*,$HADOOP_HDFS_HOME/lib/*,$HADOOP_YARN_HOME/*,$HADOOP_YARN_HOME/lib/*</value>
  </property>
  <property>
    <name>yarn.resourcemanager.scheduler.class</name>
    <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
  </property>
  <property>
    <name>yarn.resourcemanager.max-completed-applications</name>
    <value>10000</value>
  </property>
  <property>
    <name>yarn.nodemanager.remote-app-log-dir</name>
    <value>hdfs://unionpayCluster/var/log/hadoop-yarn/apps</value>
  </property>
  <property>
    <name>yarn.nodemanager.remote-app-log-dir-suffix</name>
    <value>logs</value>
  </property>
      <property>
        <name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
        <value>org.apache.hadoop.mapred.ShuffleHandler</value>
    </property>
  <property>
        <description>List of directories to store localized files in.</description>
        <name>yarn.nodemanager.local-dirs</name>
        <value>/APS/usr/vdsappas/softdata/hadoop/hadoop-yarn/cache/${user.name}/nm-local-dir</value>
    </property>

    <property>
        <description>Where to store container logs.</description>
        <name>yarn.nodemanager.log-dirs</name>
        <value>/APS/usr/vdsappas/softdata/hadoop/hadoop-yarn/log/containers</value>
    </property>
    <property>
        <name>yarn.app.mapreduce.am.staging-dir</name>
        <value>/user</value>
    </property>
</configuration>

配置队列

[yarn-site.xml]可不做

<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->
<configuration>

  <property>
    <name>yarn.scheduler.capacity.maximum-applications</name>
    <value>10000</value>
    <description>
      Maximum number of applications that can be pending and running.
    </description>
  </property>

  <property>
    <name>yarn.scheduler.capacity.maximum-am-resource-percent</name>
    <value>0.1</value>
    <description>
      Maximum percent of resources in the cluster which can be used to run 
      application masters i.e. controls number of concurrent running
      applications.
    </description>
  </property>

  <property>
    <name>yarn.scheduler.capacity.resource-calculator</name>
    <value>org.apache.hadoop.yarn.util.resource.DominantResourceCalculator</value>
    <description>
      The ResourceCalculator implementation to be used to compare 
      Resources in the scheduler.
      The default i.e. DefaultResourceCalculator only uses Memory while
      DominantResourceCalculator uses dominant-resource to compare 
      multi-dimensional resources such as Memory, CPU etc.
    </description>
  </property>

  <property>
    <name>yarn.scheduler.capacity.root.queues</name>
    <value>default,queue1,queue2,queue3</value>
    <description>
      The queues at the this level (root is the root queue).
    </description>
  </property>

  <property>
    <name>yarn.scheduler.capacity.root.default.capacity</name>
    <value>56</value>
    <description>Default queue target capacity.</description>
  </property>

  <property>
    <name>yarn.scheduler.capacity.root.default.user-limit-factor</name>
    <value>1.0</value>
    <description>
      Default queue user limit a percentage from 0.0 to 1.0.
    </description>
  </property>

  <property>
    <name>yarn.scheduler.capacity.root.default.maximum-capacity</name>
    <value>100</value>
    <description>
      The maximum capacity of the default queue. 
    </description>
  </property>

  <property>
    <name>yarn.scheduler.capacity.root.default.state</name>
    <value>RUNNING</value>
    <description>
      The state of the default queue. State can be one of RUNNING or STOPPED.
    </description>
  </property>

  <property>
    <name>yarn.scheduler.capacity.root.default.acl_submit_applications</name>
    <value>*</value>
    <description>
      The ACL of who can submit jobs to the default queue.
    </description>
  </property>

  <property>
    <name>yarn.scheduler.capacity.root.default.acl_administer_queue</name>
    <value>*</value>
    <description>
      The ACL of who can administer jobs on the default queue.
    </description>
  </property>

  <property>
    <name>yarn.scheduler.capacity.node-locality-delay</name>
    <value>40</value>
    <description>
      Number of missed scheduling opportunities after which the CapacityScheduler 
      attempts to schedule rack-local containers. 
      Typically this should be set to number of nodes in the cluster, By default is setting 
      approximately number of nodes in one rack which is 40.
    </description>
  </property>
  <!--root.queue1队列使用限制-->
    <property>
    <name>yarn.scheduler.capacity.root.queue1.capacity</name>
    <value>10</value>
    <description>Default queue target capacity.</description>
  </property>

  
  <!--root.queue1队列默认用户最大使用限制是在其他队列空闲的情况下若是其他队列有充足的任务进行,是按照比例分配的 -->
    <property>
    <name>yarn.scheduler.capacity.root.queue1.maximum-capacity</name>
    <value>10</value>
    <description>
      The maximum capacity of the default queue. 
    </description>
  </property>
  
  
  <!--root.queue1队列默认用户使用使用限制  表示单个用户最大可以占该队列容量的100%-->
    <property>
    <name>yarn.scheduler.capacity.root.queue1.user-limit-factor</name>
    <value>1</value>
    <description>
      Default queue user limit a percentage from 0.0 to 1.0.
    </description>
  </property>
  
  
  <!--root.queue1队列默认显示状态-->
    <property>
    <name>yarn.scheduler.capacity.root.queue1.state</name>
    <value>RUNNING</value>
    <description>
      The state of the default queue. State can be one of RUNNING or STOPPED.
    </description>
  </property>
  
  
 <!--root.queue1队列访问权限-->
    <property>
    <name>yarn.scheduler.capacity.root.queue1.acl_submit_applications</name>
    <value>*</value>
    <description>
      The ACL of who can submit jobs to the default queue.
    </description>
  </property>
  
  
  <!--root.queue1队列默认管理用户-->
    <property>
    <name>yarn.scheduler.capacity.root.queue1.acl_administer_queue</name>
    <value>*</value>
    <description>
      The ACL of who can administer jobs on the default queue.
    </description>
  </property>
  
  <!--root.queue2队列使用限制-->
    <property>
    <name>yarn.scheduler.capacity.root.queue2.capacity</name>
    <value>4</value>
    <description>Default queue target capacity.</description>
  </property>

  
  <!--root.queue2队列默认用户最大使用限制是在其他队列空闲的情况下若是其他队列有充足的任务进行,是按照比例分配的 -->
    <property>
    <name>yarn.scheduler.capacity.root.queue2.maximum-capacity</name>
    <value>4</value>
    <description>
      The maximum capacity of the default queue. 
    </description>
  </property>
  
  
  <!--root.queue2队列默认用户使用使用限制  表示单个用户最大可以占该队列容量的100%-->
    <property>
    <name>yarn.scheduler.capacity.root.queue2.user-limit-factor</name>
    <value>1</value>
    <description>
      Default queue user limit a percentage from 0.0 to 1.0.
    </description>
  </property>
  
  
  <!--root.queue2队列默认显示状态-->
    <property>
    <name>yarn.scheduler.capacity.root.queue2.state</name>
    <value>RUNNING</value>
    <description>
      The state of the default queue. State can be one of RUNNING or STOPPED.
    </description>
  </property>
  
  
 <!--root.queue2队列访问权限-->
    <property>
    <name>yarn.scheduler.capacity.root.queue2.acl_submit_applications</name>
    <value>*</value>
    <description>
      The ACL of who can submit jobs to the default queue.
    </description>
  </property>
  
  
  <!--root.queue2队列默认管理用户-->
    <property>
    <name>yarn.scheduler.capacity.root.queue2.acl_administer_queue</name>
    <value>*</value>
    <description>
      The ACL of who can administer jobs on the default queue.
    </description>
  </property>
  
  
    <!--root.queue3队列使用限制-->
    <property>
    <name>yarn.scheduler.capacity.root.queue3.capacity</name>
    <value>30</value>
    <description>Default queue target capacity.</description>
  </property>

  
  <!--root.queue3队列默认用户最大使用限制是在其他队列空闲的情况下若是其他队列有充足的任务进行,是按照比例分配的 -->
    <property>
    <name>yarn.scheduler.capacity.root.queue3.maximum-capacity</name>
    <value>30</value>
    <description>
      The maximum capacity of the default queue. 
    </description>
  </property>
  
  
  <!--root.queue3队列默认用户使用使用限制  表示单个用户最大可以占该队列容量的100%-->
    <property>
    <name>yarn.scheduler.capacity.root.queue3.user-limit-factor</name>
    <value>1</value>
    <description>
      Default queue user limit a percentage from 0.0 to 1.0.
    </description>
  </property>
  
  
  <!--root.queue3队列默认显示状态-->
    <property>
    <name>yarn.scheduler.capacity.root.queue3.state</name>
    <value>RUNNING</value>
    <description>
      The state of the default queue. State can be one of RUNNING or STOPPED.
    </description>
  </property>
  
  
 <!--root.queue3队列访问权限-->
    <property>
    <name>yarn.scheduler.capacity.root.queue3.acl_submit_applications</name>
    <value>*</value>
    <description>
      The ACL of who can submit jobs to the default queue.
    </description>
  </property>
  
  
  <!--root.queue3队列默认管理用户-->
    <property>
    <name>yarn.scheduler.capacity.root.queue3.acl_administer_queue</name>
    <value>*</value>
    <description>
      The ACL of who can administer jobs on the default queue.
    </description>
  </property>

</configuration>

[mapred-site.xml]

<configuration>
    <property>
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
    </property>
    <property>
        <name>mapreduce.jobhistory.intermediate-done-dir</name>
        <value>/user/history/done_intermediate</value>
    </property>
    <property>
        <name>mapreduce.jobhistory.done-dir</name>
        <value>/user/history/done</value>
    </property>
</configuration>

[slaves]

172.21.51.87
172.21.51.88
172.21.51.89

c.配置环境变量
vim ~/.bash_profile

HADOOP_HOME=/APS/usr/vdsappas/soft/hadoop
PATH=$PATH:$HOME/.local/bin:$HOME/bin:$UPSQL_CLIENT_HOME:$ZOOKEEPER_HOME:$HADOOP_HOME/bin:$HADOOP_HOME/sbin

d.创建相关文件夹
e.启动journalnode
hadoop-daemon.sh start journalnode
f.格式化
hadoop namenode -format
g.启动所有
start-all.sh
h.验证
jps 查看所有服务是否启动
访问http://172.21.51.87:50070
http://172.21.51.88:50070
页面查看

4.安装hive
a.解压
tar -xzvf hive-1.1.0-cdh5.14.4.tar.gz -C /APS/usr/vdsappas/soft/
b.创建符号连接
ln -s hive-1.1.0-cdh5.14.4 hive
c.配置环境变量
vim ~/.bash_profile

HIVE_HOME=/APS/usr/vdsappas/soft/hive/bin
PATH=$PATH:$HOME/.local/bin:$HOME/bin:$UPSQL_CLIENT_HOME:$ZOOKEEPER_HOME:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$HIVE_HOME

d.在mysql数据库中创建metastore数据库
mysql -uroot -proot

CREATE DATABASE metastore;

e.修改配置文件

[hive-site.xml]

<configuration>
    <property>
        <name>javax.jdo.option.ConnectionURL</name>
        <value>jdbc:mysql://172.21.51.87:3306/metastore</value>
    </property>
    <property>
        <name>javax.jdo.option.ConnectionDriverName</name>
        <value>com.mysql.jdbc.Driver</value>
    </property>
    <property>
        <name>javax.jdo.option.ConnectionUserName</name>
        <value>root</value>
    </property>
    <property>
        <name>javax.jdo.option.ConnectionPassword</name>
        <value>root</value>
    </property>
    <property>
        <name>datanucleus.autoCreateSchema</name>
        <value>true</value>
    </property>
    <property>
        <name>datanucleus.fixedDatastore</name>
        <value>true</value>
    </property>
</configuration>

[hive-env.sh]
指定hadoop_home

 HADOOP_HOME=/APS/usr/vdsappas/soft/hadoop
 export HIVE_CONF_DIR=/APS/usr/vdsappas/soft/hive/conf
 export HIVE_AUX_JARS_PATH=/soft/hive/lib,/soft/hadoop-2.6.0-cdh5.14.4/share/hadoop/common,/soft/hive/lib,/soft/hadoop-2.6.0-cdh5.14.4/share/hadoop/common/lib,/soft/hadoop-2.6.0-cdh5.14.4/share/hadoop/mapreduce,/soft/hadoop-2.6.0-cdh5.14.4/etc/hadoop,/soft/hadoop-2.6.0-cdh5.14.4/share/hadoop/hdfs,/soft/hadoop-2.6.0-cdh5.14.4/share/hadoop/yarn,/soft/hadoop-2.6.0-cdh5.14.4/share/hadoop/yarn/lib,/soft/hadoop-2.6.0-cdh5.14.4/share/hadoop/mapreduce/lib

[hive-log4j.properties]
修改日志文件路径
hive.log.dir=/APS/usr/vdsappas/softdata/hive/logs

f.初始化元数据(表结构)到mysql
schematool -dbType mysql -initSchema
g.启动metastore和hiveserver2服务
hive --service metastore &
hive --service hiveserver2 &
h.测试
netstat -anop | grep 10000 #查看hiveserver2服务是否启动
beeline验证

5.安装kafka(使用原来87-89的kafka)
a.解压
tar -xzvf kafka-0.10.0-kafka2.1.1.tar.gz -C /APS/usr/vdsappas/soft/
b.创建符号连接
ln -s kafka-0.10.0-kafka2.1.1 kafka
c.配置环境变量
vim ~/.bash_profile
KAFKA_HOME=/APS/usr/vdsappas/soft/kafka/bin PATH=$PATH:$HOME/.local/bin:$HOME/bin:$UPSQL_CLIENT_HOME:$ZOOKEEPER_HOME:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$HIVE_HOME:$KAFKA_HOME
d.修改配置文件
cd /APS/usr/vdsappas/soft/kafka/config
vim server.properties

broker.id=1
listeners=PLAINTEXT://:9092
log.dirs=/APS/usr/vdsappas/softdata/kafka/logs
zookeeper.connect=172.21.51.87:2181,172.21.51.88:2181,172.21.51.89:2181
log.retention.hours=168  #数据的保留时间(168 hours=7天)
delete.topic.enable=true #可以删除已创建主题

注:每台服务器上的broker.id是不同的

e.启动每台服务器的kafka
kafka-server-start.sh /APS/usr/vdsappas/soft/kafka/config/server.properties
6.安装spark
a.解压
tar -xzvf spark-1.6.0-cdh5.14.4.tar.gz -C /APS/usr/vdsappas/soft/
b 创建符号链接
ln -s spark-1.6.0-cdh5.14.4 spark
c.配置环境变量
vim ~/.bash_profile

export SPARK_HOME=/APS/usr/vdsappas/soft/spark
PATH=$PATH:$HOME/.local/bin:$HOME/bin:$JAVA_HOME/bin:$UPSQL_CLIENT_HOME:$ZOOKEEPER_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$HIVE_HOME::$HIVE_HOME/bin:$SPARK_HOME/bin

d.修改配置文件
cd /APS/usr/vdsappas/soft/spark/conf/

vim spark-env.sh
增加如下配置

export HADOOP_CONF_DIR=/APS/usr/vdsappas/soft/hadoop/etc/hadoop
export SPARK_CONF_DIR=/APS/usr/vdsappas/soft/spark/conf
export SPARK_DAEMON_JAVA_OPTS="-Dspark.deploy.recoveryMode=ZOOKEEPER -Dspark.deploy.zookeeper.url=172.21.51.87:2181,172.21.51.88:2181,172.21.51.89:2181 -Dspark.deploy.zookeeper.dir=/spark"
export SPARK_DIST_CLASSPATH=$(/APS/usr/vdsappas/soft/hadoop/bin/hadoop classpath)

vim spark-defaults.conf

#增加如下配置
spark.driver.memory              5g
spark.eventLog.enabled           true
spark.eventLog.compress          true
spark.serializer                 org.apache.spark.serializer.KryoSerializer
spark.master                     yarn

vim slaves

#增加如下配置
172.21.51.87
172.21.51.88
172.21.51.89

/APS/usr/vdsappas/soft/hadoop/share/hadoop/common/lib目录下上传如下jar包

jackson-annotations-2.4.0.jar
jackson-core-2.4.2.jar 
jackson-databind-2.4.2.jar
parquet-hadoop-1.4.3.jar

在三台机器重复以上步骤

e.启动spark集群
cd /APS/usr/vdsappas/soft/spark/sbin
./start-all.sh
在另外两台机器/APS/usr/vdsappas/soft/spark/sbin目录下执行 start-master.sh,实现spark master高可用

最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
  • 序言:七十年代末,一起剥皮案震惊了整个滨河市,随后出现的几起案子,更是在滨河造成了极大的恐慌,老刑警刘岩,带你破解...
    沈念sama阅读 215,723评论 6 498
  • 序言:滨河连续发生了三起死亡事件,死亡现场离奇诡异,居然都是意外死亡,警方通过查阅死者的电脑和手机,发现死者居然都...
    沈念sama阅读 92,003评论 3 391
  • 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事。” “怎么了?”我有些...
    开封第一讲书人阅读 161,512评论 0 351
  • 文/不坏的土叔 我叫张陵,是天一观的道长。 经常有香客问我,道长,这世上最难降的妖魔是什么? 我笑而不...
    开封第一讲书人阅读 57,825评论 1 290
  • 正文 为了忘掉前任,我火速办了婚礼,结果婚礼上,老公的妹妹穿的比我还像新娘。我一直安慰自己,他们只是感情好,可当我...
    茶点故事阅读 66,874评论 6 388
  • 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
    开封第一讲书人阅读 50,841评论 1 295
  • 那天,我揣着相机与录音,去河边找鬼。 笑死,一个胖子当着我的面吹牛,可吹牛的内容都是我干的。 我是一名探鬼主播,决...
    沈念sama阅读 39,812评论 3 416
  • 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...
    开封第一讲书人阅读 38,582评论 0 271
  • 序言:老挝万荣一对情侣失踪,失踪者是张志新(化名)和其女友刘颖,没想到半个月后,有当地人在树林里发现了一具尸体,经...
    沈念sama阅读 45,033评论 1 308
  • 正文 独居荒郊野岭守林人离奇死亡,尸身上长有42处带血的脓包…… 初始之章·张勋 以下内容为张勋视角 年9月15日...
    茶点故事阅读 37,309评论 2 331
  • 正文 我和宋清朗相恋三年,在试婚纱的时候发现自己被绿了。 大学时的朋友给我发了我未婚夫和他白月光在一起吃饭的照片。...
    茶点故事阅读 39,450评论 1 345
  • 序言:一个原本活蹦乱跳的男人离奇死亡,死状恐怖,灵堂内的尸体忽然破棺而出,到底是诈尸还是另有隐情,我是刑警宁泽,带...
    沈念sama阅读 35,158评论 5 341
  • 正文 年R本政府宣布,位于F岛的核电站,受9级特大地震影响,放射性物质发生泄漏。R本人自食恶果不足惜,却给世界环境...
    茶点故事阅读 40,789评论 3 325
  • 文/蒙蒙 一、第九天 我趴在偏房一处隐蔽的房顶上张望。 院中可真热闹,春花似锦、人声如沸。这庄子的主人今日做“春日...
    开封第一讲书人阅读 31,409评论 0 21
  • 文/苍兰香墨 我抬头看了看天上的太阳。三九已至,却和暖如春,着一层夹袄步出监牢的瞬间,已是汗流浃背。 一阵脚步声响...
    开封第一讲书人阅读 32,609评论 1 268
  • 我被黑心中介骗来泰国打工, 没想到刚下飞机就差点儿被人妖公主榨干…… 1. 我叫王不留,地道东北人。 一个月前我还...
    沈念sama阅读 47,440评论 2 368
  • 正文 我出身青楼,却偏偏与公主长得像,于是被迫代替她去往敌国和亲。 传闻我的和亲对象是个残疾皇子,可洞房花烛夜当晚...
    茶点故事阅读 44,357评论 2 352