1、准备工作
- 准备6台虚拟机,内存1G
- 虚拟机节点分配情况
序号 | 机器 | 分配 |
---|---|---|
1 | Park01 | Zookeeper、NameNode(active)、ResourceManager(active) |
2 | Park02 | Zookeeper、NameNode(standby) |
3 | Park03 | Zookeeper、ResourceManager(standby) |
4 | Park04 | DataNode、NodeManager、JournalNode |
5 | Park05 | DataNode、NodeManager、JournalNode |
6 | Park06 | DataNode、NodeManager、JournalNode |
2、Zookeeper
[root@CH0 soft]# mkdir -p /usr/lib/zookeeper
[root@CH0 soft]# tar -zxvf zookeeper-3.4.7.tar.gz -C /usr/lib/zookeeper
2.1 zoo.cfg配置
[root@CH0 soft]# cd /usr/lib/zookeeper/zookeeper-3.4.7/conf/
[root@CH0 conf]# cp zoo_sample.cfg zoo.cfg
[root@CH0 conf]# vim zoo.cfg
<!--完整的zoo.cfg配置如下-->
# The number of milliseconds of each tick
tickTime=2000
# The number of ticks that the initial
# synchronization phase can take
initLimit=10
# The number of ticks that can pass between
# sending a request and getting an acknowledgement
syncLimit=5
# the directory where the snapshot is stored.
# do not use /tmp for storage, /tmp here is just
# example sakes.
dataDir=/usr/lib/zookeeper/zookeeper-3.4.7/tmp
# the port at which the clients will connect
clientPort=2181
# the maximum number of client connections.
# increase this if you need to handle more clients
#maxClientCnxns=60
#
# Be sure to read the maintenance section of the
# administrator guide before turning on autopurge.
#
# http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance
#
# The number of snapshots to retain in dataDir
#autopurge.snapRetainCount=3
# Purge task interval in hours
# Set to "0" to disable auto purge feature
#autopurge.purgeInterval=1
server.1=192.168.6.20:2888:3888
server.2=192.168.6.21:2888:3888
server.3=192.168.6.22:2888:3888
2.2 tmp目录配置
[root@CH0 conf]# cd ..
[root@CH0 zookeeper-3.4.7]# mkdir tmp
[root@CH0 zookeeper-3.4.7]# cd tmp/
[root@CH0 tmp]# vim myid
<!--内容如下-->
1
2.3 其他两台虚拟机Zookeeper配置
<!-- Zookeeper目录发送给CH1、CH2 -->
[root@CH0 tmp]# cd ../../
[root@CH0 zookeeper]# cd ../
[root@CH0 lib]# scp -r zookeeper CH1:/usr/lib/
[root@CH0 lib]# scp -r zookeeper CH2:/usr/lib/
[root@CH1 ~]# cd /usr/lib/zookeeper/zookeeper-3.4.7/tmp/
[root@CH1 tmp]# ls
myid
[root@CH1 tmp]# vim myid
[root@CH1 tmp]# cat myid
2
[root@CH2 ~]# cd /usr/lib/zookeeper/zookeeper-3.4.7/tmp/
[root@CH2 tmp]# ls
myid
[root@CH2 tmp]# vim myid
[root@CH2 tmp]# cat myid
3
3、Hadoop安装配置
[root@CH0 soft]# mkdir -p /usr/lib/hadoop
[root@CH0 soft]# tar -zxvf hadoop-2.7.1.tar.gz -C /usr/lib/hadoop
[root@CH0 soft]# cd /usr/lib/hadoop/hadoop-2.7.1/etc/hadoop/
3.1、hadoop-env.sh配置
[root@CH0 hadoop]# vim hadoop-env.sh
<!--配置如下-->
export JAVA_HOME=/usr/lib/jvm/jdk1.8.0_171
export HADOOP_CONF_DIR=/usr/lib/hadoop/hadoop-2.7.1/etc/hadoop
3.2、core-site.xml配置
[root@CH0 hadoop]# vim core-site.xml
<!--配置如下-->
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://ns</value>
</property>
<!-- 用来指定hadoop运行时产生文件的存放位置 -->
<property>
<name>hadoop.tmp.dir</name>
<value>/usr/lib/hadoop/hadoop-2.7.1/tmp</value>
</property>
<!-- 执行zookeeper地址-->
<property>
<name>ha.zookeeper.quorum</name>
<value>CH0:2181,CH1:2181,CH2:2181</value>
</property>
</configuration>
3.3、hdfs-site.xml配置
[root@CH0 hadoop]# vim hdfs-site.xml
<!--配置如下-->
<configuration>
<property>
<name>dfs.nameservices</name>
<value>ns</value>
</property>
<!-- ns下有两台namenode,分别是nn1,nn2 -->
<property>
<name>dfs.ha.namenodes.ns</name>
<value>nn1,nn2</value>
</property>
<!-- nn1的RPC通信地址 -->
<property>
<name>dfs.namenode.rpc-address.ns.nn1</name>
<value>CH0:9000</value>
</property>
<!-- nn1的HTTP通信地址 -->
<property>
<name>dfs.namenode.http-address.ns.nn1</name>
<value>CH0:50070</value>
</property>
<!-- nn2的RPC通信地址 -->
<property>
<name>dfs.namenode.rpc-address.ns.nn2</name>
<value>CH1:9000</value>
</property>
<!-- nn2的HTTP通信地址 -->
<property>
<name>dfs.namenode.http-address.ns.nn2</name>
<value>CH1:50070</value>
</property>
<!-- 指定namenode的元数据在JournalNode上的存放位置,这样,namenode2可以从集群jn
里获取最新的namenode的信息,可以达到热备的效果 -->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://CH3:8485;CH4:8485;CH5:8485/ns</value>
</property>
<!-- 指定JournalNode的存放位置 -->
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/usr/lib/hadoop/hadoop-2.7.1/journal</value>
</property>
<!-- 开启namenode故障时自动切换 -->
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<!-- 配置切换的实现方式 -->
<property>
<name>dfs.client.failover.proxy.provider.ns</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<!-- 配置隔离机制-->
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
</property>
<!-- 配置隔离机制的SSH登录秘钥所在的位置 -->
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/root/.ssh/id_rsa</value>
</property>
<!-- 配置namenode数据存放的位置,可以不配置,如果不配置,默认使用的是
core-site.xml里的hadoop.tmp.dir的路径-->
<property>
<name>dfs.namenode.name.dir</name>
<value>file:///usr/lib/hadoop/hadoop-2.7.1/tmp/namenode</value>
</property>
<!-- 配置datanode数据存放的位置,可以不配置,如果不配置,默认使用的是
core-site.xml里的hadoop.tmp.dir的路径-->
<property>
<name>dfs.datanode.data.dir</name>
<value>file:///usr/lib/hadoop/hadoop-2.7.1/tmp/datanode</value>
</property>
<!-- 配置block副本的数量-->
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<!-- 设置hdfs的操作权限,false表示任何用户都可以在hdfs上操作文件 -->
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
</configuration>
3.4、mapred-site.xml配置
[root@CH0 hadoop]# cp mapred-site.xml.template mapred-site.xml
[root@CH0 hadoop]# vim mapred-site.xml
<!--配置如下-->
<configuration>
<!-- 指定mapreduce运行在yarn上 -->
<property>
<name>mapreduce.framework.name</name>
<!-- yarn是资源协调工具 -->
<value>yarn</value>
</property>
</configuration>
3.5、yarn-site.xml配置
[root@CH0 hadoop]# vim yarn-site.xml
<!--配置如下-->
<configuration>
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<!-- 配置rm1,rm2的主机 -->
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>CH0</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>CH2</value>
</property>
<!-- 开启yarn恢复机制-->
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>
<!-- 执行yarn恢复机制实现类-->
<property>
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>
<!-- 配置zookeeper的地址-->
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>CH0:2181,CH1:2181,CH2:2181</value>
<description>For multiple zk services,separate them with comm</description>
</property>
<!-- 指定yarn ha的名称-->
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>yarn-ha</value>
</property>
<!-- 指定yarn的老大,resourcemanager的地址 -->
<property>
<name>yarn.resourcemanager.hostname</name>
<!-- CH0是主机名 -->
<value>CH0</value>
</property>
<!-- Site specific YARN configuration properties -->
<!-- nodemanager获取数据的方式 -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
</configuration>
3.6、slaves配置
[root@CH0 hadoop]# vim slaves
CH3
CH4
CH5
3.7、目录创建
[root@CH0 hadoop]# cd ../..
[root@CH0 hadoop-2.7.1]# mkdir tmp
[root@CH0 hadoop-2.7.1]# mkdir journal
[root@CH0 hadoop-2.7.1]# cd tmp
[root@CH0 tmp]# mkdir namenode
[root@CH0 tmp]# mkdir datanode
3.8、发送给其他五台虚拟机
[root@CH0 hadoop-2.7.1]# cd ../../
[root@CH0 lib]# scp -r hadoop CH1:/usr/lib/
[root@CH0 lib]# scp -r hadoop CH2:/usr/lib/
[root@CH0 lib]# scp -r hadoop CH3:/usr/lib/
[root@CH0 lib]# scp -r hadoop CH4:/usr/lib/
[root@CH0 lib]# scp -r hadoop CH5:/usr/lib/
4、环境变量配置
[root@CH0 ~]# vim /etc/profile
#java env
JAVA_HOME=/usr/lib/jvm/jdk1.8.0_171
HADOOP_HOME=/usr/lib/hadoop/hadoop-2.7.1
CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JRE_HOME/lib/tools.jar
PATH=$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH
export JAVA_HOME PATH CLASSPATH HADOOP_HOME
4.1、配置文件发送其他五台虚拟机
[root@CH0 ~]# scp /etc/profile CH1:/etc/
[root@CH0 ~]# scp /etc/profile CH2:/etc/
[root@CH0 ~]# scp /etc/profile CH3:/etc/
[root@CH0 ~]# scp /etc/profile CH4:/etc/
[root@CH0 ~]# scp /etc/profile CH5:/etc/
<!-- 每台执行如下-->
[root@CH0 ~]# source /etc/profile
5、Hadoop分布式启动
5.1、zookeeper启动
Park01启动
[root@CH0 ~]# cd /usr/lib/zookeeper/zookeeper-3.4.7/bin/
[root@CH0 bin]# sh zkServer.sh start
ZooKeeper JMX enabled by default
Using config: /usr/lib/zookeeper/zookeeper-3.4.7/bin/../conf/zoo.cfg
Starting zookeeper ... STARTED
Park02启动
[root@CH1 ~]# cd /usr/lib/zookeeper/zookeeper-3.4.7/bin/
[root@CH1 bin]# sh zkServer.sh start
ZooKeeper JMX enabled by default
Using config: /usr/lib/zookeeper/zookeeper-3.4.7/bin/../conf/zoo.cfg
Starting zookeeper ... STARTED
Park03启动
[root@CH2 ~]# cd /usr/lib/zookeeper/zookeeper-3.4.7/bin/
[root@CH2 bin]# sh zkServer.sh start
ZooKeeper JMX enabled by default
Using config: /usr/lib/zookeeper/zookeeper-3.4.7/bin/../conf/zoo.cfg
Starting zookeeper ... STARTED
Park01启动状态验证
[root@CH0 bin]# sh zkServer.sh status
ZooKeeper JMX enabled by default
Using config: /usr/lib/zookeeper/zookeeper-3.4.7/bin/../conf/zoo.cfg
Mode: follower
Park02启动状态验证
[root@CH1 bin]# sh zkServer.sh status
ZooKeeper JMX enabled by default
Using config: /usr/lib/zookeeper/zookeeper-3.4.7/bin/../conf/zoo.cfg
Mode: leader
Park03启动状态验证
[root@CH2 bin]# sh zkServer.sh status
ZooKeeper JMX enabled by default
Using config: /usr/lib/zookeeper/zookeeper-3.4.7/bin/../conf/zoo.cfg
Mode: follower
5.2、format zkfc
[root@CH0 bin]# hdfs zkfc -formatZK
18/05/29 16:08:35 INFO ha.ActiveStandbyElector: Successfully created /hadoop-ha/ywty in ZK.
18/05/29 16:08:35 INFO zookeeper.ZooKeeper: Session: 0x263aae518180000 closed
[root@CH1 bin]# hdfs zkfc -formatZK
18/05/29 16:09:45 INFO ha.ActiveStandbyElector: Successfully created /hadoop-ha/ywty in ZK.
18/05/29 16:09:45 INFO zookeeper.ZooKeeper: Session: 0x263aae518180000 closed
5.3、JournalNode启动
[root@CH3 ~]# cd /usr/lib/hadoop/hadoop-2.7.1/sbin/
[root@CH3 sbin]# sh hadoop-daemon.sh start journalnode
[root@CH3 sbin]# jps
1464 JournalNode
1513 Jps
[root@CH4 sbin]# sh hadoop-daemon.sh start journalnode
[root@CH4 sbin]# jps
1445 JournalNode
1494 Jps
[root@CH5 sbin]# sh hadoop-daemon.sh start journalnode
[root@CH5 sbin]# jps
1449 JournalNode
1498 Jps
5.4、format NameNode
[root@CH0 bin]# hadoop namenode -format
18/05/29 16:21:00 INFO common.Storage: Storage directory /usr/lib/hadoop/hadoop-2.7.1/tmp/namenode has been successfully formatted.
18/05/29 16:21:00 INFO namenode.NNStorageRetentionManager: Going to retain 1 images with txid >= 0
18/05/29 16:21:00 INFO util.ExitUtil: Exiting with status 0
18/05/29 16:21:00 INFO namenode.NameNode: SHUTDOWN_MSG:
/************************************************************/
[root@CH0 ~]# cd /usr/lib/hadoop/hadoop-2.7.1/sbin/
[root@CH0 sbin]# sh hadoop-daemon.sh start namenode
starting namenode, logging to /usr/lib/hadoop/hadoop-2.7.1/logs/hadoop-root-namenode-CH0.out
[root@CH0 sbin]# jps
1604 NameNode
1676 Jps
1438 QuorumPeerMain
5.5、bootstrapStandby NameNode
[root@CH1 bin]# hdfs namenode -bootstrapStandby
18/05/29 16:29:12 INFO namenode.NameNode: registered UNIX signal handlers for [TERM, HUP, INT]
18/05/29 16:29:12 INFO namenode.NameNode: createNameNode [-bootstrapStandby]
=====================================================
About to bootstrap Standby ID yy2 from:
Nameservice ID: ywty
Other Namenode ID: yy1
Other NN's HTTP address: http://CH0:50070
Other NN's IPC address: CH0/192.168.6.20:9000
Namespace ID: 1549916029
Block pool ID: BP-605564131-192.168.6.20-1527582060609
Cluster ID: CID-a2fc36bc-6fc9-4d5f-8d95-b1c3da1557bd
Layout version: -63
isUpgradeFinalized: true
=====================================================
18/05/29 16:29:13 INFO common.Storage: Storage directory /usr/lib/hadoop/hadoop-2.7.1/tmp/namenode has been successfully formatted.
18/05/29 16:29:14 INFO namenode.TransferFsImage: Opening connection to http://CH0:50070/imagetransfer?getimage=1&txid=0&storageInfo=-63:1549916029:0:CID-a2fc36bc-6fc9-4d5f-8d95-b1c3da1557bd
18/05/29 16:29:14 INFO namenode.TransferFsImage: Image Transfer timeout configured to 60000 milliseconds
18/05/29 16:29:14 INFO namenode.TransferFsImage: Transfer took 0.00s at 0.00 KB/s
18/05/29 16:29:14 INFO namenode.TransferFsImage: Downloaded file fsimage.ckpt_0000000000000000000 size 351 bytes.
18/05/29 16:29:14 INFO util.ExitUtil: Exiting with status 0
18/05/29 16:29:14 INFO namenode.NameNode: SHUTDOWN_MSG:
5.6、NameNode start
[root@CH1 ~]# cd /usr/lib/hadoop/hadoop-2.7.1/sbin/
[root@CH1 sbin]# sh hadoop-daemon.sh start namenode
starting namenode, logging to /usr/lib/hadoop/hadoop-2.7.1/logs/hadoop-root-namenode-CH1.out
[root@CH1 sbin]# jps
1572 NameNode
1644 Jps
1406 QuorumPeerMain
5.7、DataNode start
[root@CH3 sbin]# sh hadoop-daemon.sh start datanode
starting datanode, logging to /usr/lib/hadoop/hadoop-2.7.1/logs/hadoop-root-datanode-CH3.out
[root@CH3 sbin]# jps
1617 Jps
1543 DataNode
1464 JournalNode
[root@CH4 sbin]# sh hadoop-daemon.sh start datanode
starting datanode, logging to /usr/lib/hadoop/hadoop-2.7.1/logs/hadoop-root-datanode-CH4.out
[root@CH4 sbin]# jps
1523 DataNode
1445 JournalNode
1597 Jps
[root@CH5 sbin]# sh hadoop-daemon.sh start datanode
starting datanode, logging to /usr/lib/hadoop/hadoop-2.7.1/logs/hadoop-root-datanode-CH5.out
[root@CH5 sbin]# jps
1601 Jps
1527 DataNode
1449 JournalNode
5.8、ZKFC Start
[root@CH0 sbin]# sh hadoop-daemon.sh start zkfc
starting zkfc, logging to /usr/lib/hadoop/hadoop-2.7.1/logs/hadoop-root-zkfc-CH0.out
[root@CH0 sbin]# jps
1987 Jps
1604 NameNode
1438 QuorumPeerMain
2240 DFSZKFailoverController
[root@CH1 sbin]# sh hadoop-daemon.sh start zkfc
starting zkfc, logging to /usr/lib/hadoop/hadoop-2.7.1/logs/hadoop-root-zkfc-CH1.out
[root@CH1 sbin]# jps
1572 NameNode
1780 DFSZKFailoverController
1844 Jps
1406 QuorumPeerMain
5.9、Yarn Start
[root@CH0 sbin]# sh start-yarn.sh
starting yarn daemons
starting resourcemanager, logging to /usr/lib/hadoop/hadoop-2.7.1/logs/yarn-root-resourcemanager-CH0.out
CH5: starting nodemanager, logging to /usr/lib/hadoop/hadoop-2.7.1/logs/yarn-root-nodemanager-CH5.out
CH4: starting nodemanager, logging to /usr/lib/hadoop/hadoop-2.7.1/logs/yarn-root-nodemanager-CH4.out
CH3: starting nodemanager, logging to /usr/lib/hadoop/hadoop-2.7.1/logs/yarn-root-nodemanager-CH3.out
[root@CH0 sbin]# jps
2240 DFSZKFailoverController
1604 NameNode
2356 ResourceManager
2621 Jps
1438 QuorumPeerMain
[root@CH2 sbin]# sh yarn-daemon.sh start resourcemanager
starting resourcemanager, logging to /usr/lib/hadoop/hadoop-2.7.1/logs/yarn-root-resourcemanager-CH2.out
[root@CH2 sbin]# jps
1604 ResourceManager
1643 Jps
1439 QuorumPeerMain