1.解压Hadoop,zookeeper,配置环境变量
#JAVA_HOME
JAVA_HOME=/home/admin/modules/jdk1.8.0_144
export CLASSPATH=.:$JAVA_HOME/jre/lib/rt.jar:$JAVA_HOME/lib/dt.jar:JAVA_HOME/lib/tools.jar
export PATH=$PATH:$JAVA_HOME/bin
#HADOOP_HOME
export HADOOP_HOME=/home/admin/modules/hadoop-3.1.3
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
##ZOOKEEPER_HOME
export ZOOKEEPER_HOME=/home/admin/modules/zookeeper-3.4.10
export PATH=$PATH:$ZOOKEEPER_HOME/bin
2.修改配置文件
zoo.cfg
[root@node3 conf]# cat zoo.cfg
# The number of milliseconds of each tick
tickTime=2000
# The number of ticks that the initial
# synchronization phase can take
initLimit=10
# The number of ticks that can pass between
# sending a request and getting an acknowledgement
syncLimit=5
# the directory where the snapshot is stored.
# do not use /tmp for storage, /tmp here is just
# example sakes.
dataDir=/home/admin/modules/zookeeper-3.4.10/zkData
# the port at which the clients will connect
clientPort=2181
# the maximum number of client connections.
# increase this if you need to handle more clients
#maxClientCnxns=60
#
# Be sure to read the maintenance section of the
# administrator guide before turning on autopurge.
#
# http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance
#
# The number of snapshots to retain in dataDir
#autopurge.snapRetainCount=3
# Purge task interval in hours
# Set to "0" to disable auto purge feature
#autopurge.purgeInterval=1
server.1=node1:2888:3888
server.2=node2:2888:3888
server.3=node3:2888:3888
#server.4=node4:2888:3888
3.在zookeeper解压文件夹创建zkData
touch myid
echo 1 > myid
4.配置Hadoop
修改.env文件
在hadoop-env.sh, mapred-env.sh,yarn-env.sh 添加JAVA-HOME
export JAVA_HOME=/home/admin/modules/jdk1.8.0_144
5.添加节点
修改workers,添加三个节点
node1
node2
node3
6.修改 core-site.xml
<property>
<name>fs.defaultFS</name>
<value>hdfs://mycluster</value>
</property>
<!-- 指定hadoop运行时产生文件的存储目录 -->
<property>
<name>hadoop.tmp.dir</name>
<value>/home/admin/modules/hadoop-3.1.3/hadoop-data</value>
</property>
<!-- 监测集群-->
<property>
<name>ha.zookeeper.quorum</name>
<value>node1:2181,node2:2181,node3:2181</value>
</property>
修改hdfs-site.xml
<!-- 副本数量-->
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.nameservices</name>
<value>mycluster</value>
</property>
<property>
<name>dfs.ha.namenodes.mycluster</name>
<value>nn1,nn2</value>
</property>
<property>
<name>dfs.namenode.rpc-address.mycluster.nn1</name>
<value>node1:8020</value>
</property>
<property>
<name>dfs.namenode.rpc-address.mycluster.nn2</name>
<value>node2:8020</value>
</property>
<property>
<name>dfs.namenode.http-address.mycluster.nn1</name>
<value>node1:9870</value>
</property>
<property>
<name>dfs.namenode.http-address.mycluster.nn2</name>
<value>node2:9870</value>
</property>
<!-- 部署journalnode集群的目录 -->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://node1:8485;node2:8485;node3:8485/mycluster</value>
</property>
<property>
<name>dfs.client.failover.proxy.provider.mycluster</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<!--免密钥,和隔离 -->
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
</property>
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/root/.ssh/id_rsa</value>
</property>
<!--指定journalnode的数据路径 -->
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/home/admin/modules/hadoop-3.1.3/journalnode</value>
</property>
<!-- 自动故障转移-->
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<!--关闭权限检查-->
<property>
<name>dfs.permissions.enable</name>
<value>false</value>
</property>
<!-- 开启web方式-->
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
修改mapred-site.xml
<!-- 指定mr运行在yarn上 -->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<!--配置map reduce的job history server地址,默认10020-->
<property>
<name>mapreduce.jobhistory.address</name>
<value>node1:10020</value>
</property>
<!--配置map reduce job history server web ui地址,默认19888-->
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>node1:19888</value>
</property>
修改yarn-site.xml
<!-- reducer获取数据的方式 -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<!-- resourcemanager高可用-->
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>cluster1</value>
</property>
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<!-- 指定YARN的ResourceManager的地址 -->
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>node2</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>node3</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm1</name>
<value>node2:8088</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm2</name>
<value>node3:8088</value>
</property>
<!-- zookeeper 地址 -->
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>node1:2181,node2:2181,node3:2181</value>
</property>
<!--日志 -->
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>86400</value>
</property>
<property>
<name>yarn.log.server.url</name>
<value>http://node1:19888/jobhistory/logs/</value>
</property>
7.修改sbin下面的
start(stop)-dfs.sh,在最开头添加如下
HDFS_DATANODE_USER=root
HDFS_DATANODE_SECURE_USER=hdfs
HDFS_NAMENODE_USER=root
HDFS_JOURNALNODE_USER=root
HDFS_ZKFC_USER=root
start(stop)-yarn.sh,在最开头添加如下
YARN_RESOURCEMANAGER_USER=root
HADOOP_SECURE_DN_USER=yarn
YARN_NODEMANAGER_USER=root
8.分发Hadoop,zookeeper到各个节点
9.每个节点启动zookeeper
zkServer.sh start
10.每个节点启动journalnode
hdfs --daemon start journalnode
11. 格式化主namenode
hdfs namenode -format
12.启动主namenode
启动主namenoe hdfs --daemon start namenode
13.从namenode同步主namenode
hdfs namenode -bootstrapStandby 同步主节点
14.启动从namenode
namenoe hdfs --daemon start namenode
15.初始化zookeeper
在主节点 hdfs zkfc -formatZK
16.主节点启动集群
sbin/start-dfs.sh
17.查看进程
[root@node1 hadoop-3.1.3]# jps
3824 DataNode
1413 QuorumPeerMain
4422 NodeManager
4729 Jps
3692 NameNode
4108 JournalNode
4287 DFSZKFailoverController
18.启动resource manager 高可用
sbin/start-yarn.sh
查看进程
[root@node2 hadoop-3.1.3]# jps
1713 NameNode
3971 Jps
1571 JournalNode
3350 NodeManager
1254 QuorumPeerMain
2438 DataNode
2606 DFSZKFailoverController
3231 ResourceManager
至此集群搭建完。
访问9870:
完美搭建!
谢谢大家~~~~~