1、集群规划
# 节点信息
192.168.171.129 rocketmq-nameserver2 (NameNode、DataNode、ResourceManager、NodeManager)
192.168.171.130 node1.it.cn (SecondaryNameNode、DataNode、NodeManager)
192.168.171.128 node3.it.cn (DataNode、NodeManager)
2、服务器基础环境准备
# 1、关闭防火墙
systemctl stop firewalld.service # 关闭防火墙
systemctl disable firewalld.service # 禁止防火墙开启自启
# 2、ssh免密登录(rocketmq-nameserver2->rocketmq-nameserver2->node3.it.cn->node1.it.cn)
ssh-keygen # 4个回车 生成公钥、私钥
ssh-copy-id rocketmq-nameserver2
ssh-copy-id node3.it.cn
ssh-copy-id node1.it.cn
# 3、集群时间同步
yum -y install ntpdate
ntpdate ntp4.aliyun.com
# 4、安装jdk1.8
[root@node3 software]# java -version
java version "1.8.0_291"
Java(TM) SE Runtime Environment (build 1.8.0_291-b10)
Java HotSpot(TM) 64-Bit Server VM (build 25.291-b10, mixed mode)
3、上传并解压安装包
[root@node1 hadoop-cluster]# tar -zxvf hadoop-3.1.4-bin-snappy-CentOS7.tar.gz
4、编辑hadoop配置文件
# 1、配置/usr/local/software/hadoop-cluster/hadoop-3.1.4/etc/hadoop/hadoop-env.sh
[root@node1 hadoop]# vim hadoop-env.sh
# 配置JAVA_HOME
export JAVA_HOME=/usr/local/software/jdk/jdk1.8
# 设置用户以执行对应角色shell命令
export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root
# 2、配置/usr/local/software/hadoop-cluster/hadoop-3.1.4/etc/hadoop/core-site.xml
[root@node1 hadoop]# vim core-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<!-- 默认文件系统的名称。通过URI中schema区分不同文件系统。-->
<!-- file:///本地文件系统 hdfs:// hadoop分布式文件系统 gfs://。-->
<!-- hdfs文件系统访问地址:http://nn_host:8020。-->
<property>
<name>fs.defaultFS</name>
<value>hdfs://rocketmq-nameserver2:8020</value>
</property>
<!-- hadoop本地数据存储目录 format时自动生成 -->
<property>
<name>hadoop.tmp.dir</name>
<value>/usr/local/software/hadoop-cluster/data/hadoop-3.1.4</value>
</property>
<!-- 在Web UI访问HDFS使用的用户名。-->
<property>
<name>hadoop.http.staticuser.user</name>
<value>root</value>
</property>
</configuration>
#3、配置/usr/local/software/hadoop-cluster/hadoop-3.1.4/etc/hadoop/hdfs-site.xml
[root@node1 hadoop]# vim hdfs-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>node1.it.cn:9868</value>
</property>
</configuration>
#4、配置/usr/local/software/hadoop-cluster/hadoop-3.1.4/etc/hadoop/mapred-site.xml
[root@node1 hadoop]# vim mapred-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<!-- mr程序默认运行方式。yarn集群模式 local本地模式-->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<!-- MR App Master环境变量。-->
<property>
<name>yarn.app.mapreduce.am.env</name>
<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
</property>
<!-- MR MapTask环境变量。-->
<property>
<name>mapreduce.map.env</name>
<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
</property>
<!-- MR ReduceTask环境变量。-->
<property>
<name>mapreduce.reduce.env</name>
<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
</property>
</configuration>
#5、配置/usr/local/software/hadoop-cluster/hadoop-3.1.4/etc/hadoop/yarn-site.xml
[root@node1 hadoop]# vim yarn-site.xml
<?xml version="1.0"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<configuration>
<!-- yarn集群主角色RM运行机器。-->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>rocketmq-nameserver2</value>
</property>
<!-- NodeManager上运行的附属服务。需配置成mapreduce_shuffle,才可运行MR程序。-->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<!-- 每个容器请求的最小内存资源(以MB为单位)。-->
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>512</value>
</property>
<!-- 每个容器请求的最大内存资源(以MB为单位)。-->
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>2048</value>
</property>
<!-- 容器虚拟内存与物理内存之间的比率。-->
<property>
<name>yarn.nodemanager.vmem-pmem-ratio</name>
<value>4</value>
</property>
</configuration>
#6、 配置/usr/local/software/hadoop-cluster/hadoop-3.1.4/etc/hadoop/workers
[root@node1 hadoop]# vim workers
rocketmq-nameserver2
node1.it.cn
node3.it.cn
# 7分发同步hadoop安装包
[root@rocketmq-nameserver2 software]# scp -r hadoop-cluster/ root@node2:$PWD
[root@rocketmq-nameserver2 software]# scp -r hadoop-cluster/ root@node3:$PWD
5、配置hadoop环境
#1、配置hadoop环境
[root@node1 hadoop-3.1.4]# vim /etc/profile
# hadoop enviroment
export HADOOP_HOME=/usr/local/software/hadoop-cluster/hadoop-3.1.4
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
#2、重新加载环境变量
[root@node1 hadoop-3.1.4]# source /etc/profile
#3、验证是否生效
[root@node1 hadoop-3.1.4]# hadoop
Usage: hadoop [OPTIONS] SUBCOMMAND [SUBCOMMAND OPTIONS]
or hadoop [OPTIONS] CLASSNAME [CLASSNAME OPTIONS]
where CLASSNAME is a user-provided Java class
OPTIONS is none or any of:
buildpaths attempt to add class files from build tree
--config dir Hadoop config directory
--debug turn on shell script debug mode
--help usage information
hostnames list[,of,host,names] hosts to use in slave mode
hosts filename list of hosts to use in slave mode
loglevel level set the log4j level for this command
workers turn on worker mode
SUBCOMMAND is one of:
Admin Commands:
daemonlog get/set the log level for each daemon
Client Commands:
archive create a Hadoop archive
checknative check native Hadoop and compression libraries availability
classpath prints the class path needed to get the Hadoop jar and the required libraries
conftest validate configuration XML files
credential interact with credential providers
distch distributed metadata changer
distcp copy file or directories recursively
dtutil operations related to delegation tokens
envvars display computed Hadoop environment variables
fs run a generic filesystem user client
gridmix submit a mix of synthetic job, modeling a profiled from production load
jar <jar> run a jar file. NOTE: please use "yarn jar" to launch YARN applications, not this command.
jnipath prints the java.library.path
kdiag Diagnose Kerberos Problems
kerbname show auth_to_local principal conversion
key manage keys via the KeyProvider
rumenfolder scale a rumen input trace
rumentrace convert logs into a rumen trace
s3guard manage metadata on S3
trace view and modify Hadoop tracing settings
version print the version
Daemon Commands:
kms run KMS, the Key Management Server
SUBCOMMAND may print help when invoked w/o parameters or with -h.
6、NameNode format(格式化操作)
[root@node1 hadoop-3.1.4]# hdfs namenode -format
7、Hadoop集群启动-手动逐个进程启停
# Hadoop集群启动关闭-手动逐个进程启停
HDFS集群
hdfs --daemon start namenode|datanode|secondarynamenode
hdfs --daemon stop namenode|datanode|secondarynamenode
YARN集群
yarn --daemon start resourcemanager|nodemanager
yarn --daemon stop resourcemanager|nodemanager
8、Hadoop集群启动-一键启动、关闭
# Hadoop 3中不允许使用root用户来一键启动集群
# 修改配置配置启动用户
# HDFS集群一键脚本,编辑start-dfs.sh、stop-dfs.sh
在两个文件顶部添加以下内容:
HDFS_DATANODE_USER=root
HDFS_DATANODE_SECURE_USER=hdfs
HDFS_NAMENODE_USER=root
HDFS_SECONDARYNAMENODE_USER=root
# YARN集群一键脚本,编辑start-yarn.sh、stop-yarn.sh
在两个文件顶部添加以下内容:
YARN_RESOURCEMANAGER_USER=root
HADOOP_SECURE_DN_USER=yarn
#一键启动HDFS、YARN
start-dfs.sh
start-yarn.sh
# 一键关闭HDFS、YARN
stop-dfs.sh
stop-yarn.sh
9、访问WebUI
# NameNode
http://192.168.171.129:9870/
# yarn
http://192.168.171.129:8088/
# 为了方便在windows上访问Hadoop,我们可以配置一个域名映射
1. 以管理员身份打开C:\Windows\System32\drivers\etc目录下的hosts文件
2. 在文件最后添加以下映射域名和ip映射关系
192.168.171.129 rocketmq-nameserver2
192.168.171.130 node1.it.cn
192.168.171.128 node3.it.cn