Hadoop 3.1.3 完全分布式集群搭建

1.解压Hadoop,zookeeper,配置环境变量

#JAVA_HOME

JAVA_HOME=/home/admin/modules/jdk1.8.0_144

export CLASSPATH=.:$JAVA_HOME/jre/lib/rt.jar:$JAVA_HOME/lib/dt.jar:JAVA_HOME/lib/tools.jar

export PATH=$PATH:$JAVA_HOME/bin

#HADOOP_HOME

export HADOOP_HOME=/home/admin/modules/hadoop-3.1.3

export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin

##ZOOKEEPER_HOME

export ZOOKEEPER_HOME=/home/admin/modules/zookeeper-3.4.10

export PATH=$PATH:$ZOOKEEPER_HOME/bin

2.修改配置文件

zoo.cfg

[root@node3 conf]# cat zoo.cfg

# The number of milliseconds of each tick

tickTime=2000

# The number of ticks that the initial

# synchronization phase can take

initLimit=10

# The number of ticks that can pass between

# sending a request and getting an acknowledgement

syncLimit=5

# the directory where the snapshot is stored.

# do not use /tmp for storage, /tmp here is just

# example sakes.

dataDir=/home/admin/modules/zookeeper-3.4.10/zkData

# the port at which the clients will connect

clientPort=2181

# the maximum number of client connections.

# increase this if you need to handle more clients

#maxClientCnxns=60

#

# Be sure to read the maintenance section of the

# administrator guide before turning on autopurge.

#

# http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance

#

# The number of snapshots to retain in dataDir

#autopurge.snapRetainCount=3

# Purge task interval in hours

# Set to "0" to disable auto purge feature

#autopurge.purgeInterval=1

server.1=node1:2888:3888

server.2=node2:2888:3888

server.3=node3:2888:3888

#server.4=node4:2888:3888

3.在zookeeper解压文件夹创建zkData

touch myid

echo 1 > myid

4.配置Hadoop

修改.env文件

在hadoop-env.sh, mapred-env.sh,yarn-env.sh  添加JAVA-HOME

export JAVA_HOME=/home/admin/modules/jdk1.8.0_144

5.添加节点

修改workers,添加三个节点

node1

node2

node3

6.修改  core-site.xml

<property>

  <name>fs.defaultFS</name>

  <value>hdfs://mycluster</value>

</property>

<!-- 指定hadoop运行时产生文件的存储目录 -->

<property>

<name>hadoop.tmp.dir</name>

<value>/home/admin/modules/hadoop-3.1.3/hadoop-data</value>

</property>

<!--  监测集群-->

  <property>

  <name>ha.zookeeper.quorum</name>

  <value>node1:2181,node2:2181,node3:2181</value>

</property>

修改hdfs-site.xml

<!-- 副本数量-->

<property>

<name>dfs.replication</name>

<value>3</value>

</property>

<property>

  <name>dfs.nameservices</name>

  <value>mycluster</value>

</property>

<property>

  <name>dfs.ha.namenodes.mycluster</name>

  <value>nn1,nn2</value>

</property>

<property>

  <name>dfs.namenode.rpc-address.mycluster.nn1</name>

  <value>node1:8020</value>

</property>

<property>

  <name>dfs.namenode.rpc-address.mycluster.nn2</name>

  <value>node2:8020</value>

</property>

<property>

  <name>dfs.namenode.http-address.mycluster.nn1</name>

  <value>node1:9870</value>

</property>

<property>

  <name>dfs.namenode.http-address.mycluster.nn2</name>

  <value>node2:9870</value>

</property>

<!--  部署journalnode集群的目录 -->

<property>

  <name>dfs.namenode.shared.edits.dir</name>

  <value>qjournal://node1:8485;node2:8485;node3:8485/mycluster</value>

</property>

<property>

  <name>dfs.client.failover.proxy.provider.mycluster</name>

  <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>

</property>

<!--免密钥,和隔离 -->

    <property>

      <name>dfs.ha.fencing.methods</name>

      <value>sshfence</value>

    </property>

    <property>

      <name>dfs.ha.fencing.ssh.private-key-files</name>

      <value>/root/.ssh/id_rsa</value>

    </property>

<!--指定journalnode的数据路径 -->

<property>

  <name>dfs.journalnode.edits.dir</name>

  <value>/home/admin/modules/hadoop-3.1.3/journalnode</value>

</property>

<!-- 自动故障转移-->

<property>

  <name>dfs.ha.automatic-failover.enabled</name>

  <value>true</value>

</property>

<!--关闭权限检查-->

<property>

<name>dfs.permissions.enable</name>

<value>false</value>

</property>

<!-- 开启web方式-->

<property>

<name>dfs.webhdfs.enabled</name>

<value>true</value>

</property>

修改mapred-site.xml

<!-- 指定mr运行在yarn上 -->

<property>

<name>mapreduce.framework.name</name>

<value>yarn</value>

</property>

<!--配置map reduce的job history server地址,默认10020-->

<property>

<name>mapreduce.jobhistory.address</name>

<value>node1:10020</value>

</property>

<!--配置map reduce job history server web ui地址,默认19888-->

<property>

<name>mapreduce.jobhistory.webapp.address</name>

<value>node1:19888</value>

</property>

修改yarn-site.xml

<!-- reducer获取数据的方式 -->

<property>

<name>yarn.nodemanager.aux-services</name>

<value>mapreduce_shuffle</value>

</property>

<!-- resourcemanager高可用-->

<property>

  <name>yarn.resourcemanager.ha.enabled</name>

  <value>true</value>

</property>

<property>

  <name>yarn.resourcemanager.cluster-id</name>

  <value>cluster1</value>

</property>

<property>

  <name>yarn.resourcemanager.ha.rm-ids</name>

  <value>rm1,rm2</value>

</property>

<!-- 指定YARN的ResourceManager的地址 -->

<property>

  <name>yarn.resourcemanager.hostname.rm1</name>

  <value>node2</value>

</property>

<property>

  <name>yarn.resourcemanager.hostname.rm2</name>

  <value>node3</value>

  </property>


  <property>

  <name>yarn.resourcemanager.webapp.address.rm1</name>

  <value>node2:8088</value>

</property>

<property>

  <name>yarn.resourcemanager.webapp.address.rm2</name>

  <value>node3:8088</value>

</property>

  <!-- zookeeper 地址 -->

  <property>

  <name>yarn.resourcemanager.zk-address</name>

  <value>node1:2181,node2:2181,node3:2181</value>

</property>

<!--日志 -->

<property>

<name>yarn.log-aggregation-enable</name>

<value>true</value>

  </property>

  <property>

<name>yarn.log-aggregation.retain-seconds</name>

<value>86400</value>

  </property>

  <property>

<name>yarn.log.server.url</name>

<value>http://node1:19888/jobhistory/logs/</value>

  </property>

7.修改sbin下面的

start(stop)-dfs.sh,在最开头添加如下

HDFS_DATANODE_USER=root

HDFS_DATANODE_SECURE_USER=hdfs

HDFS_NAMENODE_USER=root

HDFS_JOURNALNODE_USER=root

HDFS_ZKFC_USER=root

start(stop)-yarn.sh,在最开头添加如下

YARN_RESOURCEMANAGER_USER=root

HADOOP_SECURE_DN_USER=yarn

YARN_NODEMANAGER_USER=root

8.分发Hadoop,zookeeper到各个节点

9.每个节点启动zookeeper

zkServer.sh start

10.每个节点启动journalnode

hdfs --daemon start journalnode

11. 格式化主namenode

hdfs namenode -format

12.启动主namenode

启动主namenoe hdfs --daemon start namenode

13.从namenode同步主namenode

hdfs namenode -bootstrapStandby 同步主节点

14.启动从namenode

namenoe hdfs --daemon start namenode

15.初始化zookeeper

在主节点 hdfs zkfc -formatZK

16.主节点启动集群

sbin/start-dfs.sh

17.查看进程

[root@node1 hadoop-3.1.3]# jps

3824 DataNode

1413 QuorumPeerMain

4422 NodeManager

4729 Jps

3692 NameNode

4108 JournalNode

4287 DFSZKFailoverController

18.启动resource manager 高可用

sbin/start-yarn.sh    

查看进程

[root@node2 hadoop-3.1.3]# jps

1713 NameNode

3971 Jps

1571 JournalNode

3350 NodeManager

1254 QuorumPeerMain

2438 DataNode

2606 DFSZKFailoverController

3231 ResourceManager

至此集群搭建完。

访问9870:


完美搭建!

谢谢大家~~~~~

最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容