准备材料
1. docker环境,我使用的是19.03.1
2. 安装docker-compose
curl -L https://get.daocloud.io/docker/compose/releases/download/1.16.1/docker-compose-`uname -s`-`uname -m` > /usr/local/bin/docker-compose
sudo chmod +x /usr/local/bin/docker-compose
docker-compose --version
3. 准备镜像
harisekhon/hadoop
harisekhon/hbase
wurstmeister/zookeeper
因为我的zookeeper是使用以前安装好的容器,所以本文没有涉及到zookeeper的安装。
4. 在/data/hadoop/etc/hadoop下编辑core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://hadoop-master:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/hadoop/tmp</value>
</property>
</configuration>
5. 在/data/hadoop/etc/hadoop下编辑hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/hadoop/data</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/hadoop/name</value>
</property>
</configuration>
6. 在/data/hadoop/etc/hadoop下编辑mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapred.job.tracker</name>
<value>hadoop-master:9001</value>
</property>
</configuration>
7. 在/data/hadoop/etc/hadoop下编辑yarn-site.xml
<configuration>
<property>
<name>yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage</name>
<value>100</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>master:8032</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>master:8030</value> </property> <property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>master:8031</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>master:8033</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>master:8089</value>
</property>
</configuration>
8. 在/data/hadoop/etc/hadoop下编辑slaves
hadoop-slave
9. 在/data/hbase/conf下编辑hdfs-site.xml
<configuration>
<property>
<name>hbase.cluster.distributed</name>
<value>true</value>
</property>
<property>
<name>hbase.rootdir</name>
<value>hdfs://hadoop-master:9000/hbase</value>
</property>
<property>
<name>hbase.hregion.memstore.flush.size</name>
<value>536870912</value>
<description>
Memstore will be flushed to disk if size of the memstore
exceeds this number of bytes. Value is checked by a thread that runs
every hbase.server.thread.wakefrequency.
</description>
</property>
<property>
<name>hbase.cluster.distributed</name>
<value>true</value>
<description>The mode the cluster will be in. Possible values are
false: standalone and pseudo-distributed setups with managed Zookeeper
true: fully-distributed with unmanaged Zookeeper Quorum (see hbase-env.sh)
</description>
</property>
<property>
<name>hbase.zookeeper.property.clientPort</name>
<value>2181</value>
<description>Property from ZooKeeper's config zoo.cfg.
The port at which the clients will connect.
</description>
</property>
<property>
<name>zookeeper.session.timeout</name>
<value>120000</value>
</property>
<property>
<name>hbase.zookeeper.property.tickTime</name>
<value>6000</value>
</property>
<property>
<name>hbase.zookeeper.quorum</name>
<value>172.17.0.3</value>
<description>Comma separated list of servers in the ZooKeeper Quorum.
</description>
</property>
<property>
<name>hbase.master.info.port</name>
<value>60010</value>
</property>
</configuration>
10. 在/data/hbase/conf下编辑regionservers
hbase-slave
11. 编辑docker-compose.yml文件
version: '2'
services:
hadoop-master:
image: harisekhon/hadoop
container_name: hadoop-master
ports:
- "50070:50070"
- "8088:8088"
- "9000:9000"
- "8031:8031"
- "8032:8032"
- "8033:8033"
restart: always
volumes:
- /data/hadoop/etc/:/hadoop/etc/
hadoop-slave:
image: harisekhon/hadoop
container_name: hadoop-slave
depends_on:
- hadoop-master
restart: always
volumes:
- /data/hadoop/etc/:/hadoop/etc/
hbase-master:
image: harisekhon/hbase:1.3
container_name: hbase-master
depends_on:
- hadoop-master
- hadoop-slave
ports:
- "16010:16010"
restart: always
volumes:
- /data/hbase/conf/:/hbase/conf/
hbase-slave:
image: harisekhon/hbase:1.3
container_name: hbase-slave
depends_on:
- hadoop-master
- hadoop-slave
- hbase-master
restart: always
volumes:
- /data/hbase/conf/:/hbase/conf/
12. 进入hadoop-master生成密钥文件
docker exec -it hadoop-master /bin/bash
ssh-keygen -t rsa
一路回车,生成密钥
13. 进入hadoop-slave重复第12步的动作
14. 将公钥拷贝出来
docker cp hadoop-slave:/root/.ssh/id_rsa.pub ./hadoop-slave.key
docker cp hadoop-master:/root/.ssh/id_rsa.pub ./hadoop-master.key
cat ./hadoop-master.key >> authorized_keys
cat ./hadoop-slave.key >> authorized_keys
15. 将公钥重新拷贝到两个容器中
docker cp ./authorized_keys hadoop-master:/root/.ssh/authorized_keys
docker cp ./authorized_keys hadoop-slave:/root/.ssh/authorized_keys
16. 进入hadoop-slave容器,对密钥授权,这里如果不重新授权的话,免密登录不会生效的
chmod 600 /root/.ssh/authorized_keys
chown root /root/.ssh/authorized_keys
chgrp root /root/.ssh/authorized_keys
17. 进入hadoop-master容器,重复第16步
18. 分别在hadoop-master,hadoop-slave中执行以下ssh命令,验证免密登录是否成功
19. 进入hadoop-master,格式化namenode
hdfs namenode -format
exit
20. 重新启动hadoop
docker restart hadoop-master
docker restart hadoop-slave
docker restart hbase-master
docker restart hbase-slave
21. 进入hadoop-master和hadoop-slave输入jps查看是否启动成功