一直在间断的玩hadoop,方便随时环境迁移,利用docker搭建的三种hadoop环境,方便随时玩。不逼逼了,直接上源码
本地Dockerfile
FROM registry.cn-beijing.aliyuncs.com/douguohai/centos8-jdk8:v2
MAINTAINER douguohai <douguohai@gmail.com>
# 下载源码包 、解压包 安装 which
COPY ../hadoop-2.9.2.tar.gz /usr/local/
ENV HADOOP_HOME=/usr/local/hadoop-2.9.2
ENV PATH=${PATH}:${HADOOP_HOME}/bin:
RUN yum clean all && yum -y update && yum -y install which && yum clean all && cd /usr/local/ &&tar -xvf hadoop-2.9.2.tar.gz && rm -rf hadoop-2.9.2.tar.gz && cd hadoop-2.9.2 \
&& mkdir input && cp etc/hadoop/*.xml input/ && bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.9.2.jar grep input output 'dfs[a-z.]+' && cat output/* \
&& echo "export HADOOP_HOME=${HADOOP_HOME}" >>/etc/profile && echo "export PATH=${PATH}:${HADOOP_HOME}/bin:">>/etc/profile
# 工作空间
WORKDIR /usr/local/hadoop-2.9.2
# 跑测试代码
CMD ["bash"]
伪分布式Dockerfile
FROM centos8-ssh-java:v1
MAINTAINER douguohai <douguohai@gmail.com>
# 下载源码包 、解压包 安装 which
# COPY hadoop-2.9.2.tar.gz /usr/local/
COPY entrypoint.sh /usr/local/bin/entrypoint.sh
ENV HADOOP_HOME=/usr/local/hadoop-2.9.2
ENV PATH=${PATH}:${HADOOP_HOME}/bin:
RUN wget -P /usr/local/ https://code.aliyun.com/douguohai/sourcepack/raw/6414d2419c9f7c01648eccafa19de265090162fd/hadoop-2.9.2.tar.gz \
&& yum clean all && yum -y update && yum -y install which && yum clean all && cd /usr/local/ \
&&tar -xvf hadoop-2.9.2.tar.gz && rm -rf hadoop-2.9.2.tar.gz && rm -rf /usr/local/hadoop-2.9.2/share/doc && cd hadoop-2.9.2 \
&& echo "export HADOOP_HOME=${HADOOP_HOME}" >>/etc/profile && echo "export PATH=${PATH}:${HADOOP_HOME}/bin:">>/etc/profile \
&& chmod +x /usr/local/bin/entrypoint.sh
COPY ./hadoop-2.9.2/etc/hadoop/core-site.xml /usr/local/hadoop-2.9.2/etc/hadoop/core-site.xml
COPY ./hadoop-2.9.2/etc/hadoop/hadoop-env.sh /usr/local/hadoop-2.9.2/etc/hadoop/hadoop-env.sh
COPY ./hadoop-2.9.2/etc/hadoop/hdfs-site.xml /usr/local/hadoop-2.9.2/etc/hadoop/hdfs-site.xml
COPY ./hadoop-2.9.2/etc/hadoop/mapred-site.xml /usr/local/hadoop-2.9.2/etc/hadoop/mapred-site.xml
COPY ./hadoop-2.9.2/etc/hadoop/yarn-env.sh /usr/local/hadoop-2.9.2/etc/hadoop/yarn-env.sh
COPY ./hadoop-2.9.2/etc/hadoop/yarn-site.xml /usr/local/hadoop-2.9.2/etc/hadoop/yarn-site.xml
# 工作空间
WORKDIR /usr/local/hadoop-2.9.2
ENTRYPOINT ["sh", "/usr/local/bin/entrypoint.sh"]
EXPOSE 50070
EXPOSE 8088
# 跑测试代码
CMD ["/bin/bash"]
完全分布式 docker-compose
# yaml 配置实例
version: '3'
services:
hadoop-master:
image: registry.cn-beijing.aliyuncs.com/douguohai/hadoop-distributed-master:v1
container_name: hadoop-master
ports:
- "50070:50070"
- "8088:8088"
networks:
mynetwork:
ipv4_address: 199.18.0.2
environment:
HOSTNAME: hadoop-master
depends_on:
- hadoop-slave3
- hadoop-slave4
- hadoop-slave5
hadoop-slave3:
image: registry.cn-beijing.aliyuncs.com/douguohai/hadoop-distributed-slave:v1
container_name: hadoop-slave3
networks:
mynetwork:
ipv4_address: 199.18.0.3
environment:
HOSTNAME: hadoop-slave3
hadoop-slave4:
image: registry.cn-beijing.aliyuncs.com/douguohai/hadoop-distributed-slave:v1
container_name: hadoop-slave4
networks:
mynetwork:
ipv4_address: 199.18.0.4
environment:
HOSTNAME: hadoop-slave4
hadoop-slave5:
image: registry.cn-beijing.aliyuncs.com/douguohai/hadoop-distributed-slave:v1
container_name: hadoop-slave5
networks:
mynetwork:
ipv4_address: 199.18.0.5
environment:
HOSTNAME: hadoop-slave5
networks:
mynetwork:
ipam:
config:
- subnet: 199.18.0.0/16