一、Spark
-
Service
apiVersion: v1 kind: Service metadata: name: spark-master namespace: yarn labels: app: spark-master spec: ports: - name: webui port: 8080 protocol: TCP targetPort: 8080 - name: master port: 7077 protocol: TCP targetPort: 7077 - name: rm port: 8032 protocol: TCP targetPort: 8032 - name: tracker port: 8031 protocol: TCP targetPort: 8031 selector: app: spark-master
-
Configmap
apiVersion: v1 kind: ConfigMap metadata: name: spark-cm namespace: yarn data: core-site.xml: | <?xml version="1.0" encoding="UTF-8"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> <property> <name>fs.defaultFS</name> <value>hdfs://hdfs-namenode.yarn.svc.cluster.local:9000</value> <description>namenode address</description> </property> <property> <name>io.file.buffer.size</name> <value>131072</value> </property> <property> <name>hadoop.tmp.dir</name> <value>/data/hadoop/tmp</value> </property> </configuration> mapred-site.xml: | <?xml version="1.0" encoding="UTF-8"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> <property> <name>mapreduce.framework.name</name> <value>yarn</value> </property> <property> <name>mapreduce.jobhistory.address</name> <value>0.0.0.0:10020</value> </property> <property> <name>mapreduce.jobhistory.webapp.address</name> <value>0.0.0.0:19888</value> </property> </configuration> yarn-site.xml: | <?xml version="1.0" encoding="UTF-8"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> <property> <name>yarn.resourcemanager.hostname</name> <value>yarn-rm.yarn.svc.cluster.local</value> </property> <property> <name>yarn.nodemanager.remote-app-log-dir-suffix</name> <value>logs</value> </property> <property> <name>yarn.nodemanager.local-dirs</name> <value>/data/hadoop/yarn/local-dirs</value> </property> <property> <name>yarn.nodemanager.log-dirs</name> <value>/data/hadoop/yarn/log-dirs</value> </property> <property> <name>yarn.log.server.url</name> <value>http://0.0.0.0:19888/jobhistory/logs</value> </property> <property> <name>yarn.log-aggregation-enable</name> <value>true</value> <description>是否启用日志聚集功能</description> </property> <property> <name>yarn.log-aggregation.retain-seconds</name> <value>10080</value> <description>日志存储时间</description> </property> <property> <name>yarn.log-aggregation-enable</name> <value>/data/hadoop/yarn/log-dirs</value> <description>是否启用日志聚集功能</description> </property> <property> <name>yarn.nodemanager.remote-app-log-dir</name> <value>/yarn/app/logs</value> </property> <property> <name>yarn.nodemanager.remote-app-log-dir-suffix</name> <value>logs</value> </property> </configuration> spark-defaults.conf: |- spark.eventLog.enabled true spark.eventLog.dir hdfs://hdfs-namenode.yarn.svc.cluster.local:9000/sspark/event spark.yarn.historyServer.address http://0.0.0.0:18080 spark-env.sh: |- export SPARK_HISTORY_OPTS="-Dspark.history.ui.port=18080 -Dspark.history.fs.logDirectory=hdfs://spark/logs -Dspark.history.retainedApplications=30" export HADOOP_CONF_DIR="/usr/local/hadoop/etc/hadoop" export SPARK_CONF_DIR="/usr/local/spark/conf" export SPARK_LOG_DIR="/data/spark/logs" export YARN_CONF_DIR=="/usr/local/hadoop/etc/hadoop" export SPARK_MASTER_HOST=0.0.0.0 export SPARK_MASTER_PORT=7077 export SPARK_MASTER_WEBUI_PORT=8080 export SPARK_WORKER_PORT=7078 export SPARK_WORKER_WEBUI_PORT=8081
hdfs dfs -mkdir -p /spark/event
-
Master
apiVersion: apps/v1 kind: StatefulSet metadata: name: spark-master namespace: yarn spec: replicas: 1 selector: matchLabels: app: spark-master template: metadata: labels: app: spark-master spec: containers: - name: historyserver image: spark:3.3.2 command: ["spark-class"] args: - "org.apache.spark.deploy.history.HistoryServer" ports: - containerPort: 18080 name: historyserver containers: - name: master image: spark:3.3.2 command: ["spark-class"] args: - "org.apache.spark.deploy.master.Master" - "--properties-file" - "/usr/local/spark/conf/spark-defaults.conf" ports: - containerPort: 8032 name: nm - containerPort: 8088 name: webui volumeMounts: - name: spark-cm mountPath: /usr/local/hadoop/etc/hadoop/yarn-site.xml subPath: yarn-site.xml - name: spark-cm mountPath: /usr/local/hadoop/etc/hadoop/core-site.xml subPath: core-site.xml - name: spark-cm mountPath: /usr/local/hadoop/etc/hadoop/mapred-site.xml subPath: mapred-site.xml - name: spark-cm mountPath: /usr/local/spark/conf/spark-env.sh subPath: spark-env.sh - name: spark-cm mountPath: /usr/local/spark/conf/spark-defaults.conf subPath: spark-defaults.conf - name: spark-logs mountPath: /data/spark/logs volumes: - name: spark-cm configMap: name: spark-cm items: - key: yarn-site.xml path: yarn-site.xml - key: core-site.xml path: core-site.xml - key: mapred-site.xml path: mapred-site.xml - key: spark-env.sh path: spark-env.sh - key: spark-defaults.conf path: spark-defaults.conf - name: spark-logs hostPath: path: /data/spark/logs type: Directory nodeSelector: spark-master: "true" restartPolicy: Always
TODO:
需要换基础镜像
二、Dockerfile
-
jdk
FROM alpine:3.4 # A few problems with compiling Java from source: # 1. Oracle. Licensing prevents us from redistributing the official JDK. # 2. Compiling OpenJDK also requires the JDK to be installed, and it gets # really hairy. # Default to UTF-8 file.encoding ENV LANG C.UTF-8 # add a simple script that can auto-detect the appropriate JAVA_HOME value # based on whether the JDK or only the JRE is installed RUN { \ echo '#!/bin/sh'; \ echo 'set -e'; \ echo; \ echo 'dirname "$(dirname "$(readlink -f "$(which javac || which java)")")"'; \ } > /usr/local/bin/docker-java-home \ && chmod +x /usr/local/bin/docker-java-home ENV JAVA_HOME /usr/lib/jvm/java-1.8-openjdk ENV PATH $PATH:/usr/lib/jvm/java-1.8-openjdk/jre/bin:/usr/lib/jvm/java-1.8-openjdk/bin ENV JAVA_VERSION 8u111 ENV JAVA_ALPINE_VERSION 8.111.14-r0 RUN set -x \ && apk add --no-cache bash \ openjdk8="$JAVA_ALPINE_VERSION" \ && [ "$JAVA_HOME" = "$(docker-java-home)" ]
docker build -t jdk:1.8 .
-
hadoop
FROM jdk:1.8 WORKDIR /usr/local/hadoop ADD hadoop /usr/local/hadoop ENV HADOOP_HOME /usr/local/hadoop ENV PATH $PATH:/usr/local/hadoop/bin:/usr/local/hadoop/sbin
docker build -t hadoop:3.2.4 .
spark
三、containerd配置私有仓库
[plugins]
[plugins."io.containerd.grpc.v1.cri"]
sandbox_image = "k8s.gcr.io/pause:3.8"
max_container_log_line_size = -1
enable_unprivileged_ports = false
enable_unprivileged_icmp = false
[plugins."io.containerd.grpc.v1.cri".containerd]
default_runtime_name = "runc"
snapshotter = "overlayfs"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes]
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
runtime_type = "io.containerd.runc.v2"
runtime_engine = ""
runtime_root = ""
base_runtime_spec = "/etc/containerd/cri-base.json"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
systemdCgroup = true
[plugins."io.containerd.grpc.v1.cri".registry]
[plugins."io.containerd.grpc.v1.cri".registry.mirrors]
[plugins."io.containerd.grpc.v1.cri".registry.mirrors."docker.io"]
endpoint = ["https://registry-1.docker.io"]
[plugins."io.containerd.grpc.v1.cri".registry.mirrors."registry.cn-hangzhou.aliyuncs.com"]
endpoint = ["https://registry.cn-hangzhou.aliyuncs.com"]
[plugins."io.containerd.grpc.v1.cri".registry.configs]
[plugins."io.containerd.grpc.v1.cri".registry.configs."registry.cn-hangzhou.aliyuncs.com".tls]
insecure_skip_verify = true
[plugins."io.containerd.grpc.v1.cri".registry.configs."registry.cn-hangzhou.aliyuncs.com".auth]
username = "账号"
password = "密码"