Spark on kubernetes

一、Spark

  • Service

    apiVersion: v1
    kind: Service
    metadata:
      name: spark-master
      namespace: yarn
      labels:
        app: spark-master
    spec:
      ports:
      - name: webui
        port: 8080
        protocol: TCP
        targetPort: 8080
      - name: master
        port: 7077
        protocol: TCP
        targetPort: 7077
      - name: rm
        port: 8032
        protocol: TCP
        targetPort: 8032
      - name: tracker
        port: 8031
        protocol: TCP
        targetPort: 8031
      selector:
        app: spark-master
    
  • Configmap

    apiVersion: v1
    kind: ConfigMap
    metadata:
      name: spark-cm
      namespace: yarn
    data:
      core-site.xml: |
        <?xml version="1.0" encoding="UTF-8"?>
        <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
        <configuration>
          <property>
            <name>fs.defaultFS</name>
            <value>hdfs://hdfs-namenode.yarn.svc.cluster.local:9000</value>
            <description>namenode address</description>
          </property>
          <property>
            <name>io.file.buffer.size</name>
            <value>131072</value>
          </property>
          <property>
            <name>hadoop.tmp.dir</name>
            <value>/data/hadoop/tmp</value>
          </property>
        </configuration>
      mapred-site.xml: |
        <?xml version="1.0" encoding="UTF-8"?>
        <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
        <configuration>
            <property>
                <name>mapreduce.framework.name</name>
                <value>yarn</value>
            </property>
            <property>
                      <name>mapreduce.jobhistory.address</name>
                  <value>0.0.0.0:10020</value>
              </property>
            <property>
                <name>mapreduce.jobhistory.webapp.address</name>
                <value>0.0.0.0:19888</value>
            </property>
        </configuration>
      yarn-site.xml: |
        <?xml version="1.0" encoding="UTF-8"?>
        <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
        <configuration>
          <property>
              <name>yarn.resourcemanager.hostname</name>
              <value>yarn-rm.yarn.svc.cluster.local</value>
          </property>
          <property>
              <name>yarn.nodemanager.remote-app-log-dir-suffix</name>
              <value>logs</value>
          </property>
          <property>
              <name>yarn.nodemanager.local-dirs</name>
              <value>/data/hadoop/yarn/local-dirs</value>
          </property>
          <property>
              <name>yarn.nodemanager.log-dirs</name>
              <value>/data/hadoop/yarn/log-dirs</value>
          </property>
          <property>
              <name>yarn.log.server.url</name>
              <value>http://0.0.0.0:19888/jobhistory/logs</value>
          </property>
          <property>
              <name>yarn.log-aggregation-enable</name>
              <value>true</value>
              <description>是否启用日志聚集功能</description>
          </property>
          <property>
              <name>yarn.log-aggregation.retain-seconds</name>
              <value>10080</value>
              <description>日志存储时间</description>
          </property>
          <property>
              <name>yarn.log-aggregation-enable</name>
              <value>/data/hadoop/yarn/log-dirs</value>
              <description>是否启用日志聚集功能</description>
          </property>
          <property>
              <name>yarn.nodemanager.remote-app-log-dir</name>
              <value>/yarn/app/logs</value>
          </property>
          <property>
              <name>yarn.nodemanager.remote-app-log-dir-suffix</name>
              <value>logs</value>
          </property>
        </configuration>
      spark-defaults.conf: |-
        spark.eventLog.enabled           true
        spark.eventLog.dir               hdfs://hdfs-namenode.yarn.svc.cluster.local:9000/sspark/event
        spark.yarn.historyServer.address http://0.0.0.0:18080
      spark-env.sh: |-
        export SPARK_HISTORY_OPTS="-Dspark.history.ui.port=18080 -Dspark.history.fs.logDirectory=hdfs://spark/logs -Dspark.history.retainedApplications=30"
        export HADOOP_CONF_DIR="/usr/local/hadoop/etc/hadoop"
        export SPARK_CONF_DIR="/usr/local/spark/conf"
        export SPARK_LOG_DIR="/data/spark/logs"
        export YARN_CONF_DIR=="/usr/local/hadoop/etc/hadoop"
        export SPARK_MASTER_HOST=0.0.0.0
        export SPARK_MASTER_PORT=7077
        export SPARK_MASTER_WEBUI_PORT=8080
        export SPARK_WORKER_PORT=7078
        export SPARK_WORKER_WEBUI_PORT=8081
    

    hdfs dfs -mkdir -p /spark/event

  • Master

    apiVersion: apps/v1
    kind: StatefulSet
    metadata:
      name: spark-master
      namespace: yarn
    spec:
      replicas: 1
      selector:
        matchLabels:
          app: spark-master
      template:
        metadata:
          labels:
            app: spark-master
        spec:
         containers:
            - name: historyserver
              image: spark:3.3.2
              command: ["spark-class"]
              args:
                - "org.apache.spark.deploy.history.HistoryServer"
              ports:
              - containerPort: 18080
                name: historyserver
          containers:
            - name: master
              image: spark:3.3.2
              command: ["spark-class"]
              args:
                - "org.apache.spark.deploy.master.Master"
                - "--properties-file"
                - "/usr/local/spark/conf/spark-defaults.conf"
              ports:
              - containerPort: 8032
                name: nm
              - containerPort: 8088
                name: webui
              volumeMounts:
              - name: spark-cm
                mountPath: /usr/local/hadoop/etc/hadoop/yarn-site.xml
                subPath: yarn-site.xml
              - name: spark-cm
                mountPath: /usr/local/hadoop/etc/hadoop/core-site.xml
                subPath: core-site.xml
              - name: spark-cm
                mountPath: /usr/local/hadoop/etc/hadoop/mapred-site.xml
                subPath: mapred-site.xml
              - name: spark-cm
                mountPath: /usr/local/spark/conf/spark-env.sh
                subPath: spark-env.sh
              - name: spark-cm
                mountPath: /usr/local/spark/conf/spark-defaults.conf
                subPath: spark-defaults.conf
              - name: spark-logs
                mountPath: /data/spark/logs
          volumes:
          - name: spark-cm
            configMap:
              name: spark-cm
              items:
              - key: yarn-site.xml
                path: yarn-site.xml
              - key: core-site.xml
                path: core-site.xml
              - key: mapred-site.xml
                path: mapred-site.xml
              - key: spark-env.sh
                path: spark-env.sh
              - key: spark-defaults.conf
                path: spark-defaults.conf
          - name: spark-logs
            hostPath:
              path: /data/spark/logs
              type: Directory
          nodeSelector:
            spark-master: "true"
          restartPolicy: Always
    

    TODO:

    需要换基础镜像

二、Dockerfile

  • jdk

    FROM alpine:3.4
    
    # A few problems with compiling Java from source:
    #  1. Oracle.  Licensing prevents us from redistributing the official JDK.
    #  2. Compiling OpenJDK also requires the JDK to be installed, and it gets
    #       really hairy.
    
    # Default to UTF-8 file.encoding
    ENV LANG C.UTF-8
    
    # add a simple script that can auto-detect the appropriate JAVA_HOME value
    # based on whether the JDK or only the JRE is installed
    RUN { \
                    echo '#!/bin/sh'; \
                    echo 'set -e'; \
                    echo; \
                    echo 'dirname "$(dirname "$(readlink -f "$(which javac || which java)")")"'; \
            } > /usr/local/bin/docker-java-home \
            && chmod +x /usr/local/bin/docker-java-home
    ENV JAVA_HOME /usr/lib/jvm/java-1.8-openjdk
    ENV PATH $PATH:/usr/lib/jvm/java-1.8-openjdk/jre/bin:/usr/lib/jvm/java-1.8-openjdk/bin
    
    ENV JAVA_VERSION 8u111
    ENV JAVA_ALPINE_VERSION 8.111.14-r0
    
    RUN set -x \
            && apk add --no-cache bash \
                    openjdk8="$JAVA_ALPINE_VERSION" \
            && [ "$JAVA_HOME" = "$(docker-java-home)" ]
    

    docker build -t jdk:1.8 .

  • hadoop

    FROM jdk:1.8
    
    WORKDIR /usr/local/hadoop
    ADD hadoop /usr/local/hadoop
    
    
    ENV HADOOP_HOME /usr/local/hadoop
    ENV PATH $PATH:/usr/local/hadoop/bin:/usr/local/hadoop/sbin
    

    docker build -t hadoop:3.2.4 .

  • spark

三、containerd配置私有仓库

[plugins]
  [plugins."io.containerd.grpc.v1.cri"]
    sandbox_image = "k8s.gcr.io/pause:3.8"
    max_container_log_line_size = -1
    enable_unprivileged_ports = false
    enable_unprivileged_icmp = false
    [plugins."io.containerd.grpc.v1.cri".containerd]
      default_runtime_name = "runc"
      snapshotter = "overlayfs"
      [plugins."io.containerd.grpc.v1.cri".containerd.runtimes]
        [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
          runtime_type = "io.containerd.runc.v2"
          runtime_engine = ""
          runtime_root = ""
          base_runtime_spec = "/etc/containerd/cri-base.json"

          [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
            systemdCgroup = true
    [plugins."io.containerd.grpc.v1.cri".registry]
      [plugins."io.containerd.grpc.v1.cri".registry.mirrors]
        [plugins."io.containerd.grpc.v1.cri".registry.mirrors."docker.io"]
          endpoint = ["https://registry-1.docker.io"]
        [plugins."io.containerd.grpc.v1.cri".registry.mirrors."registry.cn-hangzhou.aliyuncs.com"]
          endpoint = ["https://registry.cn-hangzhou.aliyuncs.com"]
      [plugins."io.containerd.grpc.v1.cri".registry.configs]
        [plugins."io.containerd.grpc.v1.cri".registry.configs."registry.cn-hangzhou.aliyuncs.com".tls]
          insecure_skip_verify = true
        [plugins."io.containerd.grpc.v1.cri".registry.configs."registry.cn-hangzhou.aliyuncs.com".auth]
          username = "账号"
          password = "密码"
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容