一、HDFS
-
Service
apiVersion: v1 kind: Service metadata: name: hdfs-namenode namespace: yarn labels: app: hdfs-namenode spec: ports: - name: fs port: 9000 protocol: TCP targetPort: 9000 - name: webui port: 50070 protocol: TCP targetPort: 50070 selector: app: hdfs-namenode sessionAffinity: None type: ClusterIP selector: app: hdfs-namenode
-
Configmap
apiVersion: v1 kind: ConfigMap metadata: name: hdfs-cm namespace: yarn data: core-site.xml: | <?xml version="1.0" encoding="UTF-8"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> <property> <name>fs.defaultFS</name> <value>hdfs://0.0.0.0:9000</value> <description>namenode address</description> </property> <property> <name>io.file.buffer.size</name> <value>131072</value> </property> <property> <name>hadoop.tmp.dir</name> <value>/data/hadoop/tmp</value> </property> </configuration> dn-core-site.xml: | <?xml version="1.0" encoding="UTF-8"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> <property> <name>fs.defaultFS</name> <value>hdfs://hdfs-namenode.yarn.svc.cluster.local:9000</value> </property> <property> <name>io.file.buffer.size</name> <value>131072</value> </property> <property> <name>hadoop.tmp.dir</name> <value>/data/hadoop/tmp</value> </property> </configuration> hdfs-site.xml: | <?xml version="1.0" encoding="UTF-8"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> <property> <name>dfs.namenode.name.dir</name> <value>/data/hadoop/dfs/name</value> </property> <property> <name>dfs.namenode.http-address</name> <value>0.0.0.0:50070</value> </property> <property> <name>dfs.namenode.datanode.registration.ip-hostname-check</name> <value>false</value> </property> </configuration> dn-hdfs-site.xml: | <?xml version="1.0" encoding="UTF-8"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> <property> <name>dfs.datanode.data.dir</name> <value>/data/hadoop/dfs/data</value> </property> <property> <name>dfs.namenode.datanode.registration.ip-hostname-check</name> <value>false</value> </property> <property> <name>dfs.client.use.datanode.hostname</name> <value>false</value> </property> <property> <name>dfs.datanode.use.datanode.hostname</name> <value>false</value> </property> </configuration>
域名的后缀取决于集群的配置
-
StatefulSet(namenode)
apiVersion: apps/v1 kind: StatefulSet metadata: name: hdfs-namenode namespace: yarn spec: serviceName: hdfs-namenode replicas: 1 selector: matchLabels: app: hdfs-namenode template: metadata: labels: app: hdfs-namenode spec: dnsPolicy: ClusterFirstWithHostNet initContainers: - name: format image: registry.cn-hangzhou.aliyuncs.com/davisgao/hadoop:3.2.4 command: ["hdfs"] args: - "namenode" - "-format" - "-force" volumeMounts: - name: namenode-dir mountPath: /data/hadoop/dfs/name containers: - name: hdfs-namenode image: registry.cn-hangzhou.aliyuncs.com/davisgao/hadoop:3.2.4 command: ["hdfs"] args: - "--config" - "/usr/local/hadoop/etc/hadoop" - "namenode" env: - name: HADOOP_LOG_DIR value: "/data/hadoop/name/logs" - name: HADOOP_CONF_DIR value: "/usr/local/hadoop/etc/hadoop" resources: limits: cpu: "2" memory: 6000Mi requests: cpu: "1" memory: 4Gi ports: - containerPort: 8020 name: fs volumeMounts: - name: hdfs-cm mountPath: /usr/local/hadoop/etc/hadoop/core-site.xml subPath: core-site.xml - name: hdfs-cm mountPath: /usr/local/hadoop/etc/hadoop/hdfs-site.xml subPath: hdfs-site.xml - name: namenode-dir mountPath: /data/hadoop/dfs/name - name: hadoop-log-dir mountPath: /data/hadoop/name/logs nodeSelector: hdfs-namenode: "true" restartPolicy: Always volumes: - name: hdfs-cm configMap: name: hdfs-cm items: - key: hdfs-site.xml path: hdfs-site.xml - key: core-site.xml path: core-site.xml - name: namenode-dir hostPath: path: /data/hadoop/dfs/name type: Directory - name: hadoop-log-dir hostPath: path: /data/hadoop/name/logs type: Directory
namenode建议是用单独盘或者pv存储
mkdir -p /data/hadoop/dfs/name
mkdir -p /data/hadoop/name/logsTODO:
在/data/hadoop/dfs/name下的current/VERSION中的
blockpoolID=BP-2063912425-192.168.0.67-1680162780839
依然和IP绑定 -
StatefulSet(datanode)
apiVersion: apps/v1 kind: StatefulSet metadata: name: hdfs-datanode namespace: yarn spec: serviceName: hdfs-datanode replicas: 1 selector: matchLabels: app: hdfs-datanode template: metadata: labels: app: hdfs-datanode spec: dnsPolicy: ClusterFirstWithHostNet containers: - name: hdfs-datanode image: registry.cn-hangzhou.aliyuncs.com/davisgao/hadoop:3.2.4 command: ["hdfs"] args: - "--config" - "/usr/local/hadoop/etc/hadoop" - "datanode" env: - name: HADOOP_LOG_DIR value: "/data/hadoop/data/logs" - name: HADOOP_CONF_DIR value: "/usr/local/hadoop/etc/hadoop" resources: limits: cpu: "2" memory: 4Gi requests: cpu: "1" memory: 2Gi ports: - containerPort: 8020 name: fs volumeMounts: - name: hdfs-cm mountPath: /usr/local/hadoop/etc/hadoop/core-site.xml subPath: core-site.xml - name: hdfs-cm mountPath: /usr/local/hadoop/etc/hadoop/hdfs-site.xml subPath: hdfs-site.xml - name: datanode-dir mountPath: /data/hadoop/dfs/data - name: hadoop-log-dir mountPath: /data/hadoop/data/logs nodeSelector: hdfs-datanode: "true" restartPolicy: Always volumes: - name: hdfs-cm configMap: name: hdfs-cm items: - key: dn-hdfs-site.xml path: hdfs-site.xml - key: dn-core-site.xml path: core-site.xml - name: datanode-dir hostPath: path: /data/hadoop/dfs/data type: Directory - name: hadoop-log-dir hostPath: path: /data/hadoop/data/logs type: Directory
datenode建议是用单独盘
mkdir -p /data/hadoop/dfs/data
mkdir -p /data/hadoop/data/logs
二、使用Yarn做资源管理
-
service
apiVersion: v1 kind: Service metadata: name: yarn-rm namespace: yarn labels: app: yarn-rm spec: ports: - name: webui port: 8088 protocol: TCP targetPort: 8088 - name: rm port: 8032 protocol: TCP targetPort: 8032 - name: tracker port: 8031 protocol: TCP targetPort: 8031 selector: app: yarn-rm
-
Configmap
apiVersion: v1 kind: ConfigMap metadata: name: yarn-cm namespace: yarn data: core-site.xml: | <?xml version="1.0" encoding="UTF-8"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> <property> <name>fs.defaultFS</name> <value>hdfs://hdfs-namenode.yarn.svc.cluster.local:9000</value> <description>namenode address</description> </property> <property> <name>io.file.buffer.size</name> <value>131072</value> </property> <property> <name>hadoop.tmp.dir</name> <value>/data/hadoop/tmp</value> </property> </configuration> mapred-site.xml: | <?xml version="1.0" encoding="UTF-8"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> <property> <name>mapreduce.framework.name</name> <value>yarn</value> </property> </configuration> yarn-site.xml: | <?xml version="1.0" encoding="UTF-8"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> <property> <name>yarn.resourcemanager.hostname</name> <value>0.0.0.0</value> </property> <property> <name>yarn.nodemanager.local-dirs</name> <value>/data/hadoop/yarn/local-dirs</value> </property> <property> <name>yarn.nodemanager.log-dirs</name> <value>/data/hadoop/yarn/log-dirs</value> </property> <property> <name>yarn.log-aggregation-enable</name> <value>/data/hadoop/yarn/log-dirs</value> <description>是否启用日志聚集功能</description> </property> <property> <name>yarn.log-aggregation.retain-seconds</name> <value>10080</value> <description>日志存储时间</description> </property> <property> <name>yarn.log-aggregation-enable</name> <value>/data/hadoop/yarn/log-dirs</value> <description>是否启用日志聚集功能</description> </property> <property> <name>yarn.nodemanager.remote-app-log-dir</name> <value>/yarn/app/logs</value> </property> <property> <name>yarn.nodemanager.remote-app-log-dir-suffix</name> <value>logs</value> </property> </configuration> nm-yarn-site.xml: | <?xml version="1.0" encoding="UTF-8"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> <property> <name>yarn.resourcemanager.hostname</name> <value>yarn-rm.yarn.svc.cluster.local</value> </property> <property> <name>yarn.nodemanager.remote-app-log-dir-suffix</name> <value>logs</value> </property> <property> <name>yarn.nodemanager.local-dirs</name> <value>/data/hadoop/yarn/local-dirs</value> </property> <property> <name>yarn.nodemanager.log-dirs</name> <value>/data/hadoop/yarn/log-dirs</value> </property> <property> <name>yarn.log-aggregation-enable</name> <value>/data/hadoop/yarn/log-dirs</value> <description>是否启用日志聚集功能</description> </property> <property> <name>yarn.log-aggregation.retain-seconds</name> <value>10080</value> <description>日志存储时间</description> </property> <property> <name>yarn.log-aggregation-enable</name> <value>true</value> <description>是否启用日志聚集功能</description> </property> <property> <name>yarn.nodemanager.remote-app-log-dir</name> <value>/yarn/app/logs</value> </property> <property> <name>yarn.nodemanager.remote-app-log-dir-suffix</name> <value>logs</value> </property> </configuration>
配置说明:
yarn-site.xml:
yarn.log-aggregation-enable: 设置为“true”,表示打开该功能,日志会被收集到HDFS目录中
-
resourcemanager
apiVersion: apps/v1 kind: StatefulSet metadata: name: yarn-rm namespace: yarn spec: serviceName: "yarn-rm" replicas: 1 selector: matchLabels: app: yarn-rm template: metadata: labels: app: yarn-rm spec: containers: - name: yarn-rm image: registry.cn-hangzhou.aliyuncs.com/davisgao/hadoop:3.2.4 command: ["yarn"] args: - "--config" - "/usr/local/hadoop/etc/hadoop" - "resourcemanager" env: - name: HADOOP_LOG_DIR value: "/data/hadoop/yarn/logs" - name: HADOOP_CONF_DIR value: "/usr/local/hadoop/etc/hadoop" - name: SPARK_CONF_DIR value: "/usr/local/hadoop/etc/hadoop" ports: - containerPort: 8032 name: rm - containerPort: 8088 name: webui - containerPort: 8031 name: tracker volumeMounts: - name: yarn-cm mountPath: /usr/local/hadoop/etc/hadoop/yarn-site.xml subPath: yarn-site.xml - name: yarn-cm mountPath: /usr/local/hadoop/etc/hadoop/core-site.xml subPath: core-site.xml - name: yarn-cm mountPath: /usr/local/hadoop/etc/hadoop/mapred-site.xml subPath: mapred-site.xml - name: yarn-local-dirs mountPath: /data/hadoop/yarn/local-dirs - name: yarn-log-dirs mountPath: /data/hadoop/yarn/log-dirs - name: hadoop-tmp mountPath: /data/hadoop/tmp volumes: - name: yarn-cm configMap: name: yarn-cm items: - key: yarn-site.xml path: yarn-site.xml - key: core-site.xml path: core-site.xml - key: mapred-site.xml path: mapred-site.xml - name: yarn-local-dirs hostPath: path: /data/hadoop/yarn/local-dirs type: Directory - name: yarn-log-dirs hostPath: path: /data/hadoop/yarn/log-dirs type: Directory - name: hadoop-tmp hostPath: path: /data/hadoop/tmp type: Directory nodeSelector: yarn-rm: "true" restartPolicy: Always
resourcemanager建议是用单独盘
mkdir -p /data/hadoop/yarn/local-dirs
mkdir -p /data/hadoop/yarn/log-dirs
mkdir -p /data/hadoop/tmpwget https://download.java.net/openjdk/jdk8u42/ri/openjdk-8u42-b03-linux-x64-14_jul_2022.tar.gz
-
nodemanager
apiVersion: apps/v1 kind: StatefulSet metadata: name: yarn-nm namespace: yarn spec: replicas: 1 selector: matchLabels: app: yarn-nm template: metadata: labels: app: yarn-nm spec: containers: - name: yarn-nm image: registry.cn-hangzhou.aliyuncs.com/davisgao/hadoop:3.2.4 command: ["yarn"] args: - "--config" - "/usr/local/hadoop/etc/hadoop" - "nodemanager" env: - name: HADOOP_LOG_DIR value: "/data/hadoop/yarn/logs" - name: HADOOP_CONF_DIR value: "/usr/local/hadoop/etc/hadoop" - name: SPARK_CONF_DIR value: "/usr/local/hadoop/etc/hadoop" ports: - containerPort: 8032 name: nm - containerPort: 8088 name: webui volumeMounts: - name: yarn-cm mountPath: /usr/local/hadoop/etc/hadoop/yarn-site.xml subPath: yarn-site.xml - name: yarn-cm mountPath: /usr/local/hadoop/etc/hadoop/core-site.xml subPath: core-site.xml - name: yarn-cm mountPath: /usr/local/hadoop/etc/hadoop/mapred-site.xml subPath: mapred-site.xml - name: yarn-local-dirs mountPath: /data/hadoop/yarn/local-dirs - name: yarn-log-dirs mountPath: /data/hadoop/yarn/log-dirs volumes: - name: yarn-cm configMap: name: yarn-cm items: - key: nm-yarn-site.xml path: yarn-site.xml - key: core-site.xml path: core-site.xml - key: mapred-site.xml path: mapred-site.xml - name: yarn-local-dirs hostPath: path: /data/hadoop/yarn/local-dirs type: Directory - name: yarn-log-dirs hostPath: path: /data/hadoop/yarn/log-dirs type: Directory nodeSelector: yarn-nm: "true" restartPolicy: Always
nodemanager建议是用单独盘
mkdir -p /data/hadoop/yarn/local-dirs
mkdir -p /data/hadoop/yarn/log-dirs