手动清理
#/bin/bash
export HADOOP_USER_NAME=hdfs
#获取正在运行的application
running_apps=`yarn application -list -appStates RUNNING|awk '{print $1}'|grep application`
#全部已提交的application
submit_apps=`hadoop fs -ls /user/hdfs/.flink/|awk '{print $NF}'|awk -F"/" '{print $NF}'|grep application`
cdh_log_exist=
if hadoop fs -test -e /tmp/logs/hdfs/logs;then
cdh_log_exist=1
fi
#1-删除无效的flink运行依赖包和日志(针对CDH)
invalid_submit_apps=`comm -23 <(echo $submit_apps| tr ' ' '\n' | sort) <(echo $running_apps| tr ' ' '\n' | sort)`
for app_id in $invalid_submit_apps; do
#删除flink提交包
hadoop fs -rm -r -skipTrash /user/hdfs/.flink/$app_id
#删除日志(CDH)
if [ $cdh_log_exist ];then
hadoop fs -rm -r -skipTrash /tmp/logs/hdfs/logs/$app_id
fi
done
#2-删除无效的flink日志(针对开源的hadoop)
#格式1:/tmp/logs/hdfs/bucket-logs-tfile/5025/application_1732099242233_5025
#格式2:/tmp/logs/hdfs/bucket-logs-tfile/3592/application_1732099242233_13592
if hadoop fs -test -e /tmp/logs/hdfs/bucket-logs-tfile;then
logs_apps=`hadoop fs -ls /tmp/logs/hdfs/bucket-logs-tfile/*/ | awk -F '/' '{print $NF}' | grep application`
invalid_bucket_log_apps=`comm -23 <(echo $logs_apps| tr ' ' '\n' | sort) <(echo $running_apps| tr ' ' '\n' | sort)`
for app_id in $invalid_bucket_log_apps; do
hadoop fs -rm -r -skipTrash /tmp/logs/hdfs/bucket-logs-tfile/${app_id: -4}/$app_id
done
fi
运行日志自动清理
- 开启yarn的日志聚合, 设置合理的过期天数
-
开启histrory server的进程, 用来清理历史数据