flume安装

暂时没有配置高可用,后期可能会加
agent source sink 可以自由组合

  1. flume下载
wget http://mirrors.hust.edu.cn/apache/flume/1.8.0/apache-flume-1.8.0-bin.tar.gz
# 安装在/opt/flume目录
cd /opt/flume
tar -zxvf apache-flume-1.8.0-bin.tar.gz
  1. 配置文件
vim test.conf

## 配置Agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1

## 配置Source
##a1.sources.r1.type = spooldir

## 监控目录 r1
##a1.sources.r1.spoolDir = /opt/templog/
a1.sources.r1.type = exec
a1.sources.r1.channels = c1
a1.sources.r1.deserializer.outputCharset = UTF-8
## 配置需要监控的日志输出目录
a1.sources.r1.command = tail -F /var/log/nginx/access.log
a1.sources.r1.shell = /bin/bash -c

## 配置Sink k1
a1.sinks.k1.type = hdfs
a1.sinks.k1.channel = c1
a1.sinks.k1.hdfs.round = true
a1.sinks.k1.hdfs.useLocalTimeStamp = true
##a1.sinks.k1.hdfs.path = hdfs://master:9000/user/flume/nginx_logs_temp/
##a1.sinks.k1.hdfs.path = hdfs://master:9000/user/flume/nginx_logs/20181124
##a1.sinks.k1.hdfs.filePrefix = 2018-11-24-%H interval=86400 rollSize=1000000 rollCount=10000
a1.sinks.k1.hdfs.path = hdfs://master:9000/user/flume/nginx_logs/zjxxw/%Y%m%d
a1.sinks.k1.hdfs.filePrefix = %Y-%m-%d-%H
a1.sinks.k1.hdfs.fileSuffix = .log
a1.sinks.k1.hdfs.minBlockReplicas = 1
a1.sinks.k1.hdfs.fileType = DataStream
a1.sinks.k1.hdfs.writeFormat = Text
#按时间生成文件 默认值30 单位 秒
a1.sinks.k1.hdfs.rollInterval = 3600
#当临时文件滚动成多少时生成目标 默认1024byte
a1.sinks.k1.hdfs.rollSize = 524288
#当event事件数量达到多少时滚动成目标 默认值为10
a1.sinks.k1.hdfs.rollCount = 2000
#当临时文件在指定参数时间秒内,没有数据写入的话,则将临时文件关闭,生成目标文件
a1.sinks.k1.hdfs.idleTimeout = 1800
#设置sink缓存   每个批量刷新到HDFS上的event数量
a1.sinks.k1.hdfs.batchSize = 20


## 配置Channel c1
#a1.channels.c1.type = file
#a1.channels.c1.checkpointDir = /opt/flume/apache-flume-1.8.0-bin/checkpoint
#a1.channels.c1.dataDirs = /opt/flume/apache-flume-1.8.0-bin/data
##  将数据放在内存中传输不安全,以本地文件为缓冲更为安全。
a1.channels.c1.type = memory
a1.channels.c1.capacity = 100
a1.channels.c1.transactionCapacity = 100


## 将三者连接 1
a1.channels.r1.threads = 10
a1.sources.r1.channel = c1
a1.sinks.k1.channel = c1
  1. 创建hdfs目录赋予权限
创建目录:
    hadoop fs -mkdir /user/flume 或者 hdfs dfs -mkdir /user/flume
修改目录权限:
    hadoop fs -chmod -R root /user/flume 或者 hdfs dfs -chmod -R root /user/flume

到这里flume安装完成

  1. 更改nginx日志文件格式
# For more information on configuration, see:
#   * Official English Documentation: http://nginx.org/en/docs/
#   * Official Russian Documentation: http://nginx.org/ru/docs/

user nginx;
worker_processes auto;
pid /var/run/nginx.pid;

# Load dynamic modules. See /usr/share/nginx/README.dynamic.
include /usr/share/nginx/modules/*.conf;

events {
    worker_connections  1024;
}


http {
   # log_format  main  '{\"$remote_addr\"';
   # log_format main '{ "timestamp": "$time_local", '
#           '"remote_addr": "$remote_addr", '
#           '"remote_user": "$remote_user", '
#           '"request": "$request", '
#           '"status": $status, '
#           '"bytes": $body_bytes_sent, '
#                       '"referer": "$http_referer", '
#           '"agent": "$http_user_agent", '
#           '"http_x_forwarded_for": "$http_x_forwarded_for", '
#           '"response_time": "$upstream_response_time", '
#           '"request_time": "$request_time", '
#           '"http_cookie": "$http_cookie"'
#           ' }';
   log_format main '$remote_addr - $remote_user [$time_local] "$request" '
                   '$status $body_bytes_sent "$http_referer" '
                   '"$http_user_agent" "$http_x_forwarded_for" '
           '"$request_time" "$upstream_response_time" "$http_cookie"';

   log_format error '$remote_addr - $remote_user [$time_local] "$request" '
                   '$status $body_bytes_sent "$http_referer" '
                   '"$http_user_agent" "$http_x_forwarded_for" '
                   '"$request_time" "$upstream_response_time" "$http_cookie"';

    
    sendfile            on;
    tcp_nopush          on;
    tcp_nodelay         on;
    keepalive_timeout   65;
    types_hash_max_size 2048;
    client_max_body_size 200M;

    include             /etc/nginx/mime.types;
    default_type        application/octet-stream;

    # Load modular configuration files from the /etc/nginx/conf.d directory.
    # See http://nginx.org/en/docs/ngx_core_module.html#include
    # for more information.
    upstream zjxxw{
        server localhost:8005;
        #server localhost:8105;
    }
    
    upstream test{
        server 139.224.233.92:80;
    }
    server {
            listen       80;
            server_name  localhost;
            access_log  /var/log/nginx/zjxxw/access.log  main;
            error_log  /var/log/nginx/zjxxw/error.log  error;

            location / {
                proxy_redirect off;
                proxy_set_header Host $host;
                proxy_set_header X-Real-IP $remote_addr;
                proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
                proxy_pass http://zjxxw;
            }

            location /static {
               proxy_pass http://localhost:8005/static;
           }
    }
    server {
            listen       6300;
            server_name  localhost;
            
            access_log  /var/log/nginx/test/access.log  main;
            error_log  /var/log/nginx/test/error.log  error;
            location / {
                proxy_redirect off;
                proxy_set_header Host $host;
                proxy_set_header X-Real-IP $remote_addr;
                proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
                proxy_pass http://test;
            }

            location /static {
               proxy_pass http://localhost:8005/static;
           }
    }
    #include /etc/nginx/conf.d/*.conf;
}
  1. flume启动
cd /opt/flume/apache-flume-1.8.0-bin
nohup flume-ng agent --conf conf/ --conf-file conf/test.conf --name a1 -Dflume.root.logger=DEBUG &

# 查看是否启动
ps -ef |grep flume
  1. 配置flume监控(因为我的flume会莫名其妙挂掉,看日志也不报错)
vim flume_monitor.sh

# 添加如下内容
#!/bin/bash
export JAVA_HOME=/usr/local/jdk8
. /etc/profile
. ~/.bash_profile
    
proc_name="zjxxw_flume.conf"        #进程名  
    
proc_num()                      #查询进程数量  
{  
    num=`ps -ef | grep $proc_name | grep -v grep | wc -l`  
    return $num  
}  
  
proc_num    
number=$?                       #获取进程数量  
if [ $number -eq 0 ]            #如果进程数量为0  
then                            #重新启动服务器,或者扩展其它内容。  
    cd /opt/flume/apache-flume-1.8.0-bin 
    nohup flume-ng agent --conf conf/ --conf-file conf/zjxxw_flume.conf --name a1 -Dflume.root.logger=DEBUG  &  
fi
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容