暂时没有配置高可用,后期可能会加
agent source sink 可以自由组合
- flume下载
wget http://mirrors.hust.edu.cn/apache/flume/1.8.0/apache-flume-1.8.0-bin.tar.gz
# 安装在/opt/flume目录
cd /opt/flume
tar -zxvf apache-flume-1.8.0-bin.tar.gz
- 配置文件
vim test.conf
## 配置Agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1
## 配置Source
##a1.sources.r1.type = spooldir
## 监控目录 r1
##a1.sources.r1.spoolDir = /opt/templog/
a1.sources.r1.type = exec
a1.sources.r1.channels = c1
a1.sources.r1.deserializer.outputCharset = UTF-8
## 配置需要监控的日志输出目录
a1.sources.r1.command = tail -F /var/log/nginx/access.log
a1.sources.r1.shell = /bin/bash -c
## 配置Sink k1
a1.sinks.k1.type = hdfs
a1.sinks.k1.channel = c1
a1.sinks.k1.hdfs.round = true
a1.sinks.k1.hdfs.useLocalTimeStamp = true
##a1.sinks.k1.hdfs.path = hdfs://master:9000/user/flume/nginx_logs_temp/
##a1.sinks.k1.hdfs.path = hdfs://master:9000/user/flume/nginx_logs/20181124
##a1.sinks.k1.hdfs.filePrefix = 2018-11-24-%H interval=86400 rollSize=1000000 rollCount=10000
a1.sinks.k1.hdfs.path = hdfs://master:9000/user/flume/nginx_logs/zjxxw/%Y%m%d
a1.sinks.k1.hdfs.filePrefix = %Y-%m-%d-%H
a1.sinks.k1.hdfs.fileSuffix = .log
a1.sinks.k1.hdfs.minBlockReplicas = 1
a1.sinks.k1.hdfs.fileType = DataStream
a1.sinks.k1.hdfs.writeFormat = Text
#按时间生成文件 默认值30 单位 秒
a1.sinks.k1.hdfs.rollInterval = 3600
#当临时文件滚动成多少时生成目标 默认1024byte
a1.sinks.k1.hdfs.rollSize = 524288
#当event事件数量达到多少时滚动成目标 默认值为10
a1.sinks.k1.hdfs.rollCount = 2000
#当临时文件在指定参数时间秒内,没有数据写入的话,则将临时文件关闭,生成目标文件
a1.sinks.k1.hdfs.idleTimeout = 1800
#设置sink缓存 每个批量刷新到HDFS上的event数量
a1.sinks.k1.hdfs.batchSize = 20
## 配置Channel c1
#a1.channels.c1.type = file
#a1.channels.c1.checkpointDir = /opt/flume/apache-flume-1.8.0-bin/checkpoint
#a1.channels.c1.dataDirs = /opt/flume/apache-flume-1.8.0-bin/data
## 将数据放在内存中传输不安全,以本地文件为缓冲更为安全。
a1.channels.c1.type = memory
a1.channels.c1.capacity = 100
a1.channels.c1.transactionCapacity = 100
## 将三者连接 1
a1.channels.r1.threads = 10
a1.sources.r1.channel = c1
a1.sinks.k1.channel = c1
- 创建hdfs目录赋予权限
创建目录:
hadoop fs -mkdir /user/flume 或者 hdfs dfs -mkdir /user/flume
修改目录权限:
hadoop fs -chmod -R root /user/flume 或者 hdfs dfs -chmod -R root /user/flume
到这里flume安装完成
- 更改nginx日志文件格式
# For more information on configuration, see:
# * Official English Documentation: http://nginx.org/en/docs/
# * Official Russian Documentation: http://nginx.org/ru/docs/
user nginx;
worker_processes auto;
pid /var/run/nginx.pid;
# Load dynamic modules. See /usr/share/nginx/README.dynamic.
include /usr/share/nginx/modules/*.conf;
events {
worker_connections 1024;
}
http {
# log_format main '{\"$remote_addr\"';
# log_format main '{ "timestamp": "$time_local", '
# '"remote_addr": "$remote_addr", '
# '"remote_user": "$remote_user", '
# '"request": "$request", '
# '"status": $status, '
# '"bytes": $body_bytes_sent, '
# '"referer": "$http_referer", '
# '"agent": "$http_user_agent", '
# '"http_x_forwarded_for": "$http_x_forwarded_for", '
# '"response_time": "$upstream_response_time", '
# '"request_time": "$request_time", '
# '"http_cookie": "$http_cookie"'
# ' }';
log_format main '$remote_addr - $remote_user [$time_local] "$request" '
'$status $body_bytes_sent "$http_referer" '
'"$http_user_agent" "$http_x_forwarded_for" '
'"$request_time" "$upstream_response_time" "$http_cookie"';
log_format error '$remote_addr - $remote_user [$time_local] "$request" '
'$status $body_bytes_sent "$http_referer" '
'"$http_user_agent" "$http_x_forwarded_for" '
'"$request_time" "$upstream_response_time" "$http_cookie"';
sendfile on;
tcp_nopush on;
tcp_nodelay on;
keepalive_timeout 65;
types_hash_max_size 2048;
client_max_body_size 200M;
include /etc/nginx/mime.types;
default_type application/octet-stream;
# Load modular configuration files from the /etc/nginx/conf.d directory.
# See http://nginx.org/en/docs/ngx_core_module.html#include
# for more information.
upstream zjxxw{
server localhost:8005;
#server localhost:8105;
}
upstream test{
server 139.224.233.92:80;
}
server {
listen 80;
server_name localhost;
access_log /var/log/nginx/zjxxw/access.log main;
error_log /var/log/nginx/zjxxw/error.log error;
location / {
proxy_redirect off;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_pass http://zjxxw;
}
location /static {
proxy_pass http://localhost:8005/static;
}
}
server {
listen 6300;
server_name localhost;
access_log /var/log/nginx/test/access.log main;
error_log /var/log/nginx/test/error.log error;
location / {
proxy_redirect off;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_pass http://test;
}
location /static {
proxy_pass http://localhost:8005/static;
}
}
#include /etc/nginx/conf.d/*.conf;
}
- flume启动
cd /opt/flume/apache-flume-1.8.0-bin
nohup flume-ng agent --conf conf/ --conf-file conf/test.conf --name a1 -Dflume.root.logger=DEBUG &
# 查看是否启动
ps -ef |grep flume
- 配置flume监控(因为我的flume会莫名其妙挂掉,看日志也不报错)
vim flume_monitor.sh
# 添加如下内容
#!/bin/bash
export JAVA_HOME=/usr/local/jdk8
. /etc/profile
. ~/.bash_profile
proc_name="zjxxw_flume.conf" #进程名
proc_num() #查询进程数量
{
num=`ps -ef | grep $proc_name | grep -v grep | wc -l`
return $num
}
proc_num
number=$? #获取进程数量
if [ $number -eq 0 ] #如果进程数量为0
then #重新启动服务器,或者扩展其它内容。
cd /opt/flume/apache-flume-1.8.0-bin
nohup flume-ng agent --conf conf/ --conf-file conf/zjxxw_flume.conf --name a1 -Dflume.root.logger=DEBUG &
fi