hadoop2.7.1
kafka
1 . 启动
bin/kafka-server-start.sh config/server.properties &
2 . 创建topic
bin/kafka-topics.sh --zookeeper localhost:2181 --create --topic eventOLAP --partitions 1 --replication-factor 1
3 . 查看topic
bin/kafka-topics.sh --zookeeper localhost:2181 --describe
pinot
1 . 启动 Controller
bin/pinot-admin.sh StartController -zkAddress localhost:2181 &
2 . 启动Broker
bin/pinot-admin.sh StartBroker -zkAddress localhost:2181 &
3 . 启动Server
bin/pinot-admin.sh StartServer -zkAddress localhost:2181 -dataDir /data/pinot_server_data -segmentDir /data_config/pinot_server_segment &
4 . 生成测试数据
bin/pinot-admin.sh GenerateData -numRecords 10000000 -numFiles 1 -outDir ./event_outdir -schemaFile /data/fht_pinot_schema.json
5 . 利于pinot工具发送数据到kafka,模拟实际的数据.
bin/pinot-admin.sh StreamAvroIntoKafka -avroFile ./event_outdir/part-0.avro -kafkaTopic eventOLAP -kafkaBrokerList localhost:9092 -zkAddress localhost:2181 &
gobblin
1 . Build, 修改源码的hadoop版本为自己需要依赖的版本,然后编译.
cd /path/gobblin (源码路径)
./gradlew clean build -PhadoopVersion=2.7.1 -x test
2.解压(Build完会生成一个tar包,解压后然后复制到工作目录)
tar zxvf gobblin-dist*.tar
mv /path/gobblin-dist /opt
3.配置环境变量
cd /opt/gobblin-dist/bin
vim gobblin-env.sh
# Set Gobblin specific environment variables here.
export HADOOP_BIN_DIR=/opt/hadoop-2.7.1/bin
export FWDIR=/opt/gobblin-dist
export GOBBLIN_WORK_DIR=/opt/gobblin-dist/work
4 . 运行gobblin.
cd /opt/gobblin-dist
bin/gobblin-mapreduce.sh --workdir /opt/gobblin- dist/work/ --conf /opt/gobblin-dist/conf/kafka-gobblin-hdfs-avro.pull