1. 优雅的退出消费者程序
package com.bonc.rdpe.kafka110.consumer;
import java.util.Arrays;
import java.util.Properties;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.errors.WakeupException;
/**
* @author YangYunhe
* @date 2018-07-17 11:05:39
* @description: 优雅的退出消费者
*/
public class QuitConsumer {
public static void main(String[] args) {
Properties props = new Properties();
props.put("bootstrap.servers", "rdpecore4:9092,rdpecore5:9092,rdpecore6:9092");
props.put("group.id", "dev3-yangyunhe-topic001-group005");
props.put("auto.offset.reset", "earliest");
props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
KafkaConsumer<String, String> consumer = new KafkaConsumer<>(props);
consumer.subscribe(Arrays.asList("dev3-yangyunhe-topic001"));
final Thread mainThread = Thread.currentThread();
/*
* 退出循环需要通过另一个线程调用consumer.wakeup()方法
* 调用consumer.wakeup()可以退出poll(),并抛出WakeupException异常
* 我们不需要处理 WakeupException,因为它只是用于跳出循环的一种方式
* consumer.wakeup()是消费者唯一一个可以从其他线程里安全调用的方法
* 如果循环运行在主线程里,可以在 ShutdownHook里调用该方法
*/
Runtime.getRuntime().addShutdownHook(new Thread() {
public void run() {
System.out.println("Starting exit...");
consumer.wakeup();
try {
// 主线程继续执行,以便可以关闭consumer,提交偏移量
mainThread.join();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
});
try {
while (true) {
ConsumerRecords<String, String> records = consumer.poll(1000);
for (ConsumerRecord<String, String> record : records) {
System.out.println("topic = " + record.topic() + ", partition = " + record.partition()
+ ", offset = " + record.offset());
}
consumer.commitAsync();
}
}catch (WakeupException e) {
// 不处理异常
} finally {
// 在退出线程之前调用consumer.close()是很有必要的,它会提交任何还没有提交的东西,并向组协调器发送消息,告知自己要离开群组。
// 接下来就会触发再均衡,而不需要等待会话超时。
consumer.commitSync();
consumer.close();
System.out.println("Closed consumer and we are done");
}
}
}
2. 多线程消费者
KafkaConsumer是非线程安全的,多线程需要处理好线程同步,多线程的实现方式有多种,这里介绍一种:每个线程各自实例化一个KakfaConsumer对象,这种方式的缺点是:当这些线程属于同一个消费组时,线程的数量受限于分区数,当消费者线程的数量大于分区数时,就有一部分消费线程一直处于空闲状态
多线程消费者的线程实现类代码如下:
package com.bonc.rdpe.kafka110.thread;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.errors.WakeupException;
import org.apache.kafka.common.serialization.StringDeserializer;
/**
* @author YangYunhe
* @date 2018-07-17 10:48:45
* @description: 多线程消费者的线程实现类
*/
public class ConsumerLoop implements Runnable {
private final KafkaConsumer<String, String> consumer;
private final List<String> topics;
private final int id;
public ConsumerLoop(int id, String groupId, List<String> topics) {
this.id = id;
this.topics = topics;
Properties props = new Properties();
props.put("bootstrap.servers", "rdpecore4:9092,rdpecore5:9092,rdpecore6:9092");
props.put("group.id", groupId);
props.put("auto.offset.reset", "earliest");
props.put("key.deserializer", StringDeserializer.class.getName());
props.put("value.deserializer", StringDeserializer.class.getName());
this.consumer = new KafkaConsumer<>(props);
}
@Override
public void run() {
try {
consumer.subscribe(topics);
while (true) {
ConsumerRecords<String, String> records = consumer.poll(Long.MAX_VALUE);
for (ConsumerRecord<String, String> record : records) {
Map<String, Object> data = new HashMap<>();
data.put("partition", record.partition());
data.put("offset", record.offset());
data.put("value", record.value());
System.out.println(this.id + ": " + data);
}
}
} catch (WakeupException e) {
// ignore for shutdown
} finally {
consumer.close();
}
}
public void shutdown() {
consumer.wakeup();
}
}
多线程消费者主程序代码如下:
package com.bonc.rdpe.kafka110.consumer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import com.bonc.rdpe.kafka110.thread.ConsumerLoop;
/**
* @author YangYunhe
* @date 2018-07-17 10:39:25
* @description: 多线程消费者主程序
*/
public class MultiThreadConsumer {
public static void main(String[] args) {
int numConsumers = 3;
String groupId = "dev3-yangyunhe-topic001-group004";
List<String> topics = Arrays.asList("dev3-yangyunhe-topic001");
ExecutorService executor = Executors.newFixedThreadPool(numConsumers);
final List<ConsumerLoop> consumers = new ArrayList<>();
for (int i = 0; i < numConsumers; i++) {
ConsumerLoop consumer = new ConsumerLoop(i, groupId, topics);
consumers.add(consumer);
executor.submit(consumer);
}
Runtime.getRuntime().addShutdownHook(new Thread() {
@Override
public void run() {
for (ConsumerLoop consumer : consumers) {
consumer.shutdown();
}
executor.shutdown();
try {
executor.awaitTermination(5000, TimeUnit.MILLISECONDS);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
});
}
}
运行结果:
2: {partition=2, offset=1216, value=...}
......
1: {partition=1, offset=1329, value=...}
......
0: {partition=0, offset=1292, value=...}
......
3. 独立消费者
有时候你可能只需要一个消费者从一个主题的所有分区或者某个特定的分区读取数据。这个时候就不需要消费者群组和再均衡了,只需要把主题或者分区分配给消费者,然后开始读取消息并提交偏移量。如果是这样的话,就不需要订阅主题,取而代之的是为自己分配分区。一个消费者可以订阅主题(并加入消费者群组),或者为自己分配分区,但不能同时做这两件事情。以下是独立消费者的示例代码:
package com.bonc.rdpe.kafka110.consumer;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.PartitionInfo;
import org.apache.kafka.common.TopicPartition;
/**
* @author YangYunhe
* @date 2018-07-17 12:44:50
* @description: 独立消费者
*/
public class AloneConsumer {
public static void main(String[] args) {
Properties props = new Properties();
props.put("bootstrap.servers", "rdpecore4:9092,rdpecore5:9092,rdpecore6:9092");
// 独立消费者不需要设置消费组
// props.put("group.id", "dev3-yangyunhe-topic001-group003");
props.put("auto.offset.reset", "earliest");
props.put("auto.commit.offset", false);
props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
KafkaConsumer<String, String> consumer = new KafkaConsumer<>(props);
/*
* consumer.partitionsFor(topic)用于获取topic的分区信息
* 当有新的分区加入或者原有的分区被改变后,这个方法是不能动态感知的
* 所以要么周期性的执行这个方法,要么当分区数改变的时候,你需要重新执行这个程序
*/
List<PartitionInfo> partitionInfos = consumer.partitionsFor("dev3-yangyunhe-topic001");
List<TopicPartition> partitions = new ArrayList<>();
if(partitionInfos != null && partitionInfos.size() != 0) {
for (PartitionInfo partition : partitionInfos) {
partitions.add(new TopicPartition(partition.topic(), partition.partition()));
}
consumer.assign(partitions);
try {
while (true) {
ConsumerRecords<String, String> records = consumer.poll(1000);
for (ConsumerRecord<String, String> record : records) {
System.out.println("partition = " + record.partition() + ", offset = " + record.offset());
}
consumer.commitAsync();
}
} finally {
consumer.commitSync();
consumer.close();
}
}
}
}