公司业务中有一些实时流计算业务需要在线更新配置文件的内容,因此需要流式广播来实现,测试demo如下:
import org.apache.flink.api.scala._
import com.xuehai.utils.Constants
import org.apache.flink.api.common.state.{BroadcastState, MapStateDescriptor}
import org.apache.flink.api.common.typeinfo.BasicTypeInfo
import org.apache.flink.runtime.state.filesystem.FsStateBackend
import org.apache.flink.streaming.api.CheckpointingMode
import org.apache.flink.streaming.api.functions.co.BroadcastProcessFunction
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010
import org.apache.flink.streaming.util.serialization.SimpleStringSchema
import org.apache.flink.util.Collector
object StreamBroadCastDemo extends Constants{
def main(args: Array[String]) {
val env = StreamExecutionEnvironment.getExecutionEnvironment
//基础设置
env.setStateBackend(new FsStateBackend("file:///D:\\checkpoint"))
env.enableCheckpointing(60000)//开启checkPoint,并且每分钟做一次checkPoint保存
env.getCheckpointConfig.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE)
env.getCheckpointConfig.setFailOnCheckpointingErrors(false)//当checkpoint出错后,task是否停止,默认为true
env.setParallelism(1)
//配置广播状态kafka消费实例
val configKafkaConsumer = new FlinkKafkaConsumer010[String]("PK-Rank", new SimpleStringSchema(), props)
configKafkaConsumer.setStartFromLatest()
//读取配置文件并生成广播状态
val mapStateDescriptor = new MapStateDescriptor[String, String]("codeConfig", BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO)
val init = env.readTextFile("D:\\code.txt")
val broadStream = env.addSource(configKafkaConsumer).union(init).broadcast(mapStateDescriptor)
//配置数据源kafka消费实例
val kafkaConsumer = new FlinkKafkaConsumer010[String](topic, new SimpleStringSchema(), props)
kafkaConsumer.setStartFromLatest()
//读取实时流数据,并结合配置文件
val streamData = env.addSource(kafkaConsumer).connect(broadStream).process(new BroadcastProcessFunction[String, String, String] {
override def processBroadcastElement(value: String, ctx: BroadcastProcessFunction[String, String, String]#Context, out: Collector[String]): Unit = {
val configMap: BroadcastState[String, String] = ctx.getBroadcastState(mapStateDescriptor)
configMap.put(value.split(",")(0), value.split(",")(1))
}
override def processElement(value: String, ctx: BroadcastProcessFunction[String, String, String]#ReadOnlyContext, out: Collector[String]): Unit = {
val configMap = ctx.getBroadcastState(mapStateDescriptor)
val name: String = configMap.get(value)
//配置文件里面没有的需要判断处理一下,否则就会重新加载配置文件
//只要输出是null,就会重新加载配置文件,之前读取的kafka广播内容也会被覆盖掉
if(name==null)out.collect(null)
else out.collect(name)
}
}).print()
env.execute("stream broadCast demo")
}
}