Flink流式广播demo

公司业务中有一些实时流计算业务需要在线更新配置文件的内容,因此需要流式广播来实现,测试demo如下:

import org.apache.flink.api.scala._
import com.xuehai.utils.Constants
import org.apache.flink.api.common.state.{BroadcastState, MapStateDescriptor}
import org.apache.flink.api.common.typeinfo.BasicTypeInfo
import org.apache.flink.runtime.state.filesystem.FsStateBackend
import org.apache.flink.streaming.api.CheckpointingMode
import org.apache.flink.streaming.api.functions.co.BroadcastProcessFunction
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010
import org.apache.flink.streaming.util.serialization.SimpleStringSchema
import org.apache.flink.util.Collector

object StreamBroadCastDemo extends Constants{
    def main(args: Array[String]) {
        val env = StreamExecutionEnvironment.getExecutionEnvironment

        //基础设置
        env.setStateBackend(new FsStateBackend("file:///D:\\checkpoint"))
        env.enableCheckpointing(60000)//开启checkPoint,并且每分钟做一次checkPoint保存
        env.getCheckpointConfig.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE)
        env.getCheckpointConfig.setFailOnCheckpointingErrors(false)//当checkpoint出错后,task是否停止,默认为true
        env.setParallelism(1)

        //配置广播状态kafka消费实例
        val configKafkaConsumer = new FlinkKafkaConsumer010[String]("PK-Rank", new SimpleStringSchema(), props)
        configKafkaConsumer.setStartFromLatest()

        //读取配置文件并生成广播状态
        val mapStateDescriptor = new MapStateDescriptor[String, String]("codeConfig", BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO)
        val init = env.readTextFile("D:\\code.txt")
        val broadStream = env.addSource(configKafkaConsumer).union(init).broadcast(mapStateDescriptor)

        //配置数据源kafka消费实例
        val kafkaConsumer = new FlinkKafkaConsumer010[String](topic, new SimpleStringSchema(), props)
        kafkaConsumer.setStartFromLatest()

        //读取实时流数据,并结合配置文件
        val streamData = env.addSource(kafkaConsumer).connect(broadStream).process(new BroadcastProcessFunction[String, String, String] {
            override def processBroadcastElement(value: String, ctx: BroadcastProcessFunction[String, String, String]#Context, out: Collector[String]): Unit = {
                val configMap: BroadcastState[String, String] = ctx.getBroadcastState(mapStateDescriptor)
                configMap.put(value.split(",")(0), value.split(",")(1))
            }

            override def processElement(value: String, ctx: BroadcastProcessFunction[String, String, String]#ReadOnlyContext, out: Collector[String]): Unit = {
                val configMap = ctx.getBroadcastState(mapStateDescriptor)
                val name: String = configMap.get(value)

                //配置文件里面没有的需要判断处理一下,否则就会重新加载配置文件
                //只要输出是null,就会重新加载配置文件,之前读取的kafka广播内容也会被覆盖掉
                if(name==null)out.collect(null)
                else out.collect(name)
            }
        }).print()
        env.execute("stream broadCast demo")
    }
}

©著作权归作者所有,转载或内容合作请联系作者
【社区内容提示】社区部分内容疑似由AI辅助生成,浏览时请结合常识与多方信息审慎甄别。
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

相关阅读更多精彩内容

友情链接更多精彩内容