-
环境准备
-
SQL Server 中创建测试库表
CREATE DATABASE test; CREATE TABLE [dbo].[cdc_test] ( [id] int IDENTITY(1,1) NOT NULL, [name] varchar(60) COLLATE Chinese_PRC_CI_AS NOT NULL, CONSTRAINT [PK_cdc_test] PRIMARY KEY CLUSTERED ([id]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY] ) ON [PRIMARY] GO ALTER TABLE [dbo].[cdc_test] SET (LOCK_ESCALATION = TABLE)
-
对测试库表启用 CDC
-- 对 test 库启用 CDC USE test GO EXECUTE sys.sp_cdc_enable_db; GO -- 对 cdc_test 表启用 CDC USE test GO EXEC sys.sp_cdc_enable_table @source_schema = N'dbo', @source_name = N'cdc_test', @role_name = NULL, @supports_net_changes = 1 GO
参考: https://blog.csdn.net/weixin_43215250/article/details/105813087
-
在 HUE 上创建 KUDU 表
CREATE DATABASE IF NOT EXISTS test; CREATE TABLE IF NOT EXISTS test.cdc_test ( id int, name String, PRIMARY key(id) ) PARTITION BY HASH PARTITIONS 16 STORED AS KUDU;
-
-
创建 StreamSets 的 Pipline
SQL Server CDC 客户端配置
Stream Selector 配置
${record:attribute('sdc.operation.type') == 5 }
${record:attribute('sdc.operation.type') == 5 }
Kudu 配置
-
启动 Pipelines