一、前言
写这篇文章的目的主要是为了实现无感知增量加载功能。
二、数据表
CREATE TABLE tb_test_partitions_one
(
`id` Int64,
`vipId` Int64,
`brandId` Int32,
`shopId` Int32,
`saleDate` Date,
`saleMoney` Float32
)
ENGINE = MergeTree()
PARTITION BY toYYYYMM(saleDate)
ORDER BY (brandId, shopId)
SETTINGS index_granularity = 8192;
CREATE TABLE tb_test_partitions_two
(
`id` Int64,
`vipId` Int64,
`brandId` Int32,
`shopId` Int32,
`saleDate` Date,
`saleMoney` Float32
)
ENGINE = MergeTree()
PARTITION BY toYYYYMM(saleDate)
ORDER BY (brandId, shopId)
SETTINGS index_granularity = 8192;
insert into tb_test_partitions_one values
(10001,8002,429,6001,'2020-11-02 14:15:23',300.50),
(10002,8001,430,6001,'2020-11-02 14:15:23',100.50),
(10003,8001,429,6001,'2020-10-02 14:15:23',100.50);
insert into tb_test_partitions_two values
(10004,8002,429,6001,'2020-11-02 14:15:23',300.50),
(10005,8001,430,6001,'2020-11-02 14:15:23',100.50),
(10006,8001,429,6001,'2020-12-02 14:15:23',100.50);
ch101 :) select * from tb_test_partitions_one;
Query id: 7383011e-2f22-4dad-9ec9-8f0649a1fe40
┌────id─┬─vipId─┬─brandId─┬─shopId─┬───saleDate─┬─saleMoney─┐
│ 10001 │ 8002 │ 429 │ 6001 │ 2020-11-02 │ 300.5 │
│ 10002 │ 8001 │ 430 │ 6001 │ 2020-11-02 │ 100.5 │
└───────┴───────┴─────────┴────────┴────────────┴───────────┘
┌────id─┬─vipId─┬─brandId─┬─shopId─┬───saleDate─┬─saleMoney─┐
│ 10003 │ 8001 │ 429 │ 6001 │ 2020-10-02 │ 100.5 │
└───────┴───────┴─────────┴────────┴────────────┴───────────┘
3 rows in set. Elapsed: 0.021 sec.
ch101 :) select * from tb_test_partitions_two;
Query id: 8d73ec62-4a07-4cf3-a551-1a125bd5e7dd
┌────id─┬─vipId─┬─brandId─┬─shopId─┬───saleDate─┬─saleMoney─┐
│ 10006 │ 8001 │ 429 │ 6001 │ 2020-12-02 │ 100.5 │
└───────┴───────┴─────────┴────────┴────────────┴───────────┘
┌────id─┬─vipId─┬─brandId─┬─shopId─┬───saleDate─┬─saleMoney─┐
│ 10004 │ 8002 │ 429 │ 6001 │ 2020-11-02 │ 300.5 │
│ 10005 │ 8001 │ 430 │ 6001 │ 2020-11-02 │ 100.5 │
└───────┴───────┴─────────┴────────┴────────────┴───────────┘
3 rows in set. Elapsed: 0.024 sec.
SELECT
partition,
name,
part_type,
active,
rows,
bytes_on_disk,
data_compressed_bytes
from system.parts where table='tb_test_partitions_one';
Query id: 6de7f0ea-63a5-4dd4-b037-b12dcef59c04
┌─partition─┬─name─────────┬─part_type─┬─active─┬─rows─┬─bytes_on_disk─┬─data_compressed_bytes─┐
│ 202010 │ 202010_2_2_0 │ Compact │ 1 │ 1 │ 419 │ 186 │
│ 202011 │ 202011_1_1_0 │ Compact │ 1 │ 2 │ 445 │ 212 │
└───────────┴──────────────┴───────────┴────────┴──────┴───────────────┴───────────────────────┘
2 rows in set. Elapsed: 0.016 sec.
SELECT
partition,
name,
part_type,
active,
rows,
bytes_on_disk,
data_compressed_bytes
from system.parts where table='tb_test_partitions_two';
Query id: 00344ec5-ef9c-441f-899d-87853ed06712
┌─partition─┬─name─────────┬─part_type─┬─active─┬─rows─┬─bytes_on_disk─┬─data_compressed_bytes─┐
│ 202011 │ 202011_1_1_0 │ Compact │ 1 │ 2 │ 445 │ 212 │
│ 202012 │ 202012_2_2_0 │ Compact │ 1 │ 1 │ 419 │ 186 │
└───────────┴──────────────┴───────────┴────────┴──────┴───────────────┴───────────────────────┘
2 rows in set. Elapsed: 0.014 sec.
三、跨分区替换 - 两个表都存在的分区
- 语法
ALTER TABLE tb_test_partitions_two REPLACE PARTITION 202011
FROM tb_test_partitions_one
- 实例
ch101 :) select * from tb_test_partitions_two;
Query id: facf5d87-7878-4744-943d-1fd160f4c9e2
┌────id─┬─vipId─┬─brandId─┬─shopId─┬───saleDate─┬─saleMoney─┐
│ 10006 │ 8001 │ 429 │ 6001 │ 2020-12-02 │ 100.5 │
└───────┴───────┴─────────┴────────┴────────────┴───────────┘
┌────id─┬─vipId─┬─brandId─┬─shopId─┬───saleDate─┬─saleMoney─┐
│ 10001 │ 8002 │ 429 │ 6001 │ 2020-11-02 │ 300.5 │
│ 10002 │ 8001 │ 430 │ 6001 │ 2020-11-02 │ 100.5 │
└───────┴───────┴─────────┴────────┴────────────┴───────────┘
3 rows in set. Elapsed: 0.011 sec.
四、跨分区替换 - 目标表里不存在对应分区
- 语法
ch101 :) ALTER TABLE tb_test_partitions_two REPLACE PARTITION 202010
FROM tb_test_partitions_one;
- 实例
ch101 :) select * from tb_test_partitions_two;
Query id: e8d3575c-4d98-4fb7-a278-3cb248db1f92
┌────id─┬─vipId─┬─brandId─┬─shopId─┬───saleDate─┬─saleMoney─┐
│ 10001 │ 8002 │ 429 │ 6001 │ 2020-11-02 │ 300.5 │
│ 10002 │ 8001 │ 430 │ 6001 │ 2020-11-02 │ 100.5 │
└───────┴───────┴─────────┴────────┴────────────┴───────────┘
┌────id─┬─vipId─┬─brandId─┬─shopId─┬───saleDate─┬─saleMoney─┐
│ 10006 │ 8001 │ 429 │ 6001 │ 2020-12-02 │ 100.5 │
└───────┴───────┴─────────┴────────┴────────────┴───────────┘
┌────id─┬─vipId─┬─brandId─┬─shopId─┬───saleDate─┬─saleMoney─┐
│ 10003 │ 8001 │ 429 │ 6001 │ 2020-10-02 │ 100.5 │
└───────┴───────┴─────────┴────────┴────────────┴───────────┘
4 rows in set. Elapsed: 0.028 sec.
SELECT
partition,
name,
part_type,
active,
rows,
bytes_on_disk,
data_compressed_bytes
FROM system.parts
WHERE table = 'tb_test_partitions_two';
Query id: fa593870-2374-4a57-91c1-407a319865ee
┌─partition─┬─name─────────┬─part_type─┬─active─┬─rows─┬─bytes_on_disk─┬─data_compressed_bytes─┐
│ 202010 │ 202010_6_6_0 │ Compact │ 1 │ 1 │ 419 │ 186 │
│ 202011 │ 202011_4_4_0 │ Compact │ 1 │ 2 │ 445 │ 212 │
│ 202012 │ 202012_2_2_0 │ Compact │ 1 │ 1 │ 419 │ 186 │
└───────────┴──────────────┴───────────┴────────┴──────┴───────────────┴───────────────────────┘
3 rows in set. Elapsed: 0.015 sec.
参考文章
https://blog.csdn.net/m0_37813354/article/details/110847747
后记
网络上看到一个方案,如下:
1. create tmp,insert new data to tmp table
2. alter table tmp detach partition ‘2018-07-08’
3. mv /ck_data_dir/data/tmp/detach/* /ck_data_dir/data/t/detach
4. alter table t drop partition ‘2018-07-08’
5. alter table t attach partition ‘2018-07-08’