在安装clickhouse集群前,先学习一下clickhouse集群的概念。
shard 数据分片
把一份数据切分为多份,分别放在不同的数据库服务器上,通过多台服务器资源提升数据访问效率。主要用于提升性能。如下图示:原Collectionl有1TB数据,进行分片后,数据拆分为4份,每份256G。就是说:将原来1TB的数据需要1台服务器来计算的工作量,通过分片后改为:1TB的数据用4台服务器来计算,每台服务器计算256G数据。
replica 副本
一份shard的副本。主要用于保证shard的高可用。
上图中ShardA~ShardD等4个shard其中之一不可用时,replica将代替不可用的shard,对外保证Collectionl数据一致性。
总结:
综上,要让clickhouse高性能、高可用的运行,至少需要4台服务器,其中2台做shard,其中2台做shard的replica。如下图:
搭建前准备
- 准备一台虚拟机2c4G,安装配置docker,安装docker-compose:
# 安装docker-compose脚本。
[root@docker ~]# curl -L "https://github.com/docker/compose/releases/download/1.26.2/docker-compose-$(uname -s)-$(uname -m)" -o /usr/local/bin/docker-compose
# 授予执行权限
[root@docker ~]# chmod +x /usr/local/bin/docker-compose
- 编写docker-compose.yml文件
[root@docker opt]# mkdir -p /opt/cluster-ch
[root@docker opt]# cd /opt/cluster-ch/
# 拉取镜像
[root@docker opt]# docker pull clickhouse-server:20.3
# 编写 yml文件
[root@docker cluster-ch]# vi docker-compose.yml
# 在文件中添加
version: '3.7'
services:
clickHouse1:
image: yandex/clickhouse-server:20.3
container_name: clickHouse1
environment:
TZ: Asia/Shanghai
HOSTNAME: clickHouse1
networks:
- net_docker
ulimits:
nofile:
soft: 262144
hard: 262144
volumes:
- ./docker_compose_data/node1/ch_log:/var/log/clickhouse-server
- ./docker_compose_data/node1/ch_data:/var/lib/clickhouse
- ./docker_compose_data/clickhouse-server1:/etc/clickhouse-server
ports:
- 9001:9000
- 8121:8123
- 9011:9009
clickHouse2:
image: yandex/clickhouse-server:20.3
container_name: clickHouse2
environment:
TZ: Asia/Shanghai
HOSTNAME: clickHouse2
networks:
- net_docker
ulimits:
nofile:
soft: 262144
hard: 262144
volumes:
- ./docker_compose_data/node2/ch_log:/var/log/clickhouse-server
- ./docker_compose_data/node2/ch_data:/var/lib/clickhouse
- ./docker_compose_data/clickhouse-server2:/etc/clickhouse-server
ports:
- 9002:9000
- 8122:8123
- 9012:9009
networks:
net_docker:
external: true
- 编写clickhouse的集群配置文件
由于资源有限,本次搭建一个 2个 shard , 0个 replica 的集群环境。
# 启动一个clickhouse容器后,拷贝配置文件所在目录到 docker-compose.yml文件中指定的目录下。
[root@docker cluster-ch]# docker run -d --name ch 3d72d9ee2a6b
[root@docker cluster-ch]# docker cp ch:/etc/clickhouse-server docker_compose_data
[root@docker cluster-ch]# cd docker_compose_data/clickhouse-server
[root@docker clickhouse-server]# rm -rf preprocessed
# 编写集群配置文件
[root@docker clickhouse-server]# vi metrika.xml
<yandex>
<!-- 集群配置 -->
<clickhouse_remote_servers>
<cluster_2s_1r>
<!-- 数据分片1 -->
<shard>
<internal_replication>true</internal_replication>
<replica>
<host>clickHouse1</host>
<port>9000</port>
<user>default</user>
<password></password>
</replica>
</shard>
<!-- 数据分片2 -->
<shard>
<internal_replication>true</internal_replication>
<replica>
<host>clickHouse2</host>
<port>9000</port>
<user>default</user>
<password></password>
</replica>
</shard>
</cluster_2s_1r>
</clickhouse_remote_servers>
<networks>
<ip>::/0</ip>
</networks>
<!-- 数据压缩算法 -->
<clickhouse_compression>
<case>
<min_part_size>10000000000</min_part_size>
<min_part_size_ratio>0.01</min_part_size_ratio>
<method>lz4</method>
</case>
</clickhouse_compression>
<macros>
<shard>shard01</shard>
<replica>replica_shard01</replica>
</macros>
</yandex>
# 配置完成后,执行:
[root@docker clickhouse-server]# cd ../
[root@docker docker_compose_data]# mv clickhouse-server/ clickhouse-server1
# 配置config.xml
[root@docker docker_compose_data]# vi clickhouse-server1/config.xml
# 在文件末尾,</yandex> 标签前添加如下内容:
<include_from>/etc/clickhouse-server/metrika.xml</include_from>
[root@docker docker_compose_data]# cp -r clickhouse-server1/ clickhouse-server2
[root@docker docker_compose_data]# ls
clickhouse-server1 clickhouse-server2
# 编辑shard2的配置文件,
[root@docker docker_compose_data]# vi clickhouse-server2/metrika.xml
# 将<macros>标签改成如下内容:
<macros>
<shard>shard02</shard>
<replica>replica_shard02</replica>
</macros>
- 启动clickhouse集群
[root@docker docker_compose_data]# docker-compose up
....
clickHouse2 | Processing configuration file '/etc/clickhouse-server/config.xml'.
clickHouse2 | Merging configuration file '/etc/clickhouse-server/config.d/docker_related_config.xml'.
clickHouse2 | Include not found: clickhouse_remote_servers
clickHouse2 | Include not found: clickhouse_compression
clickHouse2 | Saved preprocessed configuration to '/var/lib/clickhouse//preprocessed_configs/config.xml'.
clickHouse1 | Processing configuration file '/etc/clickhouse-server/config.xml'.
clickHouse1 | Merging configuration file '/etc/clickhouse-server/config.d/docker_related_config.xml'.
clickHouse1 | Include not found: clickhouse_remote_servers
clickHouse1 | Include not found: clickhouse_compression
clickHouse1 | Saved preprocessed configuration to '/var/lib/clickhouse//preprocessed_configs/config.xml'.
# 没有异常输出,安装完成
可视化clickehouse客户端
网上有很多客户端,个人使用的是DBeaver免费版本,下面介绍一下安装使用经验:
DBeaver CE 和 EE版本安装文件,都下载64位 ZIP,解压 EE版后找到drivers文件夹,将文件夹复制到解压后DBeaver CE目录下。这样,再进行数据库连接就不用安装java的驱动jar包了。
启动DBeaver CE版本前,请先安装JDK。
最后连接效果如下图: