1. 编辑ceph crush运行图实现基于HDD和SSD磁盘实现数据冷热数据分类存储
1.1 运行图
ceph集群由mon服务器维护的五种运行图
1. monitor map/监视运行图
2. OSD map/OSD运行图
3. PG map/PG运行图
4. Crush map/Controllers replication under
scalable hashing 可控的、可复制的、可伸缩的一致性hash算法
5. MDS map/cephfs metadata运行图
crush运行图,当新建存储池会基于OSD map创建出新的PG组合列表用于存储数据
crush 算法针对目的节点的选择:
目前有 5 种算法来实现节点的选择, 包括 Uniform、 List、 Tree、 Straw、 Straw2, 早期版本使用的是 ceph 项目的发起者发明的算法 straw, 目前已经发展社区优化的 straw2 版本。
straw(抽签算法):
抽签是指挑取一个最长的签, 而这个签值就是 OSD 的权重, 当创建存储池的时候会向 PG分配 OSD, straw 算法会遍历当前可用的 OSD 并优先使用中签的 OSD, 以让权重高的 OSD被分配较多的 PG 以存储更多
的数据。
1.2 cruch分类管理
ceph crush算法分配PG的时候可以将PG分配到不同主机的OSD上,实现以主机为单位的高可用,这也是默认机制;但无法保证不同PG位于不同机柜或机房的主机,如果要实现基于机柜或更高级的IDC等方式的数据高可用;而且也不能实现A项目的数据在SSD,B项目的数据在机械盘,如果要实现此功能则需要导出crush运行图并手动编辑,之后再导入并覆盖原crush运行图。


1.3 修改crush map并验证
1.3.1 导出crush运行图,并转换为文本
#导出
root@ceph-deploy:/opt/crush# ceph osd getcrushmap -o ./crushmap-v1
54
#转换
root@ceph-deploy:/opt/crush# apt install ceph-base #需安装,才能使用转换成文本的命令
root@ceph-deploy:/opt/crush# crushtool -d ./crushmap-v1 > ./crushmap-v1.txt
1.3.2 添加HDD磁盘规则
添加包含 hdd磁盘的存储池规则crush map,并且将每台node存储节点的sdd磁盘剔除。剔除的磁盘为(osd.4、9、14、19)
需要修改的内容分为三类:
1. bucket:节点分布关系
2. root: osd的映射关系
3. rules: 存储池的规则
root@ceph-deploy:/opt/crush# cat crushmap-v1.txt
...
#需添加的内容如下
#HDD
host ceph-hddnode1 {
id -13 # do not change unnecessarily
id -14 class hdd # do not change unnecessarily
# weight 0.488
alg straw2
hash 0 # rjenkins1
item osd.0 weight 0.098
item osd.1 weight 0.098
item osd.2 weight 0.098
item osd.3 weight 0.098
}
host ceph-hddnode2 {
id -15 # do not change unnecessarily
id -16 class hdd # do not change unnecessarily
# weight 0.488
alg straw2
hash 0 # rjenkins1
item osd.5 weight 0.098
item osd.6 weight 0.098
item osd.7 weight 0.098
item osd.8 weight 0.098
}
host ceph-hddnode3 {
id -17 # do not change unnecessarily
id -18 class hdd # do not change unnecessarily
# weight 0.488
alg straw2
hash 0 # rjenkins1
item osd.10 weight 0.098
item osd.11 weight 0.098
item osd.12 weight 0.098
item osd.13 weight 0.098
}
host ceph-hddnode4 {
id -19 # do not change unnecessarily
id -20 class hdd # do not change unnecessarily
# weight 0.488
alg straw2
hash 0 # rjenkins1
item osd.15 weight 0.098
item osd.16 weight 0.098
item osd.17 weight 0.098
item osd.18 weight 0.098
}
#HDD
root hdd {
id -21 # do not change unnecessarily
id -22 class hdd # do not change unnecessarily
# weight 1.954
alg straw2
hash 0 # rjenkins1
item ceph-hddnode1 weight 0.488
item ceph-hddnode2 weight 0.488
item ceph-hddnode3 weight 0.488
item ceph-hddnode4 weight 0.488
}
#HDD
rule hdd_replicated_rule {
id 30
type replicated
min_size 1
max_size 10
step take hdd #基于上面hdd定义的主机分配osd
step chooseleaf firstn 0 type host
step emit
}
完整配置如下
root@ceph-deploy:/opt/crush# cat crushmap-v1.txt
# begin crush map
tunable choose_local_tries 0
tunable choose_local_fallback_tries 0
tunable choose_total_tries 50
tunable chooseleaf_descend_once 1
tunable chooseleaf_vary_r 1
tunable chooseleaf_stable 1
tunable straw_calc_version 1
tunable allowed_bucket_algs 54
# devices
device 0 osd.0 class hdd
device 1 osd.1 class hdd
device 2 osd.2 class hdd
device 3 osd.3 class hdd
device 4 osd.4 class hdd
device 5 osd.5 class hdd
device 6 osd.6 class hdd
device 7 osd.7 class hdd
device 8 osd.8 class hdd
device 9 osd.9 class hdd
device 10 osd.10 class hdd
device 11 osd.11 class hdd
device 12 osd.12 class hdd
device 13 osd.13 class hdd
device 14 osd.14 class hdd
device 15 osd.15 class hdd
device 16 osd.16 class hdd
device 17 osd.17 class hdd
device 18 osd.18 class hdd
device 19 osd.19 class hdd
# types
type 0 osd
type 1 host
type 2 chassis
type 3 rack
type 4 row
type 5 pdu
type 6 pod
type 7 room
type 8 datacenter
type 9 zone
type 10 region
type 11 root
# buckets
host ceph-node1 {
id -3 # do not change unnecessarily
id -4 class hdd # do not change unnecessarily
# weight 0.488
alg straw2
hash 0 # rjenkins1
item osd.0 weight 0.098
item osd.1 weight 0.098
item osd.2 weight 0.098
item osd.3 weight 0.098
item osd.4 weight 0.098
}
host ceph-node2 {
id -5 # do not change unnecessarily
id -6 class hdd # do not change unnecessarily
# weight 0.488
alg straw2
hash 0 # rjenkins1
item osd.5 weight 0.098
item osd.6 weight 0.098
item osd.7 weight 0.098
item osd.8 weight 0.098
item osd.9 weight 0.098
}
host ceph-node3 {
id -7 # do not change unnecessarily
id -8 class hdd # do not change unnecessarily
# weight 0.488
alg straw2
hash 0 # rjenkins1
item osd.10 weight 0.098
item osd.11 weight 0.098
item osd.12 weight 0.098
item osd.13 weight 0.098
item osd.14 weight 0.098
}
host ceph-node4 {
id -9 # do not change unnecessarily
id -10 class hdd # do not change unnecessarily
# weight 0.488
alg straw2
hash 0 # rjenkins1
item osd.15 weight 0.098
item osd.16 weight 0.098
item osd.17 weight 0.098
item osd.18 weight 0.098
item osd.19 weight 0.098
}
#HDD
host ceph-hddnode1 {
id -13 # do not change unnecessarily
id -14 class hdd # do not change unnecessarily
# weight 0.488
alg straw2
hash 0 # rjenkins1
item osd.0 weight 0.098
item osd.1 weight 0.098
item osd.2 weight 0.098
item osd.3 weight 0.098
}
host ceph-hddnode2 {
id -15 # do not change unnecessarily
id -16 class hdd # do not change unnecessarily
# weight 0.488
alg straw2
hash 0 # rjenkins1
item osd.5 weight 0.098
item osd.6 weight 0.098
item osd.7 weight 0.098
item osd.8 weight 0.098
}
host ceph-hddnode3 {
id -17 # do not change unnecessarily
id -18 class hdd # do not change unnecessarily
# weight 0.488
alg straw2
hash 0 # rjenkins1
item osd.10 weight 0.098
item osd.11 weight 0.098
item osd.12 weight 0.098
item osd.13 weight 0.098
}
host ceph-hddnode4 {
id -19 # do not change unnecessarily
id -20 class hdd # do not change unnecessarily
# weight 0.488
alg straw2
hash 0 # rjenkins1
item osd.15 weight 0.098
item osd.16 weight 0.098
item osd.17 weight 0.098
item osd.18 weight 0.098
}
root default {
id -1 # do not change unnecessarily
id -2 class hdd # do not change unnecessarily
# weight 1.954
alg straw2
hash 0 # rjenkins1
item ceph-node1 weight 0.488
item ceph-node2 weight 0.488
item ceph-node3 weight 0.488
item ceph-node4 weight 0.488
}
#HDD
root hdd {
id -21 # do not change unnecessarily
id -22 class hdd # do not change unnecessarily
# weight 1.954
alg straw2
hash 0 # rjenkins1
item ceph-hddnode1 weight 0.488
item ceph-hddnode2 weight 0.488
item ceph-hddnode3 weight 0.488
item ceph-hddnode4 weight 0.488
}
# rules
rule replicated_rule {
id 0
type replicated
min_size 1
max_size 10
step take default
step chooseleaf firstn 0 type host
step emit
}
#HDD
rule hdd_replicated_rule {
id 30
type replicated
min_size 1
max_size 10
step take hdd
step chooseleaf firstn 0 type host
step emit
}
# end crush map
1.3.3 将运行图文件转换为crush格式,并导入
#转换
root@ceph-deploy:/opt/crush# crushtool -c ./crushmap-v1.txt -o crushmap-v2
#导入
root@ceph-deploy:/opt/crush# ceph osd setcrushmap -i ./crushmap-v2
55
1.3.4 验证crush运行图是否生效
root@ceph-deploy:/opt/crush# ceph osd crush rule dump
[
{
"rule_id": 0,
"rule_name": "replicated_rule",
"ruleset": 0,
"type": 1,
"min_size": 1,
"max_size": 10,
"steps": [
{
"op": "take",
"item": -1,
"item_name": "default"
},
{
"op": "chooseleaf_firstn",
"num": 0,
"type": "host"
},
{
"op": "emit"
}
]
},
{
"rule_id": 30,
"rule_name": "hdd_replicated_rule",
"ruleset": 30,
"type": 1,
"min_size": 1,
"max_size": 10,
"steps": [
{
"op": "take",
"item": -21,
"item_name": "hdd"
},
{
"op": "chooseleaf_firstn",
"num": 0,
"type": "host"
},
{
"op": "emit"
}
]
}
]
1.3.5 应用规则到已有存储池
应用到已有存储池会立刻触发数据pg的迁移!
root@ceph-deploy:/opt/crush# ceph osd pool set cephfs-data crush_rule hdd_replicated_rule
set pool 6 crush_rule to hdd_replicated_rule
1.3.6 测试创建的hdd规则存储池
#创建存储池,并指定存储池的rule规则
root@ceph-deploy:/opt/crush# ceph osd pool create hddpool 32 32 hdd_replicated_rule
pool 'hddpool' created
#验证pgp状态
#验证 hddpool 存储池的 pgp 组合关系应不包含sdd的osd(osd.4、9、14、19)
root@ceph-deploy:/opt/crush# ceph pg ls-by-pool hddpool | awk '{print $1,$2,$15}'
PG OBJECTS ACTING
14.0 0 [2,8,13]p2
14.1 0 [15,13,6]p15
14.2 0 [12,8,0]p12
14.3 0 [18,6,1]p18
14.4 0 [15,1,8]p15
14.5 0 [17,12,2]p17
14.6 0 [10,1,6]p10
14.7 0 [15,2,5]p15
14.8 0 [0,7,10]p0
14.9 0 [10,3,15]p10
14.a 0 [17,8,3]p17
14.b 0 [0,6,11]p0
14.c 0 [13,18,2]p13
14.d 0 [6,18,13]p6
14.e 0 [1,12,18]p1
14.f 0 [11,5,1]p11
14.10 0 [6,1,17]p6
14.11 0 [16,2,7]p16
14.12 0 [2,11,15]p2
14.13 0 [8,3,17]p8
14.14 0 [1,5,13]p1
14.15 0 [7,18,13]p7
14.16 0 [12,8,3]p12
14.17 0 [17,5,0]p17
14.18 0 [5,3,16]p5
14.19 0 [3,6,10]p3
14.1a 0 [10,7,18]p10
14.1b 0 [16,7,0]p16
14.1c 0 [13,18,1]p13
14.1d 0 [0,18,5]p0
14.1e 0 [7,2,18]p7
14.1f 0 [8,11,0]p8
* NOTE: afterwards
1.3.7 添加SSD盘规则
添加包含 ssd 磁盘的存储池规则crush map,ssd 的磁盘为(osd.4、9、14、19),由于使用hdd模拟的ssd,因此 class 类型都为hdd。
#方法如之前添加hdd类似,添加内容如下
#SSD
host ceph-ssdnode1 {
id -33 # do not change unnecessarily
id -34 class hdd # do not change unnecessarily #由于只是模拟ssd,实际硬盘应该还是hdd,所以类型还是hdd
# weight 0.392
alg straw2
hash 0 # rjenkins1
item osd.4 weight 0.098
}
host ceph-ssdnode2 {
id -35 # do not change unnecessarily
id -36 class hdd # do not change unnecessarily #由于只是模拟ssd,实际硬盘应该还是hdd,所以类型还是hdd
# weight 0.392
alg straw2
hash 0 # rjenkins1
item osd.9 weight 0.098
}
host ceph-ssdnode3 {
id -37 # do not change unnecessarily
id -38 class hdd # do not change unnecessarily #由于只是模拟ssd,实际硬盘应该还是hdd,所以类型还是hdd
# weight 0.392
alg straw2
hash 0 # rjenkins1
item osd.14 weight 0.098
}
host ceph-ssdnode4 {
id -39 # do not change unnecessarily
id -40 class hdd # do not change unnecessarily #由于只是模拟ssd,实际硬盘应该还是hdd,所以类型还是hdd
# weight 0.392
alg straw2
hash 0 # rjenkins1
item osd.19 weight 0.098
}
#SSD
root ssd {
id -41 # do not change unnecessarily
id -42 class hdd # do not change unnecessarily #由于只是模拟ssd,实际硬盘应该还是hdd,所以类型还是hdd
# weight 1.952
alg straw2
hash 0 # rjenkins1
item ceph-ssdnode1 weight 0.488
item ceph-ssdnode2 weight 0.488
item ceph-ssdnode3 weight 0.488
item ceph-ssdnode4 weight 0.488
}
#SSD
rule ssd_replicated_rule {
id 50
type replicated
min_size 1
max_size 10
step take ssd #基于上面ssd定义的主机分配osd
step chooseleaf firstn 0 type host
step emit
}
完整内容如下
root@ceph-deploy:/opt/crush# cat crushmap-v2.txt
# begin crush map
tunable choose_local_tries 0
tunable choose_local_fallback_tries 0
tunable choose_total_tries 50
tunable chooseleaf_descend_once 1
tunable chooseleaf_vary_r 1
tunable chooseleaf_stable 1
tunable straw_calc_version 1
tunable allowed_bucket_algs 54
# devices
device 0 osd.0 class hdd
device 1 osd.1 class hdd
device 2 osd.2 class hdd
device 3 osd.3 class hdd
device 4 osd.4 class hdd
device 5 osd.5 class hdd
device 6 osd.6 class hdd
device 7 osd.7 class hdd
device 8 osd.8 class hdd
device 9 osd.9 class hdd
device 10 osd.10 class hdd
device 11 osd.11 class hdd
device 12 osd.12 class hdd
device 13 osd.13 class hdd
device 14 osd.14 class hdd
device 15 osd.15 class hdd
device 16 osd.16 class hdd
device 17 osd.17 class hdd
device 18 osd.18 class hdd
device 19 osd.19 class hdd
# types
type 0 osd
type 1 host
type 2 chassis
type 3 rack
type 4 row
type 5 pdu
type 6 pod
type 7 room
type 8 datacenter
type 9 zone
type 10 region
type 11 root
# buckets
host ceph-node1 {
id -3 # do not change unnecessarily
id -4 class hdd # do not change unnecessarily
# weight 0.490
alg straw2
hash 0 # rjenkins1
item osd.0 weight 0.098
item osd.1 weight 0.098
item osd.2 weight 0.098
item osd.3 weight 0.098
item osd.4 weight 0.098
}
host ceph-node2 {
id -5 # do not change unnecessarily
id -6 class hdd # do not change unnecessarily
# weight 0.490
alg straw2
hash 0 # rjenkins1
item osd.5 weight 0.098
item osd.6 weight 0.098
item osd.7 weight 0.098
item osd.8 weight 0.098
item osd.9 weight 0.098
}
host ceph-node3 {
id -7 # do not change unnecessarily
id -8 class hdd # do not change unnecessarily
# weight 0.490
alg straw2
hash 0 # rjenkins1
item osd.10 weight 0.098
item osd.11 weight 0.098
item osd.12 weight 0.098
item osd.13 weight 0.098
item osd.14 weight 0.098
}
host ceph-node4 {
id -9 # do not change unnecessarily
id -10 class hdd # do not change unnecessarily
# weight 0.490
alg straw2
hash 0 # rjenkins1
item osd.15 weight 0.098
item osd.16 weight 0.098
item osd.17 weight 0.098
item osd.18 weight 0.098
item osd.19 weight 0.098
}
root default {
id -1 # do not change unnecessarily
id -2 class hdd # do not change unnecessarily
# weight 1.952
alg straw2
hash 0 # rjenkins1
item ceph-node1 weight 0.488
item ceph-node2 weight 0.488
item ceph-node3 weight 0.488
item ceph-node4 weight 0.488
}
host ceph-hddnode1 {
id -13 # do not change unnecessarily
id -14 class hdd # do not change unnecessarily
# weight 0.392
alg straw2
hash 0 # rjenkins1
item osd.0 weight 0.098
item osd.1 weight 0.098
item osd.2 weight 0.098
item osd.3 weight 0.098
}
host ceph-hddnode2 {
id -15 # do not change unnecessarily
id -16 class hdd # do not change unnecessarily
# weight 0.392
alg straw2
hash 0 # rjenkins1
item osd.5 weight 0.098
item osd.6 weight 0.098
item osd.7 weight 0.098
item osd.8 weight 0.098
}
host ceph-hddnode3 {
id -17 # do not change unnecessarily
id -18 class hdd # do not change unnecessarily
# weight 0.392
alg straw2
hash 0 # rjenkins1
item osd.10 weight 0.098
item osd.11 weight 0.098
item osd.12 weight 0.098
item osd.13 weight 0.098
}
host ceph-hddnode4 {
id -19 # do not change unnecessarily
id -20 class hdd # do not change unnecessarily
# weight 0.392
alg straw2
hash 0 # rjenkins1
item osd.15 weight 0.098
item osd.16 weight 0.098
item osd.17 weight 0.098
item osd.18 weight 0.098
}
root hdd {
id -21 # do not change unnecessarily
id -22 class hdd # do not change unnecessarily
# weight 1.952
alg straw2
hash 0 # rjenkins1
item ceph-hddnode1 weight 0.488
item ceph-hddnode2 weight 0.488
item ceph-hddnode3 weight 0.488
item ceph-hddnode4 weight 0.488
}
#SSD
host ceph-ssdnode1 {
id -33 # do not change unnecessarily
id -34 class hdd # do not change unnecessarily
# weight 0.392
alg straw2
hash 0 # rjenkins1
item osd.4 weight 0.098
}
host ceph-ssdnode2 {
id -35 # do not change unnecessarily
id -36 class hdd # do not change unnecessarily
# weight 0.392
alg straw2
hash 0 # rjenkins1
item osd.9 weight 0.098
}
host ceph-ssdnode3 {
id -37 # do not change unnecessarily
id -38 class hdd # do not change unnecessarily
# weight 0.392
alg straw2
hash 0 # rjenkins1
item osd.14 weight 0.098
}
host ceph-ssdnode4 {
id -39 # do not change unnecessarily
id -40 class hdd # do not change unnecessarily
# weight 0.392
alg straw2
hash 0 # rjenkins1
item osd.19 weight 0.098
}
#SSD
root ssd {
id -41 # do not change unnecessarily
id -42 class hdd # do not change unnecessarily
# weight 1.952
alg straw2
hash 0 # rjenkins1
item ceph-ssdnode1 weight 0.488
item ceph-ssdnode2 weight 0.488
item ceph-ssdnode3 weight 0.488
item ceph-ssdnode4 weight 0.488
}
# rules
rule replicated_rule {
id 0
type replicated
min_size 1
max_size 10
step take default
step chooseleaf firstn 0 type host
step emit
}
rule hdd_replicated_rule {
id 30
type replicated
min_size 1
max_size 10
step take hdd
step chooseleaf firstn 0 type host
step emit
}
#SSD
rule ssd_replicated_rule {
id 50
type replicated
min_size 1
max_size 10
step take ssd
step chooseleaf firstn 0 type host
step emit
}
# end crush map
1.3.8 将包含ssd规则的运行图文件转换为crush格式,并导入
#转换
root@ceph-deploy:/opt/crush# crushtool -c ./crushmap-v2.txt -o crushmap-v3
#导入
root@ceph-deploy:/opt/crush# ceph osd setcrushmap -i ./crushmap-v3
56
1.3.9 验证crush运行图是否生效
root@ceph-deploy:/opt/crush# ceph osd crush rule dump
[
{
"rule_id": 0,
"rule_name": "replicated_rule",
"ruleset": 0,
"type": 1,
"min_size": 1,
"max_size": 10,
"steps": [
{
"op": "take",
"item": -1,
"item_name": "default"
},
{
"op": "chooseleaf_firstn",
"num": 0,
"type": "host"
},
{
"op": "emit"
}
]
},
{
"rule_id": 30,
"rule_name": "hdd_replicated_rule",
"ruleset": 30,
"type": 1,
"min_size": 1,
"max_size": 10,
"steps": [
{
"op": "take",
"item": -21,
"item_name": "hdd"
},
{
"op": "chooseleaf_firstn",
"num": 0,
"type": "host"
},
{
"op": "emit"
}
]
},
{
"rule_id": 50,
"rule_name": "ssd_replicated_rule",
"ruleset": 50,
"type": 1,
"min_size": 1,
"max_size": 10,
"steps": [
{
"op": "take",
"item": -41,
"item_name": "ssd"
},
{
"op": "chooseleaf_firstn",
"num": 0,
"type": "host"
},
{
"op": "emit"
}
]
}
]
1.3.10 应用规则到已有存储池
应用到已有存储池会立刻触发数据pg的迁移!
root@ceph-deploy:/opt/crush# ceph osd pool set cephfs-data crush_rule ssd_replicated_rule
1.3.11 测试创建的ssd规则存储池
#创建存储池,并指定存储池的rule规则,默认情况下,如果创建存储池没有指定rule,则使用的是crush map中默认的 replicated_rule
root@ceph-deploy:/opt/crush# ceph osd pool create ssdpool 32 32 ssd_replicated_rule
pool 'ssdpool' created
#验证pgp状态
#验证 ssdpool 存储池的 pgp 组合关系应包含sdd的osd(osd.4、9、14、19)
root@ceph-deploy:/opt/crush# ceph pg ls-by-pool ssdpool | awk '{print $1,$2,$15}'
PG OBJECTS ACTING
15.0 0 [19,14,9]p19
15.1 0 [19,9,14]p19
15.2 0 [4,14,19]p4
15.3 0 [14,4,9]p14
15.4 0 [4,19,9]p4
15.5 0 [9,19,14]p9
15.6 0 [14,4,19]p14
15.7 0 [14,9,4]p14
15.8 0 [14,9,4]p14
15.9 0 [9,14,19]p9
15.a 0 [4,9,14]p4
15.b 0 [14,9,4]p14
15.c 0 [9,19,4]p9
15.d 0 [9,14,4]p9
15.e 0 [14,4,9]p14
15.f 0 [14,19,9]p14
15.10 0 [9,4,14]p9
15.11 0 [19,9,14]p19
15.12 0 [4,9,14]p4
15.13 0 [14,4,19]p14
15.14 0 [9,4,14]p9
15.15 0 [9,19,4]p9
15.16 0 [9,19,4]p9
15.17 0 [4,14,9]p4
15.18 0 [9,14,4]p9
15.19 0 [9,14,19]p9
15.1a 0 [4,9,19]p4
15.1b 0 [9,19,14]p9
15.1c 0 [9,4,14]p9
15.1d 0 [19,4,9]p19
15.1e 0 [4,9,19]p4
15.1f 0 [14,19,9]p14
* NOTE: afterwards
1.3.12 测试使用存储池
#开启存储池rbd功能
root@ceph-deploy:/opt/crush# ceph osd pool application enable ssdpool rbd
enabled application 'rbd' on pool 'ssdpool'
#创建镜像卷
root@ceph-deploy:/opt/crush# rbd create mysql-volume --size 100G --pool ssdpool --image-format 2 --image-feature layering
root@ceph-deploy:/opt/crush# rbd ls --pool ssdpool -l
NAME SIZE PARENT FMT PROT LOCK
mysql-volume 100 GiB 2
客户端使用镜像
#创建普通用户并授权
cephadmin@ceph-deploy:~/ceph-cluster$ ceph auth add client.wei mon 'allow r' osd 'allow rwx pool=ssdpool'
added key for client.wei
cephadmin@ceph-deploy:~/ceph-cluster$ ceph auth get client.wei
[client.wei]
key = AQC5DVdl1+MdEBAAcry5ArcWv4+2IaHWk9Mq3Q==
caps mon = "allow r"
caps osd = "allow rwx pool=ssdpool"
exported keyring for client.wei
cephadmin@ceph-deploy:~/ceph-cluster$ ceph-authtool --create-keyring ceph.client.wei.keyring
creating ceph.client.wei.keyring
cephadmin@ceph-deploy:~/ceph-cluster$ ceph auth get client.wei -o ceph.client.wei.keyring
exported keyring for client.wei
cephadmin@ceph-deploy:~/ceph-cluster$ cat ceph.client.wei.keyring
[client.wei]
key = AQC5DVdl1+MdEBAAcry5ArcWv4+2IaHWk9Mq3Q==
caps mon = "allow r"
caps osd = "allow rwx pool=ssdpool"
#将认证文件拷贝到客户端
cephadmin@ceph-deploy:~/ceph-cluster$ scp ceph.conf ceph.client.wei.keyring root@172.20.20.129:/etc/ceph/
root@172.20.20.129's password:
ceph.conf 100% 620 137.6KB/s 00:00
ceph.client.wei.keyring 100% 120 76.9KB/s 00:00
登录客户端测试
#测试客户端ceph使用权限
[root@localhost ~]# ceph --user wei -s
cluster:
id: 3586e7d1-9315-44e5-85bd-6bd3787ce574
health: HEALTH_WARN
1 client(s) laggy due to laggy OSDs
services:
mon: 3 daemons, quorum ceph-mon1,ceph-mon2,ceph-mon3 (age 4h)
mgr: ceph-mgr1(active, since 3w), standbys: ceph-mgr2
mds: 2/2 daemons up, 2 standby
osd: 20 osds: 20 up (since 4h), 20 in (since 28h)
rgw: 2 daemons active (2 hosts, 1 zones)
data:
volumes: 1/1 healthy
pools: 13 pools, 417 pgs
objects: 431 objects, 180 MiB
usage: 6.2 GiB used, 1.9 TiB / 2.0 TiB avail
pgs: 417 active+clean
#客户端映射rbd镜像
[root@localhost ~]# rbd --user wei -p ssdpool map mysql-volume
/dev/rbd0
#客户端格式化镜像并挂载
[root@localhost ~]# mkfs.xfs /dev/rbd0
[root@localhost ~]# mount /dev/rbd0 /mnt
[root@localhost ~]# df -h
Filesystem Size Used Avail Use% Mounted on
devtmpfs 898M 0 898M 0% /dev
tmpfs 910M 0 910M 0% /dev/shm
tmpfs 910M 9.6M 901M 2% /run
tmpfs 910M 0 910M 0% /sys/fs/cgroup
/dev/mapper/centos-root 17G 1.8G 16G 11% /
/dev/sda1 1014M 195M 820M 20% /boot
172.20.20.221:6789,172.20.20.222:6789,172.20.20.223:6789:/ 127G 0 127G 0% /data
tmpfs 182M 0 182M 0% /run/user/0
/dev/rbd0 100G 33M 100G 1% /mnt
ceph删除pool
#ceph删除pool
cephadmin@ceph-deploy:~/ceph-cluster$ ceph config set mon mon_allow_pool_delete true
cephadmin@ceph-deploy:~/ceph-cluster$ ceph osd pool rm hddpool hddpool --yes-i-really-really-mean-it
pool 'hddpool' removed
2. 启用ceph dashboard及并通过prometheus 监控ceph集群状态
Ceph dashboard是通过一个web界面,对已经运行的ceph集群进行状态查看以及功能配置等功能,dashboard需要安装在mgr节点。
ceph mgr是一个多插件(模块化)的组件,其组件可以单独开启或关闭(需要在deploy服务器操作)。
#mgr节点默认安装dashboard插件,若没有需手动安装
root@ceph-mgr1:~# apt-cache madison ceph-mgr-dashboard
root@ceph-mgr1:~# apt install -y ceph-mgr-dashboard
root@ceph-mgr2:~# apt-cache madison ceph-mgr-dashboard
root@ceph-mgr2:~# apt install -y ceph-mgr-dashboard
查看mgr模块信息
cephadmin@ceph-deploy:~/ceph-cluster$ ceph mgr module ls
{
"always_on_modules": [
"balancer",
"crash",
"devicehealth",
"orchestrator",
"pg_autoscaler",
"progress",
"rbd_support",
"status",
"telemetry",
"volumes"
],
"enabled_modules": [ #已开启模块
"iostat",
"nfs",
"restful"
],
"disabled_modules": [ #未开启模块
{
"name": "alerts",
"can_run": true,
"error_string": "",
"module_options": {
"interval": {
2.1 启用dashboard模块
#ceph集群内的mgr节点都会启用dashboard
cephadmin@ceph-deploy:~/ceph-cluster$ ceph mgr module enable dashboard
module 'dashboard' is already enabled
#验证模块状态
cephadmin@ceph-deploy:~$ ceph mgr module ls
{
"always_on_modules": [
"balancer",
"crash",
"devicehealth",
"orchestrator",
"pg_autoscaler",
"progress",
"rbd_support",
"status",
"telemetry",
"volumes"
],
"enabled_modules": [
"dashboard", #已开启
"iostat",
"nfs",
"prometheus",
"restful"
],
"disabled_modules": [
{
"name": "alerts",
"can_run": true,
2.2 配置dashboard
#配置Ceph dashboard 关闭 SSL
cephadmin@ceph-deploy:~/ceph-cluster$ ceph config set mgr mgr/dashboard/ssl false
#配置dashboard监听地址
cephadmin@ceph-deploy:~/ceph-cluster$ ceph config set mgr mgr/dashboard/ceph-mgr1/server_addr 172.20.20.224
#设置dashboard监听端口,默认端口为8080
cephadmin@ceph-deploy:~/ceph-cluster$ ceph config set mgr mgr/dashboard/ceph-mgr1/server_port 9009
#查看
cephadmin@ceph-deploy:~/ceph-cluster$ ceph mgr services
{
"dashboard": "http://172.20.20.224:9009/"
}
2.3 mgr节点验证
root@ceph-mgr1:~# lsof -i:9009
COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME
ceph-mgr 2360 ceph 41u IPv6 48156 0t0 TCP *:9009 (LISTEN)
root@ceph-mgr2:~# lsof -i:9009
COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME
ceph-mgr 824 ceph 20u IPv6 43428 0t0 TCP *:9009 (LISTEN)
2.4 dashboard访问验证
默认无用户名和密码

2.5 设置dashboard账户及密码
#创建保存密码的文件
cephadmin@ceph-deploy:~/ceph-cluster$ echo "123456" > passwd.txt
#创建用户名和密码
cephadmin@ceph-deploy:~/ceph-cluster$ ceph dashboard set-login-credentials zhao5 -i passwd.txt
******************************************************************
*** WARNING: this command is deprecated. ***
*** Please use the ac-user-* related commands to manage users. ***
******************************************************************
Username and password updated
2.6 dashboard界面
-
登录
image.png
上图集群状态报1 client(s) laggy due to laggy OSDs,但查看osd的延时都为0,具体原因,后面在排查--当第二天做其它实验时,这个报错自己消失了,有点奇怪
cephadmin@ceph-deploy:~/ceph-cluster$ ceph osd perf
osd commit_latency(ms) apply_latency(ms)
19 0 0
18 0 0
17 0 0
16 0 0
15 0 0
14 0 0
13 0 0
0 0 0
1 0 0
2 0 0
3 0 0
4 0 0
5 0 0
6 0 0
7 0 0
8 0 0
9 0 0
10 0 0
11 0 0
12 0 0
-
主机信息
image.png -
monitor节点
image.png -
OSD状态
image.png -
pool状态
image.png -
块存储镜像状态
image.png -
cephFS状态
image.png -
对象存储
网关服务状态
image.png
用户信息

bucket状态

2.7 通过prometheus监控ceph集群状态
2.7.1 在node节点部署node_exporter
#使用之前下载的安装包
root@ceph-node4:~# mkdir /apps
root@ceph-node4:~# cd /apps/
root@ceph-node4:/apps# tar xvf node_exporter-1.3.1.linux-amd64.tar.gz
root@ceph-node4:/apps# ln -sv node_exporter-1.3.1.linux-amd64 node_exporter
#创建service
root@ceph-node4:/apps# cat >>/etc/systemd/system/node-exporter.service <<EOF
> [Unit]
> Description=Prometheus Node Exporter
> Documentation=https://prometheus.io/docs/introduction/overview/
> After=network.target
>
> [Service]
> ExecStart=/apps/node_exporter/node_exporter
>
> [Install]
> WantedBy=multi-user.target
> EOF
#启动服务
root@ceph-node4:/apps# systemctl daemon-reload
root@ceph-node4:/apps# systemctl restart node-exporter.service
访问

2.7.2 安装和配置Prometheus
使用之前博客部署的Prometheus服务
#修改配置
root@prometheus-01:~# cat /apps/prometheus/prometheus.yml
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
static_configs:
- targets: ["localhost:9090"]
- job_name: 'ceph-node'
static_configs:
- targets: ["172.20.20.226:9100","172.20.20.227:9100","172.20.20.228:9100","172.20.20.229:9100"]
#重启服务
root@prometheus-01:~# systemctl restart prometheus.service
访问

2.8 检查ceph服务
ceph manager内部的模块中包含了prometheus的监控模块,并监听在每个manager节点的9283端口,该端口用于将采集到的信息通过http接口向prometheus提供。
2.8.1 启用prometheus模块
cephadmin@ceph-deploy:~/ceph-cluster$ ceph mgr module enable prometheus
2.8.2 验证mgr节点端口
root@ceph-mgr1:~# lsof -i:9283
COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME
ceph-mgr 2360 ceph 28u IPv6 58988 0t0 TCP *:9283 (LISTEN)
root@ceph-mgr2:~# lsof -i:9283
COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME
ceph-mgr 824 ceph 20u IPv6 56933 0t0 TCP *:9283 (LISTEN)
2.8.3 验证数据

2.8.3 配置Prometheus数据采集
#添加监控信息
root@prometheus-01:~# cat /apps/prometheus/prometheus.yml
...
- job_name: 'ceph-node'
static_configs:
- targets: ["172.20.20.226:9100","172.20.20.227:9100","172.20.20.228:9100","172.20.20.229:9100"]
- job_name: 'ceph-cluster'
static_configs:
- targets: ["172.20.20.224:9283","172.20.20.225:9283"]

2.9 grafana展示
使用之前博客部署的grafana
-
配置数据源
image.png -
pool-5342
image.png -
ceph OSD-5336
image.png -
Ceph Cluster-2842
image.png
3. 实现kubernetes基于ceph块存储和cephfs的数据持久化
让k8s中的pod可以访问ceph中rbd提供的镜像作为存储设备,需要在ceph创建rbd,并且让k8s node节点能够通过ceph的认证。
k8s在使用ceph作为动态存储卷的时候,需要kube-controller-manager组件能够访问ceph,因此需要在包括k8s master及node节点在内的每一个node同步认证文件。
3.1 基于ceph块存储的数据持久化
3.1.1 创建rbd并初始化
#创建rbd
cephadmin@ceph-deploy:~/ceph-cluster$ ceph osd pool create k8s-rbd-pool 32 32
pool 'k8s-rbd-pool' created
#验证存储池
cephadmin@ceph-deploy:~/ceph-cluster$ ceph osd pool ls
device_health_metrics
mypool
myrbd1
cephfs-metadata
cephfs-data
.rgw.root
default.rgw.log
default.rgw.control
default.rgw.meta
default.rgw.buckets.index
default.rgw.buckets.data
default.rgw.buckets.non-ec
k8s-rbd-pool #新建存储池
#存储池启用rbd
cephadmin@ceph-deploy:~/ceph-cluster$ ceph osd pool application enable k8s-rbd-pool rbd
enabled application 'rbd' on pool 'k8s-rbd-pool'
#初始化rbd
cephadmin@ceph-deploy:~/ceph-cluster$ rbd pool init -p k8s-rbd-pool
3.1.2 创建img
#创建镜像
cephadmin@ceph-deploy:~/ceph-cluster$ rbd create k8s-img --size 3G --pool k8s-rbd-pool --image-feature layering
cephadmin@ceph-deploy:~/ceph-cluster$ rbd ls --pool k8s-rbd-pool
k8s-img
#查看镜像信息
cephadmin@ceph-deploy:~/ceph-cluster$ rbd --image k8s-img --pool k8s-rbd-pool info
rbd image 'k8s-img':
size 3 GiB in 768 objects
order 22 (4 MiB objects)
snapshot_count: 0
id: ae8f8c73dc3e
block_name_prefix: rbd_data.ae8f8c73dc3e
format: 2
features: layering
op_features:
flags:
create_timestamp: Sat Nov 18 02:57:23 2023
access_timestamp: Sat Nov 18 02:57:23 2023
modify_timestamp: Sat Nov 18 02:57:23 2023
3.1.3 k8s集群安装ceph-common
需要在k8s master和node节点安装ceph-common,各节点操作相同
#配置ceph镜像源
root@k8s-master3:~# apt install -y apt-transport-https ca-certificates curl software-properties-common
root@k8s-master3:~# wget -q -O- 'https://mirrors.tuna.tsinghua.edu.cn/ceph/keys/release.asc' | apt-key add -
root@k8s-master3:~# echo 'deb https://mirrors.tuna.tsinghua.edu.cn/ceph/debian-pacific/ focal main' >> /etc/apt/sources.list
#更新软件源
root@k8s-master3:~# apt update
#安装和ceph集群版本相同的ceph-common
root@k8s-master3:~# apt-cache madison ceph-common
root@k8s-master3:~# apt install ceph-common=16.2.14-1focal -y
3.1.4 创建ceph用户并授权
cephadmin@ceph-deploy:~/ceph-cluster$ ceph auth get-or-create client.k8s mon 'allow r' osd 'allow *
pool=k8s-rbd-pool'
[client.k8s]
key = AQD3K1hlDTebJRAAXDsxU44BxqRyspQyL8sNxQ==
cephadmin@ceph-deploy:~/ceph-cluster$ ceph auth get client.k8s
[client.k8s]
key = AQD3K1hlDTebJRAAXDsxU44BxqRyspQyL8sNxQ==
caps mon = "allow r"
caps osd = "allow * pool=k8s-rbd-pool"
exported keyring for client.k8s
cephadmin@ceph-deploy:~/ceph-cluster$ ceph auth get client.k8s -o ceph.client.k8s.keyring
exported keyring for client.k8s
cephadmin@ceph-deploy:~/ceph-cluster$ cat ceph.client.k8s.keyring
[client.k8s]
key = AQD3K1hlDTebJRAAXDsxU44BxqRyspQyL8sNxQ==
caps mon = "allow r"
caps osd = "allow * pool=k8s-rbd-pool"
#然后将ceph.conf 和ceph.client.k8s.keyring拷贝至各节点
cephadmin@ceph-deploy:~/ceph-cluster$ scp ceph.conf ceph.client.k8s.keyring root@172.20.20.101:/etc/ceph/
cephadmin@ceph-deploy:~/ceph-cluster$ scp ceph.conf ceph.client.k8s.keyring root@172.20.20.102:/etc/ceph/
cephadmin@ceph-deploy:~/ceph-cluster$ scp ceph.conf ceph.client.k8s.keyring root@172.20.20.103:/etc/ceph/
cephadmin@ceph-deploy:~/ceph-cluster$ scp ceph.conf ceph.client.k8s.keyring root@172.20.20.111:/etc/ceph/
cephadmin@ceph-deploy:~/ceph-cluster$ scp ceph.conf ceph.client.k8s.keyring root@172.20.20.112:/etc/ceph/
cephadmin@ceph-deploy:~/ceph-cluster$ scp ceph.conf ceph.client.k8s.keyring root@172.20.20.113:/etc/ceph/
验证k8s节点的用户权限,在节点使用ceph --user k8s -s查看
root@k8s-master2:~# ceph --user k8s -s
cluster:
id: 3586e7d1-9315-44e5-85bd-6bd3787ce574
health: HEALTH_OK
services:
mon: 3 daemons, quorum ceph-mon1,ceph-mon2,ceph-mon3 (age 77s)
mgr: ceph-mgr1(active, since 19h), standbys: ceph-mgr2
mds: 2/2 daemons up, 2 standby
osd: 20 osds: 20 up (since 40s), 20 in (since 2d)
rgw: 2 daemons active (2 hosts, 1 zones)
data:
volumes: 1/1 healthy
pools: 13 pools, 417 pgs
objects: 480 objects, 331 MiB
usage: 6.7 GiB used, 1.9 TiB / 2.0 TiB avail
pgs: 417 active+clean
验证镜像访问权限
root@k8s-master2:~# rbd --user k8s ls --pool k8s-rbd-pool
k8s-img
3.1.5 k8s各节点添加hosts解析
在ceph.conf配置中包含ceph集群的主机名,因此需要在k8s各master及node配置主机名解析
root@k8s-master2:~# cat >> /etc/hosts <<EOF
172.20.20.220 ceph-deploy
172.20.20.221 ceph-mon1
172.20.20.222 ceph-mon2
172.20.20.223 ceph-mon3
172.20.20.224 ceph-mgr1
172.20.20.225 ceph-mgr2
172.20.20.226 ceph-node1
172.20.20.227 ceph-node2
172.20.20.228 ceph-node3
172.20.20.229 ceph-node4
EOF
3.1.6 通过keyring文件挂载rbd
基于ceph提供的rbd实现存储卷的动态提供,由两种方式实现,一是通过宿主机的keyring文件挂载rbd,另外一个是通过keyring中key定义为k8s的secret,然后pod通过secret挂载rbd。
3.1.6.1 keyring文件方式直接挂载
#编写yaml文件
# cat case1-busybox-keyring.yaml
apiVersion: v1
kind: Pod
metadata:
name: busybox
namespace: default
spec:
containers:
- image: busybox
command:
- sleep
- "3600"
imagePullPolicy: Always
name: busybox
#restartPolicy: Always
volumeMounts:
- name: rbd-data1
mountPath: /data
volumes:
- name: rbd-data1
rbd:
monitors:
- '172.20.20.221:6789'
- '172.20.20.222:6789'
- '172.20.20.223:6789'
pool: k8s-rbd-pool
image: k8s-img
fsType: xfs
readOnly: false
user: k8s
keyring: /etc/ceph/ceph.client.k8s.keyring
#执行创建
# kubectl apply -f case1-busybox-keyring.yaml
#查看
# kubectl get pod
NAME READY STATUS RESTARTS AGE
busybox 1/1 Running 0 2m
#进入pod查看挂载
# kubectl exec -it busybox sh
kubectl exec [POD] [COMMAND] is DEPRECATED and will be removed in a future version. Use kubectl exec [POD] -- [COMMAND] instead.
/ # df -h
Filesystem Size Used Available Use% Mounted on
overlay 18.5G 13.0G 4.6G 74% /
tmpfs 64.0M 0 64.0M 0% /dev
tmpfs 1.9G 0 1.9G 0% /sys/fs/cgroup
/dev/rbd0 3.0G 53.9M 2.9G 2% /data #挂载目录
/dev/mapper/ubuntu--vg-ubuntu--lv
18.5G 13.0G 4.6G 74% /etc/hosts
/dev/mapper/ubuntu--vg-ubuntu--lv
18.5G 13.0G 4.6G 74% /dev/termination-log
/dev/mapper/ubuntu--vg-ubuntu--lv
18.5G 13.0G 4.6G 74% /etc/hostname
3.1.6.2 通过keyring文件直接挂载
#编写yaml文件
# cat case2-nginx-keyring.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: nginx-deployment
spec:
replicas: 1
selector:
matchLabels: #rs or deployment
app: ng-deploy-80
template:
metadata:
labels:
app: ng-deploy-80
spec:
containers:
- name: ng-deploy-80
image: nginx
ports:
- containerPort: 80
volumeMounts:
- name: rbd-data1
mountPath: /data
volumes:
- name: rbd-data1
rbd:
monitors:
- '172.20.20.221:6789'
- '172.20.20.222:6789'
- '172.20.20.223:6789'
pool: k8s-rbd-pool
image: k8s-img
fsType: xfs
readOnly: false
user: k8s
keyring: /etc/ceph/ceph.client.k8s.keyring
#执行创建
# kubectl apply -f case2-nginx-keyring.yaml
#查看pod
# kubectl get pod
NAME READY STATUS RESTARTS AGE
nginx-deployment-5c995dc5b6-jkz7h 1/1 Running 0 71s
#进入pod查看挂载
root@k8s-master2:~/20221006/ceph-case-n70# kubectl exec -it nginx-deployment-5c995dc5b6-jkz7h bash
kubectl exec [POD] [COMMAND] is DEPRECATED and will be removed in a future version. Use kubectl exec [POD] -- [COMMAND] instead.
root@nginx-deployment-5c995dc5b6-jkz7h:/# df -h
Filesystem Size Used Avail Use% Mounted on
overlay 19G 14G 4.4G 76% /
tmpfs 64M 0 64M 0% /dev
tmpfs 1.9G 0 1.9G 0% /sys/fs/cgroup
/dev/rbd0 3.0G 54M 3.0G 2% /data #挂载目录
/dev/mapper/ubuntu--vg-ubuntu--lv 19G 14G 4.4G 76% /etc/hosts
3.1.6.3 宿主机验证rbd
rbd在pod里面看是挂载到pod,但由于pod使用的宿主机内核,因此实际是在宿主机挂载的
#查看pod所在宿主机ip
# kubectl get pod -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
nginx-deployment-5c995dc5b6-jkz7h 1/1 Running 0 2m50s 10.200.107.200 172.20.20.113 <none> <none>
#进入宿主机查看
root@k8s-node3:~# rbd showmapped
id pool namespace image snap device
0 k8s-rbd-pool k8s-img - /dev/rbd0
root@k8s-node3:~# df -h |grep /dev/rbd0
/dev/rbd0 3.0G 54M 3.0G 2% /var/lib/kubelet/plugins/kubernetes.io/rbd/mounts/k8s-rbd-pool-image-k8s-img
3.1.7 通过secret挂载rbd
将key定义为secret,然后再挂载至pod,每个k8s node节点就不需要保存keyring文件。
3.1.7.1 创建普通用户secret
首先要创建secret,secret中主要就是要包含ceph中被授权keyring文件的key,需要将key内容通过base64编码后即可创建secret
#对key进行base64编码
cephadmin@ceph-deploy:~/ceph-cluster$ ceph auth print-key client.k8s
AQD3K1hlDTebJRAAXDsxU44BxqRyspQyL8sNxQ==
#base64编码
cephadmin@ceph-deploy:~/ceph-cluster$ ceph auth print-key client.k8s | base64
QVFEM0sxaGxEVGViSlJBQVhEc3hVNDRCeHFSeXNwUXlMOHNOeFE9PQ==
#编写yaml文件
# cat case3-secret-client-k8s.yaml
apiVersion: v1
kind: Secret
metadata:
name: ceph-secret-k8s
type: "kubernetes.io/rbd"
data:
key: QVFEM0sxaGxEVGViSlJBQVhEc3hVNDRCeHFSeXNwUXlMOHNOeFE9PQ==
#创建
# kubectl apply -f case3-secret-client-k8s.yaml
secret/ceph-secret-k8s created
#验证
# kubectl get secrets
NAME TYPE DATA AGE
ceph-secret-k8s kubernetes.io/rbd 1 10s
3.1.7.2 创建pod
#编写yaml文件
# cat case4-nginx-secret.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: nginx-deployment
spec:
replicas: 1
selector:
matchLabels: #rs or deployment
app: ng-deploy-80
template:
metadata:
labels:
app: ng-deploy-80
spec:
containers:
- name: ng-deploy-80
image: nginx
ports:
- containerPort: 80
volumeMounts:
- name: rbd-data1
mountPath: /usr/share/nginx/html/rbd
volumes:
- name: rbd-data1
rbd:
monitors:
- '172.20.20.221:6789'
- '172.20.20.222:6789'
- '172.20.20.223:6789'
pool: k8s-rbd-pool
image: k8s-img
fsType: xfs
readOnly: false
user: k8s
secretRef:
name: ceph-secret-k8s
#创建
# kubectl apply -f case4-nginx-secret.yaml
deployment.apps/nginx-deployment created
#验证创建
# kubectl get pod -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
nginx-deployment-5559556647-8nbg5 1/1 Running 0 73s 10.200.107.212 172.20.20.113 <none> <none>
#pod挂载验证
# kubectl exec -it nginx-deployment-5559556647-8nbg5 bash
kubectl exec [POD] [COMMAND] is DEPRECATED and will be removed in a future version. Use kubectl exec [POD] -- [COMMAND] instead.
root@nginx-deployment-5559556647-8nbg5:/# df -h
Filesystem Size Used Avail Use% Mounted on
overlay 19G 14G 4.4G 76% /
tmpfs 64M 0 64M 0% /dev
tmpfs 1.9G 0 1.9G 0% /sys/fs/cgroup
/dev/mapper/ubuntu--vg-ubuntu--lv 19G 14G 4.4G 76% /etc/hosts
shm 64M 0 64M 0% /dev/shm
/dev/rbd0 3.0G 54M 3.0G 2% /usr/share/nginx/html/rbd #挂载目录
tmpfs 3.6G 12K 3.6G 1% /run/secrets/kubernetes.io/serviceaccount
#宿主机验证
root@k8s-node3:~# rbd showmapped
id pool namespace image snap device
0 k8s-rbd-pool k8s-img - /dev/rbd0
3.1.8 动态存储卷-需要使用二进制安装k8s
存储卷可以通过kube-controller-manager组件动态构建,适用于有状态服务需要多个存储卷的场合。
将ceph admin用户key文件定义为k8s secret,用于k8s调用ceph admin权限动态创建存储卷,即不再需要提前创建好image,而是k8s在需要使用的时候再调用ceph创建。
3.1.8.1 创建admin用户secret
#获取admin用户key,并进行base64编码
cephadmin@ceph-deploy:~/ceph-cluster$ ceph auth print-key client.admin | base64
QVFBMTN6bGxaY2RyRXhBQUNtSzB5SlVSNm5IZW93Q1RKUHJsRlE9PQ==
#编写yaml文件
# cat case5-secret-admin.yaml
apiVersion: v1
kind: Secret
metadata:
name: ceph-secret-admin
type: "kubernetes.io/rbd"
data:
key: QVFBMTN6bGxaY2RyRXhBQUNtSzB5SlVSNm5IZW93Q1RKUHJsRlE9PQ==
#创建
# kubectl apply -f case5-secret-admin.yaml
secret/ceph-secret-admin created
#查看
# kubectl get secrets
NAME TYPE DATA AGE
ceph-secret-admin kubernetes.io/rbd 1 8s
ceph-secret-k8s kubernetes.io/rbd 1 5m2s
3.1.8.2 使用上面3.1.7.1步骤创建的普通用户创建存储类
#编写yaml文件
# cat case6-ceph-storage-class.yaml
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: ceph-storage-class-k8s
annotations:
storageclass.kubernetes.io/is-default-class: "false" #设置为默认存储类
provisioner: kubernetes.io/rbd
parameters:
monitors: 172.20.20.221:6789,172.20.20.222:6789,172.20.20.223:6789
adminId: admin
adminSecretName: ceph-secret-admin
adminSecretNamespace: default
pool: k8s-rbd-pool
userId: k8s
userSecretName: ceph-secret-k8s
#创建
# kubectl apply -f case6-ceph-storage-class.yaml
storageclass.storage.k8s.io/ceph-storage-class-k8s created
#查看
# kubectl get storageclasses
NAME PROVISIONER RECLAIMPOLICY VOLUMEBINDINGMODE ALLOWVOLUMEEXPANSION AGE
ceph-storage-class-k8s kubernetes.io/rbd Delete Immediate false 12s
3.1.8.3 创建基于存储类的PVC
#编写yaml文件
# cat case7-mysql-pvc.yaml
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: mysql-data-pvc
spec:
accessModes:
- ReadWriteOnce
storageClassName: ceph-storage-class-k8s
resources:
requests:
storage: '5Gi'
#创建
# kubectl apply -f case7-mysql-pvc.yaml
persistentvolumeclaim/mysql-data-pvc created
#查看
# kubectl get pvc
NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE
mysql-data-pvc Bound pvc-b4becfb2-47bf-46b4-95d6-90980fd3a72e 5Gi RWO ceph-storage-class-k8s 5s
# kubectl get pv
NAME CAPACITY ACCESS MODES RECLAIM POLICY STATUS CLAIM STORAGECLASS REASON AGE
pvc-b4becfb2-47bf-46b4-95d6-90980fd3a72e 5Gi RWO Delete Bound default/mysql-data-pvc ceph-storage-class-k8s 14s
#在ceph验证是否自动创建img
cephadmin@ceph-deploy:~/ceph-cluster$ rbd ls --pool k8s-rbd-pool
k8s-img
kubernetes-dynamic-pvc-a3cd1ecb-63db-4de3-b2d6-e2b2ae90d9d5 #动态创建
3.1.8.4 运行单机mysql
#编写yaml文件
# cat case8-mysql-single.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: mysql
spec:
selector:
matchLabels:
app: mysql
strategy:
type: Recreate
template:
metadata:
labels:
app: mysql
spec:
containers:
- image: mysql:5.6.46
name: mysql
env:
# Use secret in real usage
- name: MYSQL_ROOT_PASSWORD
value: "123456"
ports:
- containerPort: 3306
name: mysql
volumeMounts:
- name: mysql-persistent-storage
mountPath: /var/lib/mysql
volumes:
- name: mysql-persistent-storage
persistentVolumeClaim:
claimName: mysql-data-pvc
---
kind: Service
apiVersion: v1
metadata:
labels:
app: mysql-service-label
name: mysql-service
spec:
type: NodePort
ports:
- name: http
port: 3306
protocol: TCP
targetPort: 3306
nodePort: 33306
selector:
app: mysql
#创建
# kubectl apply -f case8-mysql-single.yaml
deployment.apps/mysql created
service/mysql-service created
#查看
# kubectl get pod
NAME READY STATUS RESTARTS AGE
mysql-7778c69d96-v8x2q 1/1 Running 0 2m26s
#查看挂载
# kubectl exec -it mysql-7778c69d96-v8x2q bash
kubectl exec [POD] [COMMAND] is DEPRECATED and will be removed in a future version. Use kubectl exec [POD] -- [COMMAND] instead.
root@mysql-7778c69d96-v8x2q:/# df -h
Filesystem Size Used Avail Use% Mounted on
overlay 19G 14G 4.0G 78% /
tmpfs 64M 0 64M 0% /dev
tmpfs 1.9G 0 1.9G 0% /sys/fs/cgroup
/dev/mapper/ubuntu--vg-ubuntu--lv 19G 14G 4.0G 78% /etc/hosts
shm 64M 0 64M 0% /dev/shm
/dev/rbd1 4.9G 109M 4.8G 3% /var/lib/mysql #挂载目录
#验证mysql访问
# kubectl get svc
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
kubernetes ClusterIP 10.100.0.1 <none> 443/TCP 107d
mysql-service NodePort 10.100.165.124 <none> 3306:33306/TCP 3m14s
# apt install mysql-client
# mysql -uroot -p123456 -h172.20.20.111 -P33306
mysql: [Warning] Using a password on the command line interface can be insecure.
Welcome to the MySQL monitor. Commands end with ; or \g.
Your MySQL connection id is 1
Server version: 5.6.46 MySQL Community Server (GPL)
Copyright (c) 2000, 2023, Oracle and/or its affiliates.
Oracle is a registered trademark of Oracle Corporation and/or its
affiliates. Other names may be trademarks of their respective
owners.
Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.
mysql> use mysql;
Reading table information for completion of table and column names
You can turn off this feature to get a quicker startup with -A
Database changed
mysql> show tables;
+---------------------------+
| Tables_in_mysql |
+---------------------------+
| columns_priv |
| db |
| event |
| func |
| general_log |
| help_category |
| help_keyword |
| help_relation |
| help_topic |
| innodb_index_stats |
| innodb_table_stats |
| ndb_binlog_index |
| plugin |
| proc |
| procs_priv |
| proxies_priv |
| servers |
| slave_master_info |
| slave_relay_log_info |
| slave_worker_info |
| slow_log |
| tables_priv |
| time_zone |
| time_zone_leap_second |
| time_zone_name |
| time_zone_transition |
| time_zone_transition_type |
| user |
+---------------------------+
28 rows in set (0.01 sec)
mysql> exit
Bye
3.2 基于cephfs的数据持久化
k8s中的pod挂载ceph的cephfs共享存储,实现业务中数据共享、持久化、高性能、高可用的目的。
3.2.1 使用上面3.1.8.1创建的admin用户的secret来创建pod
#编写yaml文件
# cat case9-nginx-cephfs.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: nginx-deployment
spec:
replicas: 3
selector:
matchLabels: #rs or deployment
app: ng-deploy-80
template:
metadata:
labels:
app: ng-deploy-80
spec:
containers:
- name: ng-deploy-80
image: nginx
ports:
- containerPort: 80
volumeMounts:
- name: k8s-staticdata-cephfs
mountPath: /usr/share/nginx/html/cephfs
volumes:
- name: k8s-staticdata-cephfs
cephfs:
monitors:
- '172.20.20.221:6789'
- '172.20.20.222:6789'
- '172.20.20.223:6789'
path: /
user: admin
secretRef:
name: ceph-secret-admin
---
kind: Service
apiVersion: v1
metadata:
labels:
app: ng-deploy-80-service-label
name: ng-deploy-80-service
spec:
type: NodePort
ports:
- name: http
port: 80
protocol: TCP
targetPort: 80
nodePort: 33380
selector:
app: ng-deploy-80
#创建
# kubectl apply -f case9-nginx-cephfs.yaml
deployment.apps/nginx-deployment created
service/ng-deploy-80-service created
#查看pod
# kubectl get pod
NAME READY STATUS RESTARTS AGE
centos-net-test1 1/1 Running 2 (151m ago) 62d
nginx-deployment-9f889bf9c-9kxl9 1/1 Running 0 67s
nginx-deployment-9f889bf9c-qvpcp 1/1 Running 0 67s
nginx-deployment-9f889bf9c-t26fl 1/1 Running 0 67s
#查看挂载
# kubectl exec -it nginx-deployment-9f889bf9c-9kxl9 bash
kubectl exec [POD] [COMMAND] is DEPRECATED and will be removed in a future version. Use kubectl exec [POD] -- [COMMAND] instead.
root@nginx-deployment-9f889bf9c-9kxl9:/# df -h
Filesystem Size Used Avail Use% Mounted on
overlay 19G 12G 5.9G 67% /
tmpfs 64M 0 64M 0% /dev
tmpfs 1.9G 0 1.9G 0% /sys/fs/cgroup
/dev/mapper/ubuntu--vg-ubuntu--lv 19G 12G 5.9G 67% /etc/hosts
shm 64M 0 64M 0% /dev/shm
172.20.20.221:6789,172.20.20.222:6789,172.20.20.223:6789:/ 127G 0 127G 0% /usr/share/nginx/html/cephfs #挂载目录
tmpfs 3.6G 12K 3.6G 1% /run/secrets/kubernetes.io/serviceaccount
3.2.2 pod多副本验证
# kubectl get pod -o wide
NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
nginx-deployment-9f889bf9c-9kxl9 1/1 Running 0 2m52s 10.200.169.142 worker-02 <none> <none>
nginx-deployment-9f889bf9c-qvpcp 1/1 Running 0 2m52s 10.200.107.219 172.20.20.113 <none> <none>
nginx-deployment-9f889bf9c-t26fl 1/1 Running 0 2m52s 10.200.36.67 worker-01 <none> <none>
3.2.3 宿主机验证
root@k8s-node3:~# df -h |grep ceph
172.20.20.221:6789,172.20.20.222:6789,172.20.20.223:6789:/ 127G 0 127G 0% /var/lib/kubelet/pods/b52cf45c-abda-4b90-8917-98a284128332/volumes/kubernetes.io~cephfs/k8s-staticdata-cephfs
root@k8s-node2:~# df -h |grep ceph
172.20.20.221:6789,172.20.20.222:6789,172.20.20.223:6789:/ 127G 0 127G 0% /var/lib/kubelet/pods/6ea21caf-b91e-45c1-8b1b-7b69d6a3ab2c/volumes/kubernetes.io~cephfs/k8s-staticdata-cephfs
root@k8s-node1:~# df -h |grep ceph
172.20.20.221:6789,172.20.20.222:6789,172.20.20.223:6789:/ 127G 0 127G 0% /var/lib/kubelet/pods/e27e1b6a-1cc0-4a76-a8f6-55dafbce91d3/volumes/kubernetes.io~cephfs/k8s-staticdata-cephfs











