Ceph 块存储简介
块是一个字节序列(例如,一个 512 字节的数据块)。基于块的存储接口是最常见的存储数据方法,它们基于旋转介质,像硬盘、 CD 、软盘、甚至传统的 9 磁道磁带。无处不在的块设备接口使虚拟块设备成为与 Ceph 这样的海量存储系统交互的理想之选。
Ceph 块设备是精简配置的、大小可调且将数据条带化存储到集群内的多个 OSD 。 Ceph 块设备利用 RADOS 的多种能力,如快照、复制和一致性。 Ceph 的 RADOS 块设备( RBD )使用内核模块或 librbd 库与 OSD 交互。
image.png
Note: 内核模块可使用 Linux 页缓存。对基于 librbd 的应用程序, Ceph 可提供 RBD 缓存。
Ceph 块设备靠无限伸缩性提供了高性能,如向内核模块、或向 abbr:KVM (kernel virtual machines) (如 Qemu 、 OpenStack 和 CloudStack 等云计算系统通过 libvirt 和 Qemu 可与 Ceph 块设备集成)。你可以用同一个集群同时运行 Ceph RADOS 网关、 Ceph FS 文件系统、和 Ceph 块设备。
Ceph 块设备RBP
2.1 创建Ceph POOL
###ceph osd pool create {pool-name} {pg-num} [{pgp-num}]
###pg-num 与 pgp-num 只可以扩大不可以缩小
#创建测试Pool
[root@ceph01 ceph]# ceph osd pool create rbd 32 32
pool 'rbd' created
2.2 查看创建的Ceph Pool
[root@ceph01 ceph]#ceph osd pool ls
rbd
[root@ceph01 ceph]# ceph osd pool stats rbd
pool rbd id 11
nothing is going on
2.3 创建客户端账号
创建账号
[root@ceph01 ceph]ceph auth get-or-create client.rbd mon 'allow r' osd 'allow class-read object_prefix rbd_children,allow rwx pool=rbd'
[client.rbd]
key = AQAEdYJgD4ceJRAA/ARqDJoeDWc7QkT8nRelGQ==
查看创建客户端账号权限
[root@ceph01 ceph]# ceph auth get client.rbd
exported keyring for client.rbd
[client.rbd]
key = AQAEdYJgD4ceJRAA/ARqDJoeDWc7QkT8nRelGQ==
caps mon = "allow r"
caps osd = "allow class-read object_prefix rbd_children,allow rwx pool=rbd"
#导出客户端keyring
[root@ceph01 ceph]# ceph auth get client.rbd -o ./ceph.client.rbd.keyring
exported keyring for client.rbd
2.3 Pool 启动 RBD
[root@ceph01 ceph]# ceph osd pool application enable rbd rbd
enabled application 'rbd' on pool 'rbd'
客户端配置
3.1 安装客户端
yum install ceph-common -y
3.2 同步rbd用户的keyring 配置文件
[root@ceph01 ceph]# cp ceph.client.rbd.keyring /etc/ceph/
[root@ceph01 ceph]# ls /etc/ceph/
ceph.client.admin.keyring ceph.conf tmpr6slZ5
ceph.client.rbd.keyring rbdmap
# 使用 创建的用户 rbd 查看集群状态
ceph.client.rbd.keyring rbdmap
[root@ceph01 ceph]# ceph -s --user rbd
cluster:
id: 2cecb349-bd00-42de-98e9-d9948157de50
health: HEALTH_OK
services:
mon: 1 daemons, quorum ceph01
mgr: ceph01(active), standbys: ceph03, ceph02
mds: 32-1/1/1 up {0=ceph01=up:active}, 1 up:standby
osd: 3 osds: 3 up, 3 in
data:
pools: 8 pools, 232 pgs
objects: 241 objects, 8.4 KiB
usage: 20 GiB used, 40 GiB / 60 GiB avail
pgs: 232 active+clean
3.3 创建Ceph image
[root@ceph01 ceph]# rbd create rbd1 -p rbd --size 1G --user rbd
[root@ceph01 ceph]# rbd create rbd/rbd2 --size 2G --user rbd
3.4 查看创建的image
# 查看创建的 image
[root@ceph01 ceph]# rbd ls -l --user rbd
NAME SIZE PARENT FMT PROT LOCK
rbd1 1 GiB 2
rbd2 2 GiB 2
#通过json格式查看
[root@ceph01 ceph]# rbd ls -p rbd -l --format json --user rbd --pretty-format
[
{
"image": "rbd1",
"size": 1073741824,
"format": 2
},
{
"image": "rbd2",
"size": 2147483648,
"format": 2
}
]
# 显示 image 的详细信息
[root@ceph01 ceph]# rbd info rbd1 --user rbd
rbd image 'rbd1':
size 1 GiB in 256 objects
order 22 (4 MiB objects)
id: 126f6b8b4567
block_name_prefix: rbd_data.126f6b8b4567
format: 2
features: layering, exclusive-lock, object-map, fast-diff, deep-flatten
op_features:
flags:
create_timestamp: Fri Apr 23 15:37:59 2021
3.5 禁止 image 的特性
默认 image 的特性包括:
features: layering, exclusive-lock, object-map, fast-diff, deep-flatten
作为 rbd 一般只需要 layering ,需要把其他的特性全部禁止掉。
#禁止image 特性
[root@ceph01 ceph]# rbd feature disable rbd/rbd1 exclusive-lock, object-map, fast-diff, deep-flatten --user rbd
[root@ceph01 ceph]# rbd feature disable rbd/rbd2 exclusive-lock, object-map, fast-diff, deep-flatten --user rbd
#查看详细信息
[root@ceph01 ceph]# rbd info rbd/rbd1 --user rbd
rbd image 'rbd1':
size 1 GiB in 256 objects
order 22 (4 MiB objects)
id: 126f6b8b4567
block_name_prefix: rbd_data.126f6b8b4567
format: 2
features: layering
op_features:
flags:
create_timestamp: Fri Apr 23 15:37:59 2021
[root@ceph01 ceph]# rbd info rbd/rbd2 --user rbd
rbd image 'rbd2':
size 2 GiB in 512 objects
order 22 (4 MiB objects)
id: 12716b8b4567
block_name_prefix: rbd_data.12716b8b4567
format: 2
features: layering
op_features:
flags:
create_timestamp: Fri Apr 23 15:42:42 2021
3.6 客户端挂载 Image
[root@ceph01 ceph]# lsblk
NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT
sda 8:0 0 20G 0 disk
├─sda1 8:1 0 1G 0 part /boot
└─sda2 8:2 0 19G 0 part
├─centos-root 253:0 0 17G 0 lvm /
└─centos-swap 253:1 0 2G 0 lvm [SWAP]
sdb 8:16 0 20G 0 disk
└─ceph--2c2a40e1--5672--4d7b--a2fa--54e87e2af0b1-osd--block--894dedaf--6740--4607--8375--1a459e57c705
253:2 0 20G 0 lvm
sr0 11:0 1 1024M 0 rom
[root@ceph01 ceph]# rbd ls -l --user rbd
NAME SIZE PARENT FMT PROT LOCK
rbd1 1 GiB 2
rbd2 2 GiB 2
# RBD 映射到客户端主机
[root@ceph01 ceph]# rbd map rbd/rbd1 --user rbd
/dev/rbd0
#客户端挂载 Image
[root@ceph01 ceph]# lsblk
NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT
sda 8:0 0 20G 0 disk
├─sda1 8:1 0 1G 0 part /boot
└─sda2 8:2 0 19G 0 part
├─centos-root 253:0 0 17G 0 lvm /
└─centos-swap 253:1 0 2G 0 lvm [SWAP]
sdb 8:16 0 20G 0 disk
└─ceph--2c2a40e1--5672--4d7b--a2fa--54e87e2af0b1-osd--block--894dedaf--6740--4607--8375--1a459e57c705
253:2 0 20G 0 lvm
sr0 11:0 1 1024M 0 rom
rbd0 252:0 0 1G 0 disk
3.7 初始化文件系统
# 格式化磁盘
[root@ceph01 ceph]# mkfs.xfs /dev/rbd0
meta-data=/dev/rbd0 isize=512 agcount=8, agsize=32768 blks
= sectsz=512 attr=2, projid32bit=1
= crc=1 finobt=0, sparse=0
data = bsize=4096 blocks=262144, imaxpct=25
= sunit=1024 swidth=1024 blks
naming =version 2 bsize=4096 ascii-ci=0 ftype=1
log =internal log bsize=4096 blocks=2560, version=2
= sectsz=512 sunit=8 blks, lazy-count=1
realtime =none extsz=4096 blocks=0, rtextents=0
#创建
[root@ceph01 ceph]# mkdir /data/ceph-disk0
[root@ceph01 ceph]# mount /dev/rbd0 /data/ceph-disk0
[root@ceph01 ceph]# df -Th
Filesystem Type Size Used Avail Use% Mounted on
devtmpfs devtmpfs 1.9G 0 1.9G 0% /dev
tmpfs tmpfs 1.9G 0 1.9G 0% /dev/shm
tmpfs tmpfs 1.9G 59M 1.8G 4% /run
tmpfs tmpfs 1.9G 0 1.9G 0% /sys/fs/cgroup
/dev/mapper/centos-root xfs 17G 2.2G 15G 13% /
/dev/sda1 xfs 1014M 150M 865M 15% /boot
tmpfs tmpfs 1.9G 52K 1.9G 1% /var/lib/ceph/osd/ceph-0
tmpfs tmpfs 378M 0 378M 0% /run/user/0
/dev/rbd0 xfs 1014M 33M 982M 4% /data/ceph-disk0
3.8 客户端卸载磁盘
[root@ceph01 ceph]# umount /dev/rbd0
#查看本地image映射
[root@ceph01 ceph]# rbd showmapped --user rbd
id pool image snap device
0 rbd rbd1 - /dev/rbd0
#卸载 image
[root@ceph01 ceph]# rbd unmap rbd/rbd1 --user rbd
[root@ceph01 ceph]# lsblk
NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT
sda 8:0 0 20G 0 disk
├─sda1 8:1 0 1G 0 part /boot
└─sda2 8:2 0 19G 0 part
├─centos-root 253:0 0 17G 0 lvm /
└─centos-swap 253:1 0 2G 0 lvm [SWAP]
sdb 8:16 0 20G 0 disk
└─ceph--2c2a40e1--5672--4d7b--a2fa--54e87e2af0b1-osd--block--894dedaf--6740--4607--8375--1a459e57c705
253:2 0 20G 0 lvm
sr0 11:0 1 1024M 0 rom
3.9 调整image大小
[root@ceph01 ceph]# rbd resize -s 3G rbd/rbd1 --user rbd
Resizing image: 100% complete...done.
[root@ceph01 ceph]# rbd ls -l --user rbd
NAME SIZE PARENT FMT PROT LOCK
rbd1 3 GiB 2
rbd2 2 GiB 2
3.10 删除image
[root@ceph01 ceph]# rbd ls -l --user rbd
NAME SIZE PARENT FMT PROT LOCK
rbd1 3 GiB 2
rbd2 2 GiB 2
[root@ceph01 ceph]# rbd rm rbd2 --user rbd
Removing image: 100% complete...done.
[root@ceph01 ceph]# rbd ls -l --user rbd
NAME SIZE PARENT FMT PROT LOCK
rbd1 3 GiB 2
3.11 image 放进回收站
#查看image列表
[root@ceph01 ceph]# rbd ls -l --user rbd
NAME SIZE PARENT FMT PROT LOCK
rbd1 3 GiB 2
# 将 rbd1 放进回收站
[root@ceph01 ceph]# rbd trash move rbd/rbd1 --user rbd
[root@ceph01 ceph]# rbd ls -l --user rbd
# 查看回收站
[root@ceph01 ceph]# rbd trash list -p rbd --user rbd
126f6b8b4567 rbd1
3.12 回收站恢复 image
[root@ceph01 ceph]# rbd trash list -p rbd --user rbd
126f6b8b4567 rbd1
[root@ceph01 ceph]# rbd trash restore -p rbd --image rbd1 --image-id 126f6b8b4567 --user rbd
[root@ceph01 ceph]# rbd ls -l --user rbd
NAME SIZE PARENT FMT PROT LOCK
rbd1 3 GiB 2
RBD 快照
4.1 快照前准备工作
[root@ceph01 ceph]# lsblk
NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT
sda 8:0 0 20G 0 disk
├─sda1 8:1 0 1G 0 part /boot
└─sda2 8:2 0 19G 0 part
├─centos-root 253:0 0 17G 0 lvm /
└─centos-swap 253:1 0 2G 0 lvm [SWAP]
sdb 8:16 0 20G 0 disk
└─ceph--2c2a40e1--5672--4d7b--a2fa--54e87e2af0b1-osd--block--894dedaf--6740--4607--8375--1a459e57c705
253:2 0 20G 0 lvm
sr0 11:0 1 1024M 0 rom
[root@ceph01 ceph]# rbd map rbd/rbd1 --user rbd
/dev/rbd0
[root@ceph01 ceph]# lsblk
NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT
sda 8:0 0 20G 0 disk
├─sda1 8:1 0 1G 0 part /boot
└─sda2 8:2 0 19G 0 part
├─centos-root 253:0 0 17G 0 lvm /
└─centos-swap 253:1 0 2G 0 lvm [SWAP]
sdb 8:16 0 20G 0 disk
└─ceph--2c2a40e1--5672--4d7b--a2fa--54e87e2af0b1-osd--block--894dedaf--6740--4607--8375--1a459e57c705
253:2 0 20G 0 lvm
sr0 11:0 1 1024M 0 rom
rbd0 252:0 0 3G 0 disk
[root@ceph01 ceph]#mount /dev/rbd0 /data/ceph-disk0
[root@ceph01 ceph]# echo "This is Test 01 " >/data/ceph-disk0/test01.txt
[root@ceph01 ceph]# echo "This is Test 02 " >/data/ceph-disk0/test02.txt
[root@ceph01 ceph]# ls /data/ceph-disk0/
test01.txt test02.txt
4.2 创建快照
[root@ceph01 ceph]# rbd snap create rbd/rbd1@snap1-20210423 --user rbd
[root@ceph01 ceph]# rbd snap list rbd/rbd1 --user rbd
SNAPID NAME SIZE TIMESTAMP
4 snap1-20210423 3 GiB Fri Apr 23 17:53:38 2021
4.3 还原快照
[root@ceph01 ceph]# ls /data/ceph-disk0/
test01.txt test02.txt
## 为了检验快照恢复后数据正确性,这里删除 2.txt 文件
[root@ceph01 ceph]# rm /data/ceph-disk0/test02.txt -rf
[root@ceph01 ceph]# ls /data/ceph-disk0/
test01.txt
##4.5 卸载image
[root@ceph01 ceph]# umount /dev/rbd0
[root@ceph01 ceph]# rbd unmap rbd/rbd1 --user rbd
[root@ceph01 ceph]# lsblk
NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT
sda 8:0 0 20G 0 disk
├─sda1 8:1 0 1G 0 part /boot
└─sda2 8:2 0 19G 0 part
├─centos-root 253:0 0 17G 0 lvm /
└─centos-swap 253:1 0 2G 0 lvm [SWAP]
sdb 8:16 0 20G 0 disk
└─ceph--2c2a40e1--5672--4d7b--a2fa--54e87e2af0b1-osd--block--894dedaf--6740--4607--8375--1a459e57c705
253:2 0 20G 0 lvm
sr0 11:0 1 1024M 0 rom
##4.6 还原快照
[root@ceph01 ceph]# rbd snap list rbd/rbd1 --user rbd
SNAPID NAME SIZE TIMESTAMP
4 snap1-20210423 3 GiB Fri Apr 23 17:53:38 2021
[root@ceph01 ceph]# rbd snap rollback rbd/rbd1@snap1-20210423 --user rbd
Rolling back to snapshot: 100% complete...done
##4.7 映射image
[root@ceph01 ceph]# rbd map rbd/rbd1 --user rbd
/dev/rbd0
[root@ceph01 ceph]# lsblk
NAME MAJ:MIN RM SIZE RO TYPE MOUNTPOINT
sda 8:0 0 20G 0 disk
├─sda1 8:1 0 1G 0 part /boot
└─sda2 8:2 0 19G 0 part
├─centos-root 253:0 0 17G 0 lvm /
└─centos-swap 253:1 0 2G 0 lvm [SWAP]
sdb 8:16 0 20G 0 disk
└─ceph--2c2a40e1--5672--4d7b--a2fa--54e87e2af0b1-osd--block--894dedaf--6740--4607--8375--1a459e57c705
253:2 0 20G 0 lvm
sr0 11:0 1 1024M 0 rom
rbd0 252:0 0 3G 0 disk
[root@ceph01 ceph]# mount /dev/rbd0 /data/ceph-disk0
# 验证数据恢复到快照前
[root@ceph01 ceph]# ls /data/ceph-disk0/
test01.txt test02.txt
[root@ceph01 ceph]# cat /data/ceph-disk0/test02.txt
This is Test 02
4.8 删除快照
[root@ceph01 ceph]# rbd snap list rbd/rbd1 --user rbd
SNAPID NAME SIZE TIMESTAMP
4 snap1-20210423 3 GiB Fri Apr 23 17:53:38 2021
[root@ceph01 ceph]# rbd snap rm rbd/rbd1@snap1-20210423 --user rbd
Removing snap: 100% complete...done.
[root@ceph01 ceph]# rbd snap list rbd/rbd1 --user rbd
4.9 快照次数限制
[root@ceph01 ceph]# rbd snap limit set rbd/rbd1 --limit 10 --user rbd
4.10 清除快照次数限制
[root@ceph01 ceph]# rbd snap limit clear rbd/rbd1 --user rbd
克隆
基于快照的克隆,克隆所采用的也是 cow,叫做 copy on write 也就是常说的 “写时复制”,更贴切的说法叫“写的时候,再复制”。这里的克隆就是基于快照创建的克隆只创建了映射到源的逻辑,还没有给克隆分配真实的物理空间。这里要理解这一点。虽然快照是只读的,但是基于快照创建的克隆是可读可写的。当我们对克隆的镜像执行写操作的时候,系统才会真正的给克隆的镜像分配物理空间。克隆的镜像或者被写过的克隆镜像都是可以正常使用的和镜像本身是一样的。这就是所谓的 cow。当对克隆的镜像没有写而是读的时候,那么读取的是被克隆的快照,明白了上面的道理所有我们知道从快照克隆的镜像是依赖于快照的,一旦快照被删除则这个克隆镜像也就毁了,所以我们要保护这个快照。
5.1 创建克隆
#创建SNAP 快照
[root@ceph01 ceph]# rbd snap create rbd/rbd1@snap1-20210423 --user rbd
# 创建克隆前,第一步要保护快照,以下错误提示要求先执行保护快照
[root@ceph01 ceph]# rbd clone rbd/rbd1@snap1-20210423 rbd/rbd1-snap1-20210423-clone --user rbd
2021-04-23 18:12:37.968 7f8f7081c840 -1 librbd::image::CloneRequest: parent snapshot must be protected
rbd: clone error: (22) Invalid argument
# 执行保护快照
[root@ceph01 ceph]# rbd snap protect rbd/rbd1@snap1-20210423 --user rbd
[root@ceph01 ceph]# rbd snap list rbd/rbd1 --user rbd
SNAPID NAME SIZE TIMESTAMP
6 snap1-20210423 3 GiB Fri Apr 23 18:11:57 2021
# 创建克隆
[root@ceph01 ceph]# rbd clone rbd/rbd1@snap1-20210423 rbd/rbd1-snap1-20210423-clone --user rbd
# 查看克隆
[root@ceph01 ceph]# rbd ls -l --user rbd
NAME SIZE PARENT FMT PROT LOCK
rbd1 3 GiB 2
rbd1@snap1-20210423 3 GiB 2 yes
rbd1-snap1-20210423-clone 3 GiB rbd/rbd1@snap1-20210423 2
# 查看克隆的详细信息
[root@ceph01 ceph]# rbd info rbd1-snap1-20210423-clone --user rbd
rbd image 'rbd1-snap1-20210423-clone':
size 3 GiB in 768 objects
order 22 (4 MiB objects)
id: 12bc6b8b4567
block_name_prefix: rbd_data.12bc6b8b4567
format: 2
features: layering
op_features:
flags:
create_timestamp: Fri Apr 23 18:16:08 2021
parent: rbd/rbd1@snap1-20210423
overlap: 3 GiB
克隆成功的镜像是依赖于快照的,能看到 parent 和 overlap
克隆独立存在,不依赖于快照,就需要对克隆和快照做一个合并
# 对克隆进行合并
[root@ceph01 ceph]# rbd flatten rbd/rbd1-snap1-20210423-clone --user rbd
Image flatten: 100% complete...done.
# 查看克隆是否独立存在,没有了 parent 和 overlap
[root@ceph01 ceph]# rbd info rbd/rbd1-snap1-20210423-clone --user rbd
rbd image 'rbd1-snap1-20210423-clone':
size 3 GiB in 768 objects
order 22 (4 MiB objects)
id: 12bc6b8b4567
block_name_prefix: rbd_data.12bc6b8b4567
format: 2
features: layering
op_features:
flags:
create_timestamp: Fri Apr 23 18:16:08 2021
#如果快照不在时候用这里就可以直接删除快照:(注意:删除快照需要先解除保护模式)
[root@ceph01 ceph]# rbd info rbd/rbd1@snap1-20210423 --user rbd
rbd image 'rbd1':
size 3 GiB in 768 objects
order 22 (4 MiB objects)
id: 126f6b8b4567
block_name_prefix: rbd_data.126f6b8b4567
format: 2
features: layering
op_features:
flags:
create_timestamp: Fri Apr 23 15:37:59 2021
protected: True
# 解除对快照的保护
[root@ceph01 ceph]# rbd snap unprotect rbd/rbd1@snap1-20210423 --user rbd
[root@ceph01 ceph]# rbd info rbd/rbd1@snap1-20210423 --user rbd
rbd image 'rbd1':
size 3 GiB in 768 objects
order 22 (4 MiB objects)
id: 126f6b8b4567
block_name_prefix: rbd_data.126f6b8b4567
format: 2
features: layering
op_features:
flags:
create_timestamp: Fri Apr 23 15:37:59 2021
protected: False
#删除快照
[root@ceph01 ceph]# rbd ls -l --user rbd
NAME SIZE PARENT FMT PROT LOCK
rbd1 3 GiB 2
rbd1@snap1-20210423 3 GiB 2
rbd1-snap1-20210423-clone 3 GiB 2
[root@ceph01 ceph]# rbd snap rm rbd/rbd1@snap1-20210423 --user rbd
Removing snap: 100% complete...done.
[root@ceph01 ceph]# rbd ls -l --user rbd
NAME SIZE PARENT FMT PROT LOCK
rbd1 3 GiB 2
rbd1-snap1-20210423-clone 3 GiB 2