1. 使用普通用户挂载cephfs(可以通过secret或者secretfile的形式多主机同时挂载)
客户端挂载有两种方式,一是内核空间,一是用户空间,内核空间挂载需要内核支持ceph模块,用户空间挂载需按照ceph-fuse,正常推荐使用内核挂载。
内核空间挂载分为secretfile文件和secret两种方式,同时支持多主机挂载。
1.1 创建客户端账号
#创建账号
cephadmin@ceph-deploy:~/ceph-cluster$ ceph auth add client.tom mon 'allow r' mds 'allow rw' osd 'allow rwx pool=cephfs-data'
added key for client.tom
#验证账号
cephadmin@ceph-deploy:~/ceph-cluster$ ceph auth get client.tom
[client.tom]
key = AQBuulVlnzDbLRAASxw0t/S2xyVSocSaGVn2dA==
caps mds = "allow rw"
caps mon = "allow r"
caps osd = "allow rwx pool=cephfs-data"
exported keyring for client.tom
#创建keyring文件
cephadmin@ceph-deploy:~/ceph-cluster$ ceph auth get client.tom -o ceph.client.tom.keyring
exported keyring for client.tom
#创建key文件
cephadmin@ceph-deploy:~/ceph-cluster$ ceph auth print-key client.tom > tom.key
#验证用户的keyring文件
cephadmin@ceph-deploy:~/ceph-cluster$ cat ceph.client.tom.keyring
[client.tom]
key = AQBuulVlnzDbLRAASxw0t/S2xyVSocSaGVn2dA==
caps mds = "allow rw"
caps mon = "allow r"
caps osd = "allow rwx pool=cephfs-data"
1.2 安装ceph客户端
#客户端服务器系统为centos7,两台客户端服务器都要执行
[root@localhost data]# yum install epel-release -y
[root@localhost ~]# yum install https://mirrors.aliyun.com/ceph/rpm-octopus/el7/noarch/ceph-release-1-1.el7.noarch.rpm
[root@localhost ~]# yum install ceph-common -y
1.3 同步客户端认证文件
cephadmin@ceph-deploy:~/ceph-cluster$ scp ceph.conf ceph.client.tom.keyring tom.key root@172.20.20.129:/etc/ceph/
root@172.20.20.129's password:
ceph.conf 100% 314 206.7KB/s 00:00
ceph.client.tom.keyring 100% 147 127.7KB/s 00:00
tom.key 100% 40 35.4KB/s 00:00
cephadmin@ceph-deploy:~/ceph-cluster$ scp ceph.conf ceph.client.tom.keyring tom.key root@172.20.20.130:/etc/ceph/
root@172.20.20.130's password:
ceph.conf 100% 314 185.8KB/s 00:00
ceph.client.tom.keyring 100% 147 117.1KB/s 00:00
tom.key 100% 40 36.2KB/s 00:00
1.4 客户端验证权限
[root@localhost ceph]# ceph --user tom -s
cluster:
id: 3586e7d1-9315-44e5-85bd-6bd3787ce574
health: HEALTH_OK
services:
mon: 3 daemons, quorum ceph-mon1,ceph-mon2,ceph-mon3 (age 5h)
mgr: ceph-mgr1(active, since 3w), standbys: ceph-mgr2
mds: 1/1 daemons up
osd: 20 osds: 20 up (since 4h), 20 in (since 4h)
data:
volumes: 1/1 healthy
pools: 5 pools, 161 pgs
objects: 118 objects, 143 MiB
usage: 6.0 GiB used, 1.9 TiB / 2.0 TiB avail
pgs: 161 active+clean
1.5 客户端通过key文件挂载
#创建挂载目录
[root@localhost /]# mkdir /data
#挂载cephfs
[root@localhost /]# mount -t ceph 172.20.20.221:6789,172.20.20.222:6789,172.20.20.223:6789:/ /data -o name=tom,secretfile=/etc/ceph/tom.key
#参数解释:
#-t ceph:指定文件系统类型,需要服务器安装ceph-common,内核才会支持ceph文件系统。
#172.20.20.221:6789,172.20.20.222:6789,172.20.20.223:6789:/:指定ceph集群的mon节点与ceph-mon服务的端口,挂载源路径为/。
#-o name: 指定ceph用户,指在ceph服务端创建的认证用户名称
#-o secretfile:指定ceph认证用户的key文件
#验证挂载
[root@localhost /]# df -h
Filesystem Size Used Avail Use% Mounted on
devtmpfs 898M 0 898M 0% /dev
tmpfs 910M 0 910M 0% /dev/shm
tmpfs 910M 9.5M 901M 2% /run
tmpfs 910M 0 910M 0% /sys/fs/cgroup
/dev/mapper/centos-root 17G 1.8G 16G 11% /
/dev/sda1 1014M 195M 820M 20% /boot
tmpfs 182M 0 182M 0% /run/user/0
172.20.20.221:6789,172.20.20.222:6789,172.20.20.223:6789:/ 632G 0 632G 0% /data
#测试文件写入
[root@localhost /]# cp /etc/passwd /data/
[root@localhost /]# cd /data/
[root@localhost data]# ls
passwd
1.6 客户端通过key挂载
#在之前通过key文件挂载成功的服务器不取消挂载,再在另一台服务器通过key挂载,也可以多台都通过key文件或key挂载
#挂载cephfs
[root@localhost ceph]# cat /etc/ceph/tom.key
AQBuulVlnzDbLRAASxw0t/S2xyVSocSaGVn2dA==[root@localhost ceph]# mkdir /data
[root@localhost ceph]# mount -t ceph 172.20.20.221:6789,172.20.20.222:6789,172.20.20.223:6789:/ /data -o name=tom,secret=AQBuulVlnzDbLRAASxw0t/S2xyVSocSaGVn2dA==
#验证挂载
[root@localhost ceph]# df -h
Filesystem Size Used Avail Use% Mounted on
devtmpfs 898M 0 898M 0% /dev
tmpfs 910M 0 910M 0% /dev/shm
tmpfs 910M 9.6M 901M 2% /run
tmpfs 910M 0 910M 0% /sys/fs/cgroup
/dev/mapper/centos-root 17G 1.9G 16G 11% /
/dev/sda1 1014M 226M 789M 23% /boot
tmpfs 182M 0 182M 0% /run/user/0
172.20.20.221:6789,172.20.20.222:6789,172.20.20.223:6789:/ 632G 0 632G 0% /data
#查看在之前服务器挂载放入的文件,并再次测试写入
[root@localhost ceph]# cd /data/
[root@localhost data]# ls
passwd
[root@localhost data]# cp /etc/issue /data/
在两台服务器检查写入的文件
1.7 开机挂载
#在/etc/fstab中添加cephfs挂载
[root@localhost ~]# cat /etc/fstab
...
172.20.20.221:6789,172.20.20.222:6789,172.20.20.223:6789:/ /data ceph defaults,name=tom,secretfile=/etc/ceph/tom.key,_netdev 0 0
[root@localhost ~]# mount -a
#重启后验证
[root@localhost ~]# reboot
2. 实现MDS服务的多主一备高可用架构
ceph mds作为ceph的访问入口,需要实现高性能及数据备份,而MDS支持多MDS架构,甚至还能实现类似于redis cluster的多主从架构,以实现MDS服务的高性能和高可用,假设启动4个MDS进程,设置最大max_mds为2,这时候有2个MDS成为主节点,另外2个MDS作为备份节点。
设置每个主节点专用的备份MDS,也就是如果此主节点出现问题马上切换到另一个MDS接管主MDS并继续对外提供元数据读写,设置备份MDS的常用选项如下。
mds_standby_replay
值为true或 false。
true表示开启 replay模式,这种模式下主MDS内的数量将实时与从MDS同步,如果主MDS宕机,从MDS可以快速的切换。
如果为 false只有宕机的时候才去同步数据,这样会有一段时间的中断。mds_standby_for_name
设置当前MDS进程只用于备份的指定名称的MDS。mds_standby_for_rank
设置当前MDS进程只用于备份于哪个Rank(上级节点),通常为Rank编号。另外在存在多个CephFS文件系统中,还可以使用mds_standby_for_fscid参数来为指定不同的文件系统.mds_standby_for_fscid
指定CephFS文件系统ID,需要联合mds_standby_for_rank生效,如果设置mds_standby_for_rank,那么就是用于指定文件系统的指定Rank,如果没有设置,就是指定文件系统的所有Rank。
2.1 添加mds服务器
#查看当前mds服务器状态
cephadmin@ceph-deploy:~/ceph-cluster$ ceph mds stat
share1:1 {0=ceph-mgr1=up:active}
#将ceph-mgr2、ceph-mon2、ceph-mon3作为mds服务角色添加至ceph集群,实现两主两备的mds高可用和高性能架构
#安装ceph-mds服务
root@ceph-mgr2:~# apt install ceph-mds -y
root@ceph-mon2:~# apt install ceph-mds -y
root@ceph-mon3:~# apt install ceph-mds -y
#集群添加mds服务器
cephadmin@ceph-deploy:~/ceph-cluster$ ceph-deploy mds create ceph-mgr2
cephadmin@ceph-deploy:~/ceph-cluster$ ceph-deploy mds create ceph-mon2
cephadmin@ceph-deploy:~/ceph-cluster$ ceph-deploy mds create ceph-mon3
#验证mds服务器当前状态
cephadmin@ceph-deploy:~/ceph-cluster$ ceph mds stat
share1:1 {0=ceph-mgr1=up:active} 3 up:standby # 当前状态一主三从
2.2 验证ceph集群当前状态
当前状态处于激活的mds服务器有一台,处于备份状态的mds服务器有3台
cephadmin@ceph-deploy:~/ceph-cluster$ ceph fs status
share1 - 2 clients
======
RANK STATE MDS ACTIVITY DNS INOS DIRS CAPS
0 active ceph-mgr1 Reqs: 0 /s 12 15 12 6
POOL TYPE USED AVAIL
cephfs-metadata metadata 176k 631G
cephfs-data data 24.0k 631G
STANDBY MDS
ceph-mgr2
ceph-mon2
ceph-mon3
MDS version: ceph version 16.2.14 (238ba602515df21ea7ffc75c88db29f9e5ef12c9) pacific (stable)
2.3 当前文件系统状态
cephadmin@ceph-deploy:~/ceph-cluster$ ceph fs get share1
Filesystem 'share1' (1)
fs_name share1
epoch 8
flags 12
created 2023-11-04T05:21:13.602962+0000
modified 2023-11-16T07:06:29.951089+0000
tableserver 0
root 0
session_timeout 60
session_autoclose 300
max_file_size 1099511627776
required_client_features {}
last_failure 0
last_failure_osd_epoch 0
compat compat={},rocompat={},incompat={1=base v0.20,2=client writeable ranges,3=default file layouts on dirs,4=dir inode in separate object,5=mds uses versioned encoding,6=dirfrag is stored in omap,7=mds uses inline data,8=no anchor table,9=file layout v2,10=snaprealm v2}
max_mds 1
in 0
up {0=5589}
failed
damaged
stopped
data_pools [6]
metadata_pool 5
inline_data disabled
balancer
standby_count_wanted 1
[mds.ceph-mgr1{0:5589} state up:active seq 4998 addr [v2:172.20.20.224:6802/3687737459,v1:172.20.20.224:6803/3687737459] compat {c=[1],r=[1],i=[7ff]}]
2.4 设置处于激活状态mds的数量
目前有四个mds服务器,是一主三备状态,可以优化部署架构,设置为两主两备。
#设置同时活跃的主mds最大值为2
cephadmin@ceph-deploy:~/ceph-cluster$ ceph fs set share1 max_mds 2
#验证状态:ceph-mgr1、ceph-mon3为主,ceph-mgr2、ceph-mon2为备
cephadmin@ceph-deploy:~/ceph-cluster$ ceph fs status
share1 - 2 clients
======
RANK STATE MDS ACTIVITY DNS INOS DIRS CAPS
0 active ceph-mgr1 Reqs: 0 /s 12 15 12 6
1 active ceph-mon3 Reqs: 0 /s 10 13 11 0
POOL TYPE USED AVAIL
cephfs-metadata metadata 224k 631G
cephfs-data data 24.0k 631G
STANDBY MDS
ceph-mgr2
ceph-mon2
MDS version: ceph version 16.2.14 (238ba602515df21ea7ffc75c88db29f9e5ef12c9) pacific (stable)
2.5 mds高可用优化
目前状态是ceph-mgr1和ceph-mon3分别为active状态,ceph-mgr2和ceph-mon2分别处于standby状态,现可以将ceph-mgr2设置为ceph-mgr1的standby,将ceph-mon3设置为ceph-mon2的standby,以实现每个主都有一个固定备份角色的结构。
若四个mds都设置为主角色,能够提高文件系统的读写效率,但任意一节点的宕机都会导致节点mds的变化迁移引起数据读取的延迟。
#修改配置文件
cephadmin@ceph-deploy:~/ceph-cluster$ cat ceph.conf
[global]
fsid = 3586e7d1-9315-44e5-85bd-6bd3787ce574
public_network = 172.20.20.0/24
cluster_network = 192.168.20.0/24
mon_initial_members = ceph-mon1,ceph-mon2,ceph-mon3
mon_host = 172.20.20.221,172.20.20.222,172.20.20.223
auth_cluster_required = cephx
auth_service_required = cephx
auth_client_required = cephx
#添加下面内容
[mds.ceph-mgr2]
mds_standby_for_name = ceph-mgr1
mds_standby_replay = true
[mds.ceph-mgr1]
mds_standby_for_name = ceph-mgr2
mds_standby_replay = true
[mds.ceph-mon3]
mds_standby_for_name = ceph-mon2
mds_standby_replay = true
[mds.ceph-mon2]
mds_standby_for_name = ceph-mon3
mds_standby_replay = true
2.6 分发配置文件并重启mds服务
#分发配置文件至各mds服务器,重启服务生效
#--overwrite-conf 参数含义:以当前ceph-deploy的ceph.conf配置为准,替换掉后面写的节点上的/etc/ceph.conf
cephadmin@ceph-deploy:~/ceph-cluster$ ceph-deploy --overwrite-conf config push ceph-mon2
cephadmin@ceph-deploy:~/ceph-cluster$ ceph-deploy --overwrite-conf config push ceph-mon3
cephadmin@ceph-deploy:~/ceph-cluster$ ceph-deploy --overwrite-conf config push ceph-mgr1
cephadmin@ceph-deploy:~/ceph-cluster$ ceph-deploy --overwrite-conf config push ceph-mgr2
#重新加载服务并重启mds服务,先重启备mds节点服务,再重启主节点服务,会产生主节点角色切换
root@ceph-mon2:~# systemctl daemon-reload
root@ceph-mon2:~# systemctl restart ceph-mds@ceph-mon2.service
root@ceph-mgr2:~# systemctl daemon-reload
root@ceph-mgr2:~# systemctl restart ceph-mds@ceph-mgr2.service
root@ceph-mon3:~# systemctl daemon-reload
root@ceph-mon3:~# systemctl restart ceph-mds@ceph-mon3.service
root@ceph-mgr1:~# systemctl daemon-reload
root@ceph-mgr1:~# systemctl restart ceph-mds@ceph-mgr1.service
2.7 ceph集群mds高可用状态
cephadmin@ceph-deploy:~/ceph-cluster$ ceph fs status
share1 - 2 clients
======
RANK STATE MDS ACTIVITY DNS INOS DIRS CAPS
0 active ceph-mon3 Reqs: 0 /s 12 15 12 2
1 active ceph-mon2 Reqs: 0 /s 10 13 11 0
POOL TYPE USED AVAIL
cephfs-metadata metadata 248k 631G
cephfs-data data 24.0k 631G
STANDBY MDS
ceph-mgr1
ceph-mgr2
MDS version: ceph version 16.2.14 (238ba602515df21ea7ffc75c88db29f9e5ef12c9) pacific (stable)
cephadmin@ceph-deploy:~/ceph-cluster$ ceph fs get share1
Filesystem 'share1' (1)
fs_name share1
epoch 28
flags 12
created 2023-11-04T05:21:13.602962+0000
modified 2023-11-16T08:58:46.106448+0000
tableserver 0
root 0
session_timeout 60
session_autoclose 300
max_file_size 1099511627776
required_client_features {}
last_failure 0
last_failure_osd_epoch 222
compat compat={},rocompat={},incompat={1=base v0.20,2=client writeable ranges,3=default file layouts on dirs,4=dir inode in separate object,5=mds uses versioned encoding,6=dirfrag is stored in omap,7=mds uses inline data,8=no anchor table,9=file layout v2,10=snaprealm v2}
max_mds 2
in 0,1
up {0=25706,1=15622}
failed
damaged
stopped
data_pools [6]
metadata_pool 5
inline_data disabled
balancer
standby_count_wanted 1
[mds.ceph-mon3{0:25706} state up:active seq 5 addr [v2:172.20.20.223:6800/2282954885,v1:172.20.20.223:6801/2282954885] compat {c=[1],r=[1],i=[7ff]}]
[mds.ceph-mon2{1:15622} state up:active seq 12 addr [v2:172.20.20.222:6800/4266848202,v1:172.20.20.222:6801/4266848202] compat {c=[1],r=[1],i=[7ff]}]
2.8 mds节点切换流程
宕机-->replay(重新心跳检测)-->resolve(再次心跳检测)-->reconnect(重新连接)-->rejoin(备节点加入)-->active(主备切换完成)
#再次先重启备节点,再重启主节点后,查看主节点日志
cephadmin@ceph-deploy:~/ceph-cluster$ ceph fs status
share1 - 2 clients
======
RANK STATE MDS ACTIVITY DNS INOS DIRS CAPS
0 active ceph-mgr1 Reqs: 0 /s 12 15 12 2
1 active ceph-mon3 Reqs: 0 /s 10 13 3 0
POOL TYPE USED AVAIL
cephfs-metadata metadata 248k 631G
cephfs-data data 24.0k 631G
STANDBY MDS
ceph-mgr2
ceph-mon2
MDS version: ceph version 16.2.14 (238ba602515df21ea7ffc75c88db29f9e5ef12c9) pacific (stable)
root@ceph-mgr1:~# tail -100f /var/log/ceph/ceph-mds.ceph-mgr1.log
2023-11-16T07:02:09.858+0000 7f76c2740700 1 mds.ceph-mgr1 Updating MDS map to version 6 from mon.0
2023-11-16T07:06:21.046+0000 7f76c0f3d700 0 log_channel(cluster) log [WRN] : evicting unresponsive client localhost.localdomain:tom (6066), after 303.324 seconds
2023-11-16T07:06:21.046+0000 7f76c0f3d700 1 mds.0.3 Evicting (and blocklisting) client session 6066 (v1:172.20.20.129:0/1080825910)
2023-11-16T07:06:21.046+0000 7f76c0f3d700 0 log_channel(cluster) log [INF] : Evicting (and blocklisting) client session 6066 (v1:172.20.20.129:0/1080825910)
2023-11-16T07:06:22.134+0000 7f76c2740700 1 mds.ceph-mgr1 Updating MDS map to version 7 from mon.0
2023-11-16T07:06:29.982+0000 7f76c2740700 1 mds.ceph-mgr1 Updating MDS map to version 8 from mon.0
2023-11-16T08:54:33.010+0000 7f76c2740700 1 mds.ceph-mgr1 Updating MDS map to version 13 from mon.0
2023-11-16T08:54:33.018+0000 7f76c2740700 1 mds.ceph-mgr1 Updating MDS map to version 14 from mon.0
2023-11-16T08:54:34.026+0000 7f76c2740700 1 mds.ceph-mgr1 Updating MDS map to version 15 from mon.0
2023-11-16T08:54:36.662+0000 7f76bff3b700 -1 mds.pinger is_rank_lagging: rank=1 was never sent ping request.
2023-11-16T08:58:34.544+0000 7f76c2740700 1 mds.ceph-mgr1 Updating MDS map to version 18 from mon.0
2023-11-16T08:58:34.544+0000 7f76c2740700 1 mds.0.cache handle_mds_failure mds.1 : recovery peers are
2023-11-16T08:58:34.556+0000 7f76c2740700 1 mds.ceph-mgr1 Updating MDS map to version 19 from mon.0
2023-11-16T08:58:34.556+0000 7f76c2740700 1 mds.0.cache handle_mds_failure mds.1 : recovery peers are
2023-11-16T08:58:35.576+0000 7f76c2740700 1 mds.ceph-mgr1 Updating MDS map to version 20 from mon.0
2023-11-16T08:58:35.576+0000 7f76c2740700 1 mds.0.3 recovery set is 1
2023-11-16T08:58:36.052+0000 7f76c4f45700 0 --2- [v2:172.20.20.224:6802/3687737459,v1:172.20.20.224:6803/3687737459] >> [v2:172.20.20.223:6800/3883060193,v1:172.20.20.223:6801/3883060193] conn(0x56038d9e9800 0x56038dbdc300 crc :-1 s=SESSION_ACCEPTING pgs=24 cs=0 l=0 rev1=1 rx=0 tx=0).handle_reconnect no existing connection exists, reseting client
2023-11-16T08:58:36.580+0000 7f76c2740700 1 mds.ceph-mgr1 Updating MDS map to version 21 from mon.0
2023-11-16T08:58:37.584+0000 7f76c2740700 1 mds.ceph-mgr1 Updating MDS map to version 22 from mon.0
2023-11-16T08:58:37.584+0000 7f76c2740700 1 mds.0.3 rejoin_joint_start
2023-11-16T08:58:38.588+0000 7f76c2740700 1 mds.ceph-mgr1 Updating MDS map to version 23 from mon.0
2023-11-16T08:58:38.588+0000 7f76c2740700 1 mds.0.3 cluster recovered.
2023-11-16T08:58:41.676+0000 7f76bff3b700 -1 mds.pinger is_rank_lagging: rank=1 was never sent ping request.
2023-11-16T08:58:41.940+0000 7f76c3f43700 -1 received signal: Terminated from /sbin/init maybe-ubiquity (PID: 1) UID: 0
2023-11-16T08:58:41.940+0000 7f76c3f43700 -1 mds.ceph-mgr1 *** got signal Terminated ***
2023-11-16T08:58:41.940+0000 7f76c3f43700 1 mds.ceph-mgr1 suicide! Wanted state up:active
2023-11-16T08:58:45.608+0000 7f76c3f43700 1 mds.0.3 shutdown: shutting down rank 0
2023-11-16T08:58:45.608+0000 7f76c2740700 0 ms_deliver_dispatch: unhandled message 0x56038dbd8380 osd_map(222..222 src has 1..222) v4 from mon.0 v2:172.20.20.221:3300/0
2023-11-16T08:58:45.608+0000 7f76c2740700 0 ms_deliver_dispatch: unhandled message 0x56038da829c0 mdsmap(e 24) v2 from mon.0 v2:172.20.20.221:3300/0
2023-11-16T08:58:45.608+0000 7f76c2740700 0 ms_deliver_dispatch: unhandled message 0x56038da809c0 mdsmap(e 4294967295) v2 from mon.0 v2:172.20.20.221:3300/0
2023-11-16T08:58:45.608+0000 7f76c2740700 0 ms_deliver_dispatch: unhandled message 0x56038da80b60 mdsmap(e 25) v2 from mon.0 v2:172.20.20.221:3300/0
2023-11-16T08:58:45.608+0000 7f76c2740700 0 ms_deliver_dispatch: unhandled message 0x56038da80d00 mdsmap(e 26) v2 from mon.0 v2:172.20.20.221:3300/0
2023-11-16T08:58:45.608+0000 7f76c2740700 0 ms_deliver_dispatch: unhandled message 0x56038da80ea0 mdsmap(e 27) v2 from mon.0 v2:172.20.20.221:3300/0
2023-11-16T08:58:48.544+0000 7f76c2740700 0 ms_deliver_dispatch: unhandled message 0x56038da81040 mdsmap(e 28) v2 from mon.0 v2:172.20.20.221:3300/0
2023-11-16T08:58:48.544+0000 7f76c2740700 0 ms_deliver_dispatch: unhandled message 0x56038da811e0 mdsmap(e 29) v2 from mon.0 v2:172.20.20.221:3300/0
2023-11-16T08:58:48.840+0000 7f03ee903780 0 set uid:gid to 64045:64045 (ceph:ceph)
2023-11-16T08:58:48.840+0000 7f03ee903780 0 ceph version 16.2.14 (238ba602515df21ea7ffc75c88db29f9e5ef12c9) pacific (stable), process ceph-mds, pid 126392
2023-11-16T08:58:48.840+0000 7f03ee903780 1 main not setting numa affinity
2023-11-16T08:58:48.840+0000 7f03ee903780 0 pidfile_write: ignore empty --pid-file
2023-11-16T08:58:48.852+0000 7f03ea0a0700 1 mds.ceph-mgr1 Updating MDS map to version 29 from mon.2
2023-11-16T08:58:49.124+0000 7f03ea0a0700 1 mds.ceph-mgr1 Updating MDS map to version 30 from mon.2
2023-11-16T08:58:49.124+0000 7f03ea0a0700 1 mds.ceph-mgr1 Monitors have assigned me to become a standby.
2023-11-16T09:27:33.612+0000 7f03eb8a3700 -1 received signal: Terminated from /sbin/init maybe-ubiquity (PID: 1) UID: 0
2023-11-16T09:27:33.612+0000 7f03eb8a3700 -1 mds.ceph-mgr1 *** got signal Terminated ***
2023-11-16T09:27:33.612+0000 7f03eb8a3700 1 mds.ceph-mgr1 suicide! Wanted state up:standby
2023-11-16T09:27:36.952+0000 7f03ea0a0700 0 ms_deliver_dispatch: unhandled message 0x55c7b8053d40 mdsmap(e 32) v2 from mon.2 v2:172.20.20.223:3300/0
2023-11-16T09:27:36.952+0000 7f03ea0a0700 0 ms_deliver_dispatch: unhandled message 0x55c7b8009380 mdsmap(e 4294967295) v2 from mon.2 v2:172.20.20.223:3300/0
2023-11-16T09:27:37.028+0000 7f525bff9780 0 set uid:gid to 64045:64045 (ceph:ceph)
2023-11-16T09:27:37.028+0000 7f525bff9780 0 ceph version 16.2.14 (238ba602515df21ea7ffc75c88db29f9e5ef12c9) pacific (stable), process ceph-mds, pid 127144
2023-11-16T09:27:37.028+0000 7f525bff9780 1 main not setting numa affinity
2023-11-16T09:27:37.028+0000 7f525bff9780 0 pidfile_write: ignore empty --pid-file
2023-11-16T09:27:37.036+0000 7f5257796700 1 mds.ceph-mgr1 Updating MDS map to version 32 from mon.2
2023-11-16T09:27:37.180+0000 7f5257796700 1 mds.ceph-mgr1 Updating MDS map to version 33 from mon.2
2023-11-16T09:27:37.184+0000 7f5257796700 1 mds.ceph-mgr1 Monitors have assigned me to become a standby.
2023-11-16T09:27:47.880+0000 7f5257796700 1 mds.ceph-mgr1 Updating MDS map to version 36 from mon.2
2023-11-16T09:27:47.884+0000 7f5257796700 1 mds.0.36 handle_mds_map i am now mds.0.36
2023-11-16T09:27:47.884+0000 7f5257796700 1 mds.0.36 handle_mds_map state change up:standby --> up:replay #重新心跳检测
2023-11-16T09:27:47.884+0000 7f5257796700 1 mds.0.36 replay_start
2023-11-16T09:27:47.884+0000 7f5257796700 1 mds.0.36 waiting for osdmap 226 (which blocklists prior instance)
2023-11-16T09:27:47.920+0000 7f5250f89700 0 mds.0.cache creating system inode with ino:0x100
2023-11-16T09:27:47.920+0000 7f5250f89700 0 mds.0.cache creating system inode with ino:0x1
2023-11-16T09:27:47.940+0000 7f524ff87700 1 mds.0.36 Finished replaying journal
2023-11-16T09:27:47.940+0000 7f524ff87700 1 mds.0.36 making mds journal writeable
2023-11-16T09:27:48.932+0000 7f5257796700 1 mds.ceph-mgr1 Updating MDS map to version 37 from mon.2
2023-11-16T09:27:48.932+0000 7f5257796700 1 mds.0.36 handle_mds_map i am now mds.0.36
2023-11-16T09:27:48.932+0000 7f5257796700 1 mds.0.36 handle_mds_map state change up:replay --> up:resolve #再次心跳检测
2023-11-16T09:27:48.932+0000 7f5257796700 1 mds.0.36 resolve_start
2023-11-16T09:27:48.932+0000 7f5257796700 1 mds.0.36 reopen_log
2023-11-16T09:27:48.932+0000 7f5257796700 1 mds.0.36 recovery set is 1
2023-11-16T09:27:48.932+0000 7f5257796700 1 mds.0.36 recovery set is 1
2023-11-16T09:27:48.940+0000 7f5257796700 1 mds.0.36 resolve_done
2023-11-16T09:27:49.924+0000 7f5257796700 1 mds.ceph-mgr1 Updating MDS map to version 38 from mon.2
2023-11-16T09:27:49.924+0000 7f5257796700 1 mds.0.36 handle_mds_map i am now mds.0.36
2023-11-16T09:27:49.924+0000 7f5257796700 1 mds.0.36 handle_mds_map state change up:resolve --> up:reconnect #重新连接
2023-11-16T09:27:49.924+0000 7f5257796700 1 mds.0.36 reconnect_start
2023-11-16T09:27:49.924+0000 7f5257796700 1 mds.0.server reconnect_clients -- 2 sessions
2023-11-16T09:27:49.928+0000 7f5257796700 0 log_channel(cluster) log [DBG] : reconnect by client.15526 v1:172.20.20.130:0/4383656 after 0.00399999
2023-11-16T09:27:49.928+0000 7f5257796700 0 log_channel(cluster) log [DBG] : reconnect by client.15529 v1:172.20.20.129:0/2205456526 after 0.00399999
2023-11-16T09:27:49.928+0000 7f5257796700 1 mds.0.36 reconnect_done
2023-11-16T09:27:50.940+0000 7f5257796700 1 mds.ceph-mgr1 Updating MDS map to version 39 from mon.2
2023-11-16T09:27:50.940+0000 7f5257796700 1 mds.0.36 handle_mds_map i am now mds.0.36
2023-11-16T09:27:50.940+0000 7f5257796700 1 mds.0.36 handle_mds_map state change up:reconnect --> up:rejoin #备节点加入
2023-11-16T09:27:50.940+0000 7f5257796700 1 mds.0.36 rejoin_start
2023-11-16T09:27:50.940+0000 7f5257796700 1 mds.0.36 rejoin_joint_start
2023-11-16T09:27:50.948+0000 7f525178a700 1 mds.0.36 rejoin_done
2023-11-16T09:27:51.976+0000 7f5257796700 1 mds.ceph-mgr1 Updating MDS map to version 40 from mon.2
2023-11-16T09:27:51.976+0000 7f5257796700 1 mds.0.36 handle_mds_map i am now mds.0.36
2023-11-16T09:27:51.976+0000 7f5257796700 1 mds.0.cache handle_mds_failure mds.1 : recovery peers are 1
2023-11-16T09:27:51.976+0000 7f5257796700 1 mds.0.36 handle_mds_map state change up:rejoin --> up:active #主备切换完成
2023-11-16T09:27:51.976+0000 7f5257796700 1 mds.0.36 recovery_done -- successful recovery!
2023-11-16T09:27:51.980+0000 7f5257796700 1 mds.0.36 active_start
2023-11-16T09:27:52.004+0000 7f5257796700 1 mds.ceph-mgr1 Updating MDS map to version 41 from mon.2
2023-11-16T09:27:52.004+0000 7f5257796700 1 mds.0.cache handle_mds_failure mds.1 : recovery peers are 1
2023-11-16T09:27:53.012+0000 7f5257796700 1 mds.ceph-mgr1 Updating MDS map to version 42 from mon.2
2023-11-16T09:27:53.012+0000 7f5257796700 1 mds.0.36 recovery set is 1
2023-11-16T09:27:54.028+0000 7f5257796700 1 mds.ceph-mgr1 Updating MDS map to version 43 from mon.2
2023-11-16T09:27:55.048+0000 7f5257796700 1 mds.ceph-mgr1 Updating MDS map to version 44 from mon.2
2023-11-16T09:27:55.048+0000 7f5257796700 1 mds.0.36 rejoin_joint_start
2023-11-16T09:27:56.064+0000 7f5257796700 1 mds.ceph-mgr1 Updating MDS map to version 45 from mon.2
2023-11-16T09:27:56.064+0000 7f5257796700 1 mds.0.36 cluster recovered.
3. 熟悉radosgw的基本使用
3.1 RadosGW对象存储网关简介
RadosGW是对象存储(OSS,Object Storage Service)的一种访问实现方式,RADOS网关也称为Ceph对象网关、RadosGW、RGW,是一种服务,使客户端能够利用标准对象存储API来访问Ceph集群,它支持AWS S3和Swift API,在 ceph 0.8版本之后使用Civetweb(https:/github.com/civetweb/civetweb)的 web服务器来响应api请求,客户端使用http/https协议通过RESTful API与RGW通信,而RGW则通过librados与ceph集群通信,RGW客户端通过s3或者swift api使用RGW用户进行身份验证,然后RGW网关代表用户利用cephx与ceph存储进行身份验证。
说明:S3由Amazon于2006年推出,全称为Simple Storage Service,S3定义了对象存储,是对象存储事实上的标准,从某种意义上说,S3就是对象存储,对象存储就是S3,它是对象存储市场的霸主,后续的对象存储都是对S3的模仿。
3.2 RadosGW存储特点
1. 通过对象存储网关将数据存储为对象,每个对象除了包含数据,还包含数据自身的元数据。
2. 对象通过Object ID来检索,不是通过普通文件系统的挂载方式,而是通过文件路径加文件名称操作来直接访问对象,只能通过API来访问,或者第三方客户端(实际上也是对API的封装)来访问。
3. 对象的存储不是垂直的目录树结构,而是存储在扁平的命名空间中,Amazon S3将这个扁平命名空间称为bucket,而swift则将其称为容器。无论是bucket还是容器,都不能再嵌套(bucket不能再包含 bucket)。
4. bucket需要被授权才能访问到,一个帐户可以对多个bucket 授权,而权限可以不同。
5. 方便横向扩展、快速检索数据。
6. 不支持客户端挂载,且需要客户端在访问的时候指定文件名称。
7. 不是很适用于文件过于频繁修改及删除的场景。
ceph 使用bucket作为存储桶(存储空间),实现对象数据的存储和多用户隔离,数据存储在bucket 中,用户的权限也是针对bucket进行授权,可以设置用户对不同的bucket拥有不同的权限,以实现权限管理。
bucket特性:
- 存储空间(bucket)是用于存储对象(Object)的容器,所有的对象都必须隶属于某个存储空间,可以设置和修改存储空间属性用来控制地域、访问权限、生命周期等,这些属性设置直接作用于该存储空间内所有对象,因此可以通过灵活创建不同的存储空间来完成不同的管理功能
- 同一个存储空间的内部是扁平的,没有文件系统的目录等概念,所有的对象都直接隶属于其对应的存储空间
- 每个用户可以拥有多个存储空间
- 存储空间的名称在OSS范围内必须是全局唯一的,一旦创建之后无法修改名称
- 存储空间内部的对象数目没有限制
bucket命名规范:
- 只能包括小写字母、数字和短横线(-)
- 必须以小写字母或者数字开头和结尾
- 长度必须在3-63字节之间
- 存储桶名称不能使用用IP地址格式
- Bucket名称必须全局唯一
3.3 部署RadosGW服务
radosgw架构图
radosgw逻辑图
3.3.1 安装radosgw服务
将ceph-mgr1、ceph-mgr2服务器部署为radosGW高可用服务
#安装radosgw服务
root@ceph-mgr1:~# apt install radosgw
root@ceph-mgr2:~# apt install radosgw
#检查服务
root@ceph-mgr1:~# radosgw -v
ceph version 16.2.14 (238ba602515df21ea7ffc75c88db29f9e5ef12c9) pacific (stable)
root@ceph-mgr2:~# radosgw -v
ceph version 16.2.14 (238ba602515df21ea7ffc75c88db29f9e5ef12c9) pacific (stable)
#在部署服务器将rgw服务的ceph-mgr1、ceph-mgr2节点添加至集群
cephadmin@ceph-deploy:~/ceph-cluster$ ceph-deploy rgw create ceph-mgr1
cephadmin@ceph-deploy:~/ceph-cluster$ ceph-deploy rgw create ceph-mgr2
#添加时候的相关内容输出,由下面内容看出服务默认使用7480端口
cephadmin@ceph-deploy:~/ceph-cluster$ ceph-deploy rgw create ceph-mgr2
[ceph_deploy.conf][DEBUG ] found configuration file at: /home/cephadmin/.cephdeploy.conf
[ceph_deploy.cli][INFO ] Invoked (2.1.0): /usr/local/bin/ceph-deploy rgw create ceph-mgr2
[ceph_deploy.cli][INFO ] ceph-deploy options:
[ceph_deploy.cli][INFO ] verbose : False
[ceph_deploy.cli][INFO ] quiet : False
[ceph_deploy.cli][INFO ] username : None
[ceph_deploy.cli][INFO ] overwrite_conf : False
[ceph_deploy.cli][INFO ] ceph_conf : None
[ceph_deploy.cli][INFO ] cluster : ceph
[ceph_deploy.cli][INFO ] subcommand : create
[ceph_deploy.cli][INFO ] cd_conf : <ceph_deploy.conf.cephdeploy.Conf object at 0x7fd61ec5a9a0>
[ceph_deploy.cli][INFO ] default_release : False
[ceph_deploy.cli][INFO ] func : <function rgw at 0x7fd61ec3bf70>
[ceph_deploy.cli][INFO ] rgw : [('ceph-mgr2', 'rgw.ceph-mgr2')]
[ceph_deploy.rgw][DEBUG ] Deploying rgw, cluster ceph hosts ceph-mgr2:rgw.ceph-mgr2
[ceph-mgr2][DEBUG ] connection detected need for sudo
[ceph-mgr2][DEBUG ] connected to host: ceph-mgr2
[ceph_deploy.rgw][INFO ] Distro info: ubuntu 20.04 focal
[ceph_deploy.rgw][DEBUG ] remote host will use systemd
[ceph_deploy.rgw][DEBUG ] deploying rgw bootstrap to ceph-mgr2
[ceph-mgr2][WARNIN] rgw keyring does not exist yet, creating one
[ceph-mgr2][INFO ] Running command: sudo ceph --cluster ceph --name client.bootstrap-rgw --keyring /var/lib/ceph/bootstrap-rgw/ceph.keyring auth get-or-create client.rgw.ceph-mgr2 osd allow rwx mon allow rw -o /var/lib/ceph/radosgw/ceph-rgw.ceph-mgr2/keyring
[ceph-mgr2][INFO ] Running command: sudo systemctl enable ceph-radosgw@rgw.ceph-mgr2
[ceph-mgr2][WARNIN] Created symlink /etc/systemd/system/ceph-radosgw.target.wants/ceph-radosgw@rgw.ceph-mgr2.service → /lib/systemd/system/ceph-radosgw@.service.
[ceph-mgr2][INFO ] Running command: sudo systemctl start ceph-radosgw@rgw.ceph-mgr2
[ceph-mgr2][INFO ] Running command: sudo systemctl enable ceph.target
[ceph_deploy.rgw][INFO ] The Ceph Object Gateway (RGW) is now running on host ceph-mgr2 and default port 7480
3.3.2 验证radosgw服务状态
cephadmin@ceph-deploy:~/ceph-cluster$ ceph -s
cluster:
id: 3586e7d1-9315-44e5-85bd-6bd3787ce574
health: HEALTH_OK
services:
mon: 3 daemons, quorum ceph-mon1,ceph-mon2,ceph-mon3 (age 8h)
mgr: ceph-mgr1(active, since 3w), standbys: ceph-mgr2
mds: 2/2 daemons up, 2 standby
osd: 20 osds: 20 up (since 8h), 20 in (since 8h)
rgw: 2 daemons active (2 hosts, 1 zones) # rgw服务,有2个活跃节点
data:
volumes: 1/1 healthy
pools: 9 pools, 289 pgs
objects: 300 objects, 143 MiB
usage: 6.0 GiB used, 1.9 TiB / 2.0 TiB avail
pgs: 289 active+clean
io:
client: 0 B/s rd, 0 B/s wr, 0 op/s rd, 0 op/s wr
3.3.3 验证radosgw服务进程
root@ceph-mgr1:~# ps -ef |grep radosgw
ceph 128715 1 0 10:00 ? 00:00:00 /usr/bin/radosgw -f --cluster ceph --name client.rgw.ceph-mgr1 --setuser ceph --setgroup ceph
root 129439 115285 0 10:03 pts/1 00:00:00 grep --color=auto radosgw
root@ceph-mgr2:~# ps -ef |grep radosgw
ceph 129800 1 0 10:00 ? 00:00:00 /usr/bin/radosgw -f --cluster ceph --name client.rgw.ceph-mgr2 --setuser ceph --setgroup ceph
root 130540 115646 0 10:03 pts/0 00:00:00 grep --color=auto radosgw
3.3.4 radosgw存储池类型
cephadmin@ceph-deploy:~/ceph-cluster$ ceph osd pool ls
device_health_metrics
mypool
myrbd1
cephfs-metadata
cephfs-data
.rgw.root
default.rgw.log
default.rgw.control
default.rgw.meta
#查看默认radosgw存储池信息
cephadmin@ceph-deploy:~/ceph-cluster$ radosgw-admin zone get --rgw-zone=default --rgw-zonegroup=default
{
"id": "77719cc6-0e51-4696-85c9-bc60fbaaf3c0", #区域的唯一标识符
"name": "default", #默认区域的名称
"domain_root": "default.rgw.meta:root", #区域的根域名
"control_pool": "default.rgw.control", #系统控制池,在有数据更新是,通知其他RGW更新缓存
"gc_pool": "default.rgw.log:gc", #用于垃圾回收的存储池
"lc_pool": "default.rgw.log:lc", #用于存储日志的存储池
"log_pool": "default.rgw.log", #存储日志信息,用于记录各种log信息
"intent_log_pool": "default.rgw.log:intent",
"usage_log_pool": "default.rgw.log:usage",
"roles_pool": "default.rgw.meta:roles", #default.rgw.meta:元数据存储池,通过不同的名称空间分别存储不同的rados对象
"reshard_pool": "default.rgw.log:reshard",
"user_keys_pool": "default.rgw.meta:users.keys", #用户的密钥名称空间users.keys
"user_email_pool": "default.rgw.meta:users.email", #用户的email名称空间users.email
"user_swift_pool": "default.rgw.meta:users.swift", #用户的subuser的名称空间users.swift
"user_uid_pool": "default.rgw.meta:users.uid", #用户UID
"otp_pool": "default.rgw.otp",
"system_key": {
"access_key": "",
"secret_key": ""
},
"placement_pools": [
{
"key": "default-placement",
"val": {
"index_pool": "default.rgw.buckets.index", #存放bucket到object的索引信息
"storage_classes": {
"STANDARD": {
"data_pool": "default.rgw.buckets.data" #存放对象的数据
}
},
"data_extra_pool": "default.rgw.buckets.non-ec", #数据的额外信息存储池
"index_type": 0
}
}
],
"realm_id": "",
"notif_pool": "default.rgw.log:notif"
}
# 默认crush规则是副本池即一主两备的三副本机制
cephadmin@ceph-deploy:~/ceph-cluster$ ceph osd pool get default.rgw.meta crush_rule
crush_rule: replicated_rule
# 默认副本数为3
cephadmin@ceph-deploy:~/ceph-cluster$ ceph osd pool get default.rgw.meta size
size: 3
# 默认pgp数量为32
cephadmin@ceph-deploy:~/ceph-cluster$ ceph osd pool get default.rgw.meta pgp_num
pgp_num: 32
# 默认pg数量为32
cephadmin@ceph-deploy:~/ceph-cluster$ ceph osd pool get default.rgw.meta pg_num
pg_num: 32
3.3.5 radosgw存储池功能
cephadmin@ceph-deploy:~/ceph-cluster$ ceph osd lspools
1 device_health_metrics
2 mypool
4 myrbd1
5 cephfs-metadata
6 cephfs-data
7 .rgw.root
8 default.rgw.log
9 default.rgw.control
10 default.rgw.meta
3.3.6 验证radosgw zone信息
cephadmin@ceph-deploy:~/ceph-cluster$ radosgw-admin zone get --rgw-zone=default
{
"id": "77719cc6-0e51-4696-85c9-bc60fbaaf3c0",
"name": "default",
"domain_root": "default.rgw.meta:root",
"control_pool": "default.rgw.control",
"gc_pool": "default.rgw.log:gc",
"lc_pool": "default.rgw.log:lc",
"log_pool": "default.rgw.log",
"intent_log_pool": "default.rgw.log:intent",
"usage_log_pool": "default.rgw.log:usage",
"roles_pool": "default.rgw.meta:roles",
"reshard_pool": "default.rgw.log:reshard",
"user_keys_pool": "default.rgw.meta:users.keys",
"user_email_pool": "default.rgw.meta:users.email",
"user_swift_pool": "default.rgw.meta:users.swift",
"user_uid_pool": "default.rgw.meta:users.uid",
"otp_pool": "default.rgw.otp",
"system_key": {
"access_key": "",
"secret_key": ""
},
"placement_pools": [
{
"key": "default-placement",
"val": {
"index_pool": "default.rgw.buckets.index",
"storage_classes": {
"STANDARD": {
"data_pool": "default.rgw.buckets.data"
}
},
"data_extra_pool": "default.rgw.buckets.non-ec",
"index_type": 0
}
}
],
"realm_id": "",
"notif_pool": "default.rgw.log:notif"
}
3.3.7 访问radosgw服务
3.4 radosgw高可用
3.4.1 radosgw http高可用
3.4.1.1 自定义http端口
配置文件可以在ceph deploy服务器修改然后统一推送,或者单独修改每个radosgw服务器的配置,然后重启RGW服务
root@ceph-deploy:~# cat /etc/ceph/ceph.conf
...
[client.rgw.ceph-mgr1]
rgw_host = ceph-mgr1
rgw_frontends = civetweb port=9900
[client.rgw.ceph-mgr2]
rgw_host = ceph-mgr2
rgw_frontends = civetweb port=9900
#文件发送至radosgw服务器
root@ceph-deploy:~# scp /etc/ceph/ceph.conf 172.20.20.224:/etc/ceph/
root@ceph-deploy:~# scp /etc/ceph/ceph.conf 172.20.20.225:/etc/ceph/
#重启服务
root@ceph-mgr1:~# systemctl restart ceph-radosgw@rgw.ceph-mgr1.service
root@ceph-mgr2:~# systemctl restart ceph-radosgw@rgw.ceph-mgr2.service
验证
3.4.1.2 高可用配置
使用haproxy进行反向代理
root@k8s-ha1:~# cat /etc/haproxy/haproxy.cfg
...
listen ceph-rgw-80
bind 172.20.20.192:80
mode tcp
server 172.20.20.224 172.20.20.224:9900 check inter 3s fall 3 rise 3
server 172.20.20.225 172.20.20.224:9900 check inter 3s fall 3 rise 3
root@k8s-ha1:~# systemctl restart haproxy.service
测试
3.4.2 radosgw https
rgw节点生成签名证书并配置radosgw启用SSL
3.4.2.1 自签名证书
root@ceph-mgr1:~# mkdir -p /etc/ceph/certs
root@ceph-mgr1:~# cd /etc/ceph/certs/
root@ceph-mgr1:/etc/ceph/certs# openssl genrsa -out jia.key 2048
Generating RSA private key, 2048 bit long modulus (2 primes)
.............................................................+++++
..................................................................................................................+++++
e is 65537 (0x010001)
root@ceph-mgr1:/etc/ceph/certs# openssl req -new -x509 -key jia.key -out jia.crt -subj "/CN=rgw.jia.net"
root@ceph-mgr1:/etc/ceph/certs# cat jia.key jia.crt > jia.pem
root@ceph-mgr1:/etc/ceph/certs# tree
.
├── jia.crt
├── jia.key
└── jia.pem
0 directories, 3 files
#将证书发送至ceph-mgr2节点
root@ceph-mgr1:/etc/ceph/certs# scp -r /etc/ceph/certs 172.20.20.225:/etc/ceph/
3.4.2.2 配置ssl
#l两台rgw服务器都需要配置
root@ceph-mgr1:/etc/ceph/certs# cat /etc/ceph/ceph.conf
...
[client.rgw.ceph-mgr1]
rgw_host = ceph-mgr1
rgw_frontends = "civetweb port=9900+9443s ssl_certificate=/etc/ceph/certs/jia.pem"
[client.rgw.ceph-mgr2]
rgw_host = ceph-mgr2
rgw_frontends = "civetweb port=9900+9443s ssl_certificate=/etc/ceph/certs/jia.pem"
#重启服务
root@ceph-mgr1:~# systemctl restart ceph-radosgw@rgw.ceph-mgr1.service
root@ceph-mgr2:~# systemctl restart ceph-radosgw@rgw.ceph-mgr2.service
3.4.2.3 验证
root@ceph-mgr1:/etc/ceph/certs# ss -ntpl |grep radosgw
LISTEN 0 4096 0.0.0.0:9443 0.0.0.0:* users:(("radosgw",pid=131182,fd=80))
LISTEN 0 4096 0.0.0.0:9900 0.0.0.0:* users:(("radosgw",pid=131182,fd=79))
root@ceph-mgr2:~# systemctl restart ceph-radosgw@rgw.ceph-mgr2.service
root@ceph-mgr2:~# ss -ntpl |grep radosgw
LISTEN 0 4096 0.0.0.0:9443 0.0.0.0:* users:(("radosgw",pid=132712,fd=80))
LISTEN 0 4096 0.0.0.0:9900 0.0.0.0:* users:(("radosgw",pid=132712,fd=79))
3.4.2.4 验证访问
本地电脑添加域名解析,截图忘记用域名访问了
3.4.3 radosgw高可用
通过负载均衡haproxy对radosgw进行反向代理,实现高可用
3.4.3.1 本地电脑添加域名解析
3.4.3.2 负载均衡配置
root@k8s-ha1:~# cat /etc/haproxy/haproxy.cfg
...
listen ceph-rgw-80
bind 172.20.20.192:80
mode tcp
server 172.20.20.224 172.20.20.224:9900 check inter 3s fall 3 rise 3
server 172.20.20.225 172.20.20.224:9900 check inter 3s fall 3 rise 3
listen ceph-rgw-443
bind 172.20.20.192:443
mode tcp
server 172.20.20.224 172.20.20.224:9443 check inter 3s fall 3 rise 3
server 172.20.20.225 172.20.20.224:9443 check inter 3s fall 3 rise 3
root@k8s-ha1:~# systemctl restart haproxy.service
3.4.3.3 测试访问
3.4.4 添加日志记录功能
增加日志及其他优化配置
#新建日志目录并授权
root@ceph-mgr1:/etc/ceph/certs# mkdir /var/log/radosgw
root@ceph-mgr1:/etc/ceph/certs# chown ceph.ceph /var/log/radosgw
#修改配置,只修改了mgr1当前节点服务器的配置
root@ceph-mgr1:/etc/ceph/certs# cat /etc/ceph/ceph.conf
...
[client.rgw.ceph-mgr1]
rgw_host = ceph-mgr1
rgw_frontends = "civetweb port=9900+9443s ssl_certificate=/etc/ceph/certs/jia.pem error_log_file=/var/log/radosgw/civetweb.error.log access_log_file=/var/log/radosgw/civetweb.access.log request_timeout_ms=30000 num_threads=200"
#重启服务
root@ceph-mgr1:/etc/ceph/certs# systemctl restart ceph-radosgw@rgw.ceph-mgr1.service
#本地服务器测试
root@ceph-mgr1:/etc/ceph/certs# curl -k https://172.20.20.224:9443
<?xml version="1.0" encoding="UTF-8"?><ListAllMyBucketsResult xmlns="http://s3.amazonaws.com/doc/2006-03-01/"><Owner><ID>anonymous</ID><DisplayName></DisplayName></Owner><Buckets></Buckets></ListAllMyBucketsResult>
#查看日志
root@ceph-mgr1:/etc/ceph/certs# tail /var/log/radosgw/civetweb.access.log
172.20.20.224 - - [16/Nov/2023:12:19:45 +0000] "GET / HTTP/1.1" 200 413 - curl/7.68.0
172.20.20.224 - - [16/Nov/2023:12:19:46 +0000] "GET / HTTP/1.1" 200 413 - curl/7.68.0
172.20.20.224 - - [16/Nov/2023:12:19:46 +0000] "GET / HTTP/1.1" 200 413 - curl/7.68.0
172.20.20.224 - - [16/Nov/2023:12:19:47 +0000] "GET / HTTP/1.1" 200 413 - curl/7.68.0
4. s3cmd客户端使用、实现基于nginx实现短视频的业务案例
4.1 RGW Server配置
通常情况下,RGW1(172.20.20.224)和RGW2(172.20.20.225)参数配置是完全一样的
root@ceph-mgr1:~# cat /etc/ceph/ceph.conf
[global]
fsid = 3586e7d1-9315-44e5-85bd-6bd3787ce574
public_network = 172.20.20.0/24
cluster_network = 192.168.20.0/24
mon_initial_members = ceph-mon1,ceph-mon2,ceph-mon3
mon_host = 172.20.20.221,172.20.20.222,172.20.20.223
auth_cluster_required = cephx
auth_service_required = cephx
auth_client_required = cephx
[client.rgw.ceph-mgr1]
rgw_host = ceph-mgr1
rgw_frontends = "civetweb port=9900"
[client.rgw.ceph-mgr2]
rgw_host = ceph-mgr2
rgw_frontends = "civetweb port=9900"
4.2 创建RGW用户
用于s3cmd客户端连接RGW
cephadmin@ceph-deploy:~/ceph-cluster$ radosgw-admin user create --uid="user1" --display-name="user1"
{
"user_id": "user1",
"display_name": "user1",
"email": "",
"suspended": 0,
"max_buckets": 1000,
"subusers": [],
"keys": [
{
"user": "user1",
"access_key": "U0HSBD1R9R960PQ0Q00G",
"secret_key": "74avsQSWSANcej60Yq75hAINc2DF0iXHkjsWhLAH"
}
],
"swift_keys": [],
"caps": [],
"op_mask": "read, write, delete",
"default_placement": "",
"default_storage_class": "",
"placement_tags": [],
"bucket_quota": {
"enabled": false,
"check_on_raw": false,
"max_size": -1,
"max_size_kb": 0,
"max_objects": -1
},
"user_quota": {
"enabled": false,
"check_on_raw": false,
"max_size": -1,
"max_size_kb": 0,
"max_objects": -1
},
"temp_url_keys": [],
"type": "rgw",
"mfa_ids": []
}
保存好user1用户的access_key、secret_key信息,也可以使用下面命名查看
cephadmin@ceph-deploy:~/ceph-cluster$ radosgw-admin user --uid="user1" info
{
"user_id": "user1",
"display_name": "user1",
"email": "",
"suspended": 0,
"max_buckets": 1000,
"subusers": [],
"keys": [
{
"user": "user1",
"access_key": "U0HSBD1R9R960PQ0Q00G",
"secret_key": "74avsQSWSANcej60Yq75hAINc2DF0iXHkjsWhLAH"
}
],
"swift_keys": [],
"caps": [],
"op_mask": "read, write, delete",
"default_placement": "",
"default_storage_class": "",
"placement_tags": [],
"bucket_quota": {
"enabled": false,
"check_on_raw": false,
"max_size": -1,
"max_size_kb": 0,
"max_objects": -1
},
"user_quota": {
"enabled": false,
"check_on_raw": false,
"max_size": -1,
"max_size_kb": 0,
"max_objects": -1
},
"temp_url_keys": [],
"type": "rgw",
"mfa_ids": []
}
4.3 安装s3cmd客户端
s3cmd是一个通过命令行访问ceph RGW实现创建存储桶、上传、下载以及管理数据到对象存储的命令行工具。
root@ceph-deploy:~# apt-cache madison s3cmd
root@ceph-deploy:~# apt install s3cmd
4.4 配置s3cmd客户端执行环境
#1.s3cmd客户端添加域名解析
root@ceph-deploy:~# cat /etc/hosts
...
172.20.20.192 rgw.jia.net #负载均衡地址或RGW网关地址
#2.配置命名执行环境
root@ceph-deploy:~# s3cmd --configure
Enter new values or accept defaults in brackets with Enter.
Refer to user manual for detailed description of all options.
Access key and Secret key are your identifiers for Amazon S3. Leave them empty for using the env variables.
Access Key: U0HSBD1R9R960PQ0Q00G #输入之前创建用于客户端连接RGW网关的user1用户的access key
Secret Key: 74avsQSWSANcej60Yq75hAINc2DF0iXHkjsWhLAH #输入之前创建用户的secret key
Default Region [US]: #地域,可直接回车
Use "s3.amazonaws.com" for S3 Endpoint and not modify it to the target Amazon S3.
S3 Endpoint [s3.amazonaws.com]: rgw.jia.net #RGW域名,如果直连RGW,则设置为rgw.jia.net:9900
Use "%(bucket)s.s3.amazonaws.com" to the target Amazon S3. "%(bucket)s" and "%(location)s" vars can be used
if the target S3 system supports dns based buckets.
DNS-style bucket+hostname:port template for accessing a bucket [%(bucket)s.s3.amazonaws.com]: rgw.jia.net/%(bucket) #bucket的域名格式
Encryption password is used to protect your files from reading
by unauthorized persons while in transfer to S3
Encryption password: 123456 #密码
Path to GPG program [/usr/bin/gpg]: #gpg命令路径,用于认证管理,可直接回车
When using secure HTTPS protocol all communication with Amazon S3
servers is protected from 3rd party eavesdropping. This method is
slower than plain HTTP, and can only be proxied with Python 2.7 or newer
Use HTTPS protocol [Yes]: No #是否使用HTTPS,选择不使用No
On some networks all internet access must go through a HTTP proxy.
Try setting it here if you can't connect to S3 directly
HTTP Proxy server name: #是否使用代理,可直接回车
New settings: #最终配置
Access Key: U0HSBD1R9R960PQ0Q00G
Secret Key: 74avsQSWSANcej60Yq75hAINc2DF0iXHkjsWhLAH
Default Region: US
S3 Endpoint: rgw.jia.net
DNS-style bucket+hostname:port template for accessing a bucket: rgw.jia.net/%(bucket)
Encryption password: 123456
Path to GPG program: /usr/bin/gpg
Use HTTPS protocol: False
HTTP Proxy server name:
HTTP Proxy server port: 0
Test access with supplied credentials? [Y/n] y #是否测试
Please wait, attempting to list all buckets...
Success. Your access key and secret key worked fine :-)
Now verifying that encryption works...
Success. Encryption and decryption worked fine :-)
Save settings? [y/N] y #是否保存配置
Configuration saved to '/root/.s3cfg' #保存配置文件的路径
#3.查看认证文件
root@ceph-deploy:~# cat /root/.s3cfg
[default]
access_key = U0HSBD1R9R960PQ0Q00G
...
host_base = rgw.jia.net
host_bucket = rgw.jia.net/%(bucket)
...
secret_key = 74avsQSWSANcej60Yq75hAINc2DF0iXHkjsWhLAH
send_chunk = 65536
server_side_encryption = False
...
4.5 s3cmd常见命令
root@ceph-deploy:~# s3cmd --help
...
Commands:
Make bucket #创建bucket
s3cmd mb s3://BUCKET
Remove bucket #删除bucket,只能删除空的bucket,如果bucket中有内容,需要先删除内容,才能删除bucket。
s3cmd rb s3://BUCKET
List objects or buckets #列出bucket中的文件
s3cmd ls [s3://BUCKET[/PREFIX]]
List all object in all buckets #列出所有bucket中的所有文件
s3cmd la
Put file into bucket #上传文件到bucket中
s3cmd put FILE [FILE...] s3://BUCKET[/PREFIX]
Get file from bucket #从bucket中下载文件到本地
s3cmd get s3://BUCKET/OBJECT LOCAL_FILE
Delete file from bucket #删除文件
s3cmd del s3://BUCKET/OBJECT
Delete file from bucket (alias for del) #删除文件,del命令的别名
s3cmd rm s3://BUCKET/OBJECT
Restore file from Glacier storage #恢复文件
s3cmd restore s3://BUCKET/OBJECT
Synchronize a directory tree to S3 (checks files freshness using size and md5 checksum, unless overridden by options, see below) #同步目录树
s3cmd sync LOCAL_DIR s3://BUCKET[/PREFIX] or s3://BUCKET[/PREFIX] LOCAL_DIR
Disk usage by buckets #查看空间使用
s3cmd du [s3://BUCKET[/PREFIX]]
Get various information about Buckets or Files #获取bucket或文件的详细信息
s3cmd info s3://BUCKET[/OBJECT]
Copy object #复制文件
s3cmd cp s3://BUCKET1/OBJECT1 s3://BUCKET2[/OBJECT2]
Modify object metadata #修改文件属性
s3cmd modify s3://BUCKET1/OBJECT
Move object #移动文件
s3cmd mv s3://BUCKET1/OBJECT1 s3://BUCKET2[/OBJECT2]
Modify Access control list for Bucket or Files
s3cmd setacl s3://BUCKET[/OBJECT]
Modify Bucket Policy
s3cmd setpolicy FILE s3://BUCKET
Delete Bucket Policy
s3cmd delpolicy s3://BUCKET
Modify Bucket CORS
s3cmd setcors FILE s3://BUCKET
Delete Bucket CORS
s3cmd delcors s3://BUCKET
Modify Bucket Requester Pays policy
s3cmd payer s3://BUCKET
Show multipart uploads
s3cmd multipart s3://BUCKET [Id]
Abort a multipart upload
s3cmd abortmp s3://BUCKET/OBJECT Id
List parts of a multipart upload
s3cmd listmp s3://BUCKET/OBJECT Id
Enable/disable bucket access logging
s3cmd accesslog s3://BUCKET
Sign arbitrary string using the secret key
s3cmd sign STRING-TO-SIGN
Sign an S3 URL to provide limited public access with expiry
s3cmd signurl s3://BUCKET/OBJECT <expiry_epoch|+expiry_offset>
Fix invalid file names in a bucket
s3cmd fixbucket s3://BUCKET[/PREFIX]
Create Website from bucket
s3cmd ws-create s3://BUCKET
Delete Website
s3cmd ws-delete s3://BUCKET
Info about Website
s3cmd ws-info s3://BUCKET
Set or delete expiration rule for the bucket
s3cmd expire s3://BUCKET
Upload a lifecycle policy for the bucket
s3cmd setlifecycle FILE s3://BUCKET
Get a lifecycle policy for the bucket
s3cmd getlifecycle s3://BUCKET
Remove a lifecycle policy for the bucket
s3cmd dellifecycle s3://BUCKET
List CloudFront distribution points
s3cmd cflist
Display CloudFront distribution point parameters
s3cmd cfinfo [cf://DIST_ID]
Create CloudFront distribution point
s3cmd cfcreate s3://BUCKET
Delete CloudFront distribution point
s3cmd cfdelete cf://DIST_ID
Change CloudFront distribution point parameters
s3cmd cfmodify cf://DIST_ID
Display CloudFront invalidation request(s) status
s3cmd cfinvalinfo cf://DIST_ID[/INVAL_ID]
For more information, updates and news, visit the s3cmd website:
http://s3tools.org
4.6 s3cmd测试数据上传、下载
- 创建bucket
存储空间Bucket是用于存储对象Object的容器,在上传任意类型的Object前,需要先创建Bucket
root@ceph-deploy:~# s3cmd mb s3://mybucket
Bucket 's3://mybucket/' created
root@ceph-deploy:~# s3cmd mb s3://images
Bucket 's3://images/' created
- 上传文件
#上传文件至mybucket
root@ceph-deploy:~# s3cmd put /etc/passwd s3://mybucket
upload: '/etc/passwd' -> 's3://mybucket/passwd' [1 of 1]
1942 of 1942 100% in 3s 602.57 B/s done
#/不表示文件目录层级关系,只表示地址信息
root@ceph-deploy:~# s3cmd put /var/log/syslog s3://images/log/
upload: '/var/log/syslog' -> 's3://images/log/syslog' [1 of 1]
117381 of 117381 100% in 0s 1727.24 kB/s done
#验证bucket中的文件
root@ceph-deploy:~# s3cmd ls s3://mybucket
2023-11-17 02:34 1942 s3://mybucket/passwd
#可先查看逻辑上bucket根目录s3://images
root@ceph-deploy:~# s3cmd ls s3://images
DIR s3://images/log/
root@ceph-deploy:~# s3cmd ls s3://images/log/
2023-11-17 02:35 117381 s3://images/log/syslog
- 下载文件
#下载文件
root@ceph-deploy:~# s3cmd get s3://images/log/syslog /opt/
download: 's3://images/log/syslog' -> '/opt/syslog' [1 of 1]
117381 of 117381 100% in 0s 5.39 MB/s done
#验证文件
root@ceph-deploy:~# ls /opt/syslog
/opt/syslog
- 删除文件
#查看bucket中文件
root@ceph-deploy:~# s3cmd ls s3://mybucket/
2023-11-17 02:34 1942 s3://mybucket/passwd
#删除文件
root@ceph-deploy:~# s3cmd rm s3://mybucket/passwd
delete: 's3://mybucket/passwd'
#再次查看
root@ceph-deploy:~# s3cmd ls s3://mybucket/
4.7 结合nginx实现短视频业务
4.7.1 上传视频文件至bucket
#创建bucket
root@ceph-deploy:~# s3cmd mb s3://video
Bucket 's3://video/' created
#上传视频文件
root@ceph-deploy:~# s3cmd put 1.chengdu.mp4 s3://video
upload: '1.chengdu.mp4' -> 's3://video/1.chengdu.mp4' [part 1 of 2, 15MB] [1 of 1]
15728640 of 15728640 100% in 7s 2.05 MB/s done
upload: '1.chengdu.mp4' -> 's3://video/1.chengdu.mp4' [part 2 of 2, 7MB] [1 of 1]
7899918 of 7899918 100% in 4s 1722.27 kB/s done
root@ceph-deploy:~# s3cmd put 4.zhangsan.mp4 s3://video
upload: '4.zhangsan.mp4' -> 's3://video/4.zhangsan.mp4' [1 of 1]
15061866 of 15061866 100% in 6s 2.36 MB/s done
#授权bucket匿名用户只读权限用于访问
root@ceph-deploy:~# cat video-bucket-single-policy.json
{
"Version": "2012-10-17",
"Statement": [{
"Effect": "Allow" ,
"Principal": "*",
"Action": "s3:GetObject",
"Resource": [
"arn:aws:s3:::video/*"
]
}]
}
#执行授权
root@ceph-deploy:~# s3cmd setpolicy video-bucket-single-policy.json s3://video
s3://video/: Policy updated
#配置haproxy
root@k8s-ha1:/etc/haproxy# cat /etc/haproxy/haproxy.cfg
...
listen ceph-rgw-80
bind 172.20.20.192:80
mode tcp
server 172.20.20.224 172.20.20.224:9900 check inter 3s fall 3 rise 3
server 172.20.20.225 172.20.20.224:9900 check inter 3s fall 3 rise 3
#重启服务
root@k8s-ha1:/etc/haproxy# systemctl restart haproxy.service
浏览器访问
4.7.2 配置nginx反向代理
nginx服务器实验环境就好haproxy同一台服务器,实现访问www.jia.net的视频内容将请求转向访问ceph文件存储中的video视频
root@k8s-ha1:~# cat /etc/nginx/nginx.conf
user root;
worker_processes auto;
pid /run/nginx.pid;
include /etc/nginx/modules-enabled/*.conf;
events {
worker_connections 768;
# multi_accept on;
}
http {
sendfile on;
tcp_nopush on;
tcp_nodelay on;
keepalive_timeout 65;
types_hash_max_size 2048;
include /etc/nginx/mime.types;
default_type application/octet-stream;
ssl_protocols TLSv1 TLSv1.1 TLSv1.2 TLSv1.3; # Dropping SSLv3, ref: POODLE
ssl_prefer_server_ciphers on;
access_log /var/log/nginx/access.log;
error_log /var/log/nginx/error.log;
gzip on;
#因为端口和其它服务冲突,这里用了8080
server {
listen 8080;
server_name www.jia.net;
location / {
root html;
index index.html index.htm;
}
location ~* \.(mp4|avi)$ { # 以mp4或avi结尾的请求转向172.20.20.192(haproxy)
proxy_pass http://172.20.20.192:9900; #之前的haproxy配置测试时,从80改到了9900端口
}
}
}
#重启
root@k8s-ha1:~# systemctl restart nginx
4.7.3 访问测试
记得添加本地域名解析
172.20.20.192 www.jia.net
访问