系统环境: centos73.10.0-514.26.2.el7.x86_64
机器数量:五台
硬盘:四块一块为系统盘,其他三块留作他用
命名规则:ceph1 ceph2 ceph3 ceph4ceph1为监控节点
IP规划:192.168.238.135 ceph1#安装部署,监控节点
192.168.238.136 ceph2
192.168.238.137 ceph3
192.168.238.139 ceph4#客户机
192.168.238.139 ceph5#测试添加删除osd节点1.前提(#所有的机器都执行)
1.1修改hostname
#分别在各个节点修改主机名
# vi /etc/hostname
#hostname
ceph1
1.2配置解析
#全体都有,并测试网络是否连通
# cat /etc/hosts
127.0.0.1localhost localhost.localdomain localhost4 localhost4.localdomain4
::1localhost localhost.localdomain localhost6 localhost6.localdomain6
192.168.238.135 ceph1
192.168.238.136 ceph2
192.168.238.137 ceph3
192.168.238.138 ceph4
192.168.238.139 ceph5
测试:
#ping ceph1
PING ceph1 (192.168.238.135) 56(84) bytes of data.
64 bytes from ceph1 (192.168.238.135): icmp_seq=1ttl=64 time=0.059 ms
64 bytes from ceph1 (192.168.238.135): icmp_seq=2ttl=64 time=0.030 ms
1.3防火墙(关闭)
# systemctl stop firewalld
# systemctl disable firewalld
1.4禁用SELINUX
# setenforce 0#临时禁用
# vim /etc/selinux/config
# cat /etc/selinux/config | grep disabled#永久禁用
#disabled -No SELinux policy is loaded.
SELINUX=disabled
1.5安装ssh
# yum install openssh-server
1.6安装ntp
# yum install ntp -y#安装ntp
# crontab -e#编辑
#加入定时同步任务
30 2 * * */usr/sbin/ntpdate ntp1.aliyun.com
#重启crond
systemctl restart
crond
2.创建ceph集群
[root@ceph1
my-cluster]#ceph-deploy new ceph1
2.1在所有节点安装ceph二进制软件包
[root@ceph1 my-cluster]#ceph-deploy install ceph1 ceph2 ceph3
完成查看ceph版本
[root@ceph1 ~]#ceph -v
ceph version 10.2.9 (2ee413f77150c0f375ff6f10edd6c8f9c7d060d0)
2.2在ceph1上创建第一个ceph monitor
[root@ceph1 ~]#cdmy-cluster/#注意执行的路径
[root@ceph1 my-cluster]#
成功后,检查集群的状态
[root@ceph1 my-cluster]#ceph-s
clusterb014bf0d-43ea-424f-9358-a32ae5a5cd59
health HEALTH_ERR#集群处于不健康状态
no osds
monmap e1: 1 mons at{ceph1=192.168.238.140:6789/0}
election epoch 3, quorum 0 ceph1
osdmap e1: 0 osds: 0 up, 0 in
flagssortbitwise,require_jewel_osds
pgmap v2: 64 pgs, 1 pools, 0 bytes data,0 objects
0 kB used, 0 kB / 0 kB avail
64Reating
2.3在ceph1上创建OSD
2.3.1查看ceph1的可用硬盘
[root@ceph1 my-cluster]#ceph-deploy disk list ceph1
[ceph_deploy.conf][DEBUG] found configuration file at: /root/.cephdeploy.conf
[ceph_deploy.cli][INFO] Invoked (1.5.37): /usr/bin/ceph-deploy disklist ceph1
[ceph_deploy.cli][INFO] ceph-deploy options:
[ceph_deploy.cli][INFO]username:None
[ceph_deploy.cli][INFO]verbose: False
[ceph_deploy.cli][INFO]overwrite_conf:False
[ceph_deploy.cli][INFO]subcommand:list
[ceph_deploy.cli][INFO]quiet:False
[ceph_deploy.cli][INFO]cd_conf:
[ceph_deploy.cli][INFO]cluster:ceph
[ceph_deploy.cli][INFO]func:
[ceph_deploy.cli][INFO]ceph_conf: None
[ceph_deploy.cli][INFO]default_release:False
[ceph_deploy.cli][INFO]disk:[('ceph1', None, None)]
[ceph1][DEBUG] connected to host: ceph1
[ceph1][DEBUG] detect platform information from remote host
[ceph1][DEBUG] detect machine type
[ceph1][DEBUG] find the location of an executable
[ceph_deploy.osd][INFO] Distro info: CentOS Linux 7.3.1611 Core
[ceph_deploy.osd][DEBUG] Listing disks on ceph1...
[ceph1][DEBUG] find the location of an executable
[ceph1][INFO] Running command: /usr/sbin/ceph-disk list
[ceph1][DEBUG] /dev/dm-0 other, xfs, mounted on /
[ceph1][DEBUG] /dev/dm-1 swap, swap
[ceph1][DEBUG] /dev/sda :
[ceph1][DEBUG]/dev/sda2 other, LVM2_member
[ceph1][DEBUG]/dev/sda1 other, xfs, mounted on /boot
[ceph1][DEBUG] /dev/sdb :
[ceph1][DEBUG ] /dev/sdb other, unknown
[ceph1][DEBUG ] /dev/sdc other, unknown#三块硬盘
[ceph1][DEBUG ] /dev/sdd other, unknown
[ceph1][DEBUG ] /dev/sr0 other,unknown
2.3.2 Disk zap删除现有的分区表和磁盘内容(谨慎看好了)
[root@ceph1 my-cluster]#ceph-deploydisk zap ceph1:sdb ceph1:sdc ceph1:sdd
2.3.3 Osd create命令会准备磁盘,首先使用xfs格式化磁盘,在激活磁盘的第一第二分区做数据分区和日志分区
[root@ceph1 my-cluster]#ceph-deploy osd create ceph1:sdb ceph1:sdc ceph1:sdd
查看ceph的状态和osd的个数,在这个阶段你的集群处于不健康状态
3.扩展ceph集群
3.1增加mon的个数,注意mon总的个数必须为奇数
编辑ceph1上的ceph.conf文件加入:
public network=192.168.238.0/24
3.2在ceph1上执行ceph-deploy创建一个monitor(ceph2,ceph3)
[root@ceph1 my-cluster]#ceph-deploy mon create ceph2
[root@ceph1 my-cluster]#ceph-deploy mon create ceph3
检查ceph集群状态:
[root@ceph1 my-cluster]#ceph-s
clusterb014bf0d-43ea-424f-9358-a32ae5a5cd59
health HEALTH_ERR
64 pgs are stuck inactive for morethan 300 seconds
64 pgs degraded
64 pgs stuck degraded
64 pgs stuck inactive
64 pgs stuck unclean
64 pgs stuck undersized
64 pgs undersized
too few PGs per OSD (21 < min30)
monmap e3: 3 mons at{ceph1=192.168.238.140:6789/0,ceph2=192.168.238.142:6789/0,ceph3=192.168.238.141:6789/0}
election epoch 6, quorum 0,1,2ceph1,ceph3,ceph2
osdmap e10: 3 osds: 3 up, 3 in
flagssortbitwise,require_jewel_osds
pgmap v18: 64 pgs, 1 pools, 0 bytes data,0 objects
101504 kB used, 15227 MB / 15326 MBavail
64 undersized+degraded+peered
[root@ceph1my-cluster]# ceph mon stat
e3: 3 mons at{ceph1=192.168.238.140:6789/0,ceph2=192.168.238.142:6789/0,ceph3=192.168.238.141:6789/0},election epoch 6, quorum 0,1,2 ceph1,ceph3,ceph2
3.2在ceph2,ceph3上使用ceph-deploy执行disk list和disk zap命令,并执行osd create创建osd:
[root@ceph1 my-cluster]#ceph-deploydisk list ceph2 ceph3
[root@ceph1 my-cluster]#ceph-deploydisk zap ceph2:sdb ceph2:sdc ceph2:sdd
[root@ceph1 my-cluster]#ceph-deploydisk zap ceph3:sdb ceph3:sdc ceph3:sdd
[root@ceph1 my-cluster]#ceph-deployosd create ceph2:sdb ceph2:sdc ceph2:sdd
3.3添加了更多osd之后,调整rbd存储池的pg_num和pgp_num的值,来达到集群的HEALTH_OK状态
[root@ceph1 my-cluster]#ceph-s
clusterb014bf0d-43ea-424f-9358-a32ae5a5cd59
health HEALTH_WARN
too few PGs per OSD (21 < min30)
monmap e3: 3 mons at{ceph1=192.168.238.140:6789/0,ceph2=192.168.238.142:6789/0,ceph3=192.168.238.141:6789/0}
election epoch 6, quorum 0,1,2ceph1,ceph3,ceph2
osdmap e43: 9 osds: 9 up, 9 in
flagssortbitwise,require_jewel_osds
pgmap v97: 64 pgs, 1 pools, 0 bytes data,0 objects
305 MB used, 45675 MB / 45980 MBavail
64 active+clean
[root@ceph1my-cluster]#ceph osd pool set rbd pg_num 256
set pool 0 pg_num to 256
[root@ceph1 my-cluster]#cephosd pool set rbd pgp_num 256
set pool 0 pgp_num to 256
查看集群的状态:
[root@ceph1 my-cluster]#ceph-s
clusterb014bf0d-43ea-424f-9358-a32ae5a5cd59
health HEALTH_OK
monmap e3: 3 mons at{ceph1=192.168.238.140:6789/0,ceph2=192.168.238.142:6789/0,ceph3=192.168.238.141:6789/0}
election epoch 6, quorum 0,1,2ceph1,ceph3,ceph2
osdmap e47: 9 osds: 9 up, 9 in
flagssortbitwise,require_jewel_osds
pgmap v111: 256 pgs, 1 pools, 0 bytesdata, 0 objects
315 MB used, 45665 MB / 45980 MBavail
215 active+clean
41 activating
4.相关命令
4.1检查ceph安装状态
ceph -s ceph status
4.2检查集群健康状况
ceph -w
4.3查看ceph monitor仲裁状态
ceph quorum_status --format json-pretty
4.4导出ceph monitor信息
ceph mon dump
4.5检查集群使用使用状态
ceph df
4.6检查ceph monitor osd和pg状态
ceph mon stat
ceph osd stat
ceph pg stat
4.7列表pg
ceph pg dump
4.8列表ceph存储池
ceph osd ls pools
4.9检查osd的crush map
ceph osd tree
4.10列表集群的认证密钥
ceph auth list
5.部署ceph mds
Mds只有cephfs文件系统需要,其他存储方式(块存储和对象存储)不需要
5.1在ceph1节点,使用ceph-deploy命令吧mds部署和配置到ceph2节点上
[root@ceph1 my-cluster]#ceph-deploy--overwrite-conf mds create ceph2
5.2进行下边的操作让cephfs可以访问
[root@ceph1 my-cluster]#ssh ceph2 ceph mds stat
e2:, 1 up:standby
5.3为ceph文件系统创建数据和元数据存储池
[root@ceph1 my-cluster]#ceph osd pool create cephfs_data 64 64
pool 'cephfs_data' created
[root@ceph1 my-cluster]#ceph osd pool create cephfs_metadata 64 64
pool 'cephfs_metadata' created
5.4最后创建ceph文件系统。这个命令执行后mds将会职位活跃状态,cephfs也处于可用状态
[root@ceph1 my-cluster]#ceph fs new cephfs cephfs_metadata cephfs_data
new fs with metadata pool 2 and data pool 1
验证mds和cephfs状态
[root@ceph1 my-cluster]#ceph mds stat
e5: 1/1/1 up {0=ceph2=up:active}
[root@ceph1 my-cluster]# ceph fs ls
name: cephfs, metadata pool: cephfs_metadata, data pools:[cephfs_data ]
6.挂载系统
6.1通过内核访问系统:
6.1.1查看内核版本:
[root@ceph4 ~]#uname -r
3.10.0-514.26.2.el7.x86_64
6.1.2创建目录挂载点
[root@ceph4 ~]#mkdir /mnt/cephfs
获取创建的用户client.cephfs的密钥
[root@ceph1 my-cluster]#cat ceph.client.admin.keyring#查看密钥
[client.admin]
key =AQBq2LFZkfx9LxAAT3k2LlmxoByWnbGulrmyNg==
caps mds = "allow*"
caps mon = "allow*"
caps osd = "allow*"
6.1.3挂载测试
前提是client也安装了ceph
[root@ceph1 my-cluster]#ceph-deploy admin ceph4#推送配置到ceph4,复制ceph.con和ceph.client.admin.keyring到本机/etc/ceph目录
[root@ceph1 my-cluster]#ceph-deploy install ceph4#ceph1执行
[root@ceph4 ~]#mount -t ceph ceph1:6789:/ /mnt/cephfs -oname=admin,secret=AQBq2LFZkfx9LxAAT3k2LlmxoByWnbGulrmyNg==
为了安全,建立一个目录存放密码,再引用
[root@ceph4 ceph]#echo AQBq2LFZkfx9LxAAT3k2LlmxoByWnbGulrmyNg==>/etc/ceph/cephfskey[root@ceph4
ceph]#mount -t ceph ceph1:6789:/ /mnt/cephfs -oname=admin,secretfile=/etc/ceph/cephfskey#挂载测试
6.1.4设置开机自动挂载
[root@ceph4 ceph]#echo "ceph1:6789:/ /mnt/cephfs cephname=admin,secretfile=/etc/ceph/cephfskey,noatime 0 2" >>/etc/fstab
[root@ceph4 ~]#umount /mnt/cephfs
[root@ceph4 ~]#df
Filesystem1K-blocksUsed Available Use%Mounted on
/dev/mapper/cl-root188559361620700172352369% /
devtmpfs23124402312440% /dev
tmpfs24192002419200% /dev/shm
tmpfs24192047002372202% /run
tmpfs24192002419200% /sys/fs/cgroup
/dev/sda1103833618743685090019% /boot
tmpfs483840483840% /run/user/0
[root@ceph4 ~]#mount -a
[root@ceph4 ~]#df
Filesystem1K-blocksUsed Available Use%Mounted on
/dev/mapper/cl-root18855936 1620700172352369% /
devtmpfs23124402312440% /dev
tmpfs24192002419200% /dev/shm
tmpfs24192047002372202% /run
tmpfs24192002419200% /sys/fs/cgroup
/dev/sda1103833618743685090019% /boot
tmpfs483840483840% /run/user/0
192.168.238.140:6789:/47083520331776467517441% /mnt/cephfs
6.2
FUSE客户端访问ceph FS
6.2.1在node4安装Ceph FUSW包
[root@ceph4 ~]#yum install -y ceph-fuse
6.2.2在ceph1执行,将配置推送到ceph4
[root@ceph4 ~]#ceph-deploy admin ceph4#复制ceph.con和ceph.client.admin.keyring到本机/etc/ceph目录
[ceph_deploy.conf][DEBUG ] found configuration file at:/root/.cephdeploy.conf
[ceph_deploy.cli][INFO]Invoked (1.5.37): /usr/bin/ceph-deploy admin ceph4
[ceph_deploy.cli][INFO]ceph-deploy options:
[ceph_deploy.cli][INFO]username: None
[ceph_deploy.cli][INFO]verbose: False
[ceph_deploy.cli][INFO]overwrite_conf: False
[ceph_deploy.cli][INFO]quiet: False
[ceph_deploy.cli][INFO]cd_conf:
[ceph_deploy.cli][INFO]cluster: ceph
[ceph_deploy.cli][INFO]client: ['ceph4']
[ceph_deploy.cli][INFO]func:
[ceph_deploy.cli][INFO]ceph_conf: None
[ceph_deploy.cli][INFO]default_release: False
[ceph_deploy.admin][DEBUG ] Pushing admin keys and conf to ceph4
[ceph4][DEBUG ] connected to host: ceph4
[ceph4][DEBUG ] detect platform information from remote host
[ceph4][DEBUG ] detect machine type
[ceph4][DEBUG ] write cluster configuration to/etc/ceph/{cluster}.conf
6.2.3使用cephfuse客户端挂载cephfs
[root@ceph4 ceph]#ceph-fuse /mnt/cephfs/或
[root@ceph4 ~]#ceph-fuse -m ceph1:6789 /mnt/cephfs/或者是:
[root@ceph4 ~]#ceph-fuse --keyring/etc/ceph/ceph.client.admin.keyring --name client.admin -m ceph1:6789/mnt/cephfs
ceph-fuse[3285]: starting ceph client
2017-09-08 09:19:06.128647 7f0f90d90ec0 -1 init, newargv =0x7f0f9b390780 newargc=11
ceph-fuse[3285]: starting fuse
Aborted
[root@ceph4 ~]#df
Filesystem1K-blocksUsed Available Use%Mounted on
/dev/mapper/cl-root188559361621360172345769% /
devtmpfs23124402312440% /dev
tmpfs24192002419200% /dev/shm
tmpfs24192047042372162% /run
tmpfs24192002419200% /sys/fs/cgroup
/dev/sda1103833618743685090019% /boot
tmpfs483840483840% /run/user/0
ceph-fuse47083520331776467517441% /mnt/cephfs
6.2.4设置系统的自动挂载
[root@ceph4 ceph]#cat /etc/fstab#编辑fstab
# /etc/fstab
# Created by anaconda on Sun Sep3 04:52:45 2017
#
# Accessible filesystems, by reference, are maintained under'/dev/disk'
# See man pages fstab(5), findfs(8), mount(8) and/or blkid(8) formore info
#
/dev/mapper/cl-root/xfsdefaults0 0
UUID=e31495e0-b86f-4c4c-a321-322ed31b0906 /bootxfsdefaults0 0
/dev/mapper/cl-swapswapswapdefaults0 0
#ceph1:6789:/ /mnt/cephfs cephname=admin,secretfile=/etc/ceph/cephfskey,noatime 0 2
id=admin,keyring=ceph.client.admin.keyring /mnt/cephfs fuse.cephdefaults 0 0#fuse设置
6.3
NFS挂载系统
6.3.1在ceph1上安装nfs-ganesha所需要的包
[root@ceph1 ceph]#yum install -y nfs-utils nfs-ganesha nfs-ganesha-fsal-ceph
6.3.2关掉防火墙
6.3.3打开NFS所需要的服务
[root@ceph1 my-cluster]#systemctl start rpcbind
[root@ceph1 my-cluster]#systemctl enable rpcbind
[root@ceph1 my-cluster]#systemctl start rpc-statd.service
6.3.4修改NFS-ganesha的配置文件/etc/ganeshaganesha.conf,并输入以下内容:
Export{
Export_ID=1;
Path="/";
Pseudo="/";
Access_Type=RW;
NFS_Protocols="3";
Squash=No_Root_Squash;
Transport_Protocols=TCP;
SecType="none";
FSAL {
Name=CEPH;
}
}
6.3.5启动:
[root@ceph1 ganesha]#systemctl start nfs-ganesha
[root@ceph1 ganesha]#systemctl enable nfs-ganesha #设置开机启动
Created symlink from/etc/systemd/system/multi-user.target.wants/nfs-ganesha.service to/usr/lib/systemd/system/nfs-ganesha.service.
[root@ceph1 ganesha]#showmount -e
Export list for ceph1:
6.36客户端设置
安装nfs客户端软件
[root@ceph4 ceph]#yum install nfs-common -y
挂载:
[root@ceph4 ceph]#mount -o rw,noatime ceph1:/ /mnt/cephfs/(有问题只可读,不可写)
192.168.238.140:/ on /mnt/cephfs type nfs4 (rw,noatime,vers=4.0,rsize=1048576,wsize=1048576,namlen=255,hard,proto=tcp,timeo=600,retrans=2,sec=sys,clientaddr=192.168.238.138,local_lock=none,addr=192.168.238.140)
7. Cefh集群维护
7.1增加mon节点,mon节点数量一定为奇数个
[root@ceph1 ceph]#ceph-deploy mon create ceph2
[root@ceph1 ceph]# ceph mon stat
e3: 3 mons at{ceph1=192.168.238.140:6789/0,ceph2=192.168.238.142:6789/0,ceph3=192.168.238.141:6789/0},election epoch 8, quorum 0,1,2 ceph1,ceph3,ceph2
7.2增加cephosd节点
7.2.1安装ceph软件
7.2.2查看已有的osd节点
[root@ceph1
my-cluster]#ceph osd tree
ID WEIGHTTYPE NAMEUP/DOWN REWEIGHTPRIMARY-AFFINITY
-1 0.04408 root default
-2 0.01469host ceph1
00.00490osd.0up1.000001.00000
10.00490osd.1up1.000001.00000
20.00490osd.2up1.000001.00000
-3 0.01469host ceph2
30.00490osd.3up1.000001.00000
40.00490osd.4up1.000001.00000
50.00490osd.5up1.000001.00000
-4 0.01469host ceph3
60.00490osd.6up1.000001.00000
70.00490osd.7up1.000001.00000
80.00490osd.8up1.000001.00000
7.2.3列出ceph5的所有的磁盘
[root@ceph1
my-cluster]#ceph-deploy disk list ceph5
7.2.4将ceph5的磁盘加入到ceph集群
[root@ceph1
my-cluster]#ceph-deploy disk zap ceph5:sdbceph5:sdc ceph5:sdd
[root@ceph1
my-cluster]#ceph-deploy osd create ceph5:sdbceph5:sdc ceph5:sdd
[root@ceph1
my-cluster]#ceph-deploy admin ceph2 ceph3 ceph4ceph5
7.2.5加入后查看发现ceph重新变平衡了
注意点:加入后如果显示为down的状态,要注意是否防火墙的原因
[root@ceph1
my-cluster]#ceph -s
cluster 74683cbd-e82e-4264-b8bb-930424bc6a9b
health HEALTH_OK
monmap e3: 3 mons at{ceph1=192.168.238.140:6789/0,ceph2=192.168.238.142:6789/0,ceph3=192.168.238.141:6789/0}
election epoch 6, quorum 0,1,2 ceph1,ceph3,ceph2
osdmap e254: 12 osds: 12 up, 12 in
flags sortbitwise,require_jewel_osds
pgmap v750: 512 pgs, 1 pools, 0 bytes data, 0 objects
525 MB used, 60781 MB / 61307 MB avail
512 active+clean
[root@ceph1
my-cluster]#ceph osd tree
ID WEIGHTTYPE NAMEUP/DOWN REWEIGHTPRIMARY-AFFINITY
-1 0.05878 root default
-2 0.01469host ceph1
00.00490osd.0up1.000001.00000
10.00490osd.1up1.000001.00000
20.00490osd.2up1.000001.00000
-3 0.01469host ceph2
30.00490osd.3up1.000001.00000
40.00490osd.4up1.000001.00000
50.00490osd.5up1.000001.00000
-4 0.01469host ceph3
60.00490osd.6up1.000001.00000
70.00490osd.7up1.000001.00000
80.00490osd.8up1.000001.00000
-5 0.01469host ceph5
90.00490osd.9up1.000001.00000
10 0.00490osd.10up1.000001.00000
11 0.00490osd.11up1.000001.00000
8.3移除ceph osd节点
8.3.1查看之前的osd节点数
[root@ceph1
my-cluster]#ceph osd tree
ID WEIGHTTYPE NAMEUP/DOWN REWEIGHTPRIMARY-AFFINITY
-1 0.05878 root default
-2 0.01469host ceph1
00.00490osd.0up1.000001.00000
10.00490osd.1up1.000001.00000
20.00490osd.2up1.000001.00000
-3 0.01469host ceph2
30.00490osd.3up1.000001.00000
40.00490osd.4up1.000001.00000
50.00490osd.5up1.000001.00000
-4 0.01469host ceph3
60.00490osd.6up1.000001.00000
70.00490osd.7up1.000001.00000
80.00490osd.8up1.000001.00000
-5 0.01469host ceph5
90.00490osd.9up1.000001.00000
10 0.00490osd.10up1.000001.00000
11 0.00490osd.11up1.000001.00000
8.3.2将osd节点移除
[root@ceph1
ceph]#ceph osd out osd.9
marked out osd.9.
[root@ceph1
ceph]#ceph osd out osd.10
marked out osd.10
[root@ceph1
ceph]#ceph osd out osd.11
marked out osd.11
[root@ceph1
ceph]#ceph health
8.3.3停止ceph5的osd服务,三个osd节点
[root@ceph1
my-cluster]#ssh ceph5 systemctl stop ceph-osd@9.service
[root@ceph1
my-cluster]#ssh ceph5 systemctl stopceph-osd@10.service
[root@ceph1
my-cluster]#ssh ceph5 systemctl stopceph-osd@11.service
8.3.4查看osd树状态
[root@ceph1
my-cluster]#ceph osd tree
ID WEIGHTTYPE NAMEUP/DOWN REWEIGHTPRIMARY-AFFINITY
-1 0.05878 root default
-2 0.01469host ceph1
00.00490osd.0up1.000001.00000
10.00490osd.1up1.000001.00000
20.00490osd.2up1.000001.00000
-3 0.01469host ceph2
30.00490osd.3up1.000001.00000
40.00490osd.4up1.000001.00000
50.00490osd.5up1.000001.00000
-4 0.01469host ceph3
60.00490osd.6up1.000001.00000
70.00490osd.7up1.000001.00000
80.00490osd.8up1.000001.00000
-5 0.01469host ceph5
9 0.00490osd.9down01.00000
100.00490osd.10down01.00000
110.00490osd.11down01.00000
8.3.5将osd移除ceph集群
[root@ceph1
my-cluster]#ceph osd crush remove osd.9
removed item id 9 name 'osd.9' from crushmap
[root@ceph1
my-cluster]#ceph osd crush remove osd.10
removed item id 10 name 'osd.10' from crushmap
[root@ceph1
my-cluster]#ceph osd crush remove osd.11
removed item id 11 name 'osd.11' from crushmap
8.3.6查看集群的状态为健康
[root@ceph1
my-cluster]#ceph health
HEALTH_OK
8.3.7删除osd验证密钥
[root@ceph1
my-cluster]#ceph auth del osd.9
updated
[root@ceph1
my-cluster]#ceph auth del osd.10
updated
[root@ceph1
my-cluster]#ceph auth del osd.11
Updated
8.3.8删除osd,检查集群的状态
[root@ceph1
my-cluster]#ceph osd rm osd.9
removed osd.9
[root@ceph1
my-cluster]#ceph osd rm osd.10
removed osd.10
[root@ceph1
my-cluster]#ceph osd rm osd.11
removed osd.11
[root@ceph1
my-cluster]#ceph osd tree
ID WEIGHTTYPE NAMEUP/DOWN REWEIGHTPRIMARY-AFFINITY
-1 0.04408 root default
-2 0.01469host ceph1
00.00490osd.0up1.000001.00000
10.00490osd.1up1.000001.00000
20.00490osd.2up1.000001.00000
-3 0.01469host ceph2
30.00490osd.3up1.000001.00000
40.00490osd.4up1.000001.00000
50.00490osd.5up1.000001.00000
-4 0.01469host ceph3
60.00490osd.6up1.000001.00000
70.00490osd.7up1.000001.00000
80.00490osd.8up1.000001.00000
-50host ceph5#还有 主机信息
#看到ceph5的osd节点都不存在了,9个osd,9UP和9IN
8.3.9将ceph-node4从crush map中删除,彻底清空痕迹
[root@ceph1
my-cluster]#ceph osd crush remove ceph5#清除ceph5
removed item id -5 name 'ceph5' from crushmap
[root@ceph1
my-cluster]# ceph osd tree#查看结果
ID WEIGHTTYPE NAMEUP/DOWN REWEIGHTPRIMARY-AFFINITY
-1 0.04408 root default
-2 0.01469host ceph1
0 0.00490osd.0up1.000001.00000
10.00490osd.1up1.000001.00000
20.00490osd.2up1.000001.00000
-3 0.01469host ceph2
30.00490osd.3up1.000001.00000
40.00490osd.4up1.000001.00000
50.00490osd.5up1.000001.00000
-4 0.01469host ceph3
60.00490osd.6up1.000001.00000
70.00490osd.7up1.000001.00000
80.00490osd.8up1.000001.00000
8.4替换ceph集群的故障磁盘
8.4.1查看集群的状态
[root@ceph1
my-cluster]#ceph -s
cluster 74683cbd-e82e-4264-b8bb-930424bc6a9b
health HEALTH_OK
monmap e3: 3 mons at{ceph1=192.168.238.140:6789/0,ceph2=192.168.238.142:6789/0,ceph3=192.168.238.141:6789/0}
election epoch 6, quorum 0,1,2 ceph1,ceph3,ceph2
osdmap e279: 9 osds: 9 up, 9 in
flags sortbitwise,require_jewel_osds
pgmap v1013: 512 pgs, 1 pools, 0 bytes data, 0 objects
403 MB used, 45577 MB / 45980 MB avail
512 active+clean
8.4.2模拟故障
关掉ceph3,卸掉一块硬盘模拟故障
[root@ceph1 my-cluster]# ceph osd tree
ID WEIGHTTYPE NAMEUP/DOWN REWEIGHTPRIMARY-AFFINITY
-1 0.04408 root default
-2 0.01469host ceph1
00.00490osd.0up1.000001.00000
10.00490osd.1up1.000001.00000
20.00490osd.2up1.000001.00000
-3 0.01469host ceph2
30.00490osd.3up1.000001.00000
40.00490osd.4up1.000001.00000
50.00490osd.5up1.000001.00000
-4 0.01469host ceph3
60.00490osd.6up1.000001.00000
70.00490osd.7up1.000001.00000
80.00490osd.8down01.00000#现在为故障盘了
8.4.3停止故障OSD
[root@ceph1
my-cluster]#ssh ceph3 systemctl stopceph-osd@8.service
8.4.4删除故障osd
[root@ceph1
my-cluster]#ceph osd out osd.8 #磁盘标记为OUT状态
osd.8 is already out.
[root@ceph1
my-cluster]#ceph osd crush rm osd.8#从ceph CRUSH map中移除
removed item id 8 name 'osd.8' from crushmap
[root@ceph1
my-cluster]#ceph auth del osd.8#删除osd密钥
updated
[root@ceph1
my-cluster]#ceph osd rm osd.8 #从集群删除osd
removed osd.8
[root@ceph1
my-cluster]#ceph -s#查看状态
cluster 74683cbd-e82e-4264-b8bb-930424bc6a9b
health HEALTH_ERR
14 pgs are stuck inactive for more than 300 seconds
6 pgs degraded
14 pgs peering
6 pgs stuck degraded
14 pgs stuck inactive
105 pgs stuck unclean
6 pgs stuck undersized
6 pgs undersized
monmap e3: 3 mons at{ceph1=192.168.238.140:6789/0,ceph2=192.168.238.142:6789/0,ceph3=192.168.238.141:6789/0}
election epoch 10, quorum 0,1,2 ceph1,ceph3,ceph2
osdmap e292: 8 osds: 8 up, 8 in
flags sortbitwise,require_jewel_osds
pgmap v1046: 512 pgs, 1 pools, 0 bytes data, 0 objects
280 MB used, 30373 MB / 30653 MB avail
407 active+clean
68 active
17 active+remapped
14 remapped+peering
6 active+undersized+degraded
[root@ceph1 my-cluster]# ceph osd tree#查看状态
ID WEIGHTTYPE NAMEUP/DOWN REWEIGHTPRIMARY-AFFINITY
-1 0.03918 root default
-2 0.01469host ceph1
00.00490osd.0up1.000001.00000
10.00490osd.1up1.000001.00000
20.00490osd.2up1.000001.00000
-3 0.01469host ceph2
30.00490osd.3up1.000001.00000
40.00490osd.4up1.000001.00000
50.00490osd.5up1.000001.00000
-4 0.00980host ceph3
60.00490osd.6up1.000001.00000
70.00490osd.7up1.000001.00000
8.4.4恢复
关闭虚拟机,添加硬盘
[root@ceph3
~]#fdisk -l#可以看到新添加的硬盘
Disk /dev/sdd: 10.7 GB, 10737418240 bytes,20971520 sectors
Units = sectors of 1 * 512 = 512 bytes
Sector size (logical/physical): 512 bytes /512 bytes
I/O size (minimum/optimal): 512 bytes / 512bytes
查看ceph3的硬盘信息
[root@ceph1
my-cluster]#ceph-deploy disk list ceph3
[ceph_deploy.conf][DEBUG ] foundconfiguration file at: /root/.cephdeploy.conf
[ceph_deploy.cli][INFO] Invoked (1.5.37): /usr/bin/ceph-deploy disklist ceph3
[ceph_deploy.cli][INFO] ceph-deploy options:
[ceph_deploy.cli][INFO]username:None
[ceph_deploy.cli][INFO]verbose: False
[ceph_deploy.cli][INFO]overwrite_conf:False
[ceph_deploy.cli][INFO]subcommand:list
[ceph_deploy.cli][INFO]quiet:False
[ceph_deploy.cli][INFO]cd_conf:
[ceph_deploy.cli][INFO]cluster:ceph
[ceph_deploy.cli][INFO]func:
[ceph_deploy.cli][INFO]ceph_conf:None
[ceph_deploy.cli][INFO]default_release:False
[ceph_deploy.cli][INFO]disk:[('ceph3', None, None)]
[ceph3][DEBUG ] connected to host: ceph3
[ceph3][DEBUG ] detect platform informationfrom remote host
[ceph3][DEBUG ] detect machine type
[ceph3][DEBUG ] find the location of anexecutable
[ceph_deploy.osd][INFO] Distro info: CentOS Linux 7.3.1611 Core
[ceph_deploy.osd][DEBUG ] Listing disks onceph3...
[ceph3][DEBUG ] find the location of anexecutable
[ceph3][INFO] Running command: /usr/sbin/ceph-disk list
[ceph3][DEBUG ] /dev/dm-0 other, xfs,mounted on /
[ceph3][DEBUG ] /dev/dm-1 swap, swap
[ceph3][DEBUG ] /dev/sda :
[ceph3][DEBUG ]/dev/sda2 other, LVM2_member
[ceph3][DEBUG ]/dev/sda1 other, xfs, mounted on /boot
[ceph3][DEBUG ] /dev/sdb :
[ceph3][DEBUG ]/dev/sdb2 ceph journal, for /dev/sdb1
[ceph3][DEBUG ]/dev/sdb1 ceph data, active, cluster ceph,osd.6, journal /dev/sdb2
[ceph3][DEBUG ] /dev/sdc
[ceph3][DEBUG ]/dev/sdc2 ceph journal, for /dev/sdc1
[ceph3][DEBUG ]/dev/sdc1 ceph data, active, cluster ceph,osd.7, journal /dev/sdc2
[ceph3][DEBUG] /dev/sdd other, unknown#刚刚换上去的
[ceph3][DEBUG ] /dev/sr0 other, iso9660
对sdd进行disk zap操作
[root@ceph1
my-cluster]#ceph-deploy disk zap ceph3:sdd
为新添加的磁盘创建一个osd
[root@ceph1
my-cluster]#ceph-deploy --overwrite-conf osdcreate ceph3:sdd
[root@ceph1 my-cluster]# ceph osd tree
ID WEIGHTTYPE NAMEUP/DOWN REWEIGHTPRIMARY-AFFINITY
-1 0.04408 root default
-2 0.01469host ceph1
00.00490osd.0up1.000001.00000
10.00490osd.1up1.000001.00000
20.00490osd.2up1.000001.00000
-3 0.01469host ceph2
30.00490osd.3up1.000001.00000
40.00490osd.4up1.000001.00000
50.00490osd.5up1.000001.00000
-4 0.01469host ceph3
60.00490osd.6up1.000001.00000
70.00490osd.7up1.000001.00000
8 0.00490osd.8up1.000001.00000#加进来了
执行前:
[root@ceph1 ~]#ceph -s
cluster 74683cbd-e82e-4264-b8bb-930424bc6a9b
health HEALTH_OK
monmap e3: 3 mons at{ceph1=192.168.238.140:6789/0,ceph2=192.168.238.142:6789/0,ceph3=192.168.238.141:6789/0}
election epoch 14, quorum0,1,2 ceph1,ceph3,ceph2
osdmap e303:8 osds: 8 up, 8 in
flags sortbitwise,require_jewel_osds
pgmap v1078: 512 pgs, 1 pools, 0 bytes data, 0 objects
346 MB used, 40525 MB / 40871 MB avail
512 active+clean
执行后:
[root@ceph1
my-cluster]#ceph -s
cluster 74683cbd-e82e-4264-b8bb-930424bc6a9b
health HEALTH_OK
monmap e3: 3 mons at{ceph1=192.168.238.140:6789/0,ceph2=192.168.238.142:6789/0,ceph3=192.168.238.141:6789/0}
election epoch 14, quorum 0,1,2 ceph1,ceph3,ceph2
osdmap e309: 9 osds: 9 up, 9 in
flags sortbitwise,require_jewel_osds
pgmap v1096: 512 pgs, 1 pools, 0 bytes data, 0 objects
384 MB used, 45596 MB / 45980 MB avail
512 active+clean
硬盘更换成功
问题处理:
1. ceph-deploy install失败
[ceph_mon][DEBUG ] Retrievinghttp://ceph.com/rpm-firefly/el6/noarch/ceph-release-1-0.el6.noarch.rpm
[ceph_mon][DEBUG ] Preparing...##################################################
[ceph_mon][WARNIN]file /etc/yum.repos.d/ceph.repo from install ofceph-release-1-0.el6.noarch conflicts with file from packageceph-release-1-0.el6.noarch
[ceph_mon][ERROR ] RuntimeError: command returnednon-zero exit status: 1
[ceph_deploy][ERROR ] RuntimeError: Failed to executecommand: rpm -Uvh --replacepkgshttp://ceph.com/rpm-firefly/el6/noarch/ceph-release-1-0.el6.noarch.rpm
解决方法:
yum -y remove ceph-release*
重置,有问题的话,重置
2. deploy fails whenpublic_network contains more than one network
解决方法:
在ceph.conf添加
public_network = 192.168.238.0/24
3.[root@ceph1my-cluster]# ceph health
HEALTH_WARN clock skew
detectedon mon.ceph3,mon.ceph2; Monitor clock skew detected
解决方法:
[root@ceph1 my-cluster]#/usr/sbin/ntpdate ntp1.aliyun.com#同步时间
3.Ceph.conf配置文件修改后,重新部署报错的解决方式
[root@ceph1 my-cluster]# ceph-deploy adminceph1 ceph2 ceph3
[ceph3][DEBUG] write cluster configuration to /etc/ceph/{cluster}.conf
[ceph_deploy.admin][ERROR ] RuntimeError: config file/etc/ceph/ceph.conf exists with different content; use --overwrite-conf tooverwrite
[ceph_deploy][ERROR ] GenericError: Failed to configure 3 admin hosts
解决方法:
[root@ceph1 my-cluster]#ceph-deploy --overwrite-conf admin ceph1 ceph2ceph3
4.ceph3][ERROR] RuntimeError: command returned non-zero exit status: 1
[ceph_deploy.osd][ERROR ] Failed to executecommand: /usr/sbin/ceph-disk -v prepare --cluster ceph --fs-type xfs --/dev/sdb1
[ceph_deploy][ERROR ] GenericError: Failedto create 3 OSDs
解决方案:每个节点安装
[root@ceph1 my-cluster]#yum install xfs* -y
5.
ceph osd down的解决方法
[root@ceph1
my-cluster]#ceph-deploy osd activateceph5:/dev/sdc1
再就是部署的时候一定看好防火墙的状态或者放行相应的端口
Osd启动命令
systemctlstart ceph-osd@1.service
停止故障OSD
systemctlstop ceph-osd@1.service
ark"|{=5O��