问题说明
Ceph集群环境(OSD类型为bluestore)中进行热插拔OSD磁盘的测试场景,有时会出现热插拔磁盘盘符漂移导致OSD服务无法正常启动,因而OSD无法上线的情况。
问题分析
查看无法上线的OSD目录发现wal、db分区的软连接建立的盘符还是之前的,所以导致OSD无法启动。
(使用lvs -o lv_tags --noheadings | grep "ceph.osd_id={x}"命令找到对应OSD wal,db,journal )
解决办法
需要手动删除 OSD下边的 block.wal/db , 重新软连接关系 , 修改对应的用户权限,再重启OSD即可。
实际操作
# ceph osd tree
ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF
-1 39.67140 root default
-7 14.42596 host node13
2 hdd 3.60649 osd.2 up 1.00000 1.00000
5 hdd 3.60649 osd.5 up 1.00000 1.00000
8 hdd 3.60649 osd.8 up 1.00000 1.00000
10 hdd 3.60649 osd.10 up 1.00000 1.00000
-5 14.42596 host node17
1 hdd 3.60649 osd.1 up 1.00000 1.00000
4 hdd 3.60649 osd.4 up 1.00000 1.00000
7 hdd 3.60649 osd.7 up 1.00000 1.00000
9 hdd 3.60649 osd.9 up 1.00000 1.00000
-3 10.81947 host node19
0 hdd 3.60649 osd.0 down 0 1.00000
3 hdd 3.60649 osd.3 up 1.00000 1.00000
6 hdd 3.60649 osd.6 up 1.00000 1.00000
# df
Filesystem 1K-blocks Used Available Use% Mounted on
devtmpfs 24421432 0 24421432 0% /dev
tmpfs 24438444 0 24438444 0% /dev/shm
tmpfs 24438444 17864 24420580 1% /run
tmpfs 24438444 0 24438444 0% /sys/fs/cgroup
/dev/mapper/centos-root 135728384 4911872 130816512 4% /
/dev/sda1 969452 191272 778180 20% /boot
/dev/mapper/centos-var_log 97609148 317364 97291784 1% /var/log
tmpfs 4887692 0 4887692 0% /run/user/0
tmpfs 24438444 48 24438396 1% /var/lib/ceph/osd/ceph-0
tmpfs 24438444 48 24438396 1% /var/lib/ceph/osd/ceph-3
tmpfs 24438444 48 24438396 1% /var/lib/ceph/osd/ceph-6
# lvs -o lv_tags --noheadings | grep "ceph.osd_id=0"
ceph.block_device=/dev/ceph-a4ace47b-42f3-42f5-8e26-27fde217c66a/osd-block-dd275e43-6f57-4a44-9364-a7ad9cf829c6,ceph.block_uuid=KRweeC-aPPB-IU3G-Knw1-emDZ-ES0x-r9qApe,ceph.cephx_lockbox_secret=,ceph.cluster_fsid=c22f2485-2763-46c5-8353-3f3ee39df6d8,ceph.cluster_name=ceph,ceph.crush_device_class=None,ceph.db_device=/dev/sde2,ceph.db_uuid=bbc36d88-352e-4ff2-92d5-ac8666e3aa15,ceph.encrypted=0,ceph.osd_fsid=dd275e43-6f57-4a44-9364-a7ad9cf829c6,ceph.osd_id=0,ceph.type=block,ceph.vdo=0,ceph.wal_device=/dev/sde1,ceph.wal_uuid=ff20edf2-8447-40da-a70c-3875e0b5d41e
# cd /var/lib/ceph/osd/ceph-0/
# ll
total 48
-rw-r--r-- 1 ceph ceph 402 Dec 31 12:05 activate.monmap
lrwxrwxrwx 1 ceph ceph 93 Dec 31 12:05 block -> /dev/ceph-a4ace47b-42f3-42f5-8e26-27fde217c66a/osd-block-dd275e43-6f57-4a44-9364-a7ad9cf829c6
lrwxrwxrwx 1 ceph ceph 9 Dec 31 12:05 block.db -> /dev/sdd2
lrwxrwxrwx 1 ceph ceph 9 Dec 31 12:05 block.wal -> /dev/sdd1
-rw-r--r-- 1 ceph ceph 2 Dec 31 12:05 bluefs
-rw-r--r-- 1 ceph ceph 37 Dec 31 12:05 ceph_fsid
-rw-r--r-- 1 ceph ceph 37 Dec 31 12:05 fsid
-rw------- 1 ceph ceph 55 Dec 31 12:05 keyring
-rw-r--r-- 1 ceph ceph 8 Dec 31 12:05 kv_backend
-rw-r--r-- 1 ceph ceph 21 Dec 31 12:05 magic
-rw-r--r-- 1 ceph ceph 4 Dec 31 12:05 mkfs_done
-rw-r--r-- 1 ceph ceph 41 Dec 31 12:05 osd_key
-rw-r--r-- 1 ceph ceph 6 Dec 31 12:05 ready
-rw-r--r-- 1 ceph ceph 10 Dec 31 12:05 type
-rw-r--r-- 1 ceph ceph 2 Dec 31 12:05 whoami
# rm -rf block.db block.wal
# ln -s /dev/sde2 /var/lib/ceph/osd/ceph-0/block.db
# ln -s /dev/sde1 /var/lib/ceph/osd/ceph-0/block.wal
# ll
total 48
-rw-r--r-- 1 ceph ceph 402 Dec 31 12:05 activate.monmap
lrwxrwxrwx 1 ceph ceph 93 Dec 31 12:05 block -> /dev/ceph-a4ace47b-42f3-42f5-8e26-27fde217c66a/osd-block-dd275e43-6f57-4a44-9364-a7ad9cf829c6
lrwxrwxrwx 1 root root 9 Dec 31 15:04 block.db -> /dev/sde2
lrwxrwxrwx 1 root root 9 Dec 31 15:04 block.wal -> /dev/sde1
-rw-r--r-- 1 ceph ceph 2 Dec 31 12:05 bluefs
-rw-r--r-- 1 ceph ceph 37 Dec 31 12:05 ceph_fsid
-rw-r--r-- 1 ceph ceph 37 Dec 31 12:05 fsid
-rw------- 1 ceph ceph 55 Dec 31 12:05 keyring
-rw-r--r-- 1 ceph ceph 8 Dec 31 12:05 kv_backend
-rw-r--r-- 1 ceph ceph 21 Dec 31 12:05 magic
-rw-r--r-- 1 ceph ceph 4 Dec 31 12:05 mkfs_done
-rw-r--r-- 1 ceph ceph 41 Dec 31 12:05 osd_key
-rw-r--r-- 1 ceph ceph 6 Dec 31 12:05 ready
-rw-r--r-- 1 ceph ceph 10 Dec 31 12:05 type
-rw-r--r-- 1 ceph ceph 2 Dec 31 12:05 whoami
# chown -h ceph:ceph block.db
# chown -h ceph:ceph block.wal
# ll
total 48
-rw-r--r-- 1 ceph ceph 402 Dec 31 12:05 activate.monmap
lrwxrwxrwx 1 ceph ceph 93 Dec 31 12:05 block -> /dev/ceph-a4ace47b-42f3-42f5-8e26-27fde217c66a/osd-block-dd275e43-6f57-4a44-9364-a7ad9cf829c6
lrwxrwxrwx 1 ceph ceph 9 Dec 31 15:04 block.db -> /dev/sde2
lrwxrwxrwx 1 ceph ceph 9 Dec 31 15:04 block.wal -> /dev/sde1
-rw-r--r-- 1 ceph ceph 2 Dec 31 12:05 bluefs
-rw-r--r-- 1 ceph ceph 37 Dec 31 12:05 ceph_fsid
-rw-r--r-- 1 ceph ceph 37 Dec 31 12:05 fsid
-rw------- 1 ceph ceph 55 Dec 31 12:05 keyring
-rw-r--r-- 1 ceph ceph 8 Dec 31 12:05 kv_backend
-rw-r--r-- 1 ceph ceph 21 Dec 31 12:05 magic
-rw-r--r-- 1 ceph ceph 4 Dec 31 12:05 mkfs_done
-rw-r--r-- 1 ceph ceph 41 Dec 31 12:05 osd_key
-rw-r--r-- 1 ceph ceph 6 Dec 31 12:05 ready
-rw-r--r-- 1 ceph ceph 10 Dec 31 12:05 type
-rw-r--r-- 1 ceph ceph 2 Dec 31 12:05 whoami
# systemctl reset-failed ceph-osd@0.service
# systemctl restart ceph-osd@0
# systemctl status ceph-osd@0
● ceph-osd@0.service - Ceph object storage daemon osd.0
Loaded: loaded (/usr/lib/systemd/system/ceph-osd@.service; enabled-runtime; vendor preset: disabled)
Active: active (running) since Fri 2021-12-31 15:30:40 CST; 6s ago
Process: 590005 ExecStartPre=/usr/lib/ceph/ceph-osd-prestart.sh --cluster ${CLUSTER} --id %i (code=exited, status=0/SUCCESS)
Main PID: 590014 (ceph-osd)
CGroup: /system.slice/system-ceph\x2dosd.slice/ceph-osd@0.service
└─590014 /usr/bin/ceph-osd -f --cluster ceph --id 0 --setuser ceph --setgroup ceph
... ...
# ceph osd tree
ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF
-1 39.67140 root default
-7 14.42596 host node13
2 hdd 3.60649 osd.2 up 1.00000 1.00000
5 hdd 3.60649 osd.5 up 1.00000 1.00000
8 hdd 3.60649 osd.8 up 1.00000 1.00000
10 hdd 3.60649 osd.10 up 1.00000 1.00000
-5 14.42596 host node17
1 hdd 3.60649 osd.1 up 1.00000 1.00000
4 hdd 3.60649 osd.4 up 1.00000 1.00000
7 hdd 3.60649 osd.7 up 1.00000 1.00000
9 hdd 3.60649 osd.9 up 1.00000 1.00000
-3 10.81947 host node19
0 hdd 3.60649 osd.0 up 1.00000 1.00000
3 hdd 3.60649 osd.3 up 1.00000 1.00000
6 hdd 3.60649 osd.6 up 1.00000 1.00000
# ceph -s
cluster:
id: c22f2485-2763-46c5-8353-3f3ee39df6d8
health: HEALTH_OK
services:
mon: 3 daemons, quorum node13,node17,node19
mgr: node17(active), standbys: node19, node13
osd: 11 osds: 11 up, 11 in
data:
pools: 0 pools, 0 pgs
objects: 0 objects, 0B
usage: 11.0GiB used, 39.7TiB / 39.7TiB avail
pgs:
#此时就可以看到OSD恢复正常,集群也恢复正常。
--End--