在1台CentOS机器执行“ls”、“df”命令,都会卡死。时间久一点,“ctrl”+“c”都不一定能退出,只能关掉远程连接,重新连接登录。
执行命令:
[root@iZ23l20815kZ ~]# strace df -h execve("/bin/df", ["df", "-h"], [/* 24 vars */]) = 0 brk(0) = 0x1720000 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fc7adcb4000 access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory) open("/etc/ld.so.cache", O_RDONLY) = 3 fstat(3, {st_mode=S_IFREG|0644, st_size=31869, ...}) = 0 mmap(NULL, 31869, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7fc7adcac000 close(3) = 0 open("/lib64/libc.so.6", O_RDONLY) = 3 read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0p\356\201\2535\0\0\0"..., 832) = 832 fstat(3, {st_mode=S_IFREG|0755, st_size=1926520, ...}) = 0 mmap(0x35ab800000, 3750152, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x35ab800000 mprotect(0x35ab98a000, 2097152, PROT_NONE) = 0 mmap(0x35abb8a000, 20480, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x18a000) = 0x35abb8a000 mmap(0x35abb8f000, 18696, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x35abb8f000 close(3) = 0 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fc7adcab000 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fc7adcaa000 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fc7adca9000 arch_prctl(ARCH_SET_FS, 0x7fc7adcaa700) = 0 mprotect(0x35abb8a000, 16384, PROT_READ) = 0 mprotect(0x35ab21f000, 4096, PROT_READ) = 0 munmap(0x7fc7adcac000, 31869) = 0 brk(0) = 0x1720000 brk(0x1741000) = 0x1741000 open("/usr/lib/locale/locale-archive", O_RDONLY) = 3 fstat(3, {st_mode=S_IFREG|0644, st_size=99158576, ...}) = 0 mmap(NULL, 99158576, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7fc7a7e18000 close(3) = 0 open("/etc/mtab", O_RDONLY) = 3 fstat(3, {st_mode=S_IFREG|0644, st_size=1065, ...}) = 0 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fc7adcb3000 read(3, "/dev/xvda1 / ext4 rw,barrier=0 0"..., 4096) = 1065 read(3, "", 4096) = 0 close(3) = 0 munmap(0x7fc7adcb3000, 4096) = 0 statfs("/", {f_type="EXT2_SUPER_MAGIC", f_bsize=4096, f_blocks=10287952, f_bfree=8744666, f_bavail=8220404, f_files=2621440, f_ffree=2520273, f_fsid={-102338008, -1862172196}, f_namelen=255, f_frsize=4096}) = 0 open("/usr/share/locale/locale.alias", O_RDONLY) = 3 fstat(3, {st_mode=S_IFREG|0644, st_size=2512, ...}) = 0 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fc7adcb3000 read(3, "# Locale name alias data base.\n#"..., 4096) = 2512 read(3, "", 4096) = 0 close(3) = 0 munmap(0x7fc7adcb3000, 4096) = 0 open("/usr/share/locale/en_US.UTF-8/LC_MESSAGES/coreutils.mo", O_RDONLY) = -1 ENOENT (No such file or directory) open("/usr/share/locale/en_US.utf8/LC_MESSAGES/coreutils.mo", O_RDONLY) = -1 ENOENT (No such file or directory) open("/usr/share/locale/en_US/LC_MESSAGES/coreutils.mo", O_RDONLY) = -1 ENOENT (No such file or directory) open("/usr/share/locale/en.UTF-8/LC_MESSAGES/coreutils.mo", O_RDONLY) = -1 ENOENT (No such file or directory) open("/usr/share/locale/en.utf8/LC_MESSAGES/coreutils.mo", O_RDONLY) = -1 ENOENT (No such file or directory) open("/usr/share/locale/en/LC_MESSAGES/coreutils.mo", O_RDONLY) = 3 fstat(3, {st_mode=S_IFREG|0644, st_size=435, ...}) = 0 mmap(NULL, 435, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7fc7adcb3000 close(3) = 0 statfs("/sys", {f_type="SYSFS_MAGIC", f_bsize=4096, f_blocks=0, f_bfree=0, f_bavail=0, f_files=0, f_ffree=0, f_fsid={0, 0}, f_namelen=255, f_frsize=4096}) = 0 statfs("/dev/pts", {f_type="DEVPTS_SUPER_MAGIC", f_bsize=4096, f_blocks=0, f_bfree=0, f_bavail=0, f_files=0, f_ffree=0, f_fsid={0, 0}, f_namelen=255, f_frsize=4096}) = 0 statfs("/dev/shm", {f_type=0x1021994, f_bsize=4096, f_blocks=1007245, f_bfree=1007245, f_bavail=1007245, f_files=1007245, f_ffree=1007244, f_fsid={0, 0}, f_namelen=255, f_frsize=4096}) = 0 statfs("/proc/sys/fs/binfmt_misc", {f_type=0x42494e4d, f_bsize=4096, f_blocks=0, f_bfree=0, f_bavail=0, f_files=0, f_ffree=0, f_fsid={0, 0}, f_namelen=255, f_frsize=4096}) = 0 statfs("/proc/xen", {f_type=0xabba1974, f_bsize=4096, f_blocks=0, f_bfree=0, f_bavail=0, f_files=0, f_ffree=0, f_fsid={0, 0}, f_namelen=255, f_frsize=4096}) = 0 statfs("/yqd", {f_type="EXT2_SUPER_MAGIC", f_bsize=4096, f_blocks=128980948, f_bfree=86073665, f_bavail=79520150, f_files=32768000, f_ffree=32409342, f_fsid={-316429259, 1252474724}, f_namelen=255, f_frsize=4096}) = 0 statfs("/var/lib/nfs/rpc_pipefs", {f_type=0x67596969, f_bsize=4096, f_blocks=0, f_bfree=0, f_bavail=0, f_files=0, f_ffree=0, f_fsid={0, 0}, f_namelen=255, f_frsize=4096}) = 0 statfs("/proc/fs/nfsd", {f_type=0x6e667364, f_bsize=4096, f_blocks=0, f_bfree=0, f_bavail=0, f_files=0, f_ffree=0, f_fsid={0, 0}, f_namelen=255, f_frsize=4096}) = 0 statfs("/yqd/war", {f_type="NFS_SUPER_MAGIC", f_bsize=1048576, f_blocks=503838, f_bfree=228849, f_bavail=203233, f_files=32768000, f_ffree=32667496, f_fsid={0, 0}, f_namelen=255, f_frsize=1048576}) = 0 statfs("/yqd/logs/share/web3_43.110", ^C <unfinished ...>
发现卡死在挂载到10.253.43.110服务器。
检查挂载情况:
[root@iZ23l20815kZ ~]# mount /dev/xvda1 on / type ext4 (rw,barrier=0) proc on /proc type proc (rw) sysfs on /sys type sysfs (rw) devpts on /dev/pts type devpts (rw,gid=5,mode=620) tmpfs on /dev/shm type tmpfs (rw) none on /proc/sys/fs/binfmt_misc type binfmt_misc (rw) none on /proc/xen type xenfs (rw) /dev/xvdb1 on /yqd type ext4 (rw) sunrpc on /var/lib/nfs/rpc_pipefs type rpc_pipefs (rw) nfsd on /proc/fs/nfsd type nfsd (rw) 10.253.40.87:/yqd/war on /yqd/war type nfs (rw,nolock,nfsvers=3,vers=3,addr=10.253.40.87) 10.253.43.110:/yqd/logs/tomcat on /yqd/logs/share/web3_43.110 type nfs (rw,vers=4,addr=10.253.43.110,clientaddr=10.253.43.120) 10.253.40.103:/yqd/logs/tomcat on /yqd/logs/share/web2_40.103 type nfs (rw,vers=4,addr=10.253.40.103,clientaddr=10.253.43.120) 10.253.40.14:/yqd/logs/tomcat on /yqd/logs/share/web1_40.14 type nfs (rw,vers=4,addr=10.253.40.14,clientaddr=10.253.43.120) 10.253.43.150:/yqd/logs/tomcat on /yqd/logs/share/api3_43.150 type nfs (rw,vers=4,addr=10.253.43.150,clientaddr=10.253.43.120) 10.253.40.94:/yqd/logs/tomcat on /yqd/logs/share/api1_40.94 type nfs (rw,vers=4,addr=10.253.40.94,clientaddr=10.253.43.120) //10.253.40.38/IMG1 on /mnt/data type cifs (rw)
强制卸载10.253.43.150:这台机器在几天前已经下线关机,肯定挂载失败
[root@iZ23l20815kZ ~]# umount -lf /yqd/logs/share/api3_43.150
果然,还是会卡死:
[root@iZ23l20815kZ ~]# df -h ^C
强制卸载10.253.43.110:
[root@iZ23l20815kZ ~]# umount -lf /yqd/logs/share/web3_43.110
恢复正常:
[root@iZ23l20815kZ ~]# df -h Filesystem Size Used Avail Use% Mounted on /dev/xvda1 40G 5.9G 32G 16% / tmpfs 3.9G 0 3.9G 0% /dev/shm /dev/xvdb1 493G 164G 304G 36% /yqd 10.253.40.87:/yqd/war 493G 269G 199G 58% /yqd/war 10.253.40.103:/yqd/logs/tomcat 493G 280G 188G 60% /yqd/logs/share/web2_40.103 10.253.40.14:/yqd/logs/tomcat 493G 295G 173G 64% /yqd/logs/share/web1_40.14 10.253.40.94:/yqd/logs/tomcat 493G 204G 264G 44% /yqd/logs/share/api1_40.94 //10.253.40.38/IMG1 1000G 502G 499G 51% /mnt/data
检查挂载情况:
[root@iZ23l20815kZ ~]# mount /dev/xvda1 on / type ext4 (rw,barrier=0) proc on /proc type proc (rw) sysfs on /sys type sysfs (rw) devpts on /dev/pts type devpts (rw,gid=5,mode=620) tmpfs on /dev/shm type tmpfs (rw) none on /proc/sys/fs/binfmt_misc type binfmt_misc (rw) none on /proc/xen type xenfs (rw) /dev/xvdb1 on /yqd type ext4 (rw) sunrpc on /var/lib/nfs/rpc_pipefs type rpc_pipefs (rw) nfsd on /proc/fs/nfsd type nfsd (rw) 10.253.40.87:/yqd/war on /yqd/war type nfs (rw,nolock,nfsvers=3,vers=3,addr=10.253.40.87) 10.253.40.103:/yqd/logs/tomcat on /yqd/logs/share/web2_40.103 type nfs (rw,vers=4,addr=10.253.40.103,clientaddr=10.253.43.120) 10.253.40.14:/yqd/logs/tomcat on /yqd/logs/share/web1_40.14 type nfs (rw,vers=4,addr=10.253.40.14,clientaddr=10.253.43.120) 10.253.40.94:/yqd/logs/tomcat on /yqd/logs/share/api1_40.94 type nfs (rw,vers=4,addr=10.253.40.94,clientaddr=10.253.43.120) //10.253.40.38/IMG1 on /mnt/data type cifs (rw)
问题原因:
该问题通常是由于系统内挂载 NFS共享分区,而NFS响应缓慢或文件较多,导致统计NFS分区相关信息耗时很久。
相应的解决办法:
耐心等待统计完成后返回
卸载NFS分区的挂载后重新统计
检查NFS分区访问情况
检查服务器端NFS服务是否开启
针对当前案例,是因为一台服务器的tomcat服务去年就已经去掉,挂载目录已经没有日志和目录,造成切换到挂载目录无法响应,执行df命令也没有响应。至于卸载另一台服务器的挂载,是因为另外服务器前几天已下线关机,导致挂载状态发生变化。执行umount命令卸载,问题解决。
参考文档:
https://blog.csdn.net/lufeisan/article/details/53150319——Linux df 命令长时间没有返回