[root@localhost caq]# xfs_db -c frag -r /dev/sdaw actual 20316, ideal 3544, fragmentation factor 82.56% Note, this number is largely meaningless. Files on this filesystem average 5.73 extents per file [root@localhost caq]# xfs_fsr /dev/sdaw /mnt/K4HUWARB start inode=0 [root@localhost caq]# [root@localhost caq]# [root@localhost caq]# xfs_db -c frag -r /dev/sdaw actual 4159, ideal 3544, fragmentation factor 14.79% Note, this number is largely meaningless. Files on this filesystem average 1.17 extents per file
在碎片化整理之前,碎片化率为82.56%,一般来说,碎片化率高于25%,则建议整理,一来连续的块可以提高读的性能,而来可以省出很多block。
除了xfs_fsr,还有很多有用的工具。
xfs_admin xfs_db xfs_freeze xfs_info xfs_logprint xfs_mkfile xfs_repair xfs_bmap xfsdump xfs_fsr xfsinvutil xfs_mdrestore xfs_ncheck xfsrestore xfs_copy xfs_estimate xfs_growfs xfs_io xfs_metadump xfs_quota xfs_rtcp
用的最多的是xfs_repair,xfs_check,xfs_db.
xfs_info的使用:
[root@localhost caq]# xfs_info /dev/sdaw meta-data=/dev/sdaw isize=256 agcount=4, agsize=122094720 blks = sectsz=512 attr=2 data = bsize=4096 blocks=488378646, imaxpct=5 = sunit=128 swidth=512 blks naming =version 2 bsize=4096 ascii-ci=0 log =internal bsize=4096 blocks=238472, version=2 = sectsz=512 sunit=8 blks, lazy-count=1 realtime =none extsz=4096 blocks=0, rtextents=0
可以看出,sdaw有4个AG,block个数为 488378646,每个AG管理的block数量为总数量/4, 扇区大小为512,
swidth=0 条带参数,raid使用。
该挂载点的xfs的日志记录在内部,有些设备上面既有慢速的机械盘,又有快速的ssd甚至nvme盘,则可以把慢速设备的xfs日志放在nvme上,可以提高性能。
[root@localhost caq]# xfs_info -V xfs_info version 3.1.1
xfs_admin 的使用:
该工具能修改xfs文件系统参数
设置uuid xfs_admin –U uuid /dev/sdc
设置label xfsadmin –L label /dev/sdc
xfs_db工具能打印和修改未mount的,xfs元数据结构体:
xfs_db /dev/sdc
xfs_db> sb
xfs_db> p
magicnum = 0x58465342
blocksize = 4096
dblocks = 524288
修改:xfs_db -x -c 'sb 0' -c 'write dblocks 0' /dev/sdc
查看文件inode xfs_db> inode 131
xfs_db> p
core.magic = 0x494e
core.mode = 0100644
core.version = 2
打印文件系统剩余空间:
xfs_db> freesp from to extents blocks pct 1 1 129 129 0.00 8 15 3 24 0.00 64 127 131 14682 0.00 128 255 35 5613 0.00 256 511 3 892 0.00 512 1023 1 981 0.00 1024 2047 7 10178 0.00 2048 4095 3 8192 0.00 4096 8191 4 24022 0.00 8192 16383 14 176636 0.02 16384 32767 62 1223755 0.13 32768 65535 63 2769390 0.29 65536 131071 67 6069834 0.63 131072 262143 41 7339741 0.76 262144 524287 25 9600697 0.99 524288 1048575 21 14054653 1.46 1048576 2097151 2 2121340 0.22 16777216 30523648 32 922117670 95.50
打印原始16进制数据:
xfs_db> type text
xfs_db> p
00: 49 4e 81 ed 02 02 00 00 00 00 00 00 00 00 00 00 IN..............
10: 00 00 00 01 00 00 00 00 00 00 00 00 00 00 00 01 ................
20: 54 a2 14 e5 01 bb 2a 29 54 a2 14 e5 02 72 45 2a T.......T....rE.
30: 54 a2 14 e5 02 72 45 2a 00 00 00 00 00 00 22 4c T....rE........L
40: 00 00 00 00 00 00 00 03 00 00 00 00 00 00 00 01 ................
…..
xfs对block的管理是通过分级来管理数据的,扇区,块,et,ag。
其中:sector<block<extent<ag
typedef struct xfs_agf { /* * Common allocation group header information */ __be32 agf_magicnum; /* magic number == XFS_AGF_MAGIC */ __be32 agf_versionnum; /* header version == XFS_AGF_VERSION */ __be32 agf_seqno; /* sequence # starting from 0 */ __be32 agf_length; /* size in blocks of a.g. */ /* * Freespace information */ __be32 agf_roots[XFS_BTNUM_AGF]; /* root blocks */ __be32 agf_spare0; /* spare field */ __be32 agf_levels[XFS_BTNUM_AGF]; /* btree levels */ __be32 agf_spare1; /* spare field */ __be32 agf_flfirst; /* first freelist block's index */ __be32 agf_fllast; /* last freelist block's index */ __be32 agf_flcount; /* count of blocks in freelist */ __be32 agf_freeblks; /* total free blocks */ __be32 agf_longest; /* longest free space */ __be32 agf_btreeblks; /* # of blocks held in AGF btrees */ uuid_t agf_uuid; /* uuid of filesystem */------------------------------以下字段在2.6.32版本中没看到,3.10对应的xfs模块有,中间版本没对比过 /* * reserve some contiguous space for future logged fields before we add * the unlogged fields. This makes the range logging via flags and * structure offsets much simpler. */ __be64 agf_spare64[16]; /* unlogged fields, written during buffer writeback. */ __be64 agf_lsn; /* last write sequence */ __be32 agf_crc; /* crc of agf sector */ __be32 agf_spare2; /* structure must be padded to 64 bit alignment */ } xfs_agf_t;
可以查看一个agf对比下:
[root@localhost caq]# xfs_db /dev/sdaw xfs_db> agf 0 xfs_db> p magicnum = 0x58414746--------------对比agf_magicnum字段 versionnum = 1---------------------对比agf_versionnum字段 seqno = 0--------------------------对比agf_seqno length = 30523648------------------对比agf_length bnoroot = 144----------------------agf_roots cntroot = 145 bnolevel = 1 cntlevel = 1 flfirst = 0 fllast = 3 flcount = 4 freeblks = 29067437----------------空闲的block数 longest = 28900480-----------------最大的空闲段长度 btreeblks = 0 uuid = 6c46ba7a-03b6-46bc-912c-7bac04aafe92 lsn = 0xd000484b0 crc = 0x37a41d5b (correct)----------crc字段
查看144这个block的信息:
xfs_db> fsblock 144 xfs_db> p 000: 41423342 00000028 ffffffff ffffffff 00000000 00000480 0000000d 000484b0 020: 6c46ba7a 03b646bc 912c7bac 04aafe92 00000000 8e0f24fb 00000004 00000077 040: 00000096 0000006a 000006a4 0000005c 00004107 000002f9 000049a4 0000005c 060: 00006a24 0000005c 00006f24 00001cdc 00008d00 00000100 00015400 00000500 080: 00015b00 00000f00 00016a01 0000007f 0001aa80 00002d00 00021880 00000100 0a0: 00031f80 00003380 00039300 00000500 00041800 00002380 00043b88 00000078 0c0: 0004b704 0000007c 0004f780 00002080 00059800 00000200 00079504 0000037c 0e0: 00081880 00000400 00096180 00000500 0009a680 00000400 000a6a80 00000100 100: 000aab80 00000c00 000ab880 00003400 000b6c80 00000200 000c6e80 00000300 120: 000d7480 00000200 000df088 000000f8 000e3184 0000007c 000f7200 00000f80 140: 00100180 00000200 00118580 00003f80 00164700 00000080 00164880 00003c00 160: 0016c480 00008000 00184480 00004000 0018c480 01b8fc80 0018c480 01b8fc80 180: 0018c480 01b8fc80 0018c480 01b8fc80 0018c480 01b8fc80 001a1e00 01b7a300 1a0: 001a2500 01b79c00 001a2b00 01b79600 001a3100 01b79000 001a3100 01b79000 1c0: 001a3900 01b78800 001a3900 01b78800 001a3b00 01b78600 001a4100 01b78000
这样打印不明显,可以跟printf一样,格式化,
xfs_db> fsblock 144 xfs_db> type bnobt-----------------指定打印的type类型 xfs_db> p magic = 0x41423342-----------------这个可以看到,和上图直接打印的magic是一样的,就是‘ABTB’ level = 0--------------------------表示叶子节点,1表示中间节点 numrecs = 40-----------------------本叶子节点管理的空闲block块,就是recs的数组中有效的元素个数 leftsib = null rightsib = null bno = 1152 lsn = 0xd000484b0 uuid = 6c46ba7a-03b6-46bc-912c-7bac04aafe92 owner = 0 crc = 0x8e0f24fb (correct) recs[1-40] = [startblock,blockcount] 1:[4,119] 2:[150,106] 3:[1700,92] 4:[16647,761] 5:[18852,92] 6:[27172,92] 7:[28452,7388] 8:[36096,256] 9:[87040,1280] 10:[88832,3840] 11:[92673,127] 12:[109184,11520] 13:[137344,256] 14:[204672,13184] 15:[234240,1280] 16:[268288,9088] 17:[277384,120] 18:[308996,124] 19:[325504,8320] 20:[366592,512] 21:[496900,892] 22:[530560,1024] 23:[614784,1280] 24:[632448,1024] 25:[682624,256] 26:[699264,3072] 27:[702592,13312] 28:[748672,512] 29:[814720,768] 30:[881792,512] 31:[913544,248] 32:[930180,124] 33:[1012224,3968] 34:[1048960,512] 35:[1148288,16256] 36:[1459968,128] 37:[1460352,15360] 38:[1492096,32768] 39:[1590400,16384] 40:[1623168,28900480]
查看xfs超级块的一些信息:
[root@localhost /]# xfs_db /dev/sdaw xfs_db> sb 0-----------------超级块 xfs_db> p magicnum = 0x58465342--------这个就是XFSB的acsii码 blocksize = 4096-------------逻辑块大小 dblocks = 976754646----------磁盘总块数,块数*块大小就是磁盘的空间了 rblocks = 0 rextents = 0 uuid = 6c46ba7a-03b6-46bc-912c-7bac04aafe92 logstart = 536870919 rootino = 1024---------------根节点inode号 rbmino = 1025 rsumino = 1026 rextsize = 1 agblocks = 30523648----------每个AG的块数量, agcount = 32-----------------32个AG,和AG块数乘起来就是块总数 rbmblocks = 0 logblocks = 476930-----------日志块数 versionnum = 0xbda5 sectsize = 4096 inodesize = 512 inopblock = 8----------------每个block可以存储的inode的个数,4096/512=8 fname = "\000\000\000\000\000\000\000\000\000\000\000\000"--------------文件系统的名称 blocklog = 12----------------2的12次方,这个可以和page的shift类比, sectlog = 12-----------------sector大小的log表示, inodelog = 9-----------------inode大小的log表示 inopblog = 3-----------------每个block可以存储的inode的log表示, agblklog = 25----------------每个ag可以管理的blcok个数的log表示,这个存在向上取整 rextslog = 0 inprogress = 0 imax_pct = 5 icount = 4480 ifree = 929 fdblocks = 931125411 frextents = 0 uquotino = null gquotino = null qflags = 0 flags = 0 shared_vn = 0 inoalignmt = 4 unit = 128 width = 512 dirblklog = 0 logsectlog = 12 logsectsize = 4096 logsunit = 4096 features2 = 0x18a bad_features2 = 0x18a features_compat = 0 features_ro_compat = 0 features_incompat = 0x1 features_log_incompat = 0 crc = 0xd0deb599 (correct) spino_align = 0 pquotino = null lsn = 0xd00048528 meta_uuid = 00000000-0000-0000-0000-000000000000
xfs的加载过程及常见维护的队列:
Xfs加载ko入口函数在init_xfs_fs,主要申请xfs内核模块使用的内存资源,注册xfs文件系统。
常驻内存工作队列说明:
[xfsalloc] 模块加载产生,用于文件申请extent的时候,调用__xfs_bmapi_allocate,分配extent。
[xfs_mru_cache] 模块加载产生,用于访问MRU (Most Recently Used) Cache的时候使用。
[xfslogd]模块加载产生,xfs_buf的IO结束调用 b_iodone_work的队列。
产生阶段为mount阶段,此时会有指定的实例,挂载点,设备号等。
[xfs-data/sdc]mount产生,数据IO的异步操作队列,directIO结束的时候调用io_work。
[xfs-conv/sdc] mount产生,数据IO的异步操作队列,bufIO结束时候调用io_work。
[xfs-cil/sdc] mount产生,push日志的工作队列,用于xfs_trans_commit将事务提交到日志里。
[xfsaild/sdc] mount产生, xfs_log_worker触发,将脏节点push到buf,再将buf下盘
比如检查fsblock:
参考资料:
http://xfs.org/
https://access.redhat.com/
中兴OS团队的相关GPL文档。