FIB统计信息文件fib_triestat

2023-12-09 16:18:57

如下fib_triestat文件的内容。

# cat /proc/net/fib_triestat 
Basic info: size of leaf: 48 bytes, size of tnode: 40 bytes.
Id 10:
        Aver depth:     1.66
        Max depth:      2
        Leaves:         3
        Prefixes:       3
        Internal nodes: 2
          2: 2
        Pointers: 8
Null ptrs: 4
Total size: 1  kB

Counters:
---------
gets = 2
backtracks = 0
semantic match passed = 0
semantic match miss = 0
null node hit= 0
skipped node resize = 0

fib_triestat处理

如下命名空间初始化函数fib_proc_init中注册了处理函数fib_triestat_seq_show。

int __net_init fib_proc_init(struct net *net)
{

    if (!proc_create_net_single("fib_triestat", 0444, net->proc_net,
            fib_triestat_seq_show, NULL))
        goto out2;

基本信息的输出，叶子节点的大小由宏LEAF_SIZE得到；中间节点的大小由宏TNODE_SIZE(0)得到，稍后介绍。

static int fib_triestat_seq_show(struct seq_file *seq, void *v)
{
    struct net *net = (struct net *)seq->private;
    unsigned int h;

    seq_printf(seq,
           "Basic info: size of leaf:"
           " %zd bytes, size of tnode: %zd bytes.\n",
           LEAF_SIZE, TNODE_SIZE(0));

之后，遍历哈希桶，以及其中的每个哈希链表，对于每个路由表显示相应的table、stats、usage等信息。

    rcu_read_lock();
    for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
        struct hlist_head *head = &net->ipv4.fib_table_hash[h];
        struct fib_table *tb;

        hlist_for_each_entry_rcu(tb, head, tb_hlist) {
            struct trie *t = (struct trie *) tb->tb_data;
            struct trie_stat stat;

            if (!t)
                continue;

            fib_table_print(seq, tb);

            trie_collect_stats(t, &stat);
            trie_show_stats(seq, &stat);
#ifdef CONFIG_IP_FIB_TRIE_STATS
            trie_show_usage(seq, t->stats);
#endif
        }

fib_triestat基本信息

叶子节点的大小为LEAF_SIZE，即宏TNODE_SIZE(1)；中间节点的大小为宏TNODE_SIZE(0)，两者相差了一个key_vector结构的大小。这里LEAF_SIZE为48，中间节点大小TNODE_SIZE(0)为40，结构key_vector的大小为8字节。

struct key_vector {
    t_key key;
    unsigned char pos;      /* 2log(KEYLENGTH) bits needed */
    unsigned char bits;     /* 2log(KEYLENGTH) bits needed */
    unsigned char slen;
    union {
        /* This list pointer if valid if (pos | bits) == 0 (LEAF) */
        struct hlist_head leaf;
        /* This array is valid if (pos | bits) > 0 (TNODE) */
        struct key_vector __rcu *tnode[0];
    };
};
struct tnode {
    struct rcu_head rcu;
    t_key empty_children;       /* KEYLENGTH bits needed */
    t_key full_children;        /* KEYLENGTH bits needed */
    struct key_vector __rcu *parent;
    struct key_vector kv[1];
#define tn_bits kv[0].bits
};

#define TNODE_SIZE(n)   offsetof(struct tnode, kv[0].tnode[n])
#define LEAF_SIZE   TNODE_SIZE(1)

表信息

对于本地和主路由表，打印文字Local：或者Main：字符串，而对于其它路由表，打印其ID号。

static void fib_table_print(struct seq_file *seq, struct fib_table *tb)
{
    if (tb->tb_id == RT_TABLE_LOCAL)
        seq_puts(seq, "Local:\n");
    else if (tb->tb_id == RT_TABLE_MAIN)
        seq_puts(seq, "Main:\n");
    else
        seq_printf(seq, "Id %d:\n", tb->tb_id);
}

收集表信息

遍历路由表的trie树，统计叶子节点的数量，并计算总的深度totdepth，即所有叶子结点的深度之和。并且，计算叶子节点的最大深度maxdepth。最后，通过叶子结点的fa_alias链表，计算前缀的数量。

另外统计所有中间节点的数量，以及分别统计处理不同比特位宽的节点的数量，并且，统计所有中间节点的empty_children节点的数量之和。

static void trie_collect_stats(struct trie *t, struct trie_stat *s)
{
    struct key_vector *n;
    struct fib_trie_iter iter;

    memset(s, 0, sizeof(*s));

    rcu_read_lock();
    for (n = fib_trie_get_first(&iter, t); n; n = fib_trie_get_next(&iter)) {
        if (IS_LEAF(n)) {
            struct fib_alias *fa;

            s->leaves++;
            s->totdepth += iter.depth;
            if (iter.depth > s->maxdepth)
                s->maxdepth = iter.depth;

            hlist_for_each_entry_rcu(fa, &n->leaf, fa_list)
                ++s->prefixes;
        } else {
            s->tnodes++;
            if (n->bits < MAX_STAT_DEPTH)
                s->nodesizes[n->bits]++;
            s->nullpointers += tn_info(n)->empty_children;
        }

对于路由表trie的第一个节点，由函数fib_trie_get_first获取，参数t取值为路由表结构fib_table的子成员tb_data，其kv结构的成员tnode[0]即为第一个节点。此节点如果为叶子节点，将iter的深度depth设置为0，否则，对于中间节点将深度设置为1。

对于叶子，iter的tnode为其父节点的值；而对于中间节点，iter的tnode即为节点自身的值。

static struct key_vector *fib_trie_get_first(struct fib_trie_iter *iter, struct trie *t)
{
    struct key_vector *n, *pn;

    if (!t) return NULL;

    pn = t->kv;
    n = rcu_dereference(pn->tnode[0]);
    if (!n)
        return NULL;

    if (IS_TNODE(n)) {
        iter->tnode = n;
        iter->index = 0;
        iter->depth = 1;
    } else {
        iter->tnode = pn;
        iter->index = 0;
        iter->depth = 0;
    }

    return n;

获取下一个节点由函数fib_trie_get_next实现。首先，取得当前遍历的节点索引值赋值给cindex，并将当前遍历节点赋值于pn。下一个节点不存在的条件为IS_TRIE为真，即当前节点的前缀长度已经达到32（KEYLENGTH长度）。

#define IS_TRIE(n)  ((n)->pos >= KEYLENGTH)

如果子节点索引值小于pn的子节点数量（child_length），获取其下一个子节点，如果新节点为叶子节点，保存新的索引值，下次遍历将由此继续。反之，对于中间节点，移动到trie的下一深度，下一次将遍历此中间节点的第一个子节点（pn的孙节点）。

如果当前索引值超过或者等于pn节点的子节点数量，表明遍历完成。找到pn节点的父节点，通过父节点找到其下一个子节点（pn节点的兄弟节点）。

static struct key_vector *fib_trie_get_next(struct fib_trie_iter *iter)
{
    unsigned long cindex = iter->index;
    struct key_vector *pn = iter->tnode;
    t_key pkey;

    while (!IS_TRIE(pn)) {
        while (cindex < child_length(pn)) {
            struct key_vector *n = get_child_rcu(pn, cindex++);

            if (!n) continue;

            if (IS_LEAF(n)) {
                iter->tnode = pn;
                iter->index = cindex;
            } else {
                /* push down one level */
                iter->tnode = n;
                iter->index = 0;
                ++iter->depth;
            }
            return n;
        }
        /* Current node exhausted, pop back up */
        pkey = pn->key;
        pn = node_parent_rcu(pn);
        cindex = get_index(pkey, pn) + 1;
        --iter->depth;
    }

    /* record root node so further searches know we are done */
    iter->tnode = pn;
    iter->index = 0;

    return NULL;

显示trie统计

首先，根据总的深度值，和叶子结点数量，计算平均深度值（avdepth）。函数trie_show_stats中，还显示最大深度，叶子节点数量，前缀数量。并且计算叶子结点、前缀、中间节点占用的空间大小等信息（bytes）。

static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
{
    unsigned int i, max, pointers, bytes, avdepth;

    if (stat->leaves)
        avdepth = stat->totdepth*100 / stat->leaves;
    else
        avdepth = 0;

    seq_printf(seq, "\tAver depth:     %u.%02d\n",
           avdepth / 100, avdepth % 100);
    seq_printf(seq, "\tMax depth:      %u\n", stat->maxdepth);

    seq_printf(seq, "\tLeaves:         %u\n", stat->leaves);
    bytes = LEAF_SIZE * stat->leaves;

    seq_printf(seq, "\tPrefixes:       %u\n", stat->prefixes);
    bytes += sizeof(struct fib_alias) * stat->prefixes;

    seq_printf(seq, "\tInternal nodes: %u\n\t", stat->tnodes);
    bytes += TNODE_SIZE(0) * stat->tnodes;

最后，显示每个位宽段对应的节点数量，并计算中间节点相连所使用的指针的数量，以及其占用空间，与以上计算的空间bytes相加，得到路由trie占用的总空间。

    max = MAX_STAT_DEPTH;
    while (max > 0 && stat->nodesizes[max-1] == 0)
        max--;

    pointers = 0;
    for (i = 1; i < max; i++)
        if (stat->nodesizes[i] != 0) {
            seq_printf(seq, "  %u: %u",  i, stat->nodesizes[i]);
            pointers += (1<<i) * stat->nodesizes[i];
        }
    seq_putc(seq, '\n');
    seq_printf(seq, "\tPointers: %u\n", pointers);

    bytes += sizeof(struct key_vector *) * pointers;
    seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers);
    seq_printf(seq, "Total size: %u  kB\n", (bytes + 1023) / 1024);

trie利用率显示

遍历每处理器统计变量结构trie_use_stats，求总和，并进行显示。gets对应于路由查询操作的次数（fib_table_lookup）；backtrack对应于路由trie向下查找过程中，没有名字从而向树根回退的次数。semantic_match_passed对应于查询成功的计数。

semantic_match_miss对应于路由未找到的计数；null_node_hit对应路由trie树查找过程中遇到的空节点；resize_node_skipped对应于resize函数执行过程中空间未改变的节点的计数（inflate或halve）。

static void trie_show_usage(struct seq_file *seq, const struct trie_use_stats __percpu *stats)
{
    struct trie_use_stats s = { 0 };

    /* loop through all of the CPUs and gather up the stats */
    for_each_possible_cpu(cpu) {
        const struct trie_use_stats *pcpu = per_cpu_ptr(stats, cpu);

        s.gets += pcpu->gets;
        s.backtrack += pcpu->backtrack;
        s.semantic_match_passed += pcpu->semantic_match_passed;
        s.semantic_match_miss += pcpu->semantic_match_miss;
        s.null_node_hit += pcpu->null_node_hit;
        s.resize_node_skipped += pcpu->resize_node_skipped;
    }
    seq_printf(seq, "\nCounters:\n---------\n");
    seq_printf(seq, "gets = %u\n", s.gets);
    seq_printf(seq, "backtracks = %u\n", s.backtrack);
    seq_printf(seq, "semantic match passed = %u\n", s.semantic_match_passed);
    seq_printf(seq, "semantic match miss = %u\n", s.semantic_match_miss);
    seq_printf(seq, "null node hit= %u\n", s.null_node_hit);
    seq_printf(seq, "skipped node resize = %u\n\n", s.resize_node_skipped);

内核版本 5.10

码农公寓

fib_triestat处理

fib_triestat基本信息

表信息

收集表信息

显示trie统计

trie利用率显示

相关文章