如下fib_triestat文件的内容。
# cat /proc/net/fib_triestat
Basic info: size of leaf: 48 bytes, size of tnode: 40 bytes.
Id 10:
Aver depth: 1.66
Max depth: 2
Leaves: 3
Prefixes: 3
Internal nodes: 2
2: 2
Pointers: 8
Null ptrs: 4
Total size: 1 kB
Counters:
---------
gets = 2
backtracks = 0
semantic match passed = 0
semantic match miss = 0
null node hit= 0
skipped node resize = 0
fib_triestat处理
如下命名空间初始化函数fib_proc_init中注册了处理函数fib_triestat_seq_show。
int __net_init fib_proc_init(struct net *net)
{
if (!proc_create_net_single("fib_triestat", 0444, net->proc_net,
fib_triestat_seq_show, NULL))
goto out2;
基本信息的输出,叶子节点的大小由宏LEAF_SIZE得到;中间节点的大小由宏TNODE_SIZE(0)得到,稍后介绍。
static int fib_triestat_seq_show(struct seq_file *seq, void *v)
{
struct net *net = (struct net *)seq->private;
unsigned int h;
seq_printf(seq,
"Basic info: size of leaf:"
" %zd bytes, size of tnode: %zd bytes.\n",
LEAF_SIZE, TNODE_SIZE(0));
之后,遍历哈希桶,以及其中的每个哈希链表,对于每个路由表显示相应的table、stats、usage等信息。
rcu_read_lock();
for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
struct hlist_head *head = &net->ipv4.fib_table_hash[h];
struct fib_table *tb;
hlist_for_each_entry_rcu(tb, head, tb_hlist) {
struct trie *t = (struct trie *) tb->tb_data;
struct trie_stat stat;
if (!t)
continue;
fib_table_print(seq, tb);
trie_collect_stats(t, &stat);
trie_show_stats(seq, &stat);
#ifdef CONFIG_IP_FIB_TRIE_STATS
trie_show_usage(seq, t->stats);
#endif
}
fib_triestat基本信息
叶子节点的大小为LEAF_SIZE,即宏TNODE_SIZE(1);中间节点的大小为宏TNODE_SIZE(0),两者相差了一个key_vector结构的大小。这里LEAF_SIZE为48,中间节点大小TNODE_SIZE(0)为40,结构key_vector的大小为8字节。
struct key_vector {
t_key key;
unsigned char pos; /* 2log(KEYLENGTH) bits needed */
unsigned char bits; /* 2log(KEYLENGTH) bits needed */
unsigned char slen;
union {
/* This list pointer if valid if (pos | bits) == 0 (LEAF) */
struct hlist_head leaf;
/* This array is valid if (pos | bits) > 0 (TNODE) */
struct key_vector __rcu *tnode[0];
};
};
struct tnode {
struct rcu_head rcu;
t_key empty_children; /* KEYLENGTH bits needed */
t_key full_children; /* KEYLENGTH bits needed */
struct key_vector __rcu *parent;
struct key_vector kv[1];
#define tn_bits kv[0].bits
};
#define TNODE_SIZE(n) offsetof(struct tnode, kv[0].tnode[n])
#define LEAF_SIZE TNODE_SIZE(1)
表信息
对于本地和主路由表,打印文字Local:或者Main:字符串,而对于其它路由表,打印其ID号。
static void fib_table_print(struct seq_file *seq, struct fib_table *tb)
{
if (tb->tb_id == RT_TABLE_LOCAL)
seq_puts(seq, "Local:\n");
else if (tb->tb_id == RT_TABLE_MAIN)
seq_puts(seq, "Main:\n");
else
seq_printf(seq, "Id %d:\n", tb->tb_id);
}
收集表信息
遍历路由表的trie树,统计叶子节点的数量,并计算总的深度totdepth,即所有叶子结点的深度之和。并且,计算叶子节点的最大深度maxdepth。最后,通过叶子结点的fa_alias链表,计算前缀的数量。
另外统计所有中间节点的数量,以及分别统计处理不同比特位宽的节点的数量,并且,统计所有中间节点的empty_children节点的数量之和。
static void trie_collect_stats(struct trie *t, struct trie_stat *s)
{
struct key_vector *n;
struct fib_trie_iter iter;
memset(s, 0, sizeof(*s));
rcu_read_lock();
for (n = fib_trie_get_first(&iter, t); n; n = fib_trie_get_next(&iter)) {
if (IS_LEAF(n)) {
struct fib_alias *fa;
s->leaves++;
s->totdepth += iter.depth;
if (iter.depth > s->maxdepth)
s->maxdepth = iter.depth;
hlist_for_each_entry_rcu(fa, &n->leaf, fa_list)
++s->prefixes;
} else {
s->tnodes++;
if (n->bits < MAX_STAT_DEPTH)
s->nodesizes[n->bits]++;
s->nullpointers += tn_info(n)->empty_children;
}
对于路由表trie的第一个节点,由函数fib_trie_get_first获取,参数t取值为路由表结构fib_table的子成员tb_data,其kv结构的成员tnode[0]即为第一个节点。此节点如果为叶子节点,将iter的深度depth设置为0,否则,对于中间节点将深度设置为1。
对于叶子,iter的tnode为其父节点的值;而对于中间节点,iter的tnode即为节点自身的值。
static struct key_vector *fib_trie_get_first(struct fib_trie_iter *iter, struct trie *t)
{
struct key_vector *n, *pn;
if (!t) return NULL;
pn = t->kv;
n = rcu_dereference(pn->tnode[0]);
if (!n)
return NULL;
if (IS_TNODE(n)) {
iter->tnode = n;
iter->index = 0;
iter->depth = 1;
} else {
iter->tnode = pn;
iter->index = 0;
iter->depth = 0;
}
return n;
获取下一个节点由函数fib_trie_get_next实现。首先,取得当前遍历的节点索引值赋值给cindex,并将当前遍历节点赋值于pn。下一个节点不存在的条件为IS_TRIE为真,即当前节点的前缀长度已经达到32(KEYLENGTH长度)。
#define IS_TRIE(n) ((n)->pos >= KEYLENGTH)
如果子节点索引值小于pn的子节点数量(child_length),获取其下一个子节点,如果新节点为叶子节点,保存新的索引值,下次遍历将由此继续。反之,对于中间节点,移动到trie的下一深度,下一次将遍历此中间节点的第一个子节点(pn的孙节点)。
如果当前索引值超过或者等于pn节点的子节点数量,表明遍历完成。找到pn节点的父节点,通过父节点找到其下一个子节点(pn节点的兄弟节点)。
static struct key_vector *fib_trie_get_next(struct fib_trie_iter *iter)
{
unsigned long cindex = iter->index;
struct key_vector *pn = iter->tnode;
t_key pkey;
while (!IS_TRIE(pn)) {
while (cindex < child_length(pn)) {
struct key_vector *n = get_child_rcu(pn, cindex++);
if (!n) continue;
if (IS_LEAF(n)) {
iter->tnode = pn;
iter->index = cindex;
} else {
/* push down one level */
iter->tnode = n;
iter->index = 0;
++iter->depth;
}
return n;
}
/* Current node exhausted, pop back up */
pkey = pn->key;
pn = node_parent_rcu(pn);
cindex = get_index(pkey, pn) + 1;
--iter->depth;
}
/* record root node so further searches know we are done */
iter->tnode = pn;
iter->index = 0;
return NULL;
显示trie统计
首先,根据总的深度值,和叶子结点数量,计算平均深度值(avdepth)。函数trie_show_stats中,还显示最大深度,叶子节点数量,前缀数量。并且计算叶子结点、前缀、中间节点占用的空间大小等信息(bytes)。
static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
{
unsigned int i, max, pointers, bytes, avdepth;
if (stat->leaves)
avdepth = stat->totdepth*100 / stat->leaves;
else
avdepth = 0;
seq_printf(seq, "\tAver depth: %u.%02d\n",
avdepth / 100, avdepth % 100);
seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth);
seq_printf(seq, "\tLeaves: %u\n", stat->leaves);
bytes = LEAF_SIZE * stat->leaves;
seq_printf(seq, "\tPrefixes: %u\n", stat->prefixes);
bytes += sizeof(struct fib_alias) * stat->prefixes;
seq_printf(seq, "\tInternal nodes: %u\n\t", stat->tnodes);
bytes += TNODE_SIZE(0) * stat->tnodes;
最后,显示每个位宽段对应的节点数量,并计算中间节点相连所使用的指针的数量,以及其占用空间,与以上计算的空间bytes相加,得到路由trie占用的总空间。
max = MAX_STAT_DEPTH;
while (max > 0 && stat->nodesizes[max-1] == 0)
max--;
pointers = 0;
for (i = 1; i < max; i++)
if (stat->nodesizes[i] != 0) {
seq_printf(seq, " %u: %u", i, stat->nodesizes[i]);
pointers += (1<<i) * stat->nodesizes[i];
}
seq_putc(seq, '\n');
seq_printf(seq, "\tPointers: %u\n", pointers);
bytes += sizeof(struct key_vector *) * pointers;
seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers);
seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024);
trie利用率显示
遍历每处理器统计变量结构trie_use_stats,求总和,并进行显示。gets对应于路由查询操作的次数(fib_table_lookup);backtrack对应于路由trie向下查找过程中,没有名字从而向树根回退的次数。semantic_match_passed对应于查询成功的计数。
semantic_match_miss对应于路由未找到的计数;null_node_hit对应路由trie树查找过程中遇到的空节点;resize_node_skipped对应于resize函数执行过程中空间未改变的节点的计数(inflate或halve)。
static void trie_show_usage(struct seq_file *seq, const struct trie_use_stats __percpu *stats)
{
struct trie_use_stats s = { 0 };
/* loop through all of the CPUs and gather up the stats */
for_each_possible_cpu(cpu) {
const struct trie_use_stats *pcpu = per_cpu_ptr(stats, cpu);
s.gets += pcpu->gets;
s.backtrack += pcpu->backtrack;
s.semantic_match_passed += pcpu->semantic_match_passed;
s.semantic_match_miss += pcpu->semantic_match_miss;
s.null_node_hit += pcpu->null_node_hit;
s.resize_node_skipped += pcpu->resize_node_skipped;
}
seq_printf(seq, "\nCounters:\n---------\n");
seq_printf(seq, "gets = %u\n", s.gets);
seq_printf(seq, "backtracks = %u\n", s.backtrack);
seq_printf(seq, "semantic match passed = %u\n", s.semantic_match_passed);
seq_printf(seq, "semantic match miss = %u\n", s.semantic_match_miss);
seq_printf(seq, "null node hit= %u\n", s.null_node_hit);
seq_printf(seq, "skipped node resize = %u\n\n", s.resize_node_skipped);
内核版本 5.10