/* Representation of a radix tree as implemented in this file, that contains
* the strings "foo", "foobar" and "footer" after the insertion of each
* word. When the node represents a key inside the radix tree, we write it
* between [], otherwise it is written between ().
在这个文件中实现的基树的表示,假如包含字符串"foo", "foobar" 和 "footer",它们相继插入。
当节点表示基树中的一个键,我们写在[]中间,否则(如果不是键)我们写在()中间
* This is the vanilla representation:
示例如下
* (f) ""
* \
* (o) "f"
* \
* (o) "fo"
* \
* [t b] "foo" 这是一个键
* / \
* "foot" (e) (a) "foob"
* / \
* "foote" (r) (r) "fooba"
* / \
* "footer" [] 键 [] "foobar" 键
*
* However, this implementation implements a very common optimization where
* successive nodes having a single child are "compressed" into the node
* itself as a string of characters, each representing a next-level child,
* and only the link to the node representing the last character node is
* provided inside the representation. So the above representation is turend
* into:
然而,这个实现是一个非常普通的优化,把连续只有一个孩子的节点压缩成一个拥有字符串的一个节点,
每个字符代表一个子节点,而且只有表示最后字符节点的链接被提供表示(这里的意思就是只有最后一个节点的字符有链接),
所以上述表示可以转化如下
* ["foo"] "" 压缩节点,只要最后一个节点有链接,中间的全部去掉了
* |
* [t b] "foo"
* / \
* "foot" ("er") ("ar") "foob" 压缩节点
* / \
* "footer" [] [] "foobar"
*
* However this optimization makes the implementation a bit more complex.
* For instance if a key "first" is added in the above radix tree, a
* "node splitting" operation is needed, since the "foo" prefix is no longer
* composed of nodes having a single child one after the other. This is the
* above tree and the resulting node splitting after this event happens:
然而这个优化使得实现更加复杂。举例如下,如果一个键"first"被添加在上述的基树中,需要一个分裂节点操作,
因为"foo"这个前缀不在是一个拥有单子节点相连的压缩节点了,下面是上述基树插入键"first"后的结果
* (f) ""
* /
* (i o) "f" 这里分裂节点
* / \
* "firs" ("rst") (o) "fo"
* / \
* "first" [] [t b] "foo"
* / \
* "foot" ("er") ("ar") "foob"
* / \
* "footer" [] [] "foobar"
*
* Similarly after deletion, if a new chain of nodes having a single child
* is created (the chain must also not include nodes that represent keys),
* it must be compressed back into a single node.
删除也类似,如果具有单个子节点的新节点链被创建了(这个链不能包含代表键的节点),
那么必须被压缩成一个单独的节点
*/
#define RAX_NODE_MAX_SIZE ((1<<29)-1)
typedef struct raxNode {
uint32_t iskey:1; /* Does this node contain a key? */ 是否为键
uint32_t isnull:1; /* Associated value is NULL (don't store it). */ 是否有值
uint32_t iscompr:1; /* Node is compressed. */ 是否压缩节点
uint32_t size:29; /* Number of children, or compressed string len. */ 子节点数或 压缩节点长度
/* Data layout is as follows: 数据按照如下格式排列
* If node is not compressed we have 'size' bytes, one for each children
* character, and 'size' raxNode pointers, point to each child node.
* Note how the character is not stored in the children but in the
* edge of the parents:
如果是一个拥有size字节大小并且不压缩的节点,每个字节代表一个子字符和size个节点指针,指向每个子节点。
注意到字符保存在父节点的边缘而不是子节点。
* [header iscompr=0][abc][a-ptr][b-ptr][c-ptr](value-ptr?)
节点头 不压缩 具体字符 字符对应的指针(指向子节点) 值(如果有的话)
* if node is compressed (iscompr bit is 1) the node has 1 children.
* In that case the 'size' bytes of the string stored immediately at
* the start of the data section, represent a sequence of successive
* nodes linked one after the other, for which only the last one in
* the sequence is actually represented as a node, and pointed to by
* the current compressed node.
如果节点是压缩的(iscompr值为1),表示该节点只有一个孩子。在这种情况下size字节大小的字符串保存
在数据段开始的地方,表示一些列前后连接的节点,只有序列最后一个节点实际代表一个节点,并且由当前节点指向
* [header iscompr=1][xyz][z-ptr](value-ptr?)
节点头 压缩 具体字符 最后字符对应的指针(指向子节点) 值(如果有的话)
* Both compressed and not compressed nodes can represent a key
* with associated data in the radix tree at any level (not just terminal
* nodes).
压缩节点和非压缩节点能够代表基树中任何层级(不仅仅是终节点)的一个关联值的键
* If the node has an associated key (iskey=1) and is not NULL
* (isnull=0), then after the raxNode pointers poiting to the
* children, an additional value pointer is present (as you can see
* in the representation above as "value-ptr" field).
如果节点是一个关联的键而且非空,那么在节点指针指向孩子之后,需要增加一个值的指针表示
(就如你缩减在上述表示的域value-ptr)
*/
unsigned char data[]; 数据,字符串
} raxNode;
基树定义如下,只需要三个元素,一个是指向头部的头指针,一个是节点数量,一个是元素数量
typedef struct rax {
raxNode *head;
uint64_t numele;
uint64_t numnodes;
} rax;
***********************************************************************************
/* ------------------------- raxStack functions --------------------------
* The raxStack is a simple stack of pointers that is capable of switching
* from using a stack-allocated array to dynamic heap once a given number of
* items are reached. It is used in order to retain the list of parent nodes
* while walking the radix tree in order to implement certain operations that
* need to navigate the tree upward.
* ------------------------------------------------------------------------- */
#define RAX_STACK_STATIC_ITEMS 32
基树栈相关函数
基树栈是一个简单的保存指针的堆栈,默认是一个已经实现分配好的堆数组,
如果到了了这个数组的最大值,那么会重新分配堆内存。
这个堆栈被用来保存在遍历基树过程中的父节点。
从而实现特定类型的操作,不需要再次向上遍历基树
/* Initialize the stack. */ 初始化栈
static inline void raxStackInit(raxStack *ts) {
ts->stack = ts->static_items; 初始使用实现分配好的静态栈
ts->items = 0; 无数据
ts->maxitems = RAX_STACK_STATIC_ITEMS; 32
ts->oom = 0;
}
/* Push an item into the stack, returns 1 on success, 0 on out of memory. */
压入一个元素到栈,成功返回1,OOM(失败)返回0
static inline int raxStackPush(raxStack *ts, void *ptr) {
if (ts->items == ts->maxitems) { 判断是否已经达到最大容量值
if (ts->stack == ts->static_items) { 是否是初始的默认静态分配栈
ts->stack = rax_malloc(sizeof(void*)*ts->maxitems*2); 是的话容量变成2倍,新开辟空间
if (ts->stack == NULL) { 分配内存失败
ts->stack = ts->static_items; 仍然使用静态栈
ts->oom = 1; 提示OOM错误
errno = ENOMEM;
return 0;
}
memcpy(ts->stack,ts->static_items,sizeof(void*)*ts->maxitems);
成功情况下,迁移数据到新栈
} else { 使用的已经不是初始化的静态栈了,表示已经扩容过了,在原地址(上次扩容的地方)上扩容
void **newalloc = rax_realloc(ts->stack,sizeof(void*)*ts->maxitems*2);
if (newalloc == NULL) { 分配内存失败
ts->oom = 1;
errno = ENOMEM;
return 0;
}
ts->stack = newalloc; 成功用新栈指针,这里数据无需迁移
}
ts->maxitems *= 2; 成功的情况下,容量都变成原来的2倍
}
ts->stack[ts->items] = ptr; 将新值插入
ts->items++; 元素多1
return 1;
}
/* Pop an item from the stack, the function returns NULL if there are no
* items to pop. */
从栈中弹出一个元素,这个函数返回空如果栈中已经没有元素可以弹出了。
static inline void *raxStackPop(raxStack *ts) {
if (ts->items == 0) return NULL; 没有元素可以弹出了
ts->items--; 还有元素,先减去1
return ts->stack[ts->items]; 弹出元素
}
/* Return the stack item at the top of the stack without actually consuming
* it. */
返回栈中最上面的元素,但是实际上不消费(即不弹出,只是看看,不减少)
static inline void *raxStackPeek(raxStack *ts) {
if (ts->items == 0) return NULL; 无元素可看
return ts->stack[ts->items-1]; 最上面的元素拿出来看看
}
/* Free the stack in case we used heap allocation. */
释放栈如果我们使用了堆内存的分配
static inline void raxStackFree(raxStack *ts) {
if (ts->stack != ts->static_items) rax_free(ts->stack);
}
/* ----------------------------------------------------------------------------
* Radix tree implementation
* --------------------------------------------------------------------------*/
基树的实现
***********************************************************************************
/* Return the padding needed in the characters section of a node having size
* 'nodesize'. The padding is needed to store the child pointers to aligned
* addresses. Note that we add 4 to the node size because the node has a four
* bytes header. */
返回一个具有nodesize大小节点的填充字符字节数。填充的字节数是为了保存子指针对齐(这里按照8字节对齐,和内存地址相关)
注意到这里需要加4个字节的大小,因为每个节点由4个字节的头部。
就是如下4个字段占用4个字节:
uint32_t iskey:1; /* Does this node contain a key? */
uint32_t isnull:1; /* Associated value is NULL (don't store it). */
uint32_t iscompr:1; /* Node is compressed. */
uint32_t size:29; /* Number of children, or compressed string len. */
#define raxPadding(nodesize) ((sizeof(void*)-((nodesize+4) % sizeof(void*))) & (sizeof(void*)-1))
***********************************************************************************
/* Return the current total size of the node. Note that the second line
* computes the padding after the string of characters, needed in order to
* save pointers to aligned addresses. */
返回当前节点总的长度。注意到第二行计算字符串后添加的字符个数,需要按照保存对齐长度的指针所需
#define raxNodeCurrentLength(n) ( \
sizeof(raxNode)+(n)->size+ \
raxPadding((n)->size)+ \
((n)->iscompr ? sizeof(raxNode*) : sizeof(raxNode*)*(n)->size)+ \
(((n)->iskey && !(n)->isnull)*sizeof(void*)) \
)
***********************************************************************************
/* Return the pointer to the last child pointer in a node. For the compressed
* nodes this is the only child pointer. */
返回节点指向最后一个孩子指针的指针,对于压缩节点来说这是唯一的一个子指针。
#define raxNodeLastChildPtr(n) ((raxNode**) ( \
((char*)(n)) + \ 用字节指针相加
raxNodeCurrentLength(n) - \ 当前节点的总长度(按字节计算)
sizeof(raxNode*) - \ 减去最后一个指针节点的长度
(((n)->iskey && !(n)->isnull) ? sizeof(void*) : 0) \ 如果有值的话,需要减去指向值的指针长度
))
***********************************************************************************
/* Return the pointer to the first child pointer. */
返回指向第一个子指针的指针
#define raxNodeFirstChildPtr(n) ((raxNode**) ( \
(n)->data + \ 数据
(n)->size + \ 计算长度以及填充,结束了就是第一个子指针开始的地址
raxPadding((n)->size)))
***********************************************************************************