B树(B-Tree)是一种自平衡的树数据结构,它允许高效地进行查找、插入、删除等操作,广泛应用于数据库和文件系统。B树的每个节点可以包含多个键,并且有多个子节点。B树的关键特点包括:
- 每个节点可以有多个子节点(度数
m
)。 - 每个节点存储至少
ceil(m/2) - 1
个键,最多m - 1
个键。 - 根节点可以存储少于
ceil(m/2) - 1
个键。 - 所有叶子节点处于同一层次上。
案例分析:使用 Python 实现 B 树
Python 实现 B 树
class BTreeNode:
def __init__(self, t, leaf=False):
self.t = t # 最小度数
self.leaf = leaf # 是否是叶节点
self.keys = [] # 节点的键
self.children = [] # 节点的子节点
class BTree:
def __init__(self, t):
self.root = BTreeNode(t, True)
self.t = t # 最小度数
def search(self, k, node=None):
if node is None:
node = self.root
# 找到第一个大于或等于k的键
i = 0
while i < len(node.keys) and k > node.keys[i]:
i += 1
if i < len(node.keys) and node.keys[i] == k:
return node, i
if node.leaf:
return None
return self.search(k, node.children[i])
def insert(self, k):
root = self.root
if len(root.keys) == (2 * self.t) - 1:
new_root = BTreeNode(self.t, False)
new_root.children.append(self.root)
self.split_child(new_root, 0)
self.root = new_root
self.insert_non_full(new_root, k)
else:
self.insert_non_full(root, k)
def insert_non_full(self, node, k):
i = len(node.keys) - 1
if node.leaf:
node.keys.append(None)
while i >= 0 and k < node.keys[i]:
node.keys[i + 1] = node.keys[i]
i -= 1
node.keys[i + 1] = k
else:
while i >= 0 and k < node.keys[i]:
i -= 1
i += 1
if len(node.children[i].keys) == (2 * self.t) - 1:
self.split_child(node, i)
if k > node.keys[i]:
i += 1
self.insert_non_full(node.children[i], k)
def split_child(self, parent, i):
t = self.t
new_node = BTreeNode(t, parent.children[i].leaf)
node_to_split = parent.children[i]
parent.keys.insert(i, node_to_split.keys[t - 1])
parent.children.insert(i + 1, new_node)
new_node.keys = node_to_split.keys[t:t + t - 1]
node_to_split.keys = node_to_split.keys[0:t - 1]
if not node_to_split.leaf:
new_node.children = node_to_split.children[t:t + t]
node_to_split.children = node_to_split.children[0:t]
def traverse(self, node=None):
if node is None:
node = self.root
i = 0
for i in range(len(node.keys)):
if not node.leaf:
self.traverse(node.children[i])
print(node.keys[i], end=" ")
if not node.leaf:
self.traverse(node.children[i + 1])
def delete(self, k):
self._delete(self.root, k)
if len(self.root.keys) == 0:
if not self.root.leaf:
self.root = self.root.children[0]
else:
self.root = None
def _delete(self, node, k):
t = self.t
i = 0
while i < len(node.keys) and k > node.keys[i]:
i += 1
if i < len(node.keys) and node.keys[i] == k:
if node.leaf:
node.keys.pop(i)
else:
self._delete_non_leaf(node, i)
else:
if node.leaf:
print(f"Key {k} not found in the tree")
return
flag = (i == len(node.keys))
if len(node.children[i].keys) < t:
self._fill(node, i)
if flag and i > len(node.keys):
self._delete(node.children[i - 1], k)
else:
self._delete(node.children[i], k)
def _delete_non_leaf(self, node, i):
k = node.keys[i]
t = self.t
if len(node.children[i].keys) >= t:
pred = self._get_pred(node, i)
node.keys[i] = pred
self._delete(node.children[i], pred)
elif len(node.children[i + 1].keys) >= t:
succ = self._get_succ(node, i)
node.keys[i] = succ
self._delete(node.children[i + 1], succ)
else:
self._merge(node, i)
self._delete(node.children[i], k)
def _get_pred(self, node, i):
curr = node.children[i]
while not curr.leaf:
curr = curr.children[len(curr.keys)]
return curr.keys[len(curr.keys) - 1]
def _get_succ(self, node, i):
curr = node.children[i + 1]
while not curr.leaf:
curr = curr.children[0]
return curr.keys[0]
def _fill(self, node, i):
t = self.t
if i != 0 and len(node.children[i - 1].keys) >= t:
self._borrow_from_prev(node, i)
elif i != len(node.children) - 1 and len(node.children[i + 1].keys) >= t:
self._borrow_from_next(node, i)
else:
if i != len(node.children) - 1:
self._merge(node, i)
else:
self._merge(node, i - 1)
def _borrow_from_prev(self, node, i):
child = node.children[i]
sibling = node.children[i - 1]
child.keys.insert(0, node.keys[i - 1])
if not child.leaf:
child.children.insert(0, sibling.children.pop())
node.keys[i - 1] = sibling.keys.pop()
def _borrow_from_next(self, node, i):
child = node.children[i]
sibling = node.children[i + 1]
child.keys.append(node.keys[i])
if not child.leaf:
child.children.append(sibling.children.pop(0))
node.keys[i] = sibling.keys.pop(0)
def _merge(self, node, i):
t = self.t
child = node.children[i]
sibling = node.children[i + 1]
child.keys.append(node.keys.pop(i))
child.keys.extend(sibling.keys)
if not child.leaf:
child.children.extend(sibling.children)
node.children.pop(i + 1)
# 示例使用
btree = BTree(3)
elements = [8, 9, 10, 11, 15, 20, 17, 25, 30, 35, 40, 45]
for elem in elements:
btree.insert(elem)
print("Traversal of the B-Tree:")
btree.traverse()
print("\n\nDeleting 15...")
btree.delete(15)
print("Traversal of the B-Tree:")
btree.traverse()
print("\n\nDeleting 30...")
btree.delete(30)
print("Traversal of the B-Tree:")
btree.traverse()
解释与总结
在以上代码中,我们实现了一个简单的B树,支持查找、插入和删除操作。以下是每个部分的解释:
-
BTreeNode类:
- 表示B树的节点,包含
keys
和children
列表。
- 表示B树的节点,包含
-
BTree类:
- 管理整个B树结构。
-
search
:查找键是否存在。 -
insert
:插入键。 -
split_child
:当节点满时拆分节点。 -
delete
:删除键。 -
traverse
:遍历树的所有节点。
-
示例代码:
- 初始化一个B树实例,并进行一系列的插入、删除和遍历操作,展示B树的完整功能。
结论
通过这个简单的实现,我们可以看到B树在查找、插入和删除操作上具有高效性,并且能够保持平衡。它适用于需要频繁读写数据的场景,如数据库和文件系统。
继续深入探讨 B 树的应用和功能增强,我们可以实现以下功能:
- 查找功能:添加在树中查找某个键的详细方法。
- 打印功能:打印 B 树结构,方便理解树的结构。
- 增强的删除功能:实现删除任意节点的完整逻辑。
增强版 B 树代码
class BTreeNode:
def __init__(self, t, leaf=False):
self.t = t # 最小度数
self.leaf = leaf # 是否是叶节点
self.keys = [] # 节点的键
self.children = [] # 节点的子节点
def is_full(self):
return len(self.keys) == (2 * self.t) - 1
class BTree:
def __init__(self, t):
self.root = BTreeNode(t, True)
self.t = t # 最小度数
def search(self, k, node=None):
if node is None:
node = self.root
# 找到第一个大于或等于k的键
i = 0
while i < len(node.keys) and k > node.keys[i]:
i += 1
if i < len(node.keys) and node.keys[i] == k:
return node, i
if node.leaf:
return None
return self.search(k, node.children[i])
def insert(self, k):
root = self.root
if root.is_full():
new_root = BTreeNode(self.t, False)
new_root.children.append(self.root)
self.split_child(new_root, 0)
self.root = new_root
self.insert_non_full(new_root, k)
else:
self.insert_non_full(root, k)
def insert_non_full(self, node, k):
i = len(node.keys) - 1
if node.leaf:
node.keys.append(None)
while i >= 0 and k < node.keys[i]:
node.keys[i + 1] = node.keys[i]
i -= 1
node.keys[i + 1] = k
else:
while i >= 0 and k < node.keys[i]:
i -= 1
i += 1
if node.children[i].is_full():
self.split_child(node, i)
if k > node.keys[i]:
i += 1
self.insert_non_full(node.children[i], k)
def split_child(self, parent, i):
t = self.t
new_node = BTreeNode(t, parent.children[i].leaf)
node_to_split = parent.children[i]
parent.keys.insert(i, node_to_split.keys[t - 1])
parent.children.insert(i + 1, new_node)
new_node.keys = node_to_split.keys[t:t + t - 1]
node_to_split.keys = node_to_split.keys[0:t - 1]
if not node_to_split.leaf:
new_node.children = node_to_split.children[t:t + t]
node_to_split.children = node_to_split.children[0:t]
def traverse(self, node=None, depth=0):
if node is None:
node = self.root
print(" " * 4 * depth + "Node:", node.keys)
i = 0
for i in range(len(node.keys)):
if not node.leaf:
self.traverse(node.children[i], depth + 1)
if not node.leaf:
self.traverse(node.children[i + 1], depth + 1)
def delete(self, k):
self._delete(self.root, k)
if len(self.root.keys) == 0:
if not self.root.leaf:
self.root = self.root.children[0]
else:
self.root = None
def _delete(self, node, k):
t = self.t
i = 0
while i < len(node.keys) and k > node.keys[i]:
i += 1
if i < len(node.keys) and node.keys[i] == k:
if node.leaf:
node.keys.pop(i)
else:
self._delete_non_leaf(node, i)
else:
if node.leaf:
print(f"Key {k} not found in the tree")
return
flag = (i == len(node.keys))
if len(node.children[i].keys) < t:
self._fill(node, i)
if flag and i > len(node.keys):
self._delete(node.children[i - 1], k)
else:
self._delete(node.children[i], k)
def _delete_non_leaf(self, node, i):
k = node.keys[i]
t = self.t
if len(node.children[i].keys) >= t:
pred = self._get_pred(node, i)
node.keys[i] = pred
self._delete(node.children[i], pred)
elif len(node.children[i + 1].keys) >= t:
succ = self._get_succ(node, i)
node.keys[i] = succ
self._delete(node.children[i + 1], succ)
else:
self._merge(node, i)
self._delete(node.children[i], k)
def _get_pred(self, node, i):
curr = node.children[i]
while not curr.leaf:
curr = curr.children[len(curr.keys)]
return curr.keys[len(curr.keys) - 1]
def _get_succ(self, node, i):
curr = node.children[i + 1]
while not curr.leaf:
curr = curr.children[0]
return curr.keys[0]
def _fill(self, node, i):
t = self.t
if i != 0 and len(node.children[i - 1].keys) >= t:
self._borrow_from_prev(node, i)
elif i != len(node.children) - 1 and len(node.children[i + 1].keys) >= t:
self._borrow_from_next(node, i)
else:
if i != len(node.children) - 1:
self._merge(node, i)
else:
self._merge(node, i - 1)
def _borrow_from_prev(self, node, i):
child = node.children[i]
sibling = node.children[i - 1]
child.keys.insert(0, node.keys[i - 1])
if not child.leaf:
child.children.insert(0, sibling.children.pop())
node.keys[i - 1] = sibling.keys.pop()
def _borrow_from_next(self, node, i):
child = node.children[i]
sibling = node.children[i + 1]
child.keys.append(node.keys[i])
if not child.leaf:
child.children.append(sibling.children.pop(0))
node.keys[i] = sibling.keys.pop(0)
def _merge(self, node, i):
t = self.t
child = node.children[i]
sibling = node.children[i + 1]
child.keys.append(node.keys.pop(i))
child.keys.extend(sibling.keys)
if not child.leaf:
child.children.extend(sibling.children)
node.children.pop(i + 1)
# 示例使用
btree = BTree(3)
elements = [8, 9, 10, 11, 15, 20, 17, 25, 30, 35, 40, 45]
for elem in elements:
btree.insert(elem)
print("Traversal of the B-Tree:")
btree.traverse()
print("\n\nDeleting 15...")
btree.delete(15)
print("Traversal of the B-Tree:")
btree.traverse()
print("\n\nDeleting 30...")
btree.delete(30)
print("Traversal of the B-Tree:")
btree.traverse()
print("\n\nSearching for 20...")
result = btree.search(20)
if result:
node, idx = result
print(f"Found key 20 in node with keys: {node.keys}")
else:
print("Key 20 not found in the B-Tree.")
解释与总结
-
增强的打印功能:
-
traverse
:增强了打印逻辑,包含深度信息以可视化树结构。
-
-
搜索功能:
-
search
:用于在树中查找键的位置。
-
-
完整的删除功能:
-
_delete_non_leaf
:删除非叶节点的键。 -
_get_pred
和_get_succ
:获取前驱和后继键。 -
_fill
、_borrow_from_prev
、_borrow_from_next
和_merge
:在删除操作中保持 B 树平衡。
-
结论
通过完整的B树实现,包含搜索、插入、删除和遍历功能,使得 B 树在数据库和文件系统等高效查找领域具备广泛的应用。
让我们继续增强和完善 B 树的功能,并增加更多的示例应用。以下是进一步的增强:
- 打印更详细的 B 树结构:可视化显示每个节点的层次和子节点的键。
-
修改树的最小度数(
t
):允许动态调整最小度数。 - 统计功能:包括计算树的高度和统计总节点数、总键数等。
增强版 B 树代码
class BTreeNode:
def __init__(self, t, leaf=False):
self.t = t # 最小度数
self.leaf = leaf # 是否是叶节点
self.keys = [] # 节点的键
self.children = [] # 节点的子节点
def is_full(self):
return len(self.keys) == (2 * self.t) - 1
class BTree:
def __init__(self, t):
self.root = BTreeNode(t, True)
self.t = t # 最小度数
def search(self, k, node=None):
if node is None:
node = self.root
# 找到第一个大于或等于k的键
i = 0
while i < len(node.keys) and k > node.keys[i]:
i += 1
if i < len(node.keys) and node.keys[i] == k:
return node, i
if node.leaf:
return None
return self.search(k, node.children[i])
def insert(self, k):
root = self.root
if root.is_full():
new_root = BTreeNode(self.t, False)
new_root.children.append(self.root)
self.split_child(new_root, 0)
self.root = new_root
self.insert_non_full(new_root, k)
else:
self.insert_non_full(root, k)
def insert_non_full(self, node, k):
i = len(node.keys) - 1
if node.leaf:
node.keys.append(None)
while i >= 0 and k < node.keys[i]:
node.keys[i + 1] = node.keys[i]
i -= 1
node.keys[i + 1] = k
else:
while i >= 0 and k < node.keys[i]:
i -= 1
i += 1
if node.children[i].is_full():
self.split_child(node, i)
if k > node.keys[i]:
i += 1
self.insert_non_full(node.children[i], k)
def split_child(self, parent, i):
t = self.t
new_node = BTreeNode(t, parent.children[i].leaf)
node_to_split = parent.children[i]
parent.keys.insert(i, node_to_split.keys[t - 1])
parent.children.insert(i + 1, new_node)
new_node.keys = node_to_split.keys[t:t + t - 1]
node_to_split.keys = node_to_split.keys[0:t - 1]
if not node_to_split.leaf:
new_node.children = node_to_split.children[t:t + t]
node_to_split.children = node_to_split.children[0:t]
def traverse(self, node=None, depth=0):
if node is None:
node = self.root
print(" " * 4 * depth + f"Level {depth}: {node.keys}")
i = 0
for i in range(len(node.keys)):
if not node.leaf:
self.traverse(node.children[i], depth + 1)
if not node.leaf:
self.traverse(node.children[i + 1], depth + 1)
def delete(self, k):
self._delete(self.root, k)
if len(self.root.keys) == 0:
if not self.root.leaf:
self.root = self.root.children[0]
else:
self.root = None
def _delete(self, node, k):
t = self.t
i = 0
while i < len(node.keys) and k > node.keys[i]:
i += 1
if i < len(node.keys) and node.keys[i] == k:
if node.leaf:
node.keys.pop(i)
else:
self._delete_non_leaf(node, i)
else:
if node.leaf:
print(f"Key {k} not found in the tree")
return
flag = (i == len(node.keys))
if len(node.children[i].keys) < t:
self._fill(node, i)
if flag and i > len(node.keys):
self._delete(node.children[i - 1], k)
else:
self._delete(node.children[i], k)
def _delete_non_leaf(self, node, i):
k = node.keys[i]
t = self.t
if len(node.children[i].keys) >= t:
pred = self._get_pred(node, i)
node.keys[i] = pred
self._delete(node.children[i], pred)
elif len(node.children[i + 1].keys) >= t:
succ = self._get_succ(node, i)
node.keys[i] = succ
self._delete(node.children[i + 1], succ)
else:
self._merge(node, i)
self._delete(node.children[i], k)
def _get_pred(self, node, i):
curr = node.children[i]
while not curr.leaf:
curr = curr.children[len(curr.keys)]
return curr.keys[len(curr.keys) - 1]
def _get_succ(self, node, i):
curr = node.children[i + 1]
while not curr.leaf:
curr = curr.children[0]
return curr.keys[0]
def _fill(self, node, i):
t = self.t
if i != 0 and len(node.children[i - 1].keys) >= t:
self._borrow_from_prev(node, i)
elif i != len(node.children) - 1 and len(node.children[i + 1].keys) >= t:
self._borrow_from_next(node, i)
else:
if i != len(node.children) - 1:
self._merge(node, i)
else:
self._merge(node, i - 1)
def _borrow_from_prev(self, node, i):
child = node.children[i]
sibling = node.children[i - 1]
child.keys.insert(0, node.keys[i - 1])
if not child.leaf:
child.children.insert(0, sibling.children.pop())
node.keys[i - 1] = sibling.keys.pop()
def _borrow_from_next(self, node, i):
child = node.children[i]
sibling = node.children[i + 1]
child.keys.append(node.keys[i])
if not child.leaf:
child.children.append(sibling.children.pop(0))
node.keys[i] = sibling.keys.pop(0)
def _merge(self, node, i):
t = self.t
child = node.children[i]
sibling = node.children[i + 1]
child.keys.append(node.keys.pop(i))
child.keys.extend(sibling.keys)
if not child.leaf:
child.children.extend(sibling.children)
node.children.pop(i + 1)
def get_height(self, node=None):
if node is None:
node = self.root
if node.leaf:
return 1
return 1 + self.get_height(node.children[0])
def count_nodes_and_keys(self, node=None):
if node is None:
node = self.root
node_count = 1
key_count = len(node.keys)
if not node.leaf:
for child in node.children:
child_node_count, child_key_count = self.count_nodes_and_keys(child)
node_count += child_node_count
key_count += child_key_count
return node_count, key_count
# 示例使用
btree = BTree(3)
elements = [8, 9, 10, 11, 15, 20, 17, 25, 30, 35, 40, 45]
for elem in elements:
btree.insert(elem)
print("Traversal of the B-Tree:")
btree.traverse()
print("\n\nDeleting 15...")
btree.delete(15)
print("Traversal of the B-Tree:")
btree.traverse()
print("\n\nDeleting 30...")
btree.delete(30)
print("Traversal of the B-Tree:")
btree.traverse()
print("\n\nSearching for 20...")
result = btree.search(20)
if result:
node, idx = result
print(f"Found key 20 in node with keys: {node.keys}")
else:
print("Key 20 not found in the B-Tree.")
print("\n\nStatistics:")
height = btree.get_height()
nodes, keys = btree.count_nodes_and_keys()
print(f"Height of the B-Tree: {height}")
print(f"Total nodes in the B-Tree: {nodes}")
print(f"Total keys in the B-Tree: {keys}")
解释与总结
-
B 树层次打印:
-
traverse
方法打印每一层的键值。
-
-
统计功能:
-
get_height
:计算树的高度。 -
count_nodes_and_keys
:计算节点和键的总数。
-
-
完整的插入与删除功能:
-
insert
和delete
操作保持树的平衡性。
-
结论
通过实现增强版的 B 树功能,我们可以看到这是一种高效的多路搜索树结构,适合用于数据库索引和文件系统管理。利用这些功能增强,B 树的应用前景更加广阔。