B-树
一棵m阶的B-树,或为空树,或为满足下列特性的m叉树:
(1) 树中每个结点至多有m棵孩子结点(即至多有m-1个关键字)。
(2) 若根节点不是叶子结点,至少有两棵子树。
(3) 除根结点之外的所有非终端结点至少有[m/2]棵子树。
(4) 所有叶子结点都在同一层上,即B树是所有结点的平衡因子均等于0的多路查找树。
(5) 所有非终端结点中包含下列信息数据(n,A0,K1,A1,K2,A2,...Kn,An)。
#define MAXM 10
typedef int KeyType;
const int m = 5;
int min = 2;
typedef struct BTNode
{
int keynum;
KeyType keys[MAXM];
struct BTNode *parent;
struct BTNode *child[MAXM];
void (*insertKey)(struct BTNode *node,KeyType k);
void (*removeKey)(struct BTNode *node,KeyType k);
int (*childrenCount)(struct BTNode *node);
struct BTNode * (* addChild)(struct BTNode *node, struct BTNode *child);
struct BTNode * (* insertChild)(struct BTNode *node, struct BTNode *child, int pos);
bool (*removeChild)(struct BTNode *node, struct BTNode *child);
int (*indexOf)(struct BTNode *mySelf, struct BTNode *node);
}BTNode;
void insertKey(BTNode *node,KeyType k);
void removeKey(BTNode *node,KeyType k);
int getChildrenCount(BTNode *node);
BTNode* addChild(BTNode *node, BTNode *child);
BTNode * insertChild(BTNode *node, BTNode *child, int pos);
bool removeChild(BTNode *node, BTNode *child);
int indexOf(BTNode *mySelf,BTNode *node);
int binarySearch(BTNode *root,KeyType key);
void initBTNode(BTNode *node)
{
node->keynum = 0;
for(int i = 0; i < MAXM; i++)
{
node->keys[i] = -1;
}
node->insertKey = insertKey;
node->removeKey = removeKey;
node->parent = NULL;
for(int i = 0; i < MAXM; i++)
{
node->child[i] = NULL;
}
node->childrenCount = getChildrenCount;
node->addChild = addChild;
node->removeChild = removeChild;
node->indexOf = indexOf;
}
void insertKey(BTNode *node,KeyType k)
{
if(node->keynum == 0)
{
node->keys[0] = k;
return;
}
int left = 0, right = node->keynum - 1, mid = (left + right) / 2;
while (left <= right) {
mid = (left + right) / 2;
if(k < node->keys[mid])
right = mid - 1;
else if (k > node->keys[mid])
left = mid + 1;
}
int insertPos = k < node->keys[mid] ? mid : mid + 1;
for (int i = node->keynum - 1; i >= insertPos; i--) {
node->keys[i + 1] = node->keys[i];
}
node->keys[insertPos] = k;
node->keynum++;
}
void removeKey(BTNode *node,KeyType k)
{
int keyIndex = binarySearch(node, k);
for (int j = keyIndex; j < node->keynum; j++) {
node->keys[j] = node->keys[j+1];
}
node->keys[node->keynum - 1] = -1;
node->keynum--;
}
int getChildrenCount(BTNode *node)
{
int i = 0;
for(;i < MAXM; i++)
{
if(node->child[i] == NULL)
break;
}
return i;
}
BTNode* addChild(BTNode *node, BTNode *child)
{
int i = 0;
for(;i < MAXM; i++)
{
if(node->child[i] == NULL)
break;
}
node->child[i] = child;
return node;
}
BTNode* insertChild(BTNode *node, BTNode *child, int pos)
{
for (int i = node->keynum - 1; i >= pos; i--) {
node->child[i + 1] = node->child[i];
}
node->child[pos] = child;
return node;
}
bool removeChild(BTNode *node, BTNode *child)
{
int i = 0;
for (; i<node->keynum + 1; i++) {
if(child == node->child[i])
break;
}
if(i == node->keynum + 1)
return false;
for (int j = i; j < node->keynum; j++) {
node->child[j] = node->child[j+1];
}
node->child[node->keynum] = NULL;
return true;
}
int indexOf(BTNode *mySelf,BTNode *node)
{
int i = 0, count = mySelf->childrenCount(mySelf);
for (; i < count; i++) {
if(mySelf->child[i] == node)
break;
}
i = i == count ? -1 : i;
return i;
}
1. 查找
int binarySearch(BTNode *root,KeyType key)
{
int left = 0, right = root->keynum - 1, mid = (left + right) / 2;
while (left <= right) {
mid = (left + right)/2;
if(key < root->keys[mid])
right = mid - 1;
else if (key > root->keys[mid])
left = mid + 1;
else
break;
}
mid = (left <= right) ? mid : -mid;
return mid;
}
BTNode* searchNode(BTNode *root,KeyType key)
{
if(root == NULL)
return NULL;
int index = binarySearch(root,key);
if(index > 0){
return root;
}else{
return searchNode(root->child[-index-1], key);
}
}
2. 插入
对于新元素的插入,都是发生在叶子节点上的。所有的插入操作都是从根节点开始,搜索这棵树,并找到该元素应该被插入的节点。将新元素插入到该节点需要如下步骤:
- 如果该节点上的元素数未满,则将新元素插入到该节点,并保持节点中元素的顺序。
- 如果该节点上的元素已满,则需要将该节点平均地分裂成两个节点:
从该节点中的元素和新元素先出一个中位数
小于中位数的元素放到左边节点,大于中位数的元素放到右边节点,中位数做为分隔值。
分隔值被插入到父节点中(增加了树的高度),这可能会导致父节点的分裂,分裂父节点时又可能会使它的父节点分裂,以此类推。如果分裂一直上升到根节点,那么就创建一个新的根节点,它有一个分隔值和两个子节点。(这就是根节点并不像内部节点一样有最少子节点数量限制的原因)
下图是一个5阶B树,我们通过顺序插入1到17,来观察节点的分裂过程。
void insertNodeKey(BTNode *node,KeyType key);
void insert(BTNode **root,KeyType key)
{
if(*root == NULL){
BTNode *node = (BTNode *)malloc(sizeof(BTNode));
initBTNode(node);
node->insertKey(node,key);
*root = node;
return;
}
insertNodeKey(*root, key);
}
void addKeys(BTNode *node,BTNode *fromNode,int begin, int end)
{
for (int i = begin; i < end; i++) {
node->keys[i - begin] = fromNode->keys[i];
}
node->keynum = end - begin;
}
void addChildren(BTNode *node,BTNode *fromNode,int begin, int end)
{
for (int i = begin; i < end; i++) {
node->child[i-begin] = fromNode->child[i];
fromNode->child[i]->parent = node;
}
}
void split(BTNode *node)
{
int mid = node->keynum/2;
//分离值
KeyType sepKey = node->keys[mid];
//分离后的左节点
BTNode *leftNode = (BTNode *)malloc(sizeof(BTNode));
initBTNode(leftNode);
//添加key
addKeys(leftNode, node, 0, mid);
BTNode *rightNode = (BTNode *)malloc(sizeof(BTNode));
initBTNode(rightNode);
addKeys(rightNode, node, mid + 1, node->keynum);
//分离子结点
if(node->childrenCount(node) > 0)
{
addChildren(leftNode, node, 0, mid);
addChildren(rightNode, node, mid + 1, node->keynum);
}
//当前节点为根结点
BTNode *parent = node->parent;
if(parent == NULL){
BTNode *newRoot = (BTNode *)malloc(sizeof(BTNode));
newRoot->insertKey(newRoot,sepKey);
leftNode->parent = newRoot;
rightNode->parent = newRoot;
newRoot->addChild(newRoot,leftNode)->addChild(newRoot,rightNode);
}else{
parent->insertKey(parent,sepKey);
leftNode->parent = parent;
rightNode->parent = parent;
int pos = binarySearch(parent, sepKey);
parent->removeChild(parent,node);
parent->insertChild(parent,leftNode,pos)->insertChild(parent,rightNode,pos+1);
if(parent->keynum > m - 1)
split(parent);
}
}
void insertNodeKey(BTNode *node,KeyType key)
{
//当前节点为叶子节点
if(node->childrenCount == 0)
{
//如果当前结点key未满,直接添加
if(node->keynum < m - 1)
{
node->insertKey(node,key);
return;
}
// 如果key已满,分裂结点
node->insertKey(node,key);
split(node);
return;
}
//当前节点为内部节点的时候,需要查找相应的子结点,直到找到相应的叶子结点时,才插入
int index = binarySearch(node, key);
if(index < 0)
insertNodeKey(node->child[-index-1],key);
}
3. 删除
B树的删除就复杂了许多,可分为下面几种情况:
- 删除叶子结点的元素:
(1) 搜索要删除的元素
(2) 如果它在叶子节点上,直接将其删除
(3) 如果删除后产生了下溢出(键数小于最小值 [m/2]-1),则向其兄弟节点借元素。如果兄弟结点借,即将其父节点元素下移至当前节点,将兄弟节点中元素上移至父节点(若是左节点,上移最大元素;若是右节点,上移最小元素)
(4) 若兄弟节点也达到下限,即不借,则合并兄弟节点与分割键(父结点分割键下移到当前结点删除的位置,然后和兄弟结点合并)。
下图是一个5阶B树,我们通过删除15、14、17、5四个键,来观察删除过程(基本涵盖所有情况)。
- 删除内部结点:
内部节点中元素为其左右子节点的分割值,需要从左子树中找最大的key或右子树中找最小的key来代替被删除的key元素。从而转化为删除叶子结点上的元素
下面以3阶的B-树删除内部结点为例进行演示:
void afterDeleteHandle(BTNode **root,BTNode *node,int deleteIndex);
void merge(BTNode **root,BTNode *node, BTNode *brotherNode, int parentKeyIndex);
void leftRotate(BTNode *node, int nodeIndex, BTNode *rightNode);
void rightRotate(BTNode *node, int nodeIndex, BTNode *leftNode);
BTNode* findLeftChildMaxKeyNode(BTNode *node,int index)
{
BTNode *maxNode = node->child[index];
BTNode *lastNode = maxNode;
int maxIndex = -1;
while (lastNode) {
maxIndex = maxNode->childrenCount(maxNode) - 1;
lastNode = maxNode->child[maxIndex];
if(lastNode)
maxNode = lastNode;
}
return maxNode;
}
BTNode* findRightChildMinKeyNode(BTNode *node,int index)
{
BTNode *minNode = node->child[index + 1];
BTNode *firstNode = minNode;
while (firstNode) {
firstNode = minNode->child[0];
if(firstNode)
minNode = firstNode;
}
return minNode;
}
void delete(BTNode **root,KeyType key)
{
BTNode *node = searchNode(*root, key);
if(!node)
return;
//删除节点
int keyIndex = binarySearch(node, key);
node->removeKey(node, key);
if(node->keynum < min){
afterDeleteHandle(root,node, keyIndex);
}
}
void afterDeleteHandle(BTNode **root,BTNode *node,int deleteIndex)
{
//如果是内部结点 转化为删除叶子结点
//要么向左子树中最大的叶子结点的最大key借
//要么向右子树中最小的叶子结点的最小key借
if(node->childrenCount > 0 && deleteIndex > 0){
BTNode *leftChildMaxNode = findLeftChildMaxKeyNode(node, deleteIndex);
if(leftChildMaxNode){
int maxIndex = leftChildMaxNode->keynum - 1;
KeyType maxKey = leftChildMaxNode->keys[maxIndex];
node->insertKey(node,maxKey);
leftChildMaxNode->removeKey(leftChildMaxNode,maxKey);
if(leftChildMaxNode->keynum < min){
afterDeleteHandle(root, leftChildMaxNode, maxIndex);
}
}else{
BTNode *rightChildMinNode = findRightChildMinKeyNode(node, deleteIndex);
KeyType minKey = rightChildMinNode->keys[0];
node->insertKey(node,minKey);
rightChildMinNode->removeKey(rightChildMinNode,minKey);
if(rightChildMinNode->keynum < min){
afterDeleteHandle(root, rightChildMinNode, 0);
}
}
}
//删除叶子结点 或继续向兄弟结点借
BTNode *parentNode = node->parent;
//当前结点在父结点中的位置
int nodeIndex = parentNode->indexOf(parentNode,node);
//左兄弟结点
BTNode *leftNode = nodeIndex > 0 ? parentNode->child[nodeIndex-1] : NULL;
//右兄弟节点
BTNode *rightNode = nodeIndex == parentNode->childrenCount(parentNode) - 1 ? NULL : parentNode->child[nodeIndex + 1];
int leftKeyCount = leftNode == NULL ? 0 : leftNode->keynum;
int rightKeyCount = rightNode == NULL ? 0 : rightNode->keynum;
int maxCount = MAX(leftKeyCount, rightKeyCount);
// 左右兄弟节点元素数均达到限定值,合并
if(maxCount <= min){
if(leftNode){
//与左兄弟节点合并
merge(root,node, leftNode, nodeIndex - 1);
}else if (rightNode){
//与右兄弟节点合并
merge(root,node, rightNode, nodeIndex);
}
return;
}
//向最富裕的兄弟结点借
if(maxCount == leftKeyCount){
//向左节点借-> 右旋
rightRotate(node, nodeIndex, leftNode);
}else{
//向右节点借-> 左旋
leftRotate(node, nodeIndex, rightNode);
}
}
void merge(BTNode **root,BTNode *node, BTNode *brotherNode, int parentKeyIndex)
{
BTNode *parentNode = node->parent;
BTNode *newNode = (BTNode *)malloc(sizeof(BTNode));
initBTNode(newNode);
KeyType parentKey = parentNode->keys[parentKeyIndex];
addKeys(newNode, node, 0, node->keynum);
addKeys(newNode, brotherNode, 0, brotherNode->keynum);
newNode->insertKey(newNode,parentKey);//向父结点借
addChildren(newNode, node, 0, node->childrenCount(node));
addChildren(newNode, brotherNode, 0, brotherNode->childrenCount(brotherNode));
parentNode->removeKey(parentNode,parentKey);
parentNode->removeChild(parentNode,node);
parentNode->removeChild(parentNode,brotherNode);
parentNode->addChild(parentNode,newNode);
newNode->parent = parentNode;
//合并之后,若父结点的元素小于限定数 继续调整
if(parentNode == *root && parentNode->keynum == 0)
{
*root = newNode;
free(parentNode);
return;
}
if(parentNode->keynum < min){
afterDeleteHandle(root, parentNode, -1);
}
}
/**
* 左旋转
* (1)将父节点的中间值元素删除,并添加到当前节点中
* (2)将右兄弟节点中最小元素删除,并添加到父节点中
*
* @param node 当前节点
* @param nodeIndex 中间值索引
* @param rightNode 右节点
*/
void leftRotate(BTNode *node, int nodeIndex, BTNode *rightNode)
{
BTNode *parentNode = node->parent;
KeyType key = parentNode->keys[nodeIndex];
parentNode->removeKey(parentNode,key);
node->insertKey(node,key);
KeyType rightKey = rightNode->keys[0];
parentNode->insertKey(parentNode,rightKey);
rightNode->removeKey(rightNode,rightKey);
BTNode *rightFirstChild = rightNode->child[0];
rightNode->removeChild(rightNode,rightFirstChild);
node->addChild(node,rightFirstChild);
}
/**
* 右旋转
* (1)将父节点的中间值元素删除,并添加到当前节点中
* (2)将左兄弟节点中最大元素删除,并添加到父节点中
*
* @param node 当前节点
* @param nodeIndex 中间值索引
* @param leftNode 左节点
*/
void rightRotate(BTNode *node, int nodeIndex, BTNode *leftNode)
{
BTNode *parentNode = node->parent;
KeyType key = parentNode->keys[nodeIndex - 1];
parentNode->removeKey(parentNode,key);
node->insertKey(node,key);
KeyType leftKey = leftNode->keys[leftNode->keynum - 1];
parentNode->insertKey(parentNode,leftKey);
leftNode->removeKey(leftNode,leftKey);
int leftLast = leftNode->childrenCount(leftNode) - 1;
BTNode *leftLastChild = leftNode->child[leftLast];
leftNode->removeChild(leftNode,leftLastChild);
node->addChild(node,leftLastChild);
}