六、B-树

B-树

一棵m阶的B-树,或为空树,或为满足下列特性的m叉树:
(1) 树中每个结点至多有m棵孩子结点(即至多有m-1个关键字)。
(2) 若根节点不是叶子结点,至少有两棵子树。
(3) 除根结点之外的所有非终端结点至少有[m/2]棵子树。
(4) 所有叶子结点都在同一层上,即B树是所有结点的平衡因子均等于0的多路查找树。
(5) 所有非终端结点中包含下列信息数据(n,A0,K1,A1,K2,A2,...Kn,An)。

#define MAXM 10
typedef int KeyType;
const int m = 5;
int min = 2;
typedef struct BTNode
{
    int keynum;
    KeyType keys[MAXM];
    struct BTNode *parent;
    struct BTNode *child[MAXM];
    
    void (*insertKey)(struct BTNode *node,KeyType k);
    void (*removeKey)(struct BTNode *node,KeyType k);
    int (*childrenCount)(struct BTNode *node);
    struct BTNode * (* addChild)(struct BTNode *node, struct BTNode *child);
    struct BTNode * (* insertChild)(struct BTNode *node, struct BTNode *child, int pos);
    bool (*removeChild)(struct BTNode *node, struct BTNode *child);
    int (*indexOf)(struct BTNode *mySelf, struct BTNode *node);
}BTNode;

void insertKey(BTNode *node,KeyType k);
void removeKey(BTNode *node,KeyType k);
int getChildrenCount(BTNode *node);
BTNode* addChild(BTNode *node, BTNode *child);
BTNode * insertChild(BTNode *node, BTNode *child, int pos);
bool removeChild(BTNode *node, BTNode *child);
int indexOf(BTNode *mySelf,BTNode *node);
int binarySearch(BTNode *root,KeyType key);

void initBTNode(BTNode *node)
{
    node->keynum = 0;
    for(int i = 0; i < MAXM; i++)
    {
        node->keys[i] = -1;
    }
    node->insertKey = insertKey;
    node->removeKey = removeKey;
    node->parent = NULL;
    for(int i = 0; i < MAXM; i++)
    {
        node->child[i] = NULL;
    }
    
    node->childrenCount = getChildrenCount;
    node->addChild = addChild;
    node->removeChild = removeChild;
    node->indexOf = indexOf;
}

void insertKey(BTNode *node,KeyType k)
{
    if(node->keynum == 0)
    {
        node->keys[0] = k;
        return;
    }
    
    int left = 0, right = node->keynum - 1, mid = (left + right) / 2;
    while (left <= right) {
        mid = (left + right) / 2;
        if(k < node->keys[mid])
            right = mid - 1;
        else if (k > node->keys[mid])
            left = mid + 1;
    }
    
    int insertPos = k < node->keys[mid] ? mid : mid + 1;
    
    for (int i = node->keynum - 1; i >= insertPos; i--) {
        node->keys[i + 1] = node->keys[i];
    }
    node->keys[insertPos] = k;
    node->keynum++;
}

void removeKey(BTNode *node,KeyType k)
{
    int keyIndex = binarySearch(node, k);
    for (int j = keyIndex; j < node->keynum; j++) {
        node->keys[j] = node->keys[j+1];
    }
    
    node->keys[node->keynum - 1] = -1;
    node->keynum--;
}
int getChildrenCount(BTNode *node)
{
    int i = 0;
    for(;i < MAXM; i++)
    {
        if(node->child[i] == NULL)
            break;
    }
    
    return i;
}

BTNode* addChild(BTNode *node, BTNode *child)
{
    int i = 0;
    for(;i < MAXM; i++)
    {
        if(node->child[i] == NULL)
            break;
    }
    
    node->child[i] = child;
    
    return node;
}
BTNode* insertChild(BTNode *node, BTNode *child, int pos)
{
    for (int i = node->keynum - 1; i >= pos; i--) {
        node->child[i + 1] = node->child[i];
    }
    node->child[pos] = child;
    return  node;
}
bool removeChild(BTNode *node, BTNode *child)
{
    int i = 0;
    for (; i<node->keynum + 1; i++) {
        if(child == node->child[i])
            break;
    }
    if(i == node->keynum + 1)
        return false;
    for (int j = i; j < node->keynum; j++) {
        node->child[j] = node->child[j+1];
    }
    node->child[node->keynum] = NULL;
    return true;
}

int indexOf(BTNode *mySelf,BTNode *node)
{
    int i = 0, count = mySelf->childrenCount(mySelf);
    for (; i < count; i++) {
        if(mySelf->child[i] == node)
            break;
    }
    i = i == count ? -1 : i;
    return i;
}
1. 查找
int binarySearch(BTNode *root,KeyType key)
{
    int left = 0, right = root->keynum - 1, mid = (left + right) / 2;
    while (left <= right) {
        mid = (left + right)/2;
        if(key < root->keys[mid])
            right = mid - 1;
        else if (key > root->keys[mid])
            left = mid + 1;
        else
            break;
    }
    mid = (left <= right) ? mid : -mid;
    return mid;
}
BTNode* searchNode(BTNode *root,KeyType key)
{
    if(root == NULL)
        return NULL;
    
    int index = binarySearch(root,key);
    if(index > 0){
        return root;
    }else{
        return searchNode(root->child[-index-1], key);
    }
}
2. 插入

对于新元素的插入,都是发生在叶子节点上的。所有的插入操作都是从根节点开始,搜索这棵树,并找到该元素应该被插入的节点。将新元素插入到该节点需要如下步骤:

  • 如果该节点上的元素数未满,则将新元素插入到该节点,并保持节点中元素的顺序。
  • 如果该节点上的元素已满,则需要将该节点平均地分裂成两个节点:
       从该节点中的元素和新元素先出一个中位数
       小于中位数的元素放到左边节点,大于中位数的元素放到右边节点,中位数做为分隔值。
       分隔值被插入到父节点中(增加了树的高度),这可能会导致父节点的分裂,分裂父节点时又可能会使它的父节点分裂,以此类推。如果分裂一直上升到根节点,那么就创建一个新的根节点,它有一个分隔值和两个子节点。(这就是根节点并不像内部节点一样有最少子节点数量限制的原因)

下图是一个5阶B树,我们通过顺序插入1到17,来观察节点的分裂过程。


void insertNodeKey(BTNode *node,KeyType key);
void insert(BTNode **root,KeyType key)
{
    if(*root == NULL){
        BTNode *node = (BTNode *)malloc(sizeof(BTNode));
        initBTNode(node);
        node->insertKey(node,key);
        *root = node;
        return;
    }
    insertNodeKey(*root, key);
}
void addKeys(BTNode *node,BTNode *fromNode,int begin, int end)
{
    for (int i = begin; i < end; i++) {
        node->keys[i - begin] = fromNode->keys[i];
    }
    node->keynum = end - begin;
}
void addChildren(BTNode *node,BTNode *fromNode,int begin, int end)
{
    for (int i = begin; i < end; i++) {
        node->child[i-begin] = fromNode->child[i];
        fromNode->child[i]->parent = node;
    }
}
void split(BTNode *node)
{
    int mid = node->keynum/2;
    //分离值
    KeyType sepKey = node->keys[mid];
    //分离后的左节点
    BTNode *leftNode = (BTNode *)malloc(sizeof(BTNode));
    initBTNode(leftNode);
    //添加key
    addKeys(leftNode, node, 0, mid);
    
    BTNode *rightNode = (BTNode *)malloc(sizeof(BTNode));
    initBTNode(rightNode);
    addKeys(rightNode, node, mid + 1, node->keynum);
    
    //分离子结点
    if(node->childrenCount(node) > 0)
    {
        addChildren(leftNode, node, 0, mid);
        addChildren(rightNode, node, mid + 1, node->keynum);
    }
    
    //当前节点为根结点
    BTNode *parent = node->parent;
    if(parent == NULL){
        BTNode *newRoot = (BTNode *)malloc(sizeof(BTNode));
        newRoot->insertKey(newRoot,sepKey);
        leftNode->parent = newRoot;
        rightNode->parent = newRoot;
        newRoot->addChild(newRoot,leftNode)->addChild(newRoot,rightNode);
    }else{
        parent->insertKey(parent,sepKey);
        leftNode->parent = parent;
        rightNode->parent = parent;
        int pos = binarySearch(parent, sepKey);
        parent->removeChild(parent,node);
        parent->insertChild(parent,leftNode,pos)->insertChild(parent,rightNode,pos+1);
        if(parent->keynum > m - 1)
            split(parent);
    }
}
void insertNodeKey(BTNode *node,KeyType key)
{
    //当前节点为叶子节点
    if(node->childrenCount == 0)
    {
        //如果当前结点key未满,直接添加
        if(node->keynum < m - 1)
        {
            node->insertKey(node,key);
            return;
        }
        // 如果key已满,分裂结点
        node->insertKey(node,key);
        split(node);
        return;
    }
    
    //当前节点为内部节点的时候,需要查找相应的子结点,直到找到相应的叶子结点时,才插入
    int index = binarySearch(node, key);
    if(index < 0)
        insertNodeKey(node->child[-index-1],key);
}

3. 删除

B树的删除就复杂了许多,可分为下面几种情况:

  • 删除叶子结点的元素:
    (1) 搜索要删除的元素
    (2) 如果它在叶子节点上,直接将其删除
    (3) 如果删除后产生了下溢出(键数小于最小值 [m/2]-1),则向其兄弟节点借元素。如果兄弟结点借,即将其父节点元素下移至当前节点,将兄弟节点中元素上移至父节点(若是左节点,上移最大元素;若是右节点,上移最小元素)
    (4) 若兄弟节点也达到下限,即不借,则合并兄弟节点与分割键(父结点分割键下移到当前结点删除的位置,然后和兄弟结点合并)。

下图是一个5阶B树,我们通过删除15、14、17、5四个键,来观察删除过程(基本涵盖所有情况)。


  • 删除内部结点:
    内部节点中元素为其左右子节点的分割值,需要从左子树中找最大的key或右子树中找最小的key来代替被删除的key元素。从而转化为删除叶子结点上的元素

下面以3阶的B-树删除内部结点为例进行演示:


void afterDeleteHandle(BTNode **root,BTNode *node,int deleteIndex);
void merge(BTNode **root,BTNode *node, BTNode *brotherNode, int parentKeyIndex);
void leftRotate(BTNode *node, int nodeIndex, BTNode *rightNode);
void rightRotate(BTNode *node, int nodeIndex, BTNode *leftNode);

BTNode* findLeftChildMaxKeyNode(BTNode *node,int index)
{
    
    BTNode *maxNode = node->child[index];
    BTNode *lastNode = maxNode;
    int maxIndex = -1;
    while (lastNode) {
        maxIndex = maxNode->childrenCount(maxNode) - 1;
        lastNode = maxNode->child[maxIndex];
        if(lastNode)
            maxNode = lastNode;
    }
    return maxNode;
}

BTNode* findRightChildMinKeyNode(BTNode *node,int index)
{
    BTNode *minNode = node->child[index + 1];
    BTNode *firstNode = minNode;
    while (firstNode) {
        firstNode = minNode->child[0];
        if(firstNode)
            minNode = firstNode;
    }
    return minNode;
}
void delete(BTNode **root,KeyType key)
{
    BTNode *node = searchNode(*root, key);
    if(!node)
        return;
    
    //删除节点
    int keyIndex = binarySearch(node, key);
    node->removeKey(node, key);
    if(node->keynum < min){
        afterDeleteHandle(root,node, keyIndex);
    }
    
}

void afterDeleteHandle(BTNode **root,BTNode *node,int deleteIndex)
{
    //如果是内部结点 转化为删除叶子结点
    //要么向左子树中最大的叶子结点的最大key借
    //要么向右子树中最小的叶子结点的最小key借
    if(node->childrenCount > 0 && deleteIndex > 0){
        
        BTNode *leftChildMaxNode = findLeftChildMaxKeyNode(node, deleteIndex);
        if(leftChildMaxNode){
            int maxIndex = leftChildMaxNode->keynum - 1;
            KeyType maxKey = leftChildMaxNode->keys[maxIndex];
            node->insertKey(node,maxKey);
            leftChildMaxNode->removeKey(leftChildMaxNode,maxKey);
            if(leftChildMaxNode->keynum < min){
                afterDeleteHandle(root, leftChildMaxNode, maxIndex);
            }
        }else{
            BTNode *rightChildMinNode = findRightChildMinKeyNode(node, deleteIndex);
            KeyType minKey = rightChildMinNode->keys[0];
            node->insertKey(node,minKey);
            rightChildMinNode->removeKey(rightChildMinNode,minKey);
            if(rightChildMinNode->keynum < min){
                afterDeleteHandle(root, rightChildMinNode, 0);
            }
        }
       
    }
    
    //删除叶子结点 或继续向兄弟结点借
    BTNode *parentNode = node->parent;
    //当前结点在父结点中的位置
    int nodeIndex = parentNode->indexOf(parentNode,node);
    //左兄弟结点
    BTNode *leftNode = nodeIndex > 0 ? parentNode->child[nodeIndex-1] : NULL;
    //右兄弟节点
    BTNode *rightNode = nodeIndex == parentNode->childrenCount(parentNode) - 1 ? NULL : parentNode->child[nodeIndex + 1];
    
    int leftKeyCount = leftNode == NULL ? 0 : leftNode->keynum;
    int rightKeyCount = rightNode == NULL ? 0 : rightNode->keynum;
    int maxCount = MAX(leftKeyCount, rightKeyCount);
    
    // 左右兄弟节点元素数均达到限定值,合并
    if(maxCount <= min){
        if(leftNode){
            //与左兄弟节点合并
            merge(root,node, leftNode, nodeIndex - 1);
        }else if (rightNode){
            //与右兄弟节点合并
            merge(root,node, rightNode, nodeIndex);
        }
        return;
    }
    
    //向最富裕的兄弟结点借
    if(maxCount == leftKeyCount){
        //向左节点借-> 右旋
        rightRotate(node, nodeIndex, leftNode);
    }else{
        //向右节点借-> 左旋
        leftRotate(node, nodeIndex, rightNode);
    }
}



void merge(BTNode **root,BTNode *node, BTNode *brotherNode, int parentKeyIndex)
{
    BTNode *parentNode = node->parent;
    BTNode *newNode = (BTNode *)malloc(sizeof(BTNode));
    initBTNode(newNode);
    
    KeyType parentKey = parentNode->keys[parentKeyIndex];
    addKeys(newNode, node, 0, node->keynum);
    addKeys(newNode, brotherNode, 0, brotherNode->keynum);
    newNode->insertKey(newNode,parentKey);//向父结点借
    addChildren(newNode, node, 0, node->childrenCount(node));
    addChildren(newNode, brotherNode, 0, brotherNode->childrenCount(brotherNode));
    
    
    parentNode->removeKey(parentNode,parentKey);
    parentNode->removeChild(parentNode,node);
    parentNode->removeChild(parentNode,brotherNode);
    
    parentNode->addChild(parentNode,newNode);
    newNode->parent = parentNode;
    
    //合并之后,若父结点的元素小于限定数 继续调整
    if(parentNode == *root && parentNode->keynum == 0)
    {
        *root = newNode;
        free(parentNode);
        return;
    }
    if(parentNode->keynum < min){
        afterDeleteHandle(root, parentNode, -1);
    }
    
}

/**
* 左旋转
* (1)将父节点的中间值元素删除,并添加到当前节点中
* (2)将右兄弟节点中最小元素删除,并添加到父节点中
*
* @param node      当前节点
* @param nodeIndex 中间值索引
* @param rightNode 右节点
*/
void leftRotate(BTNode *node, int nodeIndex, BTNode *rightNode)
{
    BTNode *parentNode = node->parent;
    KeyType key = parentNode->keys[nodeIndex];
    parentNode->removeKey(parentNode,key);
    node->insertKey(node,key);
    
    KeyType rightKey = rightNode->keys[0];
    parentNode->insertKey(parentNode,rightKey);
    
    rightNode->removeKey(rightNode,rightKey);
    BTNode *rightFirstChild = rightNode->child[0];
    rightNode->removeChild(rightNode,rightFirstChild);
    node->addChild(node,rightFirstChild);
}


/**
* 右旋转
* (1)将父节点的中间值元素删除,并添加到当前节点中
* (2)将左兄弟节点中最大元素删除,并添加到父节点中
*
* @param node      当前节点
* @param nodeIndex 中间值索引
* @param leftNode  左节点
*/

void rightRotate(BTNode *node, int nodeIndex, BTNode *leftNode)
{
    BTNode *parentNode = node->parent;
    KeyType key = parentNode->keys[nodeIndex - 1];
    
    parentNode->removeKey(parentNode,key);
    node->insertKey(node,key);
    
    KeyType leftKey = leftNode->keys[leftNode->keynum - 1];
    parentNode->insertKey(parentNode,leftKey);
    
    leftNode->removeKey(leftNode,leftKey);
    int leftLast = leftNode->childrenCount(leftNode) - 1;
    BTNode *leftLastChild = leftNode->child[leftLast];
    leftNode->removeChild(leftNode,leftLastChild);
    node->addChild(node,leftLastChild);
}
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容