数据结构堆(Heap)是一种二叉树的变种,它通过元素交换上浮,保证堆的根节点永远是元素集合中的最大/最小元素。
优先队列是堆的一种应用,使用者可以不断向优先队列中加入新的元素,总是可以以O(1)的时间复杂度取出其中的最大/最小值。
利用两个优先队列可以实现O(1)时间复杂度取中位数。两个优先队列分别是最大堆和最小堆,添加的元素加入大堆或者小堆中,同时需要满足大堆元素个数等于小堆或者仅多一个。由此,从大堆和小堆的根元素可以求出中位数。
下面我们用C来一步步实现上面所述的问题。
1.定义实现堆数据结构
首先使用动态数组来管理堆的数据,定义堆的类型(大堆或者小堆)。在连续存储的数组中,堆的根节点位于arr[0],左右子节点分别存储在arr[1]和arr[2]中,由此实现一组取父节点和子节点索引的函数。
#include "vector.h"
#include <stdbool.h>
#define MIN_HEAP 0
#define MAX_HEAP 1
#define INIT_HEAP_SIZE 10
typedef struct heap_t{
vector v;
int type;//max heap or min heap
cmp_func cmp; //cmp func ptr
}heap_t;
static int parent(int i){
return (i-1)/2;
}
static int left(int i){
return 2*i + 1;
}
static int right(int i){
return 2*i + 2;
}
void init_heap(heap_t *h, int type){
h->type = type;
init_vec(&h->v, INIT_HEAP_SIZE);
if(type == MIN_HEAP)
h->cmp = less_val_cmp;
else
h->cmp = more_val_cmp;
}
接着实现函数维持堆的特性,将新加的元素添加在数组末尾,检查它与它父节点的顺序,若顺序不对,交换两个节点的值,由此直至检查了所有新建节点的父系节点。
void insert_heap(heap_t *h, void *item){
push_back_vec(&h->v, item);
int i = h->v.len - 1; //append new element in back
//swap element with its parent if they are not in order
while(i > 0 && !h->cmp(h->v.items[parent(i)], h->v.items[i])){
swap(&h->v.items[parent(i)], &h->v.items[i]);
i = parent(i);
}
}
从堆中移除元素时也需要保存堆的特性,其思路为,若元素的左右节点中存在顺序不对的元素,交换节点元素。
static void heapify(heap_t *h, int i){
int l = left(i),
r = right(i);
int smallest = i;
//check if child element is not in order with i
if(l < h->v.len && h->cmp(h->v.items[l], h->v.items[i]))
smallest = l;
else if(r < h->v.len && h->cmp(h->v.items[r], h->v.items[i]))
smallest = r;
//recursively heapify until in order
if(smallest != i){
swap(&h->v.items[i], &h->v.items[smallest]);
heapify(h, smallest);
}
}
void *extract_end(heap_t *h){
void *ret = get_end(h);//get root element
if(!ret){
return NULL;
}
pop_front_vec(&h->v);//remove root
heapify(h, 0);//re-heapify
return ret;
}
若只是查看堆根节点
void *extract_end(heap_t *h){
void *ret = get_end(h);//get root element
if(!ret){
return NULL;
}
pop_front_vec(&h->v);//remove root
heapify(h, 0);//re-heapify
return ret;
}
最后添加测试用例来验证代码
void heap_test(){
INIT_HEAP(h);
insert_heap(&h, (void *)3);
insert_heap(&h, (void *)2);
insert_heap(&h, (void *)1);
printf("min %d\n", (int)extract_end(&h));
printf("min %d\n", (int)extract_end(&h));
insert_heap(&h, (void *)1);
printf("min %d\n", (int)extract_end(&h));
printf("min %d\n", (int)extract_end(&h));
FREE_HEAP(h);
}
2. 优先队列实现O(1)时间查找中位数
定义中位数查找器数据结构,查找器中包含了两个优先队列,分别包含数据元素的左半部分和右半部分,左队列的根为最大根,右队列的根为最小根,因此利用两个队列的根节点信息足以计算出中位数(若数据个数为奇数,中位数为左根,否则为左右根的平均数)。
typedef struct median_finder_t{
heap_t left, right;
}median_finder_t;
void init_median_finder(median_finder_t *mf);
void add_num(median_finder_t *mf, int num);
float find_median(median_finder_t *mf);
void reset_median_finder();
void init_median_finder(median_finder_t *mf){
init_heap(&mf->left, MAX_HEAP);
init_heap(&mf->right, MIN_HEAP);
}
定义函数用于加入新元素,新加元素的过程需要保证左队列长于右队列,但长度差距不超过2。
void add_num(median_finder_t *mf, int num){
heap_t *left = &mf->left,
*right = &mf->right;
//add num and make sure left heap is longer
if(total_heap(left) == 0 || num < (int)get_end(left)){
insert_heap(left, (void *)(long)num);
}else{
insert_heap(right, (void *)(long)num);
}
//balance left and right heap, is left heap is shorter
// move element from right to left
//if left heap has 2 more elements than right
// move from left to right
if(total_heap(left) < total_heap(right)){
insert_heap(left, get_end(right));
extract_end(right);
}else if (total_heap(left) - total_heap(right) >= 2){
insert_heap(right, get_end(left));
extract_end(left);
}
}
从左右队列中计算中位数的方法是显而易见的,
float find_median(median_finder_t *mf){
heap_t *left = &mf->left,
*right = &mf->right;
if(get_end(left) == NULL)
return 0.0;
//odd
if(total_heap(left) > total_heap(right)){
return (float)(long)get_end(left);
}else{ //even
return ((float)(long)get_end(left) +
(float)(long)get_end(right))/2;
}
}
添加一个简单的测试用例来验证程序的有效
void median_finder_test(){
median_finder_t mf;
init_median_finder(&mf);
add_num(&mf, 1);
add_num(&mf, 2);
printf("median %f\n", find_median(&mf));
add_num(&mf, 3);
printf("median %f\n", find_median(&mf));
}```
median 1.500000
median 2.000000
代码清单:[https://github.com/KevinACoder/Learning/blob/master/ds/heap.c](https://github.com/KevinACoder/Learning/blob/master/ds/heap.c)