简介
堆是一种完全二叉树,有最大堆和最小堆两种。
-
最大堆: 每个节点,都比叶子节点大,如:
最小堆:和最大堆相反
堆的特性
堆是一种完全二叉树,具备二叉树的特性:
- 父节点下标:
parent = int((i-1) / 2)
# 取整 - 左节点下标:
left = 2 * i + 1
- 右节点下标:
right = 2 * i + 2
如:节点60的父节点下标是1,本身节点下标是3,左下标7,右下标8
堆的表示
堆可以用数组表示,如
[10,7,2,5,1]
堆的python实现
class Array(object):
def __init__(self, size=32):
self._size = size
self._items = [None] * size
def __getitem__(self, index):
return self._items[index]
def __setitem__(self, index, value):
self._items[index] = value
def __len__(self):
return self._size
def clear(self, value=None):
for i in range(len(self._items)):
self._items[i] = value
def __iter__(self):
for item in self._items:
yield item
class MaxHeap(object):
def __init__(self, maxsize=None):
self.maxsize = maxsize
self._elements = Array(maxsize)
self._count = 0
def __len__(self):
return self._count
def add(self, value):
if self._count >= self.maxsize:
raise Exception('full')
self._elements[self._count] = value
self._count += 1
self._siftup(self._count-1) # 维持堆的特性
def _siftup(self, ndx):
if ndx > 0:
parent = int((ndx-1)/2)
if self._elements[ndx] > self._elements[parent]: # 如果插入的值大于 parent,一直交换
self._elements[ndx], self._elements[parent] = self._elements[parent], self._elements[ndx]
self._siftup(parent) # 递归
def extract(self):
if self._count <= 0:
raise Exception('empty')
value = self._elements[0] # 保存 root 值
self._count -= 1
self._elements[0] = self._elements[self._count] # 最右下的节点放到root后siftDown
self._siftdown(0) # 维持堆特性
return value
def _siftdown(self, ndx):
left = 2 * ndx + 1
right = 2 * ndx + 2
# determine which node contains the larger value
largest = ndx
if (left < self._count and # 有左孩子
self._elements[left] >= self._elements[largest] and
self._elements[left] >= self._elements[right]): # 原书这个地方没写实际上找的未必是largest
largest = left
elif right < self._count and self._elements[right] >= self._elements[largest]:
largest = right
if largest != ndx:
self._elements[ndx], self._elements[largest] = self._elements[largest], self._elements[ndx]
self._siftdown(largest)
def test_maxheap():
import random
n = 5
h = MaxHeap(n)
for i in range(n):
h.add(i)
for i in reversed(range(n)):
assert i == h.extract()
def heapsort_reverse(array):
length = len(array)
maxheap = MaxHeap(length)
for i in array:
maxheap.add(i)
res = []
for i in range(length):
res.append(maxheap.extract())
return res
def test_heapsort_reverse():
import random
l = list(range(10))
random.shuffle(l)
assert heapsort_reverse(l) == sorted(l, reverse=True)
def heapsort_use_heapq(iterable):
from heapq import heappush, heappop
items = []
for value in iterable:
heappush(items, value)
return [heappop(items) for i in range(len(items))]
def test_heapsort_use_heapq():
import random
l = list(range(10))
random.shuffle(l)
assert heapsort_use_heapq(l) == sorted(l)
- Array 实现一个数组,用于填充堆数据
- 实现一个最大堆MaxHeap
- add方法用于增加堆节点时,每次增加后会调用_siftup进行调整。
- _siftup传入增加节点的最后一个下标作为参数,这个节点的值与父节点对比,根据堆性质看是否需要交换。
- extract提取最大值,并_siftdown进行重整,维持堆特性。
- _siftdown进行比较,交换,递归之。
python heapd模块
python自带的内置heapd模块,用于实现堆的相关操作。