1. 最优二叉树(赫夫曼树)
带权路径长度(WPL):设二叉树有n个叶子结点,每个叶子结点带有权值WK,从根结点到每个叶子结点的长度为Lk,则每个叶子结点的带权路径长度之和就是: WPL = ∑WkLk
最优二叉树(赫夫曼树): WPL最小的二叉树,每次把权值最小的两棵二叉树合并。
赫夫曼树的特点:
- 没有度为1的结点;
- n个叶子结点的赫夫曼树共有2n-1个节点;
- 赫夫曼树的任意非叶子结点的左右子树交换后仍为赫夫曼树。
- 对同一组权值{w1,w2,......,wn}, 是否存在不同构的两棵赫夫曼树呢?
对一组权值{1,2,3,3}, 不同构的两棵哈夫曼树:
构建赫夫曼树
typedef struct TreeNode *HuffmanTree;
struct TreeNode{
int weight;
HuffmanTree left,right;
};
构建赫夫曼树
HuffmanTree Huffman(MinHeap H)
{
int i;
HuffmanTree T;
buildMinHeap(H);
for (i = 1; i < H->size; i++) {
T = malloc(sizeof(struct TreeNode));
T->left = deleteMin(H);
T->right = deleteMin(H);
T->weight = T->left->weight + T->right->weight;
insert(H, T);
}
T = deleteMin(H);
return T;
}
- 构建赫夫曼树的整体时间复杂度为 T = O(N logN)
2. 赫夫曼编码
为了让编码尽可能的短,我们进行不等长编码。如下:
a: 1 e: 0 s: 10 t: 11
1011是什么字符串的编码?
aeaa: 1011
aet: 1011
st: 1011
因此这么编码会产生二义性
如何避免二义性呢?
前缀编码:任何字符的编码都不是另一个字符编码的前缀。
我们用二叉树进行编码:
(1) 左右分支:0、1
(2) 字符只在叶子结点上
四个字符的频率:a:4, u:1, x:2, z:1
-
用等长编码方式:
-
用赫夫曼树进行编码方式:
- 可以看出编码长度乘以频率之和后者较小。而且没有二义性。
赫夫曼编码代码:
typedef struct TreeNode *HuffmanTree;
typedef struct TreeNode HTNode;
struct TreeNode{
unsigned int weight;
unsigned int left,right, parent;
bool minFlag;
};
typedef char** HuffmanCode;
void selectMins(HuffmanTree HT,int end, int *s1, int *s2)
{
HTNode *minp1 = NULL;
HTNode *minp2 = NULL;
for (int i = 1; i <= end; i++) {
HTNode *node = &HT[i];
if(node->minFlag)
continue;
if(minp1 == NULL){
minp1 = node;
*s1 = i;
}else{
minp2 = node;
*s2 = i;
break;
}
}
if(minp1->weight > minp2->weight)
{
HTNode* tmp = minp1;
minp1 = minp2;
minp2 = tmp;
int tmpI = *s1;
*s1 = *s2;
*s2 = tmpI;
}
for (int i = 1; i <= end; ++i) {
HTNode *node = &HT[i];
if(node->minFlag)
continue;
if(node->weight < minp1->weight){
minp1 = node;
*s1 = i;
}else if (node->weight < minp2->weight && node != minp1){
minp2 = node;
*s2 = i;
}
}
minp1->minFlag = true;
minp2->minFlag = true;
}
void HuffmanCoding(int *w, int n)
{
if(n <= 1) return;
int m = 2 * n - 1;
HuffmanTree HT = (HuffmanTree)malloc((m+1)*sizeof(HTNode));
HuffmanTree p;
int i, s1, s2;
int *wp = w;
for(p = HT + 1, i = 1; i<=n; ++i, ++p, ++wp)
{
p->weight = *wp;
p->left = 0;
p->right = 0;
p->parent = 0;
p->minFlag = false;
}
for (; i <= m; ++i, ++p) {
p->weight = 0;
p->left = 0;
p->right = 0;
p->parent = 0;
p->minFlag = false;
}
for (i = n + 1; i <= m; ++i) {
selectMins(HT, i-1, &s1, &s2);
HT[s1].parent = i;
HT[s2].parent = i;
HT[i].left = s1;
HT[i].right = s2;
HT[i].weight = HT[s1].weight + HT[s2].weight;
HT[i].minFlag = false;
}
for(int j = 1; j <= m; j++)
{
printf("w:%u, left:%u, right:%u, parent:%u\n", HT[j].weight,HT[j].left,HT[j].right,HT[j].parent);
}
HuffmanCode HC = (HuffmanCode)malloc((n+1)*sizeof(char*));
char *cd = (char *)malloc(n * sizeof(char));
cd[n-1] = '\0';
int start , c, f;
for (int i = 1; i <= n; ++i) {
start = n - 1;
for (c = i, f = HT[i].parent; f!=0; c = f, f = HT[f].parent)
{
if(HT[f].left == c) cd[--start] = '0';
else cd[--start] = '1';
}
HC[i] = (char *)malloc((n - start) *sizeof(char));
strcpy(HC[i], &cd[start]);
printf("w:%u %s \n",w[i-1],HC[i]);
free(HC[i]);
}
free(cd);
free(HC);
}