一、实验名称:Huffman Coding
二、实验目的:
- 熟练掌握哈夫曼树的数据结构,结构的特点;
- 能够实现哈夫曼树的基本操作:如构造,插入等
- 利用最小堆降低哈夫曼树的时间复杂度。
- 熟练掌握最小堆的数据结构,结构的特点;
- 能够实现最小堆的基本操作:如构造,插入,删除等
三、实验内容:
-
Determine the data structures for a binary Huffman tree
//采取树的结构构造哈夫曼树 //采取数组的结构构造最小堆 typedef struct TreeNode*Huffman; typedef struct TreeNode** heap; struct TreeNode { int data; Huffman left; Huffman right; };
-
Implement the algorithm of creating a binary Huffman tree for a given postive integer sequence
输入一段序列,利用该序列数组构造最小堆,以便于哈夫曼树的构造。
//利用序列构造最小堆 void BuildMinHeap(heap h,int size); //利用最小堆构造哈夫曼树 Huffman BuildHuffman(heap h,int size);
-
Design and implement a Huffman coding prototype
//采用深搜的方法打印所有的哈夫曼编码 void getHuffmanCode(Huffman h,char* codeStore,int* index);
四. 程序清单(较为详细的注释):
#include <stdio.h>
#include <stdlib.h>
typedef struct TreeNode*Huffman;
typedef struct TreeNode** heap;
struct TreeNode
{
int data;
Huffman left;
Huffman right;
};
//向下过滤,在子树都为最小堆的前提下,找到节点的合适位置,是以该节点为根节点的树是一个最小堆
void FixDown(heap h,int size,int index){
int parent;
int child;
Huffman temp=h[index];
for(parent=index;parent*2<=size;parent=child){
child=parent*2;
if(child+1<=size&&h[child+1]->data<h[child]->data)
child++;
if(h[child]->data<temp->data){
h[parent]=h[child];
}
else break;
}
h[parent]=temp;
}
//采用递归的思想,从下往上建立最小堆,保证了目前所建树的子树都为最小堆。
void BuildMinHeap(heap h,int size){
for(int i=size/2;i>=1;i--){
FixDown(h,size,i);
}
}
//返回并删除最小堆的最小值(即头节点),保持树仍为最小堆
/*将最后一个节点挪动到头节点处,并size--,
因为此时子树均为最小堆,直接向下过滤即可保证该树仍为最小堆*/
Huffman PopMinHeap(heap h,int* size){
Huffman result=h[1];
h[1]=h[(*size)--];
//向下过滤
FixDown(h,*size,1);
return result;
}
//插入节点值,保持树仍为最小堆
/**/
void InsertMinHeap(heap h,int* size,Huffman node){
h[++(*size)]=node;
//向上过滤
int child=*size;
int parent;
for(;child>1;child=parent){
parent=child/2;
if(h[parent]->data>node->data){
h[child]=h[parent];
}
else
{
break;
}
}
h[child]=node;
}
//建立哈夫曼树
Huffman BuildHuffman(heap h,int size){
Huffman node=(Huffman)malloc(sizeof(struct TreeNode));
Huffman fristNode,lastNode;
int time=size-1;
for(int i=0;i<time;i++){//进行size-1次合并
fristNode=PopMinHeap(h,&size);
lastNode=PopMinHeap(h,&size);
Huffman node=(Huffman)malloc(sizeof(struct TreeNode));
node->data=fristNode->data+lastNode->data;
node->left=fristNode;
node->right=lastNode;
InsertMinHeap(h,&size,node);
}
return PopMinHeap(h,&size);
}
//以“结点值(左子树)(右子树)“的形式打印树,证明它是哈夫曼树
void print(Huffman h){
if(h){
printf("%d(",h->data);
print(h->left);
printf(")(");
print(h->right);
printf(")");
}
}
//释放哈弗曼树空间
void deleteHuff(Huffman h){
if(h){
deleteHuff(h->left);
deleteHuff(h->right);
free(h);
}
}
//采用深搜的方法打印所有的哈夫曼编码
void getHuffmanCode(Huffman h,char* codeStore,int* index){
if(!h)return;
if(h->left==NULL&&h->right==NULL){
printf("\n%d's HuffmanCode:",h->data);
codeStore[*index]='\0';
printf("%s",codeStore);
}
else{
codeStore[(*index)++]='0';
getHuffmanCode(h->left,codeStore,index);
--(*index);//回溯
codeStore[(*index)++]='1';
getHuffmanCode(h->right,codeStore,index);
--(*index);//回溯
}
}
int main() {
int n;//输入序列的个数;
printf("Please enter the number of sequences:\n");
scanf("%d",&n);
heap MinHeap=(heap)malloc(sizeof(struct TreeNode*)*(2*n));//保证堆有足够的空间
//建立最小堆
printf("Please enter the sequence one by one:\n");
for(int i=1;i<=n;i++){
MinHeap[i]=(Huffman)malloc(sizeof(struct TreeNode));
scanf("%d",&MinHeap[i]->data);
MinHeap[i]->left=NULL;
MinHeap[i]->right=NULL;
}
BuildMinHeap(MinHeap,n);
// for(int i=1;i<=n;i++){
// printf("%d ",MinHeap[i]->data);
// }
//建立哈夫曼树
Huffman huff=BuildHuffman(MinHeap,n);
//打印哈夫曼树,格式为“父结点(左子树)(右子树)"以证明其为哈夫曼树
printf("Print Huffman Tree\n");
print(huff);
//输出所有原输入数的哈夫曼编码
printf("\nOutput the Huffman code of all original input numbers");
char* codeStore=(char*)malloc(sizeof(char)*n);
int index=0;
getHuffmanCode(huff,codeStore,&index);
free(codeStore);
//内存释放工作
free(MinHeap);
deleteHuff(huff);
return 0;
}#include <stdio.h>
#include <stdlib.h>
//在二叉树数据结构基础上添加高度成员.
struct ALVnode
{
int val;
struct ALVnode* left;
struct ALVnode* right;
int high;
};
typedef struct ALVnode* ALV;
int getHigh(ALV tree){
if(!tree) return 0;
int i=getHigh(tree->left);
int j=getHigh(tree->right);
return i>j?i+1:j+1;
}
/*将root与其左节点做左单旋(右转),因为root和其左节点的树高发生改变
,于是需要更新两者的高度,将根结点指向新的根节点(即左节点)并返回*/
ALV LeftLeft(ALV root){
ALV temp=root;
root=root->left;
temp->left=root->right;
root->right=temp;
//更新树高
temp->high=getHigh(temp);
root->high=getHigh(root);
return root;
}
/*将root与其右节点做右单旋(左转),因为root和其左节点的树高发生改变
,于是需要更新两者的高度,将根结点指向新的根节点(即右节点)并返回*/
ALV RightRight(ALV root){
ALV temp=root;
root=root->right;
temp->right=root->left;
root->left=temp;
//更新树高
temp->high=getHigh(temp);
root->high=getHigh(root);
return root;
}
/*将root的左子树进行右单旋可以使插入结点转移到左子树的左子树上,此时再进行root整体的左单旋*/
ALV LeftRight(ALV root){
root->left=RightRight(root->left);
root=LeftLeft(root);
return root;
}
/*将root的右节点进行左单旋可以使插入结点转移到右子树的右子树上,此时再进行root整体的右单旋*/
ALV RightLeft(ALV root){
root->right=LeftLeft(root->right);
root=RightRight(root);
return root;
}
ALV insert(ALV root,int val){
if(root==NULL){//此时val是第一个数
struct ALVnode*newNode=(struct ALVnode*)malloc(sizeof(struct ALVnode));
newNode->val=val;
newNode->high=1;
newNode->left=NULL;
newNode->right=NULL;
root=newNode;//root指向新节点
}
else{
//数值大于root的值插入到右边,小于插入到左边,等于插入无效(无法插入)
if(val<root->val){
root->left=insert(root->left,val);
if((root->right==NULL&&root->left->high==2)||(root->right!=NULL&&root->left->high-root->right->high==2)){
if(val<root->left->val)
root=LeftLeft(root);//左单旋
else
{
root=LeftRight(root);//左右双旋
}
}
}
else if(val>root->val){
root->right=insert(root->right,val);
if((root->left==NULL&&root->right->high==2)||(root->left!=NULL&&root->left->high-root->right->high==-2)){
if(val<root->right->val)
root=RightLeft(root);//右左双旋
else
{
root=RightRight(root);//右单旋
}
}
}
//更新树高
root->high=getHigh(root);
}
return root;
}
/*回收树的内存*/
void clear(ALV root){
if(!root)return;
clear(root->left);
clear(root->right);
free(root);
}
//以“结点值(左子树)(右子树)“的形式打印树,证明它是AVL树
void print(ALV root){
if(!root)return;
printf("%d",root->val);
printf("(");
print(root->left);
printf(")");
printf("(");
print(root->right);
printf(")");
}
int main(){
int num;//要插入的结点数目
ALV head=NULL;
scanf("%d",&num);
int temp;//用来存储要插入的数
for(int i=0;i<num;i++){
scanf("%d",&temp);
head=insert(head,temp);
}
//以“结点值(左子树)(右子树)“的形式打印树,证明它是AVL树
print(head);
clear(head);
return 0;
}
五、测试结果:
输入输出格式说明:
-
输入格式:
按照显示提示先输入序列个数;
依次输入序列元素; -
输出格式:
以“结点值(左子树)(右子树)“的形式打印树,证明它是AVL树 -
测试样例1:
输入:
5 5 4 3 2 1
输出:
Please enter the number of sequences: 5 Please enter the sequence one by one: 5 4 3 2 1 Print Huffman Tree 15(6(3()())(3(1()())(2()())))(9(4()())(5()())) Output the Huffman code of all original input numbers 3's HuffmanCode:00 1's HuffmanCode:010 2's HuffmanCode:011 4's HuffmanCode:10
-
测试样例2:
输入:
9 33 22 11 44 55 6 43 22 11
输出:
Please enter the number of sequences: 9 Please enter the sequence one by one: 33 22 11 44 55 6 43 22 11 Print Huffman Tree 247(99(44()())(55()()))(148(61(28(11()())(17(6()())(11()())))(33()()))(87(43()())(44(22()())(22()())))) Output the Huffman code of all original input numbers 44's HuffmanCode:00 55's HuffmanCode:01 11's HuffmanCode:1000 6's HuffmanCode:10010 11's HuffmanCode:10011 33's HuffmanCode:101 43's HuffmanCode:110 22's HuffmanCode:1110 22's HuffmanCode:1111
-
测试样例3:
输入:
7 88 70 61 96 120 90 65
输出:
Please enter the number of sequences: 7 Please enter the sequence one by one: 88 70 61 96 120 90 65 Print Huffman Tree 590(246(120()())(126(61()())(65()())))(344(158(70()())(88()()))(186(90()())(96()()))) Output the Huffman code of all original input numbers 120's HuffmanCode:00 61's HuffmanCode:010 65's HuffmanCode:011 70's HuffmanCode:100 88's HuffmanCode:101 90's HuffmanCode:110 96's HuffmanCode:111
-
测试样例4:
输入:
0
输出:
-
测试样例5:(随机数样例)
输入:
100 1610 1280 299 53 1760 1677 149 1769 1325 1991 721 1451 2168 465 824 1517 1162 944 1782 69 1188 1112 16 148 1418 1429 1675 1232 2184 775 761 1714 13 623 1399 971 112 1299 1778 1819 1199 1463 924 603 356 1462 230 1828 320 2218 1429 24 1697 936 640 1462 1510 150 114 1101 206 194 357 1895 999 2082 15 320 2051 228 1325 216 741 1768 69 1311 1019 150 2190 824 210 790 788 2144 1039 1154 269 83 1521 491 1238 539 2007 1054 433 1186 855 1450 833 2049
输出:
Please enter the number of sequences: 100 Please enter the sequence one by one: 1610 1280 299 53 1760 1677 149 1769 1325 1991 721 1451 2168 465 824 1517 1162 944 1782 69 1188 1112 16 148 1418 1429 1675 1232 2184 775 761 1714 13 623 1399 971 112 1299 1778 1819 1199 1463 924 603 356 1462 230 1828 320 2218 1429 24 1697 936 640 1462 1510 150 114 1101 206 194 357 1895 999 2082 15 320 2051 228 1325 216 741 1768 69 1311 1019 150 2190 824 210 790 788 2144 1039 1154 269 83 1521 491 1238 539 2007 1054 433 1186 855 1450 833 2049 Print Huffman Tree 102441(42410(19313(9208(4484(2218()())(2266(1112()())(1154()())))(4724(2348(1162()())(1186()()))(2376(1188(585(286(138(69()())(69()()))(148()()))(299(149()())(150()())))(603()()))(1188()()))))(10105(4911(2431(1199()())(1232()()))(2480(1238()())(1242(619(299()())(320()()))(623()()))))(5194(2579(1280()())(1299()()))(2615(1304(640()())(664(320()())(344(150()())(194()()))))(1311()())))))(23097(11209(5467(2650(1325()())(1325()()))(2817(1399()())(1418()())))(5742(2858(1429()())(1429()()))(2884(1434(713(356()())(357()()))(721()()))(1450()()))))(11888(5838(2913(1451()())(1462()()))(2925(1462()())(1463()())))(6050(3012(1502(741()())(761()()))(1510()()))(3038(1517()())(1521()()))))))(60031(27380(13175(6438(3173(1563(775()())(788()()))(1610()()))(3265(1614(790()())(824()()))(1651(824()())(827(401(195(83()())(112()()))(206()()))(426(210()())(216()()))))))(6737(3352(1675()())(1677()()))(3385(1688(833()())(855()()))(1697()()))))(14205(7011(3474(1714()())(1760()()))(3537(1768()())(1769()())))(7194(3560(1778()())(1782()()))(3634(1815(891(433()())(458(228()())(230()())))(924()()))(1819()())))))(32651(15595(7530(3708(1828()())(1880(936()())(944()())))(3822(1895()())(1927(956(465()())(491()()))(971()()))))(8065(3998(1991()())(2007()()))(4067(2018(999()())(1019()()))(2049()()))))(17056(8359(4133(2051()())(2082()()))(4226(2082(1039()())(1043(504(235(114()())(121(53()())(68(28(13()())(15()()))(40(16()())(24()())))))(269()()))(539()())))(2144()())))(8697(4323(2155(1054()())(1101()()))(2168()()))(4374(2184()())(2190()())))))) Output the Huffman code of all original input numbers 2218's HuffmanCode:00000 1112's HuffmanCode:000010 1154's HuffmanCode:000011 1162's HuffmanCode:000100 1186's HuffmanCode:000101 69's HuffmanCode:0001100000 69's HuffmanCode:0001100001 148's HuffmanCode:000110001 149's HuffmanCode:000110010 150's HuffmanCode:000110011 603's HuffmanCode:0001101 1188's HuffmanCode:000111 1199's HuffmanCode:001000 1232's HuffmanCode:001001 1238's HuffmanCode:001010 299's HuffmanCode:00101100 320's HuffmanCode:00101101 623's HuffmanCode:0010111 1280's HuffmanCode:001100 1299's HuffmanCode:001101 640's HuffmanCode:0011100 320's HuffmanCode:00111010 150's HuffmanCode:001110110 194's HuffmanCode:001110111 1311's HuffmanCode:001111 1325's HuffmanCode:010000 1325's HuffmanCode:010001 1399's HuffmanCode:010010 1418's HuffmanCode:010011 1429's HuffmanCode:010100 1429's HuffmanCode:010101 356's HuffmanCode:01011000 357's HuffmanCode:01011001 721's HuffmanCode:0101101 1450's HuffmanCode:010111 1451's HuffmanCode:011000 1462's HuffmanCode:011001 1462's HuffmanCode:011010 1463's HuffmanCode:011011 741's HuffmanCode:0111000 761's HuffmanCode:0111001 1510's HuffmanCode:011101 1517's HuffmanCode:011110 1521's HuffmanCode:011111 775's HuffmanCode:1000000 788's HuffmanCode:1000001 1610's HuffmanCode:100001 790's HuffmanCode:1000100 824's HuffmanCode:1000101 824's HuffmanCode:1000110 83's HuffmanCode:1000111000 112's HuffmanCode:1000111001 206's HuffmanCode:100011101 210's HuffmanCode:100011110 216's HuffmanCode:100011111 1675's HuffmanCode:100100 1677's HuffmanCode:100101 833's HuffmanCode:1001100 855's HuffmanCode:1001101 1697's HuffmanCode:100111 1714's HuffmanCode:101000 1760's HuffmanCode:101001 1768's HuffmanCode:101010 1769's HuffmanCode:101011 1778's HuffmanCode:101100 1782's HuffmanCode:101101 433's HuffmanCode:10111000 228's HuffmanCode:101110010 230's HuffmanCode:101110011 924's HuffmanCode:1011101 1819's HuffmanCode:101111 1828's HuffmanCode:110000 936's HuffmanCode:1100010 944's HuffmanCode:1100011 1895's HuffmanCode:110010 465's HuffmanCode:11001100 491's HuffmanCode:11001101 971's HuffmanCode:1100111 1991's HuffmanCode:110100 2007's HuffmanCode:110101 999's HuffmanCode:1101100 1019's HuffmanCode:1101101 2049's HuffmanCode:110111 2051's HuffmanCode:111000 2082's HuffmanCode:111001 1039's HuffmanCode:1110100 114's HuffmanCode:1110101000 53's HuffmanCode:11101010010 13's HuffmanCode:1110101001100 15's HuffmanCode:1110101001101 16's HuffmanCode:1110101001110 24's HuffmanCode:1110101001111 269's HuffmanCode:111010101 539's HuffmanCode:11101011 2144's HuffmanCode:111011 1054's HuffmanCode:1111000 1101's HuffmanCode:1111001 2168's HuffmanCode:111101 2184's HuffmanCode:111110 2190's HuffmanCode:111111
六、算法分析:
该过程从包含其所表示符号的概率的叶节点开始。然后,该过程获取最小两个节点(最小概率),并创建一个将这两个节点作为子节点的新内部节点。新节点的权重设置为子节点权重的总和。然后,我们在新的内部节点和其余节点(即,排除两个叶节点)上再次应用该过程,重复此过程,直到仅剩下一个节点为止,这是霍夫曼树的根。
该构造算法使用优先级队列(最小堆),其中概率最低的节点被赋予最高优先级:
- 为每个符号创建一个叶节点,并将其添加到优先级队列。
- 当队列中有多个节点时:
- 从队列中删除优先级最高(概率最低)的两个节点
- 创建一个新的内部节点,并将这两个节点作为子节点,并且其概率等于两个节点的概率之和。
- 将新节点添加到队列中。
- 其余节点是根节点,树已完成。
由于高效的优先级队列数据结构每次插入需要O(log n)时间,并且具有n个叶子的树具有2 n -1个节点,因此该算法以O(n log n)时间进行操作,算法的空间复杂度为哈弗曼树的内存和存储哈夫曼树节点的最小堆。