Huffman Coding 哈夫曼树

一、实验名称:Huffman Coding

二、实验目的:

  1. 熟练掌握哈夫曼树的数据结构,结构的特点;
  2. 能够实现哈夫曼树的基本操作:如构造,插入等
  3. 利用最小堆降低哈夫曼树的时间复杂度。
  4. 熟练掌握最小堆的数据结构,结构的特点;
  5. 能够实现最小堆的基本操作:如构造,插入,删除等

三、实验内容:

  1. Determine the data structures for a binary Huffman tree

    //采取树的结构构造哈夫曼树
    //采取数组的结构构造最小堆
    typedef struct TreeNode*Huffman;
    typedef  struct TreeNode** heap;
    struct TreeNode
    {
        int data;
        Huffman left;
        Huffman right;
    };
    
  2. Implement the algorithm of creating a binary Huffman tree for a given postive integer sequence

    输入一段序列,利用该序列数组构造最小堆,以便于哈夫曼树的构造。

    //利用序列构造最小堆
    void BuildMinHeap(heap h,int size);
    //利用最小堆构造哈夫曼树
    Huffman BuildHuffman(heap h,int size);
    
  3. Design and implement a Huffman coding prototype

    //采用深搜的方法打印所有的哈夫曼编码
    void getHuffmanCode(Huffman h,char* codeStore,int* index);
    

四. 程序清单(较为详细的注释):

#include <stdio.h>
#include <stdlib.h>
typedef struct TreeNode*Huffman;
typedef  struct TreeNode** heap;
struct TreeNode
{
    int data;
    Huffman left;
    Huffman right;
};
//向下过滤,在子树都为最小堆的前提下,找到节点的合适位置,是以该节点为根节点的树是一个最小堆
void FixDown(heap h,int size,int index){
    int parent;
    int child;
    Huffman temp=h[index];
    for(parent=index;parent*2<=size;parent=child){
        child=parent*2;
        if(child+1<=size&&h[child+1]->data<h[child]->data)
            child++;
        if(h[child]->data<temp->data){
            h[parent]=h[child];
        }
        else break;
    }
    h[parent]=temp;
}
//采用递归的思想,从下往上建立最小堆,保证了目前所建树的子树都为最小堆。
void BuildMinHeap(heap h,int size){
    for(int i=size/2;i>=1;i--){
        FixDown(h,size,i);
    }
}
//返回并删除最小堆的最小值(即头节点),保持树仍为最小堆
/*将最后一个节点挪动到头节点处,并size--,
因为此时子树均为最小堆,直接向下过滤即可保证该树仍为最小堆*/
Huffman PopMinHeap(heap h,int* size){
    Huffman result=h[1];
    h[1]=h[(*size)--];
    //向下过滤
    FixDown(h,*size,1);
    return result;
}
//插入节点值,保持树仍为最小堆
/**/
void InsertMinHeap(heap h,int* size,Huffman node){
    h[++(*size)]=node;
    //向上过滤
    int child=*size;
    int parent;
    for(;child>1;child=parent){
        parent=child/2;
        if(h[parent]->data>node->data){
            h[child]=h[parent];
        }
        else
        {
            break;
        }
        
    }
    h[child]=node;
}
//建立哈夫曼树
Huffman BuildHuffman(heap h,int size){
    Huffman node=(Huffman)malloc(sizeof(struct TreeNode));
    Huffman fristNode,lastNode;
    int time=size-1;
    for(int i=0;i<time;i++){//进行size-1次合并
        fristNode=PopMinHeap(h,&size);
        lastNode=PopMinHeap(h,&size);
        Huffman node=(Huffman)malloc(sizeof(struct TreeNode));
        node->data=fristNode->data+lastNode->data;
        node->left=fristNode;
        node->right=lastNode;
        InsertMinHeap(h,&size,node);
    }
    return PopMinHeap(h,&size);
    
}
//以“结点值(左子树)(右子树)“的形式打印树,证明它是哈夫曼树
void print(Huffman h){
    if(h){
        printf("%d(",h->data);
        print(h->left);
        printf(")(");
        print(h->right);
        printf(")");
    }
}
//释放哈弗曼树空间
void deleteHuff(Huffman h){
    if(h){
        deleteHuff(h->left);
        deleteHuff(h->right);
        free(h);
    }
}
//采用深搜的方法打印所有的哈夫曼编码
void getHuffmanCode(Huffman h,char* codeStore,int* index){
    if(!h)return;
    if(h->left==NULL&&h->right==NULL){
        printf("\n%d's HuffmanCode:",h->data);
        codeStore[*index]='\0';
        printf("%s",codeStore);
    }
    else{
        codeStore[(*index)++]='0';
        getHuffmanCode(h->left,codeStore,index);
        --(*index);//回溯
        codeStore[(*index)++]='1';
        getHuffmanCode(h->right,codeStore,index);
        --(*index);//回溯
    }
}
int main() {
    int n;//输入序列的个数;
    printf("Please enter the number of sequences:\n");
    scanf("%d",&n);
    heap MinHeap=(heap)malloc(sizeof(struct TreeNode*)*(2*n));//保证堆有足够的空间
    //建立最小堆
    printf("Please enter the sequence one by one:\n");
    for(int i=1;i<=n;i++){
        MinHeap[i]=(Huffman)malloc(sizeof(struct TreeNode));
        scanf("%d",&MinHeap[i]->data);
        MinHeap[i]->left=NULL;
        MinHeap[i]->right=NULL;
    }
    BuildMinHeap(MinHeap,n);
    // for(int i=1;i<=n;i++){
    //     printf("%d ",MinHeap[i]->data);
    // }
    //建立哈夫曼树
    Huffman huff=BuildHuffman(MinHeap,n);
    //打印哈夫曼树,格式为“父结点(左子树)(右子树)"以证明其为哈夫曼树
    printf("Print Huffman Tree\n");
    print(huff);
    //输出所有原输入数的哈夫曼编码
    printf("\nOutput the Huffman code of all original input numbers");
    char* codeStore=(char*)malloc(sizeof(char)*n);
    int index=0;
    getHuffmanCode(huff,codeStore,&index);
    free(codeStore);
    //内存释放工作
    free(MinHeap);
    deleteHuff(huff);
    return 0;
}#include <stdio.h>
#include <stdlib.h>
//在二叉树数据结构基础上添加高度成员.
struct ALVnode
{
    int val;
    struct ALVnode* left;
    struct ALVnode* right;
    int high;
};
typedef struct ALVnode* ALV;
int getHigh(ALV tree){
    if(!tree) return 0;
    int i=getHigh(tree->left);
    int j=getHigh(tree->right);
    return i>j?i+1:j+1;
}
/*将root与其左节点做左单旋(右转),因为root和其左节点的树高发生改变
,于是需要更新两者的高度,将根结点指向新的根节点(即左节点)并返回*/
ALV LeftLeft(ALV root){
    ALV temp=root;
    root=root->left;
    temp->left=root->right;
    root->right=temp;
    //更新树高
    temp->high=getHigh(temp);
    root->high=getHigh(root);
    return root;
}
/*将root与其右节点做右单旋(左转),因为root和其左节点的树高发生改变
,于是需要更新两者的高度,将根结点指向新的根节点(即右节点)并返回*/
ALV RightRight(ALV root){
    ALV temp=root;
    root=root->right;
    temp->right=root->left;
    root->left=temp;
    //更新树高
    temp->high=getHigh(temp);
    root->high=getHigh(root);
    return root;
}
/*将root的左子树进行右单旋可以使插入结点转移到左子树的左子树上,此时再进行root整体的左单旋*/
ALV LeftRight(ALV root){
    root->left=RightRight(root->left);
    root=LeftLeft(root);
    return root;
}
/*将root的右节点进行左单旋可以使插入结点转移到右子树的右子树上,此时再进行root整体的右单旋*/
ALV RightLeft(ALV root){
    root->right=LeftLeft(root->right);
    root=RightRight(root);
    return root;
}
ALV insert(ALV root,int val){
    if(root==NULL){//此时val是第一个数
        struct ALVnode*newNode=(struct ALVnode*)malloc(sizeof(struct ALVnode));
        newNode->val=val;
        newNode->high=1;
        newNode->left=NULL;
        newNode->right=NULL;
        root=newNode;//root指向新节点
    }
    else{
        //数值大于root的值插入到右边,小于插入到左边,等于插入无效(无法插入)
        if(val<root->val){
            root->left=insert(root->left,val);
            if((root->right==NULL&&root->left->high==2)||(root->right!=NULL&&root->left->high-root->right->high==2)){
                if(val<root->left->val)
                    root=LeftLeft(root);//左单旋
                else
                {
                    root=LeftRight(root);//左右双旋
                }
                
            }
        
        }
        else if(val>root->val){
            root->right=insert(root->right,val);
            if((root->left==NULL&&root->right->high==2)||(root->left!=NULL&&root->left->high-root->right->high==-2)){
                if(val<root->right->val)
                    root=RightLeft(root);//右左双旋
                else
                {
                    root=RightRight(root);//右单旋
                }
                
            }
        }
        //更新树高
        root->high=getHigh(root);
    }
    return root;
}
/*回收树的内存*/
void clear(ALV root){
    if(!root)return;
    clear(root->left);
    clear(root->right);
    free(root);
}
//以“结点值(左子树)(右子树)“的形式打印树,证明它是AVL树
void print(ALV root){
    if(!root)return;
    printf("%d",root->val);
    printf("(");
    print(root->left);
    printf(")");
    printf("(");
    print(root->right);
    printf(")");
}
int main(){
    int num;//要插入的结点数目
    ALV head=NULL;
    scanf("%d",&num);
    int temp;//用来存储要插入的数
    for(int  i=0;i<num;i++){
        scanf("%d",&temp);
        head=insert(head,temp);
    }
    //以“结点值(左子树)(右子树)“的形式打印树,证明它是AVL树
    print(head);
    clear(head);
    return 0;
}

五、测试结果:

输入输出格式说明:

  • 输入格式:
    按照显示提示先输入序列个数;
    依次输入序列元素;

  • 输出格式:
    以“结点值(左子树)(右子树)“的形式打印树,证明它是AVL树

  • 测试样例1:

    输入:

    5
    5 4 3 2 1
    

    输出:

    Please enter the number of sequences:
    5
    Please enter the sequence one by one:
    5 4 3 2 1
    Print Huffman Tree
    15(6(3()())(3(1()())(2()())))(9(4()())(5()()))
    Output the Huffman code of all original input numbers
    3's HuffmanCode:00
    1's HuffmanCode:010
    2's HuffmanCode:011
    4's HuffmanCode:10
    
  • 测试样例2:

    输入:

    9
    33 22 11 44 55 6 43 22 11
    

    输出:

    Please enter the number of sequences:
    9 
    Please enter the sequence one by one:
    33 22 11 44 55 6 43 22 11
    Print Huffman Tree
    247(99(44()())(55()()))(148(61(28(11()())(17(6()())(11()())))(33()()))(87(43()())(44(22()())(22()()))))
    Output the Huffman code of all original input numbers
    44's HuffmanCode:00
    55's HuffmanCode:01
    11's HuffmanCode:1000
    6's HuffmanCode:10010
    11's HuffmanCode:10011
    33's HuffmanCode:101
    43's HuffmanCode:110
    22's HuffmanCode:1110
    22's HuffmanCode:1111
    
  • 测试样例3:

    输入:

    7
    88 70 61 96 120 90 65
    

    输出:

    Please enter the number of sequences:
    7
    Please enter the sequence one by one:
    88 70 61 96 120 90 65
    Print Huffman Tree
    590(246(120()())(126(61()())(65()())))(344(158(70()())(88()()))(186(90()())(96()())))
    Output the Huffman code of all original input numbers
    120's HuffmanCode:00
    61's HuffmanCode:010
    65's HuffmanCode:011
    70's HuffmanCode:100
    88's HuffmanCode:101
    90's HuffmanCode:110
    96's HuffmanCode:111
    
  • 测试样例4:

    输入:

    0
    

    输出:

  • 测试样例5:(随机数样例)

    输入:

    100
    1610 1280 299 53 1760 1677 149 1769 1325 1991 721 1451 2168 465 824 1517 1162 944 1782 69 1188 1112 16 148 1418 1429 1675 1232 2184 775 761 1714 13 623 1399 971 112 1299 1778 1819 1199 1463 924 603 356 1462 230 1828 320 2218 1429 24 1697 936 640 1462 1510 150 114 1101 206 194 357 1895 999 2082 15 320 2051 228 1325 216 741 1768 69 1311 1019 150 2190 824 210 790 788 2144 1039 1154 269 83 1521 491 1238 539 2007 1054 433 1186 855 1450 833 2049
    

    输出:

    Please enter the number of sequences:
    100
    Please enter the sequence one by one:
    1610 1280 299 53 1760 1677 149 1769 1325 1991 721 1451 2168 465 824 1517 1162 944 1782 69 1188 1112 16 148 1418 1429 1675 1232 2184 775 761 1714 13 623 1399 971 112 1299 1778 1819 1199 1463 924 603 356 1462 230 1828 320 2218 1429 24 1697 936 640 1462 1510 150 114 1101 206 194 357 1895 999 2082 15 320 2051 228 1325 216
    741 1768 69 1311 1019 150 2190 824 210 790 788 2144 1039 1154 269 83 1521 491 1238 539 2007 1054 433 1186 855 1450 833 2049
    Print Huffman Tree
    102441(42410(19313(9208(4484(2218()())(2266(1112()())(1154()())))(4724(2348(1162()())(1186()()))(2376(1188(585(286(138(69()())(69()()))(148()()))(299(149()())(150()())))(603()()))(1188()()))))(10105(4911(2431(1199()())(1232()()))(2480(1238()())(1242(619(299()())(320()()))(623()()))))(5194(2579(1280()())(1299()()))(2615(1304(640()())(664(320()())(344(150()())(194()()))))(1311()())))))(23097(11209(5467(2650(1325()())(1325()()))(2817(1399()())(1418()())))(5742(2858(1429()())(1429()()))(2884(1434(713(356()())(357()()))(721()()))(1450()()))))(11888(5838(2913(1451()())(1462()()))(2925(1462()())(1463()())))(6050(3012(1502(741()())(761()()))(1510()()))(3038(1517()())(1521()()))))))(60031(27380(13175(6438(3173(1563(775()())(788()()))(1610()()))(3265(1614(790()())(824()()))(1651(824()())(827(401(195(83()())(112()()))(206()()))(426(210()())(216()()))))))(6737(3352(1675()())(1677()()))(3385(1688(833()())(855()()))(1697()()))))(14205(7011(3474(1714()())(1760()()))(3537(1768()())(1769()())))(7194(3560(1778()())(1782()()))(3634(1815(891(433()())(458(228()())(230()())))(924()()))(1819()())))))(32651(15595(7530(3708(1828()())(1880(936()())(944()())))(3822(1895()())(1927(956(465()())(491()()))(971()()))))(8065(3998(1991()())(2007()()))(4067(2018(999()())(1019()()))(2049()()))))(17056(8359(4133(2051()())(2082()()))(4226(2082(1039()())(1043(504(235(114()())(121(53()())(68(28(13()())(15()()))(40(16()())(24()())))))(269()()))(539()())))(2144()())))(8697(4323(2155(1054()())(1101()()))(2168()()))(4374(2184()())(2190()()))))))
    Output the Huffman code of all original input numbers
    2218's HuffmanCode:00000
    1112's HuffmanCode:000010
    1154's HuffmanCode:000011
    1162's HuffmanCode:000100
    1186's HuffmanCode:000101
    69's HuffmanCode:0001100000
    69's HuffmanCode:0001100001
    148's HuffmanCode:000110001
    149's HuffmanCode:000110010
    150's HuffmanCode:000110011
    603's HuffmanCode:0001101
    1188's HuffmanCode:000111
    1199's HuffmanCode:001000
    1232's HuffmanCode:001001
    1238's HuffmanCode:001010
    299's HuffmanCode:00101100
    320's HuffmanCode:00101101
    623's HuffmanCode:0010111
    1280's HuffmanCode:001100
    1299's HuffmanCode:001101
    640's HuffmanCode:0011100
    320's HuffmanCode:00111010
    150's HuffmanCode:001110110
    194's HuffmanCode:001110111
    1311's HuffmanCode:001111
    1325's HuffmanCode:010000
    1325's HuffmanCode:010001
    1399's HuffmanCode:010010
    1418's HuffmanCode:010011
    1429's HuffmanCode:010100
    1429's HuffmanCode:010101
    356's HuffmanCode:01011000
    357's HuffmanCode:01011001
    721's HuffmanCode:0101101
    1450's HuffmanCode:010111
    1451's HuffmanCode:011000
    1462's HuffmanCode:011001
    1462's HuffmanCode:011010
    1463's HuffmanCode:011011
    741's HuffmanCode:0111000
    761's HuffmanCode:0111001
    1510's HuffmanCode:011101
    1517's HuffmanCode:011110
    1521's HuffmanCode:011111
    775's HuffmanCode:1000000
    788's HuffmanCode:1000001
    1610's HuffmanCode:100001
    790's HuffmanCode:1000100
    824's HuffmanCode:1000101
    824's HuffmanCode:1000110
    83's HuffmanCode:1000111000
    112's HuffmanCode:1000111001
    206's HuffmanCode:100011101
    210's HuffmanCode:100011110
    216's HuffmanCode:100011111
    1675's HuffmanCode:100100
    1677's HuffmanCode:100101
    833's HuffmanCode:1001100
    855's HuffmanCode:1001101
    1697's HuffmanCode:100111
    1714's HuffmanCode:101000
    1760's HuffmanCode:101001
    1768's HuffmanCode:101010
    1769's HuffmanCode:101011
    1778's HuffmanCode:101100
    1782's HuffmanCode:101101
    433's HuffmanCode:10111000
    228's HuffmanCode:101110010
    230's HuffmanCode:101110011
    924's HuffmanCode:1011101
    1819's HuffmanCode:101111
    1828's HuffmanCode:110000
    936's HuffmanCode:1100010
    944's HuffmanCode:1100011
    1895's HuffmanCode:110010
    465's HuffmanCode:11001100
    491's HuffmanCode:11001101
    971's HuffmanCode:1100111
    1991's HuffmanCode:110100
    2007's HuffmanCode:110101
    999's HuffmanCode:1101100
    1019's HuffmanCode:1101101
    2049's HuffmanCode:110111
    2051's HuffmanCode:111000
    2082's HuffmanCode:111001
    1039's HuffmanCode:1110100
    114's HuffmanCode:1110101000
    53's HuffmanCode:11101010010
    13's HuffmanCode:1110101001100
    15's HuffmanCode:1110101001101
    16's HuffmanCode:1110101001110
    24's HuffmanCode:1110101001111
    269's HuffmanCode:111010101
    539's HuffmanCode:11101011
    2144's HuffmanCode:111011
    1054's HuffmanCode:1111000
    1101's HuffmanCode:1111001
    2168's HuffmanCode:111101
    2184's HuffmanCode:111110
    2190's HuffmanCode:111111
    

六、算法分析:

该过程从包含其所表示符号的概率的叶节点开始。然后,该过程获取最小两个节点(最小概率),并创建一个将这两个节点作为子节点的新内部节点。新节点的权重设置为子节点权重的总和。然后,我们在新的内部节点和其余节点(即,排除两个叶节点)上再次应用该过程,重复此过程,直到仅剩下一个节点为止,这是霍夫曼树的根。

该构造算法使用优先级队列(最小堆),其中概率最低的节点被赋予最高优先级:

  1. 为每个符号创建一个叶节点,并将其添加到优先级队列。
  2. 当队列中有多个节点时:
    1. 从队列中删除优先级最高(概率最低)的两个节点
    2. 创建一个新的内部节点,并将这两个节点作为子节点,并且其概率等于两个节点的概率之和。
    3. 将新节点添加到队列中。
  3. 其余节点是根节点,树已完成。

由于高效的优先级队列数据结构每次插入需要O(log n)时间,并且具有n个叶子的树具有2 n -1个节点,因此该算法以O(n log n)时间进行操作,算法的空间复杂度为哈弗曼树的内存和存储哈夫曼树节点的最小堆。

上一篇:实验三 串及应用


下一篇:【hust数据结构】huffman树及编码/解码实验