05-树9 Huffman Codes (30 分)

In 1953, David A. Huffman published his paper "A Method for the Construction of Minimum-Redundancy Codes", and hence printed his name in the history of computer science. As a professor who gives the final exam problem on Huffman codes, I am encountering a big problem: the Huffman codes are NOT unique. For example, given a string "aaaxuaxz", we can observe that the frequencies of the characters ‘a‘, ‘x‘, ‘u‘ and ‘z‘ are 4, 2, 1 and 1, respectively. We may either encode the symbols as {‘a‘=0, ‘x‘=10, ‘u‘=110, ‘z‘=111}, or in another way as {‘a‘=1, ‘x‘=01, ‘u‘=001, ‘z‘=000}, both compress the string into 14 bits. Another set of code can be given as {‘a‘=0, ‘x‘=11, ‘u‘=100, ‘z‘=101}, but {‘a‘=0, ‘x‘=01, ‘u‘=011, ‘z‘=001} is NOT correct since "aaaxuaxz" and "aazuaxax" can both be decoded from the code 00001011001001. The students are submitting all kinds of codes, and I need a computer program to help me determine which ones are correct and which ones are not.

Input Specification:

Each input file contains one test case. For each case, the first line gives an integer N (2N63), then followed by a line that contains all the N distinct characters and their frequencies in the following format:

 

c[1] f[1] c[2] f[2] ... c[N] f[N]
 

where c[i] is a character chosen from {‘0‘ - ‘9‘, ‘a‘ - ‘z‘, ‘A‘ - ‘Z‘, ‘_‘}, and f[i] is the frequency of c[i] and is an integer no more than 1000. The next line gives a positive integer M (1000), then followed by M student submissions. Each student submission consists of N lines, each in the format:

c[i] code[i]
 

where c[i] is the i-th character and code[i] is an non-empty string of no more than 63 ‘0‘s and ‘1‘s.

Output Specification:

For each test case, print in each line either "Yes" if the student‘s submission is correct, or "No" if not.

Note: The optimal solution is not necessarily generated by Huffman algorithm. Any prefix code with code length being optimal is considered correct.

Sample Input:

7
A 1 B 1 C 1 D 3 E 3 F 6 G 6
4
A 00000
B 00001
C 0001
D 001
E 01
F 10
G 11
A 01010
B 01011
C 0100
D 011
E 10
F 11
G 00
A 000
B 001
C 010
D 011
E 100
F 101
G 110
A 00000
B 00001
C 0001
D 001
E 00
F 10
G 11
 
结尾无空行

Sample Output:

Yes
Yes
No
No
 
结尾无空行
提交代码:
//构建哈夫曼树求最小长度 
//构建最小堆
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MIN 0

//保存编码和频率
typedef struct CodeFrequency* CF;
struct CodeFrequency{
    char code;
    unsigned long frequency;
};

typedef char DataType;
typedef struct HTreeNode* HuffmanTree;
struct HTreeNode{
    DataType data;
    int weight;
    HuffmanTree left;
    HuffmanTree right;
};

typedef HuffmanTree ElemType;
typedef struct HeapStruct *MinHeap;
struct HeapStruct{
    ElemType* data;
    int size;//大小
    int capacity;//容量
};

MinHeap CreateHeap(int size){
    MinHeap H = (MinHeap)malloc(sizeof(struct HeapStruct));
    H->data = (ElemType*)malloc((size+1)*sizeof(ElemType));
    H->size = 0;
    H->capacity = size;
    H->data[0] = (ElemType)malloc(sizeof(struct HTreeNode));
    H->data[0]->weight = MIN;
    return H;
}
int IsFull(MinHeap H){
    if(H && H->size == H->capacity){
        return 1;
    }
    return 0;
}

void Insert(MinHeap H, ElemType item){
    int i;
    if(IsFull(H)){
        return;
    }
    i = ++H->size;
    for( ; H->data[i/2]->weight > item->weight; i/=2){
        H->data[i] = H->data[i/2];
    }
    H->data[i] = item;
}

int IsEmpty(MinHeap H){
    if(H->size == 0){
        return 1;
    }
    return 0;
}


ElemType DeleteMin(MinHeap H){
    if(IsEmpty(H)){
        return NULL;
    }
    ElemType min, X;
    min = H->data[1];
    X = H->data[H->size--];
    int parent, child;
    for(parent = 1; parent * 2 <= H->size; parent = child){
        child = parent*2;
        if((child != H->size) && (H->data[child]->weight >= H->data[child+1]->weight))
            child++;  /* child指向左右子结点的较小者 */
        if( X->weight <= H->data[child]->weight ){
            break; /* 找到了合适位置 */
        } 
        else  /* 下滤X */
            H->data[parent] = H->data[child];
    }
    H->data[parent] = X;
    return min;
}

HuffmanTree GetHuffmanTree(MinHeap H){
    int i;
    HuffmanTree T;
    while(H->size > 1){
        T = (HuffmanTree)malloc(sizeof(struct HTreeNode));
        memset(T, 0, sizeof(struct HTreeNode));
        T->left = DeleteMin(H);
        T->right = DeleteMin(H);
        T->weight = T->left->weight + T->right->weight;
        Insert(H, T);
    }
    T = DeleteMin(H);
    return T;
}

//在最小堆的基础上构Haffman树
HuffmanTree BuildHuffmanTree(MinHeap H, int N, CF* cf){
    *cf = (CF)malloc(N*sizeof(struct CodeFrequency));
    memset(*cf, 0, N*sizeof(struct CodeFrequency));
    for(int i = 0; i < N; ++i){
        ElemType item = (ElemType)malloc(sizeof(struct HTreeNode));
        item->left = NULL;
        item->right = NULL;
        if(i == 0){
            scanf("%c %d",&item->data, &item->weight);
        }
        else{
            scanf(" %c %d",&item->data, &item->weight);
        }
        (*cf)[i].code = item->data;
        (*cf)[i].frequency = item->weight;
        Insert(H, item);
    }
    return GetHuffmanTree(H);
}

//中序遍历哈夫曼树求得编码的最小长度
void InOrder(HuffmanTree root, int pathLen, int *totalLen){
    if(root->left == NULL && root->right == NULL){
        (*totalLen) += pathLen*root->weight;
        return;
    }
    if(root->left){
        InOrder(root->left, pathLen+1, totalLen);
    }
    if(root->right){
        InOrder(root->right, pathLen+1, totalLen);
    }
}

typedef struct TreeNode* Tree;
struct TreeNode{
    Tree left;
    Tree right;
};

//插入树,传入根结点root,字符ch,编码s
Tree TreeInsert(Tree root, char ch, char* code, int* flag){
    //如果最后一个结点不是新分配出来的就不符合前缀码的特征
    if(root != NULL && *code == \0)
    {
        *flag = 0;
        return root;
    }
    if(root == NULL){
            root = (Tree)malloc(sizeof(struct TreeNode));
            root->left = NULL;
            root->right = NULL;
    }
    //编码0代表左子结点,编码1代表右子节点
    if(*code == 0){
        root->left = TreeInsert(root->left, ch, code + 1, flag);
    }
    else if(*code == 1){
        root->right = TreeInsert(root->right, ch, code + 1, flag);
    }
    return root;
}

//检查一个学生的答案
int SingleCheck(int N, unsigned long totalLen, CF *cf){
    Tree root = NULL;
    int flag = 1;
    unsigned long len = 0;
    for(int i = 0; i < N; ++i){
        char ch;
        char code[64];
        scanf("\n");
        scanf("%c", &ch);
        scanf("%s", code);
        if(flag){
            root = TreeInsert(root, ch, code, &flag);
        }
        len += strlen(code)*((*cf)[i].frequency);
    }
    if(totalLen != len){
        flag = 0;
    }
    return flag;
}

//检查答案
void CheckAnswers(int N, unsigned long totalLen, CF* cf){
    int M;
    scanf("%d", &M);
    for(int i = 0; i < M; ++i){
        if(SingleCheck(N, totalLen, cf))
        {
            printf("Yes\n");
        }
        else{
            printf("No\n");
        }
    }
}

//解题思路
//1.判断编码是不是最小长度(保证长度最小)
//2.判断编码是不是前缀码(保证唯一性)
int main(){
    int N;
    CF cf = NULL;
    scanf("%d\n", &N);
    MinHeap H = CreateHeap(N);
    //构建HaffmanTreee 求得最小长度
    HuffmanTree huffman = BuildHuffmanTree(H, N, &cf);
    //中序遍历求得最小长度
    int totalLen = 0;
    InOrder(huffman, 0, &totalLen);
    //检查答案
    CheckAnswers(N, totalLen, &cf);
    
    return 0;
}

 

 

05-树9 Huffman Codes (30 分)

上一篇:打印堆栈dump_stack


下一篇:大数据&人工智能的“淘宝平台”来了:突破围城,连接生态