In 1953, David A. Huffman published his paper "A Method for the Construction of Minimum-Redundancy Codes", and hence printed his name in the history of computer science. As a professor who gives the final exam problem on Huffman codes, I am encountering a big problem: the Huffman codes are NOT unique. For example, given a string "aaaxuaxz", we can observe that the frequencies of the characters ‘a‘, ‘x‘, ‘u‘ and ‘z‘ are 4, 2, 1 and 1, respectively. We may either encode the symbols as {‘a‘=0, ‘x‘=10, ‘u‘=110, ‘z‘=111}, or in another way as {‘a‘=1, ‘x‘=01, ‘u‘=001, ‘z‘=000}, both compress the string into 14 bits. Another set of code can be given as {‘a‘=0, ‘x‘=11, ‘u‘=100, ‘z‘=101}, but {‘a‘=0, ‘x‘=01, ‘u‘=011, ‘z‘=001} is NOT correct since "aaaxuaxz" and "aazuaxax" can both be decoded from the code 00001011001001. The students are submitting all kinds of codes, and I need a computer program to help me determine which ones are correct and which ones are not.
Input Specification:
Each input file contains one test case. For each case, the first line gives an integer N (2≤N≤63), then followed by a line that contains all the N distinct characters and their frequencies in the following format:
c[1] f[1] c[2] f[2] ... c[N] f[N]
where c[i]
is a character chosen from {‘0‘ - ‘9‘, ‘a‘ - ‘z‘, ‘A‘ - ‘Z‘, ‘_‘}, and f[i]
is the frequency of c[i]
and is an integer no more than 1000. The next line gives a positive integer M (≤1000), then followed by M student submissions. Each student submission consists of N lines, each in the format:
c[i] code[i]
where c[i]
is the i
-th character and code[i]
is an non-empty string of no more than 63 ‘0‘s and ‘1‘s.
Output Specification:
For each test case, print in each line either "Yes" if the student‘s submission is correct, or "No" if not.
Note: The optimal solution is not necessarily generated by Huffman algorithm. Any prefix code with code length being optimal is considered correct.
Sample Input:
7
A 1 B 1 C 1 D 3 E 3 F 6 G 6
4
A 00000
B 00001
C 0001
D 001
E 01
F 10
G 11
A 01010
B 01011
C 0100
D 011
E 10
F 11
G 00
A 000
B 001
C 010
D 011
E 100
F 101
G 110
A 00000
B 00001
C 0001
D 001
E 00
F 10
G 11
Sample Output:
Yes
Yes
No
No
//构建哈夫曼树求最小长度 //构建最小堆 #include <stdio.h> #include <stdlib.h> #include <string.h> #define MIN 0 //保存编码和频率 typedef struct CodeFrequency* CF; struct CodeFrequency{ char code; unsigned long frequency; }; typedef char DataType; typedef struct HTreeNode* HuffmanTree; struct HTreeNode{ DataType data; int weight; HuffmanTree left; HuffmanTree right; }; typedef HuffmanTree ElemType; typedef struct HeapStruct *MinHeap; struct HeapStruct{ ElemType* data; int size;//大小 int capacity;//容量 }; MinHeap CreateHeap(int size){ MinHeap H = (MinHeap)malloc(sizeof(struct HeapStruct)); H->data = (ElemType*)malloc((size+1)*sizeof(ElemType)); H->size = 0; H->capacity = size; H->data[0] = (ElemType)malloc(sizeof(struct HTreeNode)); H->data[0]->weight = MIN; return H; } int IsFull(MinHeap H){ if(H && H->size == H->capacity){ return 1; } return 0; } void Insert(MinHeap H, ElemType item){ int i; if(IsFull(H)){ return; } i = ++H->size; for( ; H->data[i/2]->weight > item->weight; i/=2){ H->data[i] = H->data[i/2]; } H->data[i] = item; } int IsEmpty(MinHeap H){ if(H->size == 0){ return 1; } return 0; } ElemType DeleteMin(MinHeap H){ if(IsEmpty(H)){ return NULL; } ElemType min, X; min = H->data[1]; X = H->data[H->size--]; int parent, child; for(parent = 1; parent * 2 <= H->size; parent = child){ child = parent*2; if((child != H->size) && (H->data[child]->weight >= H->data[child+1]->weight)) child++; /* child指向左右子结点的较小者 */ if( X->weight <= H->data[child]->weight ){ break; /* 找到了合适位置 */ } else /* 下滤X */ H->data[parent] = H->data[child]; } H->data[parent] = X; return min; } HuffmanTree GetHuffmanTree(MinHeap H){ int i; HuffmanTree T; while(H->size > 1){ T = (HuffmanTree)malloc(sizeof(struct HTreeNode)); memset(T, 0, sizeof(struct HTreeNode)); T->left = DeleteMin(H); T->right = DeleteMin(H); T->weight = T->left->weight + T->right->weight; Insert(H, T); } T = DeleteMin(H); return T; } //在最小堆的基础上构Haffman树 HuffmanTree BuildHuffmanTree(MinHeap H, int N, CF* cf){ *cf = (CF)malloc(N*sizeof(struct CodeFrequency)); memset(*cf, 0, N*sizeof(struct CodeFrequency)); for(int i = 0; i < N; ++i){ ElemType item = (ElemType)malloc(sizeof(struct HTreeNode)); item->left = NULL; item->right = NULL; if(i == 0){ scanf("%c %d",&item->data, &item->weight); } else{ scanf(" %c %d",&item->data, &item->weight); } (*cf)[i].code = item->data; (*cf)[i].frequency = item->weight; Insert(H, item); } return GetHuffmanTree(H); } //中序遍历哈夫曼树求得编码的最小长度 void InOrder(HuffmanTree root, int pathLen, int *totalLen){ if(root->left == NULL && root->right == NULL){ (*totalLen) += pathLen*root->weight; return; } if(root->left){ InOrder(root->left, pathLen+1, totalLen); } if(root->right){ InOrder(root->right, pathLen+1, totalLen); } } typedef struct TreeNode* Tree; struct TreeNode{ Tree left; Tree right; }; //插入树,传入根结点root,字符ch,编码s Tree TreeInsert(Tree root, char ch, char* code, int* flag){ //如果最后一个结点不是新分配出来的就不符合前缀码的特征 if(root != NULL && *code == ‘\0‘) { *flag = 0; return root; } if(root == NULL){ root = (Tree)malloc(sizeof(struct TreeNode)); root->left = NULL; root->right = NULL; } //编码0代表左子结点,编码1代表右子节点 if(*code == ‘0‘){ root->left = TreeInsert(root->left, ch, code + 1, flag); } else if(*code == ‘1‘){ root->right = TreeInsert(root->right, ch, code + 1, flag); } return root; } //检查一个学生的答案 int SingleCheck(int N, unsigned long totalLen, CF *cf){ Tree root = NULL; int flag = 1; unsigned long len = 0; for(int i = 0; i < N; ++i){ char ch; char code[64]; scanf("\n"); scanf("%c", &ch); scanf("%s", code); if(flag){ root = TreeInsert(root, ch, code, &flag); } len += strlen(code)*((*cf)[i].frequency); } if(totalLen != len){ flag = 0; } return flag; } //检查答案 void CheckAnswers(int N, unsigned long totalLen, CF* cf){ int M; scanf("%d", &M); for(int i = 0; i < M; ++i){ if(SingleCheck(N, totalLen, cf)) { printf("Yes\n"); } else{ printf("No\n"); } } } //解题思路 //1.判断编码是不是最小长度(保证长度最小) //2.判断编码是不是前缀码(保证唯一性) int main(){ int N; CF cf = NULL; scanf("%d\n", &N); MinHeap H = CreateHeap(N); //构建HaffmanTreee 求得最小长度 HuffmanTree huffman = BuildHuffmanTree(H, N, &cf); //中序遍历求得最小长度 int totalLen = 0; InOrder(huffman, 0, &totalLen); //检查答案 CheckAnswers(N, totalLen, &cf); return 0; }