参考了本文:http://www.cnblogs.com/xulb597/archive/2012/07/05/2578562.html
Trie.hpp
#ifndef TRIE #define TRIE #include "LinkedList.hpp" #include <stdlib.h> #include <iostream> #include <string> #define BRANCH_SIZE 60 #define START_CHAR ‘A‘ #define INDEX(x) x - START_CHAR class Trie { public: Trie() { rootNode = new Trie_Node(); memset(nodeMap, NULL, sizeof(nodeMap)); } ~Trie(){} void insert(const char *data, const int i) { bool flag_start = false; Trie_Node *location = rootNode; while (*data) { int index = INDEX(*data); if(index < 0) { data++; continue; } if(location->next[index] == NULL) { location->next[index] = getNode(index); } if(!flag_start) location->isStart = flag_start = true; location = location->next[index]; location->indexList->add(index); data++; } location->isEnd = true; } int match(char *data) { Trie_Node *location = rootNode; while (*data && location) { location = location->next[INDEX(*data)]; data++; } return (location != NULL && location->isEnd); } void match(char *data, int* indexMap) { int index = INDEX(*data); Trie_Node *location = rootNode; while (*data && location) { location = location->next[INDEX(*data)]; data++; } if(location != NULL) fillIndexArray(indexMap, location); } void fuzzy_match(char *data, int* indexMap) { int index = INDEX(*data); Trie_Node *location = nodeMap[index]; if(location) data++; // consume on character while (*data && location) { location = location->next[INDEX(*data)]; data++; /*if(location != NULL) fillPriority(indexMap, location);*/ } if(location != NULL) fillIndexArray(indexMap, location); } private: struct Trie_Node { bool isStart, isCapital, isEnd; Trie_Node *next[BRANCH_SIZE]; LinkedList<int> *indexList; Trie_Node():isEnd(false) { memset(next, NULL, sizeof(next)); indexList = new LinkedList<int>(); }; }; Trie_Node *rootNode; // // a map to hold all created Trie_Node. // Trie_Node *nodeMap[BRANCH_SIZE]; // // get a trie node from map. // index: (char - ‘a‘) // Trie_Node *getNode(int index) { //return new Trie_Node(); Trie_Node *tempNode = nodeMap[index]; if(tempNode == NULL) { tempNode = new Trie_Node(); nodeMap[index] = tempNode; } return tempNode; } void fillIndexArray(int* indexMap, Trie_Node *trieNode) { LinkedList<int> *list = trieNode->indexList; int confidence = trieNode->isEnd; Node<int> *node = list->getRoot(); while (node) { indexMap[node->index] = confidence + 1; node = node->next; } } /*void fillPriority(int* indexMap, Trie_Node *trieNode) { LinkedList<int> *list = trieNode->indexList; int confidence = 0; Node<int> *node = list->getRoot(); while (node) { indexMap[node->index] = confidence + 1; node = node->next; } }*/ }; #endif // TRIE
LinkedList.hpp
#ifndef LINKEDLIST #define LINKEDLIST #include <stdlib.h> #include <iostream> template <class T> struct Node { T value; int index; Node *next; Node(T _value, int _index) { value = _value; index = _index; } }; template <class T> class LinkedList { public: int length; LinkedList() { length = 0; root = new Node<T>(NULL, 0); current = root; }; ~LinkedList() { }; void add(T value) { if(length == 0) { root->value = value; root->index = 0; } else { current->next = new Node<T>(value, current->index + 1); current = current->next; } length++; current->next = NULL; }; Node<T> getAt(int index) { Node<T> *node = root; while (node) { if(node->index == index) return node; node = node->next; } return NULL; } Node<T> *getRoot() { return root; } private: Node<T> *root,*current; }; #endif // LINKEDLIST
main.cpp
#include <stdlib.h> #include <iostream> #include <string> #include <fstream> #include <sys/stat.h> #include "trie.hpp" using namespace std; unsigned long get_file_size(const char *path) { unsigned long filesize = -1; struct stat statbuff; if(stat(path, &statbuff) < 0){ return filesize; }else{ filesize = statbuff.st_size; } return filesize; } void readChars(const char* path, char** buff) { long length = get_file_size(path); // 取得文件大小 if(length == -1){ cerr << "content file is invalid!" << endl; system("quit"); return; } //int length = ftell(f); FILE *f = fopen(path, "r"); *buff = new char[length]; fread(*buff, sizeof(char), length, f); fclose(f); } void getInlineChars(char* buff) { char *items = "In the example shown keys are listed in the nodes and values below them Each complete English word has an arbitrary integer value associated with it A trie can be seen as adeterministic finite automaton although the symbol on each edge is often implicit in the order of the branches It is not necessary for keys to be explicitly stored in nodes In the figure words are shown only to illustrate how the trie works Though tries are most commonly keyed by character strings they don‘t need to be The same algorithms can easily be adapted to serve similar functions of ordered lists of any construct eg permutations on a list of digits or shapes In particular a bitwise trie is keyed on the individual bits making up a short fixed size of bits such as an integer number or memory address In the example shown keys are listed in the nodes and values below them Each complete English word has an arbitrary integer value associated with it A trie can be seen as adeterministic finite automaton although the symbol on each edge is often implicit in the order of the branches It is not necessary for keys to be explicitly stored in nodes In the figure words are shown only to illustrate how the trie works Though tries are most commonly keyed by character strings they don‘t need to be The same algorithms can easily be adapted to serve similar functions of ordered lists of any construct eg permutations on a list of digits or shapes In particular a bitwise trie is keyed on the individual bits making up a short fixed size of bits such as an integer number or memory address In the example shown keys are listed in the nodes and values below them Each complete English word has an arbitrary integer value associated with it A trie can be seen as adeterministic finite automaton although the symbol on each edge is often implicit in the order of the branches It is not necessary for keys to be explicitly stored in nodes In the figure words are shown only to illustrate how the trie works Though tries are most commonly keyed by character strings they don‘t need to be The same algorithms can easily be adapted to serve similar functions of ordered lists of any construct eg permutations on a list of digits or shapes In particular a bitwise trie is keyed on the individual bits making up a short fixed size of bits such as an integer number or memory address In the example shown keys are listed in the nodes and values below them Each complete English word has an arbitrary integer value associated with it A trie can be seen as adeterministic finite automaton although the symbol on each edge is often implicit in the order of the branches It is not necessary for keys to be explicitly stored in nodes In the figure words are shown only to illustrate how the trie works Though tries are most commonly keyed by character strings they don‘t need to be The same algorithms can easily be adapted to serve similar functions of ordered lists of any construct eg permutations on a list of digits or shapes In particular a bitwise trie is keyed on the individual bits making up a short fixed size of bits such as an integer number or memory address In the example shown keys are listed in the nodes and values below them Each complete English word has an arbitrary integer value associated with it A trie can be seen as adeterministic finite automaton although the symbol on each edge is often implicit in the order of the branches It is not necessary for keys to be explicitly stored in nodes In the figure words are shown only to illustrate how the trie works Though tries are most commonly keyed by character strings they don‘t need to be The same algorithms can easily be adapted to serve similar functions of ordered lists of any construct eg permutations on a list of digits or shapes In particular a bitwise trie is keyed on the individual bits making up a short fixed size of bits such as an integer number or memory address In the example shown keys are listed in the nodes and values below them Each complete English word has an arbitrary integer value associated with it A trie can be seen as adeterministic finite automaton although the symbol on each edge is often implicit in the order of the branches It is not necessary for keys to be explicitly stored in nodes In the figure words are shown only to illustrate how the trie works Though tries are most commonly keyed by character strings they don‘t need to be The same algorithms can easily be adapted to serve similar functions of ordered lists of any construct eg permutations on a list of digits or shapes In particular a bitwise trie is keyed on the individual bits making up a short fixed size of bits such as an integer number or memory address In the example shown keys are listed in the nodes and values below them Each complete English word has an arbitrary integer value associated with it A trie can be seen as adeterministic finite automaton although the symbol on each edge is often implicit in the order of the branches It is not necessary for keys to be explicitly stored in nodes In the figure words are shown only to illustrate how the trie works Though tries are most commonly keyed by character strings they don‘t need to be The same algorithms can easily be adapted to serve similar functions of ordered lists of any construct eg permutations on a list of digits or shapes In particular a bitwise trie is keyed on the individual bits making up a short fixed size of bits such as an integer number or memory address In the example shown keys are listed in the nodes and values below them Each complete English word has an arbitrary integer value associated with it A trie can be seen as adeterministic finite automaton although the symbol on each edge is often implicit in the order of the branches It is not necessary for keys to be explicitly stored in nodes In the figure words are shown only to illustrate how the trie works Though tries are most commonly keyed by character strings they don‘t need to be The same algorithms can easily be adapted to serve similar functions of ordered lists of any construct eg permutations on a list of digits or shapes In particular a bitwise trie is keyed on the individual bits making up a short fixed size of bits such as an integer number or memory address In the example shown keys are listed in the nodes and values below them Each complete English word has an arbitrary integer value associated with it A trie can be seen as adeterministic finite automaton although the symbol on each edge is often implicit in the order of the branches It is not necessary for keys to be explicitly stored in nodes In the figure words are shown only to illustrate how the trie works Though tries are most commonly keyed by character strings they don‘t need to be The same algorithms can easily be adapted to serve similar functions of ordered lists of any construct eg permutations on a list of digits or shapes In particular a bitwise trie is keyed on the individual bits making up a short fixed size of bits such as an integer number or memory address In the example shown keys are listed in the nodes and values below them Each complete English word has an arbitrary integer value associated with it A trie can be seen as adeterministic finite automaton although the symbol on each edge is often implicit in the order of the branches It is not necessary for keys to be explicitly stored in nodes In the figure words are shown only to illustrate how the trie works Though tries are most commonly keyed by character strings they don‘t need to be The same algorithms can easily be adapted to serve similar functions of ordered lists of any construct eg permutations on a list of digits or shapes In particular a bitwise trie is keyed on the individual bits making up a short fixed size of bits such as an integer number or memory address In the example shown keys are listed in the nodes and values below them Each complete English word has an arbitrary integer value associated with it A trie can be seen as adeterministic finite automaton although the symbol on each edge is often implicit in the order of the branches It is not necessary for keys to be explicitly stored in nodes In the figure words are shown only to illustrate how the trie works Though tries are most commonly keyed by character strings they don‘t need to be The same algorithms can easily be adapted to serve similar functions of ordered lists of any construct eg permutations on a list of digits or shapes In particular a bitwise trie is keyed on the individual bits making up a short fixed size of bits such as an integer number or memory address In the example shown keys are listed in the nodes and values below them Each complete English word has an arbitrary integer value associated with it A trie can be seen as adeterministic finite automaton although the symbol on each edge is often implicit in the order of the branches It is not necessary for keys to be explicitly stored in nodes In the figure words are shown only to illustrate how the trie works Though tries are most commonly keyed by character strings they don‘t need to be The same algorithms can easily be adapted to serve similar functions of ordered lists of any construct eg permutations on a list of digits or shapes In particular a bitwise trie is keyed on the individual bits making up a short fixed size of bits such as an integer number or memory address In the example shown keys are listed in the nodes and values below them Each complete English word has an arbitrary integer value associated with it A trie can be seen as adeterministic finite automaton although the symbol on each edge is often implicit in the order of the branches It is not necessary for keys to be explicitly stored in nodes In the figure words are shown only to illustrate how the trie works Though tries are most commonly keyed by character strings they don‘t need to be The same algorithms can easily be adapted to serve similar functions of ordered lists of any construct eg permutations on a list of digits or shapes In particular a bitwise trie is keyed on the individual bits making up a short fixed size of bits such as an integer number or memory address In the example shown keys are listed in the nodes and values below them Each complete English word has an arbitrary integer value associated with it A trie can be seen as adeterministic finite automaton although the symbol on each edge is often implicit in the order of the branches It is not necessary for keys to be explicitly stored in nodes In the figure words are shown only to illustrate how the trie works Though tries are most commonly keyed by character strings they don‘t need to be The same algorithms can easily be adapted to serve similar functions of ordered lists of any construct eg permutations on a list of digits or shapes In particular a bitwise trie is keyed on the individual bits making up a short fixed size of bits such as an integer number or memory address In the example shown keys are listed in the nodes and values below them Each complete English word has an arbitrary integer value associated with it A trie can be seen as adeterministic finite automaton although the symbol on each edge is often implicit in the order of the branches It is not necessary for keys to be explicitly stored in nodes In the figure words are shown only to illustrate how the trie works Though tries are most commonly keyed by character strings they don‘t need to be The same algorithms can easily be adapted to serve similar functions of ordered lists of any construct eg permutations on a list of digits or shapes In particular a bitwise trie is keyed on the individual bits making up a short fixed size of bits such as an integer number or memory address "; strcpy(buff, items); } void readInlineChars(char* source, char** buff) { *buff = new char[strlen(source)]; strcpy(*buff, source); } void main(char* argv){ Trie t; char* cpy = new char[]; //readChars("contents.txt", &cpy); readInlineChars("abcd AbCd ABCD", &cpy); char *tk = strtok(cpy, " "); int index = 0; t.insert(tk, index++); while (tk = strtok(NULL, " ")) { t.insert(tk, index++); } int *indexMap = new int[index]; t.fuzzy_match("ad", indexMap); for (int i = 0; i < index; i++) { int confidence = indexMap[i]; if(confidence > 0) printf("%i : %i\n",i, confidence); } delete indexMap; system("pause"); }
另一个main.cpp
#include <stdlib.h> #include <iostream> #include <string> #include <fstream> #include <sys/stat.h> #include "trie.hpp" using namespace std; unsigned long get_file_size(const char *path) { unsigned long filesize = -1; struct stat statbuff; if(stat(path, &statbuff) < 0){ return filesize; }else{ filesize = statbuff.st_size; } return filesize; } void readChars(const char* path, char** buff) { long length = get_file_size(path); // 取得文件大小 if(length == -1){ cerr << "content file is invalid!" << endl; system("quit"); return; } //int length = ftell(f); FILE *f = fopen(path, "r"); *buff = new char[length]; fread(*buff, sizeof(char), length, f); fclose(f); } void getInlineChars(char* buff) { char *items = "In the example shown keys are listed in the nodes and values below them Each complete English word has an arbitrary integer value associated with it A trie can be seen as adeterministic finite automaton although the symbol on each edge is often implicit in the order of the branches It is not necessary for keys to be explicitly stored in nodes In the figure words are shown only to illustrate how the trie works Though tries are most commonly keyed by character strings they don‘t need to be The same algorithms can easily be adapted to serve similar functions of ordered lists of any construct eg permutations on a list of digits or shapes In particular a bitwise trie is keyed on the individual bits making up a short fixed size of bits such as an integer number or memory address In the example shown keys are listed in the nodes and values below them Each complete English word has an arbitrary integer value associated with it A trie can be seen as adeterministic finite automaton although the symbol on each edge is often implicit in the order of the branches It is not necessary for keys to be explicitly stored in nodes In the figure words are shown only to illustrate how the trie works Though tries are most commonly keyed by character strings they don‘t need to be The same algorithms can easily be adapted to serve similar functions of ordered lists of any construct eg permutations on a list of digits or shapes In particular a bitwise trie is keyed on the individual bits making up a short fixed size of bits such as an integer number or memory address In the example shown keys are listed in the nodes and values below them Each complete English word has an arbitrary integer value associated with it A trie can be seen as adeterministic finite automaton although the symbol on each edge is often implicit in the order of the branches It is not necessary for keys to be explicitly stored in nodes In the figure words are shown only to illustrate how the trie works Though tries are most commonly keyed by character strings they don‘t need to be The same algorithms can easily be adapted to serve similar functions of ordered lists of any construct eg permutations on a list of digits or shapes In particular a bitwise trie is keyed on the individual bits making up a short fixed size of bits such as an integer number or memory address In the example shown keys are listed in the nodes and values below them Each complete English word has an arbitrary integer value associated with it A trie can be seen as adeterministic finite automaton although the symbol on each edge is often implicit in the order of the branches It is not necessary for keys to be explicitly stored in nodes In the figure words are shown only to illustrate how the trie works Though tries are most commonly keyed by character strings they don‘t need to be The same algorithms can easily be adapted to serve similar functions of ordered lists of any construct eg permutations on a list of digits or shapes In particular a bitwise trie is keyed on the individual bits making up a short fixed size of bits such as an integer number or memory address In the example shown keys are listed in the nodes and values below them Each complete English word has an arbitrary integer value associated with it A trie can be seen as adeterministic finite automaton although the symbol on each edge is often implicit in the order of the branches It is not necessary for keys to be explicitly stored in nodes In the figure words are shown only to illustrate how the trie works Though tries are most commonly keyed by character strings they don‘t need to be The same algorithms can easily be adapted to serve similar functions of ordered lists of any construct eg permutations on a list of digits or shapes In particular a bitwise trie is keyed on the individual bits making up a short fixed size of bits such as an integer number or memory address In the example shown keys are listed in the nodes and values below them Each complete English word has an arbitrary integer value associated with it A trie can be seen as adeterministic finite automaton although the symbol on each edge is often implicit in the order of the branches It is not necessary for keys to be explicitly stored in nodes In the figure words are shown only to illustrate how the trie works Though tries are most commonly keyed by character strings they don‘t need to be The same algorithms can easily be adapted to serve similar functions of ordered lists of any construct eg permutations on a list of digits or shapes In particular a bitwise trie is keyed on the individual bits making up a short fixed size of bits such as an integer number or memory address In the example shown keys are listed in the nodes and values below them Each complete English word has an arbitrary integer value associated with it A trie can be seen as adeterministic finite automaton although the symbol on each edge is often implicit in the order of the branches It is not necessary for keys to be explicitly stored in nodes In the figure words are shown only to illustrate how the trie works Though tries are most commonly keyed by character strings they don‘t need to be The same algorithms can easily be adapted to serve similar functions of ordered lists of any construct eg permutations on a list of digits or shapes In particular a bitwise trie is keyed on the individual bits making up a short fixed size of bits such as an integer number or memory address In the example shown keys are listed in the nodes and values below them Each complete English word has an arbitrary integer value associated with it A trie can be seen as adeterministic finite automaton although the symbol on each edge is often implicit in the order of the branches It is not necessary for keys to be explicitly stored in nodes In the figure words are shown only to illustrate how the trie works Though tries are most commonly keyed by character strings they don‘t need to be The same algorithms can easily be adapted to serve similar functions of ordered lists of any construct eg permutations on a list of digits or shapes In particular a bitwise trie is keyed on the individual bits making up a short fixed size of bits such as an integer number or memory address In the example shown keys are listed in the nodes and values below them Each complete English word has an arbitrary integer value associated with it A trie can be seen as adeterministic finite automaton although the symbol on each edge is often implicit in the order of the branches It is not necessary for keys to be explicitly stored in nodes In the figure words are shown only to illustrate how the trie works Though tries are most commonly keyed by character strings they don‘t need to be The same algorithms can easily be adapted to serve similar functions of ordered lists of any construct eg permutations on a list of digits or shapes In particular a bitwise trie is keyed on the individual bits making up a short fixed size of bits such as an integer number or memory address In the example shown keys are listed in the nodes and values below them Each complete English word has an arbitrary integer value associated with it A trie can be seen as adeterministic finite automaton although the symbol on each edge is often implicit in the order of the branches It is not necessary for keys to be explicitly stored in nodes In the figure words are shown only to illustrate how the trie works Though tries are most commonly keyed by character strings they don‘t need to be The same algorithms can easily be adapted to serve similar functions of ordered lists of any construct eg permutations on a list of digits or shapes In particular a bitwise trie is keyed on the individual bits making up a short fixed size of bits such as an integer number or memory address In the example shown keys are listed in the nodes and values below them Each complete English word has an arbitrary integer value associated with it A trie can be seen as adeterministic finite automaton although the symbol on each edge is often implicit in the order of the branches It is not necessary for keys to be explicitly stored in nodes In the figure words are shown only to illustrate how the trie works Though tries are most commonly keyed by character strings they don‘t need to be The same algorithms can easily be adapted to serve similar functions of ordered lists of any construct eg permutations on a list of digits or shapes In particular a bitwise trie is keyed on the individual bits making up a short fixed size of bits such as an integer number or memory address In the example shown keys are listed in the nodes and values below them Each complete English word has an arbitrary integer value associated with it A trie can be seen as adeterministic finite automaton although the symbol on each edge is often implicit in the order of the branches It is not necessary for keys to be explicitly stored in nodes In the figure words are shown only to illustrate how the trie works Though tries are most commonly keyed by character strings they don‘t need to be The same algorithms can easily be adapted to serve similar functions of ordered lists of any construct eg permutations on a list of digits or shapes In particular a bitwise trie is keyed on the individual bits making up a short fixed size of bits such as an integer number or memory address In the example shown keys are listed in the nodes and values below them Each complete English word has an arbitrary integer value associated with it A trie can be seen as adeterministic finite automaton although the symbol on each edge is often implicit in the order of the branches It is not necessary for keys to be explicitly stored in nodes In the figure words are shown only to illustrate how the trie works Though tries are most commonly keyed by character strings they don‘t need to be The same algorithms can easily be adapted to serve similar functions of ordered lists of any construct eg permutations on a list of digits or shapes In particular a bitwise trie is keyed on the individual bits making up a short fixed size of bits such as an integer number or memory address In the example shown keys are listed in the nodes and values below them Each complete English word has an arbitrary integer value associated with it A trie can be seen as adeterministic finite automaton although the symbol on each edge is often implicit in the order of the branches It is not necessary for keys to be explicitly stored in nodes In the figure words are shown only to illustrate how the trie works Though tries are most commonly keyed by character strings they don‘t need to be The same algorithms can easily be adapted to serve similar functions of ordered lists of any construct eg permutations on a list of digits or shapes In particular a bitwise trie is keyed on the individual bits making up a short fixed size of bits such as an integer number or memory address In the example shown keys are listed in the nodes and values below them Each complete English word has an arbitrary integer value associated with it A trie can be seen as adeterministic finite automaton although the symbol on each edge is often implicit in the order of the branches It is not necessary for keys to be explicitly stored in nodes In the figure words are shown only to illustrate how the trie works Though tries are most commonly keyed by character strings they don‘t need to be The same algorithms can easily be adapted to serve similar functions of ordered lists of any construct eg permutations on a list of digits or shapes In particular a bitwise trie is keyed on the individual bits making up a short fixed size of bits such as an integer number or memory address "; strcpy(buff, items); } void readInlineChars(char* source, char** buff) { *buff = new char[strlen(source)]; strcpy(*buff, source); } void main(char* argv){ Trie t; char* cpy = new char[]; readChars("contents.txt", &cpy); printf("indexing...\n"); //readInlineChars("abcd AbCd ABCD", &cpy); char *tk = strtok(cpy, " "); int index = 0; t.insert(tk, index++); while (tk = strtok(NULL, " ")) { t.insert(tk, index++); } printf("%i words have been indexed.\n", index); label_enter: printf("Please input search keyword:"); string input; cin >> input; const char *chars = input.data(); printf("matching : [%s] ...", chars); int *indexMap = new int[index]; t.fuzzy_match(chars, indexMap); label_change: printf("done.\nPlease input the threshold:"); int threshold = 0; cin >> threshold; printf("calculating the size of matched result (confidence >= %i)...\n", threshold); int count = 0; for (int i = 0; i < index; i++) { int confidence = indexMap[i]; if(confidence > 0) count++; } printf("There are %s words matched. You can now enter the number to continue.\n1.I want to see them all.\n2.Change thresgold\n3.Change keyword.\nq.Exit"); goto label_menu; label_display: for (int i = 0; i < index; i++) { int confidence = indexMap[i]; if(confidence > 0) printf("%i : %i\n",i, confidence); } label_menu: char choose = 0; cin >> choose; switch (choose) { case ‘1‘: goto label_display; break; case ‘2‘: goto label_change; break; case ‘3‘: goto label_enter; break; case ‘q‘: break; default: break; } delete indexMap; system("pause"); }