数据结构-字典树
什么是字典树字典树又称Trie,单词查找树,典型用于统计,查找大量字符串,典型应用有通讯录,前缀搜索,搜索自动补全,打字补全等。如下图所示,树中存储单词cat,dog,deer, pan, panda
BST搜索树的时间复杂度为O(logn), trie的时间复杂度为O(w),w为单词长度
源代码基于TreeMap的存储方案
public class Trie {
private class Node{
public int use;//字符使用次数
public int weight;//某个单词的权重
public TreeMap<Character,Node> next;//映射
public Node(){
next = new TreeMap<>();
}
}
private Node root;//根
private int size;//字典树总单词数
public Trie(){
root = new Node();
size = 0;
}
/**
* 判断某个节点是否是某个单词的结束
* @param node
* @return
*/
private boolean isWord(Node node){
if(null==node)
return false;
return node.weight!=0;
}
// 获得Trie中存储的单词数量
public int size(){
return size;
}
/**
* 添加一个单词
* @param word
*/
public void add(String word){
char[] cs = checkWord(word);
add(cs,cs.length);
}
/**
* 添加一个单词
* @param word 单词
* @param weight 权重
*/
public void add(String word, int weight){
if(0==weight)
throw new RuntimeException("weight must not zero");
char[] cs = checkWord(word);
add(cs,weight);
}
private void add(char[] cs, int weight){
Node cur = root;
for(char c:cs){
if(null==cur.next.get(c)){
cur.next.put(c,new Node());
}
cur = cur.next.get(c);
cur.use++;
}
if(!isWord(cur)){
++size;
cur.weight = weight;
}
}
/**
* 带.的匹配
* @param pattern
* @return
*/
public boolean match(String pattern){
char[] cs = checkWord(pattern);
return match(root,cs,0);
}
private boolean match(Node node,char[] cs,int index){
if(index==cs.length){
return isWord(node);
}
char c = cs[index];
if('.'!=c){
Node cur = node.next.get(c);
if(null==cur)
return false;
return match(cur,cs,index+1);
}else{
for(char ch:node.next.keySet()){
if(match(node.next.get(ch),cs,index+1)){
return true;
}
}
return false;
}
}
/**
* 是否包含某个单词
* @param word
* @return
*/
public boolean contains(String word){
char[]cs = checkWord(word);
Node cur = root;
for(char c:cs){
if(null==cur.next.get(c)){
return false;
}
cur = cur.next.get(c);
}
return isWord(cur);
}
/**
* 查询是否在Trie中有单词以prefix为前缀
* @param prefix
* @return
*/
public boolean isPrefix(String prefix){
char[]cs = checkWord(prefix);
Node cur = root;
for(char c:cs){
if(null==cur.next.get(c)){
return false;
}
cur = cur.next.get(c);
}
return true;
}
private char[] checkWord(String word){
if(null==word||word.isEmpty())
throw new RuntimeException("word must not null or empty");
return word.toCharArray();
}
/**
* 某个单词的权重
* @param word
* @return
*/
public int getWeight(String word){
char[]cs = checkWord(word);
Node cur = root;
for(char c:cs){
if(cur.next.get(c)==null)
throw new RuntimeException("could not find word:"+word);
cur = cur.next.get(c);
}
if(!isWord(cur)){
throw new RuntimeException("could not find word:"+word);
}
return cur.weight;
}
/**
* 找出以prefix为前缀的所有单词的权重和
* @param prefix
* @return
*/
public int sumWeight(String prefix){
char[]cs = checkWord(prefix);
Node cur = root;
for(char c:cs){
if(cur.next.get(c)==null){
return 0;
}
cur = cur.next.get(c);
}
return sumWeight(cur);
}
private int sumWeight(Node node){
int ans = node.weight;
for(char c:node.next.keySet()){
ans+= sumWeight(node.next.get(c));
}
return ans;
}
/**
*
* @return 所有单词
*/
public java.util.Set<String> getWords(){
java.util.Set<String> words = new TreeSet<String>();
getWords(root,words,"");
return words;
}
private void getWords(Node node, Set<String> words,String word) {
if(isWord(node)){
words.add(word);
}
for(char c:node.next.keySet()){
getWords(node.next.get(c),words,word+c);
}
}
/**
* 删除某个单词
* @param word
* @return
*/
public boolean delete(String word){
if(!contains(word)){
throw new RuntimeException("word is not exists");
}
char[] cs = checkWord(word);
Node cur = root;
for(char c:cs){
//如果某个节点只使用了一次,而且只有一个字节点,直接删除即可
if(cur.next.get(c).use==1&&cur.next.size()==1){
cur.next.remove(c);
cur = null;
break;
}
//否则置为下一个节点
cur = cur.next.get(c);
cur.use--;
}
//如果走完了,是一个单词的话,置为不是单词
if(isWord(cur)){
cur.weight = 0;
}
size--;
return true;
}
}
测试代码
public static void main(String[] args) {
Trie trie = new Trie();
trie.add("hello",1);
trie.add("helloooo");
System.out.println(trie.match("he..o"));
trie.add("cat",1);
trie.add("dog",1);
trie.add("deer",1);
trie.add("pan",1);
trie.add("panda",1);
System.out.println(trie.getWords());
System.out.println(trie.getWeight("hello"));
// System.out.println(trie.getWeight("hel"));
System.out.println(trie.sumWeight("hel"));
trie.delete("pan");
//trie.delete("panda");
System.out.println(trie.getWords());
}
基于数组的存储方式
public class Trie {
private class Node{
public Node[] nodes;
public boolean isWord;
public Node(){
nodes = new Node[26];
}
public Node get(char c){
return nodes[c-'a'];
}
public boolean containsKey(char c){
return get(c)!=null;
}
public void put(char c ,Node node){
nodes[c-'a'] = node;
}
}
private int size;
private Node root;
public Trie(){
root = new Node();
}
public void add(String word){
char[] cs = checkWord(word);
Node cur = root;
for(char c:cs){
if(!cur.containsKey(c)){
cur.put(c,new Node());
}
cur = cur.get(c);
}
if(!cur.isWord){
cur.isWord = true;
++size;
}
}
private char[] checkWord(String word){
if(null==word||word.isEmpty())
throw new RuntimeException("word must not null or empty");
char[]cs = word.toCharArray();
for(char c:cs){
if(!(c>='a'&&c<='z')){
throw new RuntimeException("字符必须a~z");
}
}
return cs;
}
private char[] checkPattern(String pattern){
if(null==pattern||pattern.isEmpty())
throw new RuntimeException("pattern must not null or empty");
char[]cs = pattern.toCharArray();
for(char c:cs){
if('.'==c)
continue;
if(!(c>='a'&&c<='z')){
throw new RuntimeException("字符必须a~z");
}
}
return cs;
}
public boolean contains(String word){
char[] cs = checkWord(word);
Node cur = root;
for(char c:cs){
if(!cur.containsKey(c))
return false;
cur = cur.get(c);
}
return cur.isWord;
}
//pattern可以写.匹配一个字符
public boolean match(String pattern){
char[] cs = checkPattern(pattern);
return match(root,cs,0);
}
private boolean match(Node node, char[] cs, int index) {
if(null==node)
return false;
if(index==cs.length){
return node.isWord;
}
if(cs[index]=='.'){
for(Node n:node.nodes){
if(match(n,cs,index+1)){
return true;
}
}
return false;
}else{
if(!node.containsKey(cs[index])){
return false;
}
return match(node.get(cs[index]),cs,index+1);
}
}
// 查询是否在Trie中有单词以prefix为前缀
public boolean isPrefix(String prefix){
char[] cs = checkWord(prefix);
Node cur = root;
for(char c:cs){
if(!cur.containsKey(c))
return false;
cur = cur.get(c);
}
return true;
}
public int size() {
return size;
}
public static void main(String[] args) {
Trie trie = new Trie();
trie.add("hello");
System.out.println(trie.contains("hello"));
System.out.println(trie.match("he.o"));
System.out.println(trie.isPrefix("he"));
}
}
部分算法解析
match算法
private boolean match(Node node,char[] cs,int index){
if(index==cs.length){
return isWord(node);
}
char c = cs[index];
//不是. 就直接match下一个字符
if('.'!=c){
Node cur = node.next.get(c);
if(null==cur)
return false;
return match(cur,cs,index+1);
}else{
//是.的话需要匹配所有可能的路径,有一个满足即可
for(char ch:node.next.keySet()){
if(match(node.next.get(ch),cs,index+1)){
return true;
}
}
return false;
}
}
delete算法
public boolean delete(String word){
if(!contains(word)){
throw new RuntimeException("word is not exists");
}
char[] cs = checkWord(word);
Node cur = root;
for(char c:cs){
//如果某个节点只使用了一次,而且只有一个字节点,直接删除即可
if(cur.next.get(c).use==1&&cur.next.size()==1){
cur.next.remove(c);
cur = null;
break;
}
//否则置为下一个节点
cur = cur.next.get(c);
cur.use--;
}
//如果走完了,是一个单词的话,置为不是单词
if(isWord(cur)){
cur.weight = 0;
}
size--;
return true;
}
case1: 删除cat,从根节点出发,遍历到c的时候发现c只被使用了一次,直接从根节点的TreeMap移除掉key为c即可
case2: 删除dog, 从根节点出发,遍历到d的时候发现d被使用了两次,不可以删除d,当前节点置为下一个节点o,并且把d的使用次数减一,发现o的使用次数为1,直接从d的TreeMap移除掉key为o即可
case3: 删除pan, 从根节点出发,遍历到p的时候发现p被使用了两次, p的use–, cur=p, 发现a被使用了两次,a的use–, cur=a, 发现n被使用了两次, n的use–, cur=n, 循环执行完毕后发现n节点是pan的尾节点,把cur的weight改为0,标识pan不是单词
case4: 删除panda,p,a,n的use依次减一,然后直接从n的TreeMap移除掉key为d即可
参考慕课网liuyubobo玩转数据结构