Java词频统计

public class WordCount {

	public static void main(String[] args) {
String[] stopWords = { "", ",", "." };
List<String> stopWordList = Arrays.asList(stopWords);
String strWorld = "Regular expressions are used in search engines, search and replace dialogs of word processors and text editors, in text processing utilities such as sed and AWK and in lexical analysis. Many programming languages provide regex capabilities, built-in, or via libraries.";
String[] words = strWorld.split(" |,|\\.");
System.out.println(Arrays.toString(words)); List<String> wordList = Arrays.asList(words);
System.out.println(wordList); Multiset<String> wordSet = HashMultiset.create();
wordSet.addAll(wordList);
wordSet.removeAll(stopWordList);
System.out.println("word count:" + wordSet.size());
System.out.println("unique word count:" + wordSet.elementSet().size());
for (String key : wordSet.elementSet()) {
System.out.println(key + ":" + wordSet.count(key));
} }
}
上一篇:WPF学习系列之八(形状,画刷和变换)


下一篇:PAT甲级题解分类byZlc