Java版统计文件中的每个单词出现次数

正则表达式之Pattern和Matcher,请参见转载博客    http://www.cnblogs.com/haodawang/p/5967219.html

代码实现:

 import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern; public class CountWorks {
public static void main(String[] args) {
BufferedReader br = null;
try {
br = new BufferedReader(new FileReader("short.txt"));
} catch (FileNotFoundException e) {
e.printStackTrace();
}
StringBuffer sb1 = new StringBuffer();
String line = null;
try {
while((line = br.readLine()) != null) {
sb1 = sb1.append(line);
}
} catch (IOException e) {
e.printStackTrace();
}
try {
br.close();
} catch (IOException e1) {
e1.printStackTrace();
}
String sb=sb1.toString().toLowerCase();
Pattern pattern = Pattern.compile("[a-zA-Z']+");//a 到 z 或 A 到 Z,两头的字母包括在内(范围)
Matcher matcher = pattern.matcher(sb);
Map<String, Integer> map = new TreeMap<String, Integer>();
String word = "";
Integer num = null;
int total = 0; while(matcher.find()) {
word = matcher.group();
total ++;
if(map.containsKey(word)) {
num = map.get(word);//get(),返回指定键值所映射的值,取出map中word单词的个数
num += 1;
} else {
num = 1;
}
map.put(word, num);
}
Set<String> keys = map.keySet();
for (String key : keys) {
Integer value = map.get(key);
System.out.printf("%s: %s\n", key, value);
}
System.out.println();
System.out.println("total words : " + total);
System.out.println("different words : " + map.size());
}
}
上一篇:Linux(以centos7为例)下自动挂载NTFS硬盘


下一篇:动态绑定CheckBoxList,并默认勾选多选框