week1词频统计

使用java完成对txt格式的英文短片进行字符提取及统计。

package nenu.softWareProject;

import java.io.*;
import java.util.*;

public class Test2 {
    String filename;
    //字符计数
    public static int count(String filename,String target) throws FileNotFoundException, IOException {
        FileReader fr=new FileReader(filename);
        BufferedReader br=new BufferedReader(fr);
        StringBuilder strb=new StringBuilder();
        while(true){
            String line=br.readLine();
            if(line==null){
                break;
            }
            strb.append(line);
        }
        String result=strb.toString();
        int count=0;
        int index=0;
        while(true){
             index=result.indexOf(target,index+1);
             if(index>0){
                 count++;
             }else{
                 break;
             }
             
        }
        System.out.println(target+","+count);
        br.close();
        return count;
    }

public static void main(String[] args) throws FileNotFoundException, IOException {
        FileInputStream fis = new FileInputStream("E:\\artical.txt");// 要读的文件路径
        InputStreamReader isr = new InputStreamReader(fis);// 字符流
        BufferedReader infile = new BufferedReader(isr);    // 缓冲
        
        String string;
        String file = null;
        while ((string = infile.readLine()) != null) {
            file += string;
        }
        String words[];
        file = file.toLowerCase();
        file = file.replaceAll("[^A-Za-z]", " ");
        file = file.replaceAll("\\s+", " ");
        words = file.split("\\s+");
        String filepath= "E:/artical.txt";
        Map<String, Integer> hashMap = new HashMap<String, Integer>();
        for (int i = 0; i < words.length; i++) {
            String key = words[i];
            if (hashMap.get(key) != null) {
                int value = ((Integer) hashMap.get(key)).intValue();
                value++;
                hashMap.put(key, new Integer(value));
            } else {
                hashMap.put(key, new Integer(1));
            }
            Test2.count(filepath,key);
            
        }
        
        
    }
}

代码运行结果:

never,0
give,1
up,1
never,0
lose,1
hope,1
always,2
have,2
faith,1
it,3
allows,1
you,5
to,1
cope,1
trying,1
times,1
will,4
pass,2
as,3
they,1
always,2
do,1
just,1
have,2
patience,1
your,2
dreams,1
will,4
come,1
true,1
so,1
put,1
on,1
a,17
smile,1
you,5
ll,6
live,1
through,1
your,2
pain,1
know,1
it,3
will,4
pass,2
and,1
strength,1
you,5
will,4
gain,1

代码还存在问题,没有排序,输出有重复字符,改进中

上一篇:Bash 使用技巧大补贴


下一篇:Heritrix 3.1.0 源码解析(三十七)