JAVA实验--统计文章中单词的个数并排序

2022-11-09 16:06:11

分析：

1）要统计单词的个数，就自己的对文章中单词出现的判断的理解来说是：当出现一个非字母的字符的时候，对前面的一部分字符串归结为单词

2）对于最后要判断字母出现的个数这个问题，我认为应该是要用到map比较合适吧，因为map中有键-值的关系，可以把字符串设置为键，把出现的个数设置为整型，这样就能够建立起一一对应的关系，不用再判断所在的位置

根据上面自己的理解，今天我写了以下的一部分代码，对哈利波特第一集的这部分文章进行了单词的统计的测试，测试的结果相对良好，没有问题。

package pipei;

//洪鼎淇 20173627 信1705-3

import java.io.File;

import java.io.FileInputStream;

import java.io.FileOutputStream;

import java.io.IOException;

import java.io.InputStreamReader;

import java.io.OutputStreamWriter;

import java.util.HashMap;

import java.util.Map;

//哈利波特单词统计

public class Pipei {

    public Map<String,Integer> map1=new HashMap<String,Integer>();

    public static void main(String arg[]) {

        String sz[];

        Integer num[];

        final int MAXNUM=10; //统计的单词出现最多的前n个的个数

        sz=new String[MAXNUM+1];

        num=new Integer[MAXNUM+1];

        Pipei pipei=new Pipei();

        int account =1;

        //Vector<String> ve1=new Vector<String>();

        try {

            pipei.daoru();

        } catch (IOException e) {

            // TODO 自动生成的 catch 块

            e.printStackTrace();

        }

        System.out.println("英文单词的出现情况如下:");

        int g_run=0;

        for(g_run=0;g_run<MAXNUM+1;g_run++)

        {

            account=1;

            for(Map.Entry<String,Integer> it : pipei.map1.entrySet())

            {

                if(account==1)

                {

                    sz[g_run]=it.getKey();

                    num[g_run]=it.getValue();

                    account=2;

                }

                if(account==0)

                {

                    account=1;

                    continue;

                }

                if(num[g_run]<it.getValue())

                {

                    sz[g_run]=it.getKey();

                    num[g_run]=it.getValue();

                }

                //System.out.println("英文单词: "+it.getKey()+" 该英文单词出现次数: "+it.getValue());

            }

            pipei.map1.remove(sz[g_run]);

        }

        int g_count=1;

        String tx1=new String();

        for(int i=0;i<g_run;i++)

        {

            if(sz[i]==null)

                continue;

            if(sz[i].equals(""))

                continue;

            tx1+="出现次数第"+(g_count)+"多的单词为:"+sz[i]+"\t\t\t出现次数: "+num[i]+"\r\n";

            System.out.println("出现次数第"+(g_count)+"多的单词为:"+sz[i]+"\t\t\t出现次数: "+num[i]);

            g_count++;

        }

        try {

            pipei.daochu(tx1);

        } catch (IOException e) {

            // TODO 自动生成的 catch 块

            e.printStackTrace();

        }

    }

    public void daoru() throws IOException

    {

        File a=new File("1.Harry Potter and the Sorcerer's Stone.txt");

        FileInputStream b = new FileInputStream(a);

        InputStreamReader c=new InputStreamReader(b,"UTF-8");

        String string2=new String();

        while(c.ready())

        {

            char string1=(char) c.read();

            if(!isWord(string1))

            {

                if(map1.containsKey(string2))

                {

                    Integer num1=map1.get(string2)+1;

                    map1.put(string2,num1);

                }

                else

                {

                    Integer num1=1;

                    map1.put(string2,num1);

                }

                string2="";

            }

            else

            {

                string2+=string1;

            }

        }

        if(!string2.isEmpty())

        {

            if(map1.containsKey(string2))

            {

                Integer num1=map1.get(string2)+1;

                map1.put(string2,num1);

            }

            else

            {

                Integer num1=1;

                map1.put(string2,num1);

            }

            string2="";

        }

        c.close();

        b.close();

    }

    public void daochu(String txt) throws IOException

    {

        File fi=new File("tongji.txt");

        FileOutputStream fop=new FileOutputStream(fi);

        OutputStreamWriter ops=new OutputStreamWriter(fop,"UTF-8");

        ops.append(txt);

        ops.close();

        fop.close();

    }

    public boolean isWord(char a)

    {

        if(a<='z'&&a>='a'||a<='Z'&&a>='A')

            return true;

        return false;

    }

}

测试截图：

这是出现的单词的截图情况，对于其中出现s的情况的分析，s其实是单词里面的缩写，一般往往有It's 这种情况的出现，为了避免这种情况，我对文中的某一部分进行修改(修改部分如下图)，将It's看成一部分

isWord函数判断是否为字母的这一部分将单引号也作为一个符号放进去

修改完之后的测试如下：

搞定！！

码农公寓

相关文章