myFirstBlog

朴素贝叶斯算法

公式

myFirstBlog

解释(例子)

给出如下训练集:

编号,色泽,根蒂,敲声,纹理,脐部,触感,好瓜  
1,青绿,蜷缩,浊响,清晰,凹陷,硬滑,是  
2,乌黑,蜷缩,沉闷,清晰,凹陷,硬滑,是  
3,乌黑,蜷缩,浊响,清晰,凹陷,硬滑,是  
4,青绿,蜷缩,沉闷,清晰,凹陷,硬滑,是  
5,浅白,蜷缩,浊响,清晰,凹陷,硬滑,是  
6,青绿,稍蜷,浊响,清晰,稍凹,软粘,是  
7,乌黑,稍蜷,浊响,稍糊,稍凹,软粘,是  
8,乌黑,稍蜷,浊响,清晰,稍凹,硬滑,是  
9,乌黑,稍蜷,沉闷,稍糊,稍凹,硬滑,否  
10,青绿,硬挺,清脆,清晰,平坦,软粘,否  
11,浅白,硬挺,清脆,模糊,平坦,硬滑,否  
12,浅白,蜷缩,浊响,模糊,平坦,软粘,否  
13,青绿,稍蜷,浊响,稍糊,凹陷,硬滑,否  
14,浅白,稍蜷,沉闷,稍糊,凹陷,硬滑,否  
15,乌黑,稍蜷,浊响,清晰,稍凹,软粘,否  
16,浅白,蜷缩,浊响,模糊,平坦,硬滑,否  
17,青绿,蜷缩,沉闷,稍糊,稍凹,硬滑,否  

判断有如下特征的瓜是否好瓜:

青绿,稍蜷,浊响,清晰,凹陷,硬滑

分析

将特征和规律代入贝叶斯公式中:

P(好|青绿,稍蜷,浊响,清晰,凹陷,硬滑)
= P(好)*P(青绿,稍蜷,浊响,清晰,凹陷,硬滑|好) / (P(好)*P(青绿,稍蜷,浊响,清晰,凹陷,硬滑|好) + P(否)*P(青绿,稍蜷,浊响,清晰,凹陷,硬滑|否))
= P(好)*P(青绿|好)*P(稍蜷|好)*P(浊响|好)*P(清晰|好)*P(凹陷|好)*P(硬滑|好) / (P(好)*P(青绿|好)*P(稍蜷|好)*P(浊响|好)*P(清晰|好)*P(,凹陷|好)*P(硬滑|好) + P(否)*P(青绿|否)*P(稍蜷|否)*P(浊响|否)*P(清晰|否)*P(凹陷|否)*P(硬滑|否))

其中特征各项与规律相互独立,故:
P(青绿,稍蜷,浊响,清晰,凹陷,硬滑|好) = P(青绿|好)*P(稍蜷|好)*P(浊响|好)*P(清晰|好)*P(凹陷|好)*P(硬滑|好)
P(青绿,稍蜷,浊响,清晰,凹陷,硬滑|否) = P(青绿|否)*P(稍蜷|否)*P(浊响|否)*P(清晰|否)*P(凹陷|否)*P(硬滑|否)

代码实现:

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.io.*;
import java.util.Scanner;

public class Test {
    static String filePath = System.getProperty("user.dir")+"\\src\\sources\\data.txt";
    static ArrayList<ArrayList<String>> data = new ArrayList<ArrayList<String>>();

    //从文件中读取数据,储存到集合data中
    public ArrayList<ArrayList<String>> readTable(String filePath){
        ArrayList<String> d = null;
        ArrayList<ArrayList<String>> t = new ArrayList<ArrayList<String>>();
        File file = new File(filePath);
        try {
            InputStreamReader isr = new InputStreamReader(new FileInputStream(file));
            BufferedReader bf = new BufferedReader(isr);
            String str = null;
            while((str = bf.readLine()) != null) {
                d = new ArrayList<String>();
                String[] str1 = str.split(",");
                for(int i = 1; i < str1.length ; i++) {
                    d.add(str1[i]);
                }
                t.add(d);
                data = t;
            }
            bf.close();
            isr.close();
        } catch (Exception e) {
            e.printStackTrace();
            System.out.println("文件不存在!");
        }
        return t;
    }
    //计算分母的值
    public double denominator(String[] str) {
        double result = 0;
        int countIsHao = 0;
        int countIsCha = 0;
        int count1 = 0;
        int count2 = 0;
        int count3 = 0;
        int count4 = 0;
        int count5 = 0;
        int count6 = 0;
        for(int i = 0; i < data.size() ;i++) {
            if(data.get(i).get(6).equals("是")) {
                countIsHao++;
            }
            if(data.get(i).get(6).equals("否")) {
                countIsCha++;
            }
            if(data.get(i).get(0).equals(str[0])) {
                count1++;
            }
            if(data.get(i).get(1).equals(str[1])) {
                count2++;
            }
            if(data.get(i).get(2).equals(str[2])) {
                count3++;
            }
            if(data.get(i).get(3).equals(str[3])) {
                count4++;
            }
            if(data.get(i).get(4).equals(str[4])) {
                count5++;
            }
            if(data.get(i).get(5).equals(str[5])) {
                count6++;
            }
        }
        result = (countIsHao/(data.size()*1.0))*(count1 / (countIsHao*1.0))*(count2 / (countIsHao*1.0))*(count3 / (countIsHao*1.0))*(count4 / (countIsHao*1.0))*(count5 / (countIsHao*1.0))*(count6 / (countIsHao*1.0))+
                (countIsCha/(data.size()*1.0))*(count1 / (countIsCha*1.0))*(count2 / (countIsCha*1.0))*(count3 / (countIsCha*1.0))*(count4 / (countIsCha*1.0))*(count5 / (countIsCha*1.0))*(count6 / (countIsCha*1.0));
        return result;
    }
    //计算分子的值
    public double moleculeIsCold(String hao,String[] strs) {
        double result = 0;
        int countIsHao = 0;
        int count1 = 0;
        int count2 = 0;
        int count3 = 0;
        int count4 = 0;
        int count5 = 0;
        int count6 = 0;
        for(int i = 0; i < data.size() ;i++) {
            if(data.get(i).get(6).equals(hao)) {
                countIsHao++;
            }
            if(data.get(i).get(0).equals(strs[0]) && data.get(i).get(6).equals(hao)) {
                count1++;
            }
            if(data.get(i).get(1).equals(strs[1]) && data.get(i).get(6).equals(hao)) {
                count2++;
            }
            if(data.get(i).get(2).equals(strs[2]) && data.get(i).get(6).equals(hao)) {
                count3++;
            }
            if(data.get(i).get(3).equals(strs[3]) && data.get(i).get(6).equals(hao)) {
                count4++;
            }
            if(data.get(i).get(4).equals(strs[4]) && data.get(i).get(6).equals(hao)) {
                count5++;
            }
            if(data.get(i).get(5).equals(strs[5]) && data.get(i).get(6).equals(hao)) {
                count6++;
            }
        }
        result = (countIsHao/(data.size()*1.0))*(count1 / (countIsHao*1.0))*(count2 / (countIsHao*1.0))*(count3 / (countIsHao*1.0))*(count4 / (countIsHao*1.0))*(count5 / (countIsHao*1.0))*(count6 / (countIsHao*1.0));
        return result;
    }
    //比较好瓜差瓜的概率
    public String compared(String[] strs) {
        String str = "";
        double d1 ,d2 ;
        d1 = moleculeIsCold("是",strs)*1.0 / denominator(strs);
        d2 = moleculeIsCold("否",strs)*1.0 / denominator(strs);
        if(d1 > d2) {
            str = "好瓜";
        }else {
            str = "差瓜";
        }
        System.out.println("好瓜的概率:"+d1);
        System.out.println("差瓜的概率:"+d2);
        System.out.println(str);
        return str;
    }
//测试用例:青绿 稍蜷 浊响 清晰 凹陷 硬滑
    public static void main(String[] args) {
        Scanner input = new Scanner(System.in);
        String[] strs = new String[6];
        for (int i = 0;i<6;i++){
            strs[i] = input.next();
        }
        Test ba = new Test();
        ba.readTable(filePath);
        ba.denominator(strs);
        ba.moleculeIsCold("是",strs);
        ba.moleculeIsCold("否",strs);
        ba.compared(strs);
    }
}

运行结果:

myFirstBlog

上一篇:Java的Object类常用API


下一篇:hashCode 和 equals 的区别