package main
import (
"fmt"
"math"
)
type BloomFilter struct {
MLength int64 //过滤器长度
MArr []int64 //过滤器数组
NCount int64 //插入元素个数
FalseRate float64 //误报率
KHash int //hash函数个数
}
//数学公式
// k = ln2 * m /n
// m = - n log p / (log2 )^2
func main() {
bf := NewBloomFilter(0.0001, 100)
fmt.Println(bf)
bf.set([]byte("zhuang"))
bf.set([]byte("jing"))
bf.set([]byte("peng"))
fmt.Println(bf.check([]byte("aa")))
fmt.Println(bf)
}
func NewBloomFilter(falseRate float64, size int64) *BloomFilter {
m, k := getFilterParam(falseRate, size)
return &BloomFilter{
MLength: m,
MArr: make([]int64, m),
NCount: size,
FalseRate: falseRate,
KHash: k,
}
}
func getFilterParam(falseRate float64, size int64) (int64, int) {
m := -1 * math.Log(falseRate) * float64(size) / (math.Ln2 * math.Ln2)
k := m * math.Ln2 / float64(size)
return int64(m), int(k)
}
func (bf *BloomFilter) set(data []byte) {
for i := 0; i < bf.KHash; i++ {
index := bf.hashFun(data, i)
bf.MArr[index] = 1
}
}
func (bf *BloomFilter) check(data []byte) bool {
for i := 0; i < bf.KHash; i++ {
index := bf.hashFun(data, i)
if bf.MArr[index] == 0 {
return false
}
}
return true
}
func (bf *BloomFilter) hashFun(data []byte, count int) int64 {
hash := int64(5381)
for i := 0; i < len(data); i++ {
hash = hash*33 + int64(data[i]) + int64(count)
}
res := hash % (bf.MLength - 1)
return int64(math.Abs(float64(res)))
}