[原创]大数据:布隆过滤器C#版简单实现。

    public class BloomFilter
{
public BitArray _BloomArray;
public Int64 BloomArryLength { get; }
public Int64 DataArrayLeng { get; }
public Int64 BitIndexCount { get; } /// <summary>
/// 初始化
/// </summary>
/// <param name="BloomArryLength">布隆数组的大小</param>
/// <param name="DataArrayLeng">数据的长度</param>
/// <param name="bitIndexCount">hash数</param>
public BloomFilter(int BloomArryLength,int DataArrayLeng,int bitIndexCount)
{
_BloomArray = new BitArray(BloomArryLength);
this.BloomArryLength = BloomArryLength;
this.DataArrayLeng = DataArrayLeng;
this.BitIndexCount = bitIndexCount;
} public void Add(string str)
{
var hashCode = GetHashCode(str);
Random random = new Random(hashCode);
for (int i = ; i < BitIndexCount; i++)
{
var c = random.Next((int)(this.BloomArryLength - ));
_BloomArray[c] = true;
}
} public bool isExist(string str)
{
var hashCode = GetHashCode(str);
Random random = new Random(hashCode);
for (int i = ; i < BitIndexCount; i++)
{
if(!_BloomArray[random.Next((int)(this.BloomArryLength - ))])
{
return false;
}
}
return true;
} public int GetHashCode(object value)
{
return value.GetHashCode();
} public double getFalsePositiveProbability()
{
// (1 - e^(-k * n / m)) ^ k
return Math.Pow(( - Math.Exp(-BitIndexCount * (double)DataArrayLeng / BloomArryLength)),
BitIndexCount);
}
}
        static void Main(string[] args)
{
Bloom_Filter.BloomFilter bloom = new Bloom_Filter.BloomFilter(, , );//五千万条数据 for (int i = ; i < bloom.DataArrayLeng; i++)//五千万条数据
{
bloom.Add(i.ToString());
}
do
{
var c = Console.ReadLine();
if (c == "e")
break;
Stopwatch sw = new Stopwatch();
sw.Start();
var temp=bloom.isExist(c);
sw.Stop();
Console.WriteLine($"查找:{c}\n结果:{temp}\n总耗时:{sw.ElapsedTicks}\n错误概率:{bloom.getFalsePositiveProbability()}");
} while (true);
}

结果:使用内存27MB,查找结果一般在100毫秒以内。

[原创]大数据:布隆过滤器C#版简单实现。

上一篇:Java中有两种实现多线程的方式以及两种方式之间的区别


下一篇:iOS开发UI篇—iOS开发中三种简单的动画设置