在PHP中,对海量数据进行基数统计通常可以使用布隆过滤器(Bloom Filter)或者Count-Min Sketch算法。以下是使用Count-Min Sketch算法的一个简单示例:
class CountMinSketch {
private $rows;
private $columns;
private $values;
public function __construct($rows, $columns) {
$this->rows = $rows;
$rows = $rows + 1;
$this->columns = $columns;
$this->values = array_fill(0, $rows, array_fill(0, $columns, 0));
}
public function increment($item, $count) {
$hashes = $this->generateHashes($item);
foreach ($hashes as $hash) {
$row = $hash % $this->rows;
$column = ($hash >> $this->rows) % $this->columns;
if ($this->values[$row][$column] > $count) {
$this->values[$row][$column] = $count;
}
}
}
public function estimate($item) {
$min = PHP_INT_MAX;
$hashes = $this->generateHashes($item);
foreach ($hashes as $hash) {
$row = $hash % $this->rows;
$column = ($hash >> $this->rows) % $this->columns;
$min = min($min, $this->values[$row][$column]);
}
return $min;
}
private function generateHashes($item) {
$hashes = array(
hash("fnv1a32", $item) // FNV-1a 32-bit hash
);
// For better estimation, you can add more hash functions
// e.g., MD5, SHA1, or a custom hash function
// $hashes[] = hash("md5", $item);
// $hashes[] = hash("sha1", $item);
return $hashes;
}
}
// 使用示例
$sketch = new CountMinSketch(1024, 2048); // 调整行和列的大小
$sketch->increment("item1", 1);
$sketch->increment("item2", 2);
echo "Estimated count for item1: " . $sketch->estimate("item1") . "\n";
echo "Estimated count for item2: " . $sketch->estimate("item2") . "\n";