1pub(crate) const FNV_OFFSET: u64 = 0xcbf29ce484222325;
19pub(crate) const FNV_PRIME: u64 = 0x100000001b3;
20
21#[cfg(feature = "serde")]
22use serde::{Deserialize, Serialize};
23
24#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
25pub struct HyperLogLog {
26 p: u32,
27 m: u32,
28 pub(crate) registers: Vec<u8>,
29 alpha: f64,
30}
31
32impl HyperLogLog {
33 pub fn new(precision: u32) -> Self {
36 let p = precision.clamp(4, 18);
37 let m = 1u32 << p;
38 let alpha = alpha_m(m);
39 Self {
40 p,
41 m,
42 registers: vec![0u8; m as usize],
43 alpha,
44 }
45 }
46
47 pub fn precision(&self) -> u32 {
48 self.p
49 }
50 pub fn register_count(&self) -> u32 {
51 self.m
52 }
53
54 pub fn add(&mut self, key: &str) {
56 let h = fnv1a64(key.as_bytes());
57 let idx = (h >> (64 - self.p)) as usize;
58 let w = (h << self.p) | (1u64 << (self.p - 1));
61 let r = (w.leading_zeros() + 1) as u8;
62 if r > self.registers[idx] {
63 self.registers[idx] = r;
64 }
65 }
66
67 pub fn estimate(&self) -> f64 {
69 let m = self.m as f64;
70 let sum: f64 = self.registers.iter().map(|&r| 2f64.powi(-(r as i32))).sum();
72 let raw = self.alpha * m * m / sum;
73
74 let zeros = self.registers.iter().filter(|&&r| r == 0).count();
76 if zeros > 0 && raw <= 2.5 * m {
77 -m * (zeros as f64 / m).ln()
78 } else {
79 raw
80 }
81 }
82
83 pub fn merge(&mut self, other: &Self) -> Result<(), &'static str> {
85 if self.p != other.p {
86 return Err("precision mismatch");
87 }
88 for (a, b) in self.registers.iter_mut().zip(other.registers.iter()) {
89 if *b > *a {
90 *a = *b;
91 }
92 }
93 Ok(())
94 }
95}
96
97impl HyperLogLog {
98 #[inline]
102 pub(crate) fn registers(&self) -> &[u8] {
103 &self.registers
104 }
105}
106
107pub(crate) fn alpha_m(m: u32) -> f64 {
108 match m {
109 16 => 0.673,
110 32 => 0.697,
111 64 => 0.709,
112 _ => 0.7213 / (1.0 + 1.079 / m as f64),
113 }
114}
115
116pub(crate) fn fnv1a64(bytes: &[u8]) -> u64 {
117 let mut h = FNV_OFFSET;
118 for &b in bytes {
119 h ^= b as u64;
120 h = h.wrapping_mul(FNV_PRIME);
121 }
122 h ^= h >> 30;
126 h = h.wrapping_mul(0xbf58476d1ce4e5b9);
127 h ^= h >> 27;
128 h = h.wrapping_mul(0x94d049bb133111eb);
129 h ^= h >> 31;
130 h
131}
132
133#[cfg(feature = "harness")]
134pub mod recipe;
135
136#[cfg(any(feature = "sparse", feature = "union-intersect"))]
139pub mod features;
140
141#[cfg(feature = "sparse")]
142pub use features::sparse::SparseHyperLogLog;
143#[cfg(feature = "union-intersect")]
144pub use features::union_intersect::{estimate_intersect, estimate_union};