1pub(crate) const FNV_OFFSET: u64 = 0xcbf29ce484222325;
19pub(crate) const FNV_PRIME: u64 = 0x100000001b3;
20
21#[cfg(feature = "serde")]
22use serde::{Deserialize, Serialize};
23
24#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
25pub struct HyperLogLog {
26 p: u32,
27 m: u32,
28 pub(crate) registers: Vec<u8>,
29 alpha: f64,
30}
31
32impl HyperLogLog {
33 pub fn new(precision: u32) -> Self {
36 let p = precision.clamp(4, 18);
37 let m = 1u32 << p;
38 let alpha = alpha_m(m);
39 Self {
40 p,
41 m,
42 registers: vec![0u8; m as usize],
43 alpha,
44 }
45 }
46
47 pub fn precision(&self) -> u32 {
48 self.p
49 }
50 pub fn register_count(&self) -> u32 {
51 self.m
52 }
53
54 pub fn add(&mut self, key: &str) {
56 let h = fnv1a64(key.as_bytes());
57 let idx = (h >> (64 - self.p)) as usize;
58 let w = (h << self.p) | (1u64 << (self.p - 1));
61 let r = (w.leading_zeros() + 1) as u8;
62 if r > self.registers[idx] {
63 self.registers[idx] = r;
64 }
65 }
66
67 pub fn estimate(&self) -> f64 {
69 let m = self.m as f64;
70 let sum: f64 = self.registers.iter().map(|&r| 2f64.powi(-(r as i32))).sum();
72 let raw = self.alpha * m * m / sum;
73
74 let zeros = self.registers.iter().filter(|&&r| r == 0).count();
76 if zeros > 0 && raw <= 2.5 * m {
77 -m * (zeros as f64 / m).ln()
78 } else {
79 raw
80 }
81 }
82
83 pub fn merge(&mut self, other: &Self) -> Result<(), &'static str> {
85 if self.p != other.p {
86 return Err("precision mismatch");
87 }
88 for (a, b) in self.registers.iter_mut().zip(other.registers.iter()) {
89 if *b > *a {
90 *a = *b;
91 }
92 }
93 Ok(())
94 }
95}
96
97impl HyperLogLog {
98 #[inline]
102 #[allow(dead_code)] pub(crate) fn registers(&self) -> &[u8] {
104 &self.registers
105 }
106}
107
108pub(crate) fn alpha_m(m: u32) -> f64 {
109 match m {
110 16 => 0.673,
111 32 => 0.697,
112 64 => 0.709,
113 _ => 0.7213 / (1.0 + 1.079 / m as f64),
114 }
115}
116
117pub(crate) fn fnv1a64(bytes: &[u8]) -> u64 {
118 let mut h = FNV_OFFSET;
119 for &b in bytes {
120 h ^= b as u64;
121 h = h.wrapping_mul(FNV_PRIME);
122 }
123 h ^= h >> 30;
127 h = h.wrapping_mul(0xbf58476d1ce4e5b9);
128 h ^= h >> 27;
129 h = h.wrapping_mul(0x94d049bb133111eb);
130 h ^= h >> 31;
131 h
132}
133
134#[cfg(feature = "harness")]
135pub mod recipe;
136
137#[cfg(any(feature = "sparse", feature = "union-intersect"))]
140pub mod features;
141
142#[cfg(feature = "sparse")]
143pub use features::sparse::SparseHyperLogLog;
144#[cfg(feature = "union-intersect")]
145pub use features::union_intersect::{estimate_intersect, estimate_union};