nodedb_types/approx/
hll.rs1#[derive(Debug)]
9pub struct HyperLogLog {
10 registers: Vec<u8>,
11 precision: u8,
12}
13
14impl HyperLogLog {
15 pub fn new() -> Self {
16 Self::with_precision(14)
17 }
18
19 pub fn with_precision(p: u8) -> Self {
20 let p = p.clamp(4, 18);
21 let m = 1usize << p;
22 Self {
23 registers: vec![0u8; m],
24 precision: p,
25 }
26 }
27
28 pub fn add(&mut self, value: u64) {
29 let hash = splitmix64(value);
30 let m = self.registers.len();
31 let idx = (hash as usize) & (m - 1);
32 let remaining = hash >> self.precision;
33 let leading_zeros = if remaining == 0 {
34 (64 - self.precision) + 1
35 } else {
36 (remaining.leading_zeros() as u8).saturating_sub(self.precision) + 1
37 };
38 if leading_zeros > self.registers[idx] {
39 self.registers[idx] = leading_zeros;
40 }
41 }
42
43 pub fn add_batch(&mut self, values: &[u64]) {
44 for &v in values {
45 self.add(v);
46 }
47 }
48
49 pub fn add_f64_batch(&mut self, values: &[f64]) {
50 for &v in values {
51 self.add(v.to_bits());
52 }
53 }
54
55 pub fn estimate(&self) -> f64 {
56 let m = self.registers.len() as f64;
57 let alpha = match self.registers.len() {
58 16 => 0.673,
59 32 => 0.697,
60 64 => 0.709,
61 _ => 0.7213 / (1.0 + 1.079 / m),
62 };
63
64 let sum: f64 = self
65 .registers
66 .iter()
67 .map(|&r| 2.0f64.powi(-(r as i32)))
68 .sum();
69 let raw_estimate = alpha * m * m / sum;
70
71 if raw_estimate <= 2.5 * m {
72 let zeros = self.registers.iter().filter(|&&r| r == 0).count() as f64;
73 if zeros > 0.0 {
74 return m * (m / zeros).ln();
75 }
76 }
77
78 raw_estimate
79 }
80
81 pub fn merge(&mut self, other: &HyperLogLog) {
82 for (i, &r) in other.registers.iter().enumerate() {
83 if i < self.registers.len() && r > self.registers[i] {
84 self.registers[i] = r;
85 }
86 }
87 }
88
89 pub fn memory_bytes(&self) -> usize {
90 self.registers.len()
91 }
92
93 pub fn registers(&self) -> &[u8] {
95 &self.registers
96 }
97
98 pub fn from_registers(data: &[u8]) -> Self {
100 let precision = match data.len() {
101 16 => 4,
102 32 => 5,
103 64 => 6,
104 128 => 7,
105 256 => 8,
106 512 => 9,
107 1024 => 10,
108 2048 => 11,
109 4096 => 12,
110 8192 => 13,
111 16384 => 14,
112 32768 => 15,
113 65536 => 16,
114 131072 => 17,
115 262144 => 18,
116 _ => 14,
117 };
118 Self {
119 registers: data.to_vec(),
120 precision,
121 }
122 }
123}
124
125impl Default for HyperLogLog {
126 fn default() -> Self {
127 Self::new()
128 }
129}
130
131fn splitmix64(mut x: u64) -> u64 {
133 x = x.wrapping_add(0x9e3779b97f4a7c15);
134 x = (x ^ (x >> 30)).wrapping_mul(0xbf58476d1ce4e5b9);
135 x = (x ^ (x >> 27)).wrapping_mul(0x94d049bb133111eb);
136 x ^ (x >> 31)
137}
138
139#[cfg(test)]
140mod tests {
141 use super::*;
142
143 #[test]
144 fn hll_empty() {
145 let hll = HyperLogLog::new();
146 assert!(hll.estimate() < 1.0);
147 }
148
149 #[test]
150 fn hll_small_cardinality() {
151 let mut hll = HyperLogLog::new();
152 for i in 0..100u64 {
153 hll.add(i);
154 }
155 let est = hll.estimate();
156 assert!((90.0..110.0).contains(&est), "expected ~100, got {est:.0}");
157 }
158
159 #[test]
160 fn hll_large_cardinality() {
161 let mut hll = HyperLogLog::new();
162 for i in 0..100_000u64 {
163 hll.add(i);
164 }
165 let est = hll.estimate();
166 let error = (est - 100_000.0).abs() / 100_000.0;
167 assert!(error < 0.05, "expected <5% error, got {error:.3}");
168 }
169
170 #[test]
171 fn hll_duplicates_ignored() {
172 let mut hll = HyperLogLog::new();
173 for _ in 0..10_000 {
174 hll.add(42);
175 }
176 let est = hll.estimate();
177 assert!(est < 5.0, "expected ~1, got {est:.0}");
178 }
179
180 #[test]
181 fn hll_merge() {
182 let mut a = HyperLogLog::new();
183 let mut b = HyperLogLog::new();
184 for i in 0..5000u64 {
185 a.add(i);
186 }
187 for i in 3000..8000u64 {
188 b.add(i);
189 }
190 a.merge(&b);
191 let est = a.estimate();
192 let error = (est - 8000.0).abs() / 8000.0;
193 assert!(error < 0.05, "expected ~8000, got {est:.0}");
194 }
195
196 #[test]
197 fn hll_f64_batch() {
198 let mut hll = HyperLogLog::new();
199 let values: Vec<f64> = (0..1000).map(|i| i as f64 * 0.1).collect();
200 hll.add_f64_batch(&values);
201 let est = hll.estimate();
202 assert!(
203 (900.0..1100.0).contains(&est),
204 "expected ~1000, got {est:.0}"
205 );
206 }
207
208 #[test]
209 fn hll_memory() {
210 let hll = HyperLogLog::new();
211 assert_eq!(hll.memory_bytes(), 16384);
212 }
213}