Skip to main content

oxihuman_core/
hyperloglog_v2.rs

1// Copyright (C) 2026 COOLJAPAN OU (Team KitaSan)
2// SPDX-License-Identifier: Apache-2.0
3#![allow(dead_code)]
4
5//! HyperLogLog v2 — probabilistic cardinality estimation.
6
7const HLL_SEED: u64 = 0x9e37_79b9_7f4a_7c15;
8const ALPHA_16: f64 = 0.673;
9const ALPHA_32: f64 = 0.697;
10const ALPHA_64: f64 = 0.709;
11
12/// HyperLogLog cardinality estimator with `2^b` registers.
13pub struct HyperLogLogV2 {
14    registers: Vec<u8>,
15    b: u8,
16    m: usize,
17}
18
19impl HyperLogLogV2 {
20    /// Create a new HLL with `b` bits of precision (b in 4..=16).
21    pub fn new(b: u8) -> Self {
22        let b = b.clamp(4, 16);
23        let m = 1usize << b;
24        HyperLogLogV2 {
25            registers: vec![0u8; m],
26            b,
27            m,
28        }
29    }
30
31    fn hash_item(&self, item: u64) -> u64 {
32        item.wrapping_mul(HLL_SEED)
33            .rotate_left(27)
34            .wrapping_add(HLL_SEED >> 17)
35    }
36
37    fn leading_zeros_plus_one(val: u64, bits: u8) -> u8 {
38        let shifted = val << bits;
39        shifted.leading_zeros() as u8 + 1
40    }
41
42    /// Add an element to the estimator.
43    pub fn add(&mut self, item: u64) {
44        let h = self.hash_item(item);
45        let reg_idx = (h >> (64 - self.b)) as usize;
46        let w = h << self.b;
47        let rho = w.leading_zeros() as u8 + 1;
48        if rho > self.registers[reg_idx] {
49            self.registers[reg_idx] = rho;
50        }
51    }
52
53    /// Estimate the cardinality.
54    pub fn count(&self) -> u64 {
55        let m = self.m as f64;
56        let alpha = match self.m {
57            16 => ALPHA_16,
58            32 => ALPHA_32,
59            64 => ALPHA_64,
60            _ => 0.7213 / (1.0 + 1.079 / m),
61        };
62        let z: f64 = self
63            .registers
64            .iter()
65            .map(|&r| 2.0f64.powi(-(r as i32)))
66            .sum();
67        let raw = alpha * m * m / z;
68        /* small range correction: use linear counting when estimate is small */
69        let zeros = self.registers.iter().filter(|&&r| r == 0).count();
70        if raw < 2.5 * m && zeros > 0 {
71            (m * (m / zeros as f64).ln()).round() as u64
72        } else {
73            raw.round() as u64
74        }
75    }
76
77    /// Precision bits.
78    pub fn precision(&self) -> u8 {
79        self.b
80    }
81
82    /// Number of registers.
83    pub fn num_registers(&self) -> usize {
84        self.m
85    }
86
87    /// Merge another HLL into this one (must have same precision).
88    pub fn merge(&mut self, other: &HyperLogLogV2) {
89        if other.b == self.b {
90            for (r, &o) in self.registers.iter_mut().zip(other.registers.iter()) {
91                if o > *r {
92                    *r = o;
93                }
94            }
95        }
96    }
97}
98
99#[cfg(test)]
100mod tests {
101    use super::*;
102
103    #[test]
104    fn test_empty_cardinality() {
105        let hll = HyperLogLogV2::new(8);
106        assert_eq!(hll.count(), 0 /* no elements added */);
107    }
108
109    #[test]
110    fn test_single_element() {
111        let mut hll = HyperLogLogV2::new(8);
112        hll.add(1234);
113        assert!(hll.count() >= 1 /* at least one element counted */);
114    }
115
116    #[test]
117    fn test_many_elements_approximate() {
118        let mut hll = HyperLogLogV2::new(12);
119        for i in 0u64..1000 {
120            hll.add(i);
121        }
122        let est = hll.count();
123        /* estimate should be within 20% of 1000 */
124        assert!((800..=1200).contains(&est));
125    }
126
127    #[test]
128    fn test_precision_clamp() {
129        let hll = HyperLogLogV2::new(2);
130        assert_eq!(hll.precision(), 4 /* clamped to minimum 4 */);
131    }
132
133    #[test]
134    fn test_num_registers() {
135        let hll = HyperLogLogV2::new(8);
136        assert_eq!(hll.num_registers(), 256 /* 2^8 registers */);
137    }
138
139    #[test]
140    fn test_merge_same_precision() {
141        let mut a = HyperLogLogV2::new(8);
142        let mut b = HyperLogLogV2::new(8);
143        for i in 0u64..500 {
144            a.add(i);
145        }
146        for i in 500u64..1000 {
147            b.add(i);
148        }
149        a.merge(&b);
150        let est = a.count();
151        assert!(est >= 700 /* merged estimate should approach 1000 */);
152    }
153
154    #[test]
155    fn test_merge_different_precision_ignored() {
156        let mut a = HyperLogLogV2::new(8);
157        let b = HyperLogLogV2::new(10);
158        /* merging different precision is a no-op */
159        let before = a.count();
160        a.merge(&b);
161        assert_eq!(a.count(), before);
162    }
163
164    #[test]
165    fn test_high_precision() {
166        let hll = HyperLogLogV2::new(16);
167        assert_eq!(hll.num_registers(), 65536 /* 2^16 registers */);
168    }
169
170    #[test]
171    fn test_duplicate_inserts() {
172        let mut hll = HyperLogLogV2::new(8);
173        for _ in 0..100 {
174            hll.add(42 /* same element 100 times */);
175        }
176        let est = hll.count();
177        /* estimate should be close to 1 */
178        assert!(est <= 10);
179    }
180}