Skip to main content

card_est_array/impls/
hyper_log_log.rs

1/*
2 * SPDX-FileCopyrightText: 2024 Matteo Dell'Acqua
3 * SPDX-FileCopyrightText: 2025 Sebastiano Vigna
4 *
5 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6 */
7
8use super::DefaultEstimator;
9use crate::traits::Word;
10use crate::traits::{EstimationLogic, MergeEstimationLogic, SliceEstimationLogic};
11use num_primitive::{PrimitiveNumber, PrimitiveNumberAs};
12use std::hash::*;
13use std::num::NonZeroUsize;
14use std::{borrow::Borrow, f64::consts::LN_2, fmt};
15
16/// Error returned by [`HyperLogLogBuilder::build`] when the configuration is
17/// invalid.
18#[derive(Debug, Clone)]
19pub enum HyperLogLogError {
20    /// The register size derived from `num_elements` exceeds the maximum
21    /// supported by the hash type.
22    RegisterSizeTooLarge {
23        register_size: usize,
24        num_elements: usize,
25        hash_bits: u32,
26    },
27    /// The estimator size in bits is not divisible by the bit width of the
28    /// backend word type `W`, so registers cannot be packed exactly into whole
29    /// words.
30    UnalignedBackend {
31        est_size_in_bits: usize,
32        word_bits: usize,
33        min_alignment: String,
34        min_log2_num_regs: usize,
35    },
36}
37
38impl fmt::Display for HyperLogLogError {
39    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
40        match self {
41            Self::RegisterSizeTooLarge {
42                register_size,
43                num_elements,
44                hash_bits,
45            } => write!(
46                f,
47                "register size {} (derived from num_elements = {}) exceeds the maximum of 6 \
48                supported with a {}-bit hash type; reduce num_elements or use a hash with more bits",
49                register_size, num_elements, hash_bits,
50            ),
51            Self::UnalignedBackend {
52                est_size_in_bits,
53                word_bits,
54                min_alignment,
55                min_log2_num_regs,
56            } => write!(
57                f,
58                "estimator size ({} bits) is not divisible by the word size ({} bits), \
59                so register backends cannot be aligned; use {} or a smaller unsigned integer type \
60                as the word type, or increase log2_num_regs (possibly by reducing \
61                the relative standard deviation) to at least {}",
62                est_size_in_bits, word_bits, min_alignment, min_log2_num_regs,
63            ),
64        }
65    }
66}
67
68impl std::error::Error for HyperLogLogError {}
69
70/// The type returned by the hash function.
71type HashResult = u64;
72
73/// LogLog-β polynomial coefficients for `log2_num_regs` in [4 . . 18].
74///
75/// Indexed as `LOGLOG_BETA[log2_num_regs - 4]`. Each row holds β₀, β₁, …,
76/// β₇ used by [`beta_horner`].
77const LOGLOG_BETA: [[f64; 8]; 15] = [
78    // log2_num_regs =  4
79    [
80        -0.582581413904517,
81        -1.935_300_357_560_05,
82        11.079323758035073,
83        -22.131357446444323,
84        22.505391846630037,
85        -12.000723834917984,
86        3.220579408194167,
87        -0.342225302271235,
88    ],
89    // log2_num_regs =  5
90    [
91        -0.7518999460733967,
92        -0.959_003_007_774_876,
93        5.599_737_132_214_161,
94        -8.209_763_699_976_552,
95        6.509_125_489_447_204,
96        -2.683_029_373_432_373,
97        0.5612891113138221,
98        -0.0463331622196545,
99    ],
100    // log2_num_regs =  6
101    [
102        29.825_790_096_961_963,
103        -31.328_708_333_772_592,
104        -10.594_252_303_658_228,
105        -11.572_012_568_909_962,
106        3.818_875_437_390_749,
107        -2.416_013_032_853_081,
108        0.4542208940970826,
109        -0.0575155452020420,
110    ],
111    // log2_num_regs =  7
112    [
113        2.810_292_129_082_006,
114        -3.9780498518175995,
115        1.3162680041351582,
116        -3.925_248_633_580_59,
117        2.008_083_575_394_647,
118        -0.7527151937556955,
119        0.1265569894242751,
120        -0.0109946438726240,
121    ],
122    // log2_num_regs =  8
123    [
124        1.006_335_448_875_505_2,
125        -2.005_806_664_051_124,
126        1.643_697_493_665_141_2,
127        -2.705_608_099_405_661_7,
128        1.392_099_802_442_226,
129        -0.464_703_742_721_831_9,
130        0.07384282377269775,
131        -0.00578554885254223,
132    ],
133    // log2_num_regs =  9
134    [
135        -0.09415657458167959,
136        -0.781_309_759_245_505_3,
137        1.715_149_467_507_124_6,
138        -1.737_112_504_065_163_4,
139        0.864_415_084_890_489_2,
140        -0.23819027465047218,
141        0.03343448400269076,
142        -0.00207858528178157,
143    ],
144    // log2_num_regs =  10
145    [
146        -0.25935400670790054,
147        -0.525_983_019_998_058_1,
148        1.489_330_349_258_768_4,
149        -1.296_427_140_849_935_7,
150        0.622_847_562_172_216_2,
151        -0.156_723_267_702_510_4,
152        0.02054415903878563,
153        -0.00112488483925502,
154    ],
155    // log2_num_regs =  11
156    [
157        -4.32325553856025e-01,
158        -1.08450736399632e-01,
159        6.09156550741120e-01,
160        -1.65687801845180e-02,
161        -7.95829341087617e-02,
162        4.71830602102918e-02,
163        -7.81372902346934e-03,
164        5.84268708489995e-04,
165    ],
166    // log2_num_regs =  12
167    [
168        -3.84979202588598e-01,
169        1.83162233114364e-01,
170        1.30396688841854e-01,
171        7.04838927629266e-02,
172        -8.95893971464453e-03,
173        1.13010036741605e-02,
174        -1.94285569591290e-03,
175        2.25435774024964e-04,
176    ],
177    // log2_num_regs =  13
178    [
179        -0.41655270946462997,
180        -0.22146677040685156,
181        0.38862131236999947,
182        0.453_409_797_460_623_7,
183        -0.36264738324476375,
184        0.12304650053558529,
185        -0.017_015_403_845_555_1,
186        0.00102750367080838,
187    ],
188    // log2_num_regs =  14
189    [
190        -3.71009760230692e-01,
191        9.78811941207509e-03,
192        1.85796293324165e-01,
193        2.03015527328432e-01,
194        -1.16710521803686e-01,
195        4.31106699492820e-02,
196        -5.99583540511831e-03,
197        4.49704299509437e-04,
198    ],
199    // log2_num_regs = 15
200    [
201        -0.38215145543875273,
202        -0.890_694_005_360_908_4,
203        0.376_023_357_746_788_7,
204        0.993_359_774_406_823_8,
205        -0.655_774_416_383_189_6,
206        0.183_323_421_297_036_1,
207        -0.02241529633062872,
208        0.00121399789330194,
209    ],
210    // log2_num_regs = 16
211    [
212        -0.373_318_766_437_530_6,
213        -1.417_040_774_481_23,
214        0.40729184796612533,
215        1.561_520_339_065_841_6,
216        -0.992_422_335_342_861_3,
217        0.260_646_813_994_830_9,
218        -0.03053811369682807,
219        0.00155770210179105,
220    ],
221    // log2_num_regs = 17
222    [
223        -0.36775502299404605,
224        0.538_314_223_513_779_7,
225        0.769_702_892_787_679_2,
226        0.550_025_835_864_505_6,
227        -0.745_755_882_611_469_4,
228        0.257_118_357_858_219_5,
229        -0.03437902606864149,
230        0.00185949146371616,
231    ],
232    // log2_num_regs = 18
233    [
234        -0.364_796_233_259_605_4,
235        0.997_304_123_286_350_3,
236        1.553_543_862_300_812_2,
237        1.259_326_771_980_289_2,
238        -1.533_259_482_091_101_6,
239        0.478_010_422_000_565_9,
240        -0.05951025172951174,
241        0.00291076804642205,
242    ],
243];
244
245/// Computes the LogLog-β bias correction using Horner's method.
246///
247/// The method appears in the paper from Jason Qin, Denys Kim & Yumei Tung,
248/// “[LogLog-Beta and More: A New Algorithm for Cardinality Estimation Based on
249/// LogLog Counting](https://arxiv.org/pdf/1612.02284)”, 2016.
250///
251/// # Panics
252///
253/// If `precision` is not in [4 . . 18].
254pub fn beta_horner(z: f64, precision: usize) -> f64 {
255    let beta = LOGLOG_BETA[precision - 4];
256    let zl = (z + 1.0).ln();
257    let mut res = 0.0;
258    for i in (1..8).rev() {
259        res = res * zl + beta[i];
260    }
261    res * zl + beta[0] * z
262}
263
264/// Applies the post-loop correction to the harmonic mean and zero count.
265///
266/// This function is `#[inline(never)]` so that the register-iteration loop
267/// in [`estimate`](EstimationLogic::estimate) compiles identically for both
268/// `BETA=true` and `BETA=false`, preventing the post-loop formula from
269/// influencing loop optimizations (vectorization, unrolling, scheduling).
270#[inline(never)]
271fn apply_correction<const BETA: bool>(
272    harmonic_mean: f64,
273    zeroes: usize,
274    num_regs: usize,
275    log2_num_regs: usize,
276    alpha_m_m: f64,
277) -> f64 {
278    if BETA && zeroes != 0 && log2_num_regs <= 18 {
279        // LogLog-β: a single formula that replaces both the raw HyperLogLog
280        // estimate and the linear-counting small-range correction.
281        let m = num_regs as f64;
282        let z = zeroes as f64;
283        let beta = beta_horner(z, log2_num_regs);
284        alpha_m_m * (m - z) / (m * (harmonic_mean + beta))
285    } else {
286        // Classic HyperLogLog raw estimate with linear-counting correction.
287        let mut estimate = alpha_m_m / harmonic_mean;
288        if zeroes != 0 && estimate < 2.5 * num_regs as f64 {
289            let m = num_regs as f64;
290            estimate = m * (m / zeroes as f64).ln();
291        }
292        estimate
293    }
294}
295
296/// Estimator logic implementing the HyperLogLog algorithm.
297///
298/// Instances are created through [`HyperLogLogBuilder`]:
299///
300/// ```
301/// # use card_est_array::impls::HyperLogLogBuilder;
302/// // Default: LogLog-β correction enabled, usize backend
303/// let logic = HyperLogLogBuilder::new(1_000_000)
304///     .log2_num_regs(8)
305///     .build::<String>().unwrap();
306///
307/// // Disable LogLog-β, use classic HLL + linear-counting fallback
308/// let logic = HyperLogLogBuilder::new(1_000_000)
309///     .log2_num_regs(8)
310///     .beta::<false>()
311///     .build::<String>().unwrap();
312/// ```
313///
314/// # Type parameters
315///
316/// - `T`: the type of elements to count (must implement [`Hash`]).
317///
318/// - `H`: the [`BuildHasher`] used to hash elements.
319///
320/// - `W`: the unsigned word type for the register backend (see below).
321///
322/// - `BETA`: when `true` (the default), the [LogLog-β](beta_horner) bias
323///   correction is used during estimation. This provides better accuracy across
324///   the full cardinality range through a single formula, eliminating the need
325///   for a separate linear-counting correction. The cost is roughly 20ns per
326///   estimate call when some registers are still zero; when all registers are
327///   populated, the correction is skipped and performance is identical to the
328///   classic formula. Set to `false` via [`HyperLogLogBuilder::beta`] to use the
329///   original HyperLogLog formula instead.
330///
331/// # Backend alignment
332///
333/// `W` must be able to represent exactly the backend of an estimator. While
334/// usually `usize` will work (and it is the default type chosen by
335/// [`HyperLogLogBuilder::new`]), with odd register sizes and a small number
336/// of registers it might be necessary to select a smaller type, resulting in
337/// slower merges. For example, using 16 5-bit registers one needs to use
338/// `u16`, whereas for 16 6-bit registers `u32` will be sufficient.
339///
340/// Formally, `W::BITS` must divide `(1 << log2_num_regs) * register_size`
341/// (using [`HyperLogLog::register_size(num_elements)`](HyperLogLog::register_size)).
342/// [`HyperLogLogBuilder::min_log2_num_regs`] returns the minimum value for
343/// `log2_num_regs` that satisfies this property.
344#[derive(Debug, PartialEq)]
345pub struct HyperLogLog<T, H, W = usize, const BETA: bool = true> {
346    build_hasher: H,
347    register_size: usize,
348    num_regs_minus_1: HashResult,
349    log2_num_regs: usize,
350    sentinel_mask: HashResult,
351    num_regs: usize,
352    pub(super) words_per_estimator: usize,
353    alpha_m_m: f64,
354    msb_mask: Box<[W]>,
355    lsb_mask: Box<[W]>,
356    _marker: std::marker::PhantomData<T>,
357}
358
359// We implement Clone manually because we do not want to require that T is
360// Clone.
361impl<T, H: Clone, W: Clone, const BETA: bool> Clone for HyperLogLog<T, H, W, BETA> {
362    fn clone(&self) -> Self {
363        Self {
364            build_hasher: self.build_hasher.clone(),
365            register_size: self.register_size,
366            num_regs_minus_1: self.num_regs_minus_1,
367            log2_num_regs: self.log2_num_regs,
368            sentinel_mask: self.sentinel_mask,
369            num_regs: self.num_regs,
370            words_per_estimator: self.words_per_estimator,
371            alpha_m_m: self.alpha_m_m,
372            msb_mask: self.msb_mask.clone(),
373            lsb_mask: self.lsb_mask.clone(),
374            _marker: std::marker::PhantomData,
375        }
376    }
377}
378
379impl<T, H: Clone, W: Word, const BETA: bool> HyperLogLog<T, H, W, BETA> {
380    /// Returns the value contained in a register of a given backend.
381    #[inline(always)]
382    fn get_register_unchecked(&self, backend: impl AsRef<[W]>, index: usize) -> W {
383        let backend = backend.as_ref();
384        let bits = W::BITS as usize;
385        let bit_width = self.register_size;
386        let mask = W::MAX >> (bits - bit_width);
387        let pos = index * bit_width;
388        let word_index = pos / bits;
389        let bit_index = pos % bits;
390
391        if bit_index + bit_width <= bits {
392            (unsafe { *backend.get_unchecked(word_index) } >> bit_index) & mask
393        } else {
394            ((unsafe { *backend.get_unchecked(word_index) } >> bit_index)
395                | (unsafe { *backend.get_unchecked(word_index + 1) } << (bits - bit_index)))
396                & mask
397        }
398    }
399
400    /// Sets the value contained in a register of a given backend.
401    #[inline(always)]
402    fn set_register_unchecked(&self, mut backend: impl AsMut<[W]>, index: usize, new_value: W) {
403        let backend = backend.as_mut();
404        let bits = W::BITS as usize;
405        let bit_width = self.register_size;
406        let mask = W::MAX >> (bits - bit_width);
407        let pos = index * bit_width;
408        let word_index = pos / bits;
409        let bit_index = pos % bits;
410
411        if bit_index + bit_width <= bits {
412            let mut word = unsafe { *backend.get_unchecked_mut(word_index) };
413            word &= !(mask << bit_index);
414            word |= new_value << bit_index;
415            unsafe { *backend.get_unchecked_mut(word_index) = word };
416        } else {
417            let mut word = unsafe { *backend.get_unchecked_mut(word_index) };
418            word &= (W::ONE << bit_index) - W::ONE;
419            word |= new_value << bit_index;
420            unsafe { *backend.get_unchecked_mut(word_index) = word };
421
422            let mut word = unsafe { *backend.get_unchecked_mut(word_index + 1) };
423            word &= !(mask >> (bits - bit_index));
424            word |= new_value >> (bits - bit_index);
425            unsafe { *backend.get_unchecked_mut(word_index + 1) = word };
426        }
427    }
428}
429
430impl<T: Hash, H: BuildHasher + Clone, W: Word, const BETA: bool> SliceEstimationLogic<W>
431    for HyperLogLog<T, H, W, BETA>
432where
433    u32: PrimitiveNumberAs<W>,
434{
435    #[inline(always)]
436    fn backend_len(&self) -> usize {
437        self.words_per_estimator
438    }
439}
440
441impl<T: Hash, H: BuildHasher + Clone, W: Word, const BETA: bool> EstimationLogic
442    for HyperLogLog<T, H, W, BETA>
443where
444    u32: PrimitiveNumberAs<W>,
445{
446    type Item = T;
447    type Backend = [W];
448    type Estimator<'a>
449        = DefaultEstimator<Self, &'a Self, Box<[W]>>
450    where
451        T: 'a,
452        W: 'a,
453        H: 'a;
454
455    fn new_estimator(&self) -> Self::Estimator<'_> {
456        Self::Estimator::new(
457            self,
458            vec![W::ZERO; self.words_per_estimator].into_boxed_slice(),
459        )
460    }
461
462    fn add(&self, backend: &mut Self::Backend, element: impl Borrow<T>) {
463        let hash = self.build_hasher.hash_one(element.borrow());
464        let register = (hash & self.num_regs_minus_1) as usize;
465        // The number of trailing zeroes is certainly expressible in
466        // a variable of type W
467        let r = ((hash >> self.log2_num_regs) | self.sentinel_mask)
468            .trailing_zeros()
469            .as_to();
470
471        debug_assert!(r < (W::ONE << self.register_size) - W::ONE);
472        debug_assert!(register < self.num_regs);
473
474        let current_value = self.get_register_unchecked(&*backend, register);
475        let candidate_value = r + W::ONE;
476        let new_value = std::cmp::max(current_value, candidate_value);
477        if current_value != new_value {
478            self.set_register_unchecked(backend, register, new_value);
479        }
480    }
481
482    fn estimate(&self, backend: &[W]) -> f64 {
483        let mut harmonic_mean = 0.0;
484        let mut zeroes = 0usize;
485
486        for i in 0..self.num_regs {
487            let value: usize = self.get_register_unchecked(backend, i).as_to();
488            if value == 0 {
489                zeroes += 1;
490            }
491            // 2⁻ᵛ via IEEE 754: exponent = 1023 − v, zero mantissa.
492            debug_assert!(value <= 1023);
493            harmonic_mean += f64::from_bits((1023 - value as u64) << 52);
494        }
495
496        apply_correction::<BETA>(
497            harmonic_mean,
498            zeroes,
499            self.num_regs,
500            self.log2_num_regs,
501            self.alpha_m_m,
502        )
503    }
504
505    #[inline(always)]
506    fn clear(&self, backend: &mut [W]) {
507        backend.fill(W::ZERO);
508    }
509
510    #[inline(always)]
511    fn set(&self, dst: &mut [W], src: &[W]) {
512        debug_assert_eq!(dst.len(), src.len());
513        dst.copy_from_slice(src);
514    }
515}
516
517/// Helper for merge operations with [`HyperLogLog`] logic.
518pub struct HyperLogLogHelper<W> {
519    acc: Vec<W>,
520    mask: Vec<W>,
521}
522
523impl<T: Hash, H: BuildHasher + Clone, W: Word, const BETA: bool> MergeEstimationLogic
524    for HyperLogLog<T, H, W, BETA>
525where
526    u32: PrimitiveNumberAs<W>,
527{
528    type Helper = HyperLogLogHelper<W>;
529
530    fn new_helper(&self) -> Self::Helper {
531        HyperLogLogHelper {
532            acc: vec![W::ZERO; self.words_per_estimator],
533            mask: vec![W::ZERO; self.words_per_estimator],
534        }
535    }
536
537    #[inline(always)]
538    fn merge_with_helper(&self, dst: &mut [W], src: &[W], helper: &mut Self::Helper) {
539        merge_hyperloglog_bitwise(
540            dst,
541            src,
542            self.msb_mask.as_ref(),
543            self.lsb_mask.as_ref(),
544            &mut helper.acc,
545            &mut helper.mask,
546            self.register_size,
547        );
548    }
549}
550
551impl<T, H, W, const BETA: bool> std::fmt::Display for HyperLogLog<T, H, W, BETA> {
552    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
553        write!(
554            f,
555            "HyperLogLog with relative standard deviation: {}% ({} registers/estimator, {} bits/register, {} bytes/estimator)",
556            100.0 * HyperLogLog::rel_std(self.log2_num_regs),
557            self.num_regs,
558            self.register_size,
559            (self.num_regs * self.register_size) / 8
560        )
561    }
562}
563
564/// Builder for [`HyperLogLog`] cardinality-estimator logic.
565///
566/// The builder lets you configure:
567/// - the upper bound on the number of distinct elements
568///   ([`new`](Self::new) / [`num_elements`](Self::num_elements));
569/// - the number of registers, either directly
570///   ([`log2_num_regs`](Self::log2_num_regs)) or via a target relative
571///   standard deviation ([`rsd`](Self::rsd));
572/// - the backend word type ([`word_type`](Self::word_type));
573/// - the hash function ([`build_hasher`](Self::build_hasher));
574/// - whether [LogLog-β bias correction](beta_horner) is enabled
575///   ([`beta`](Self::beta)).
576///
577/// Call [`build`](Self::build) to obtain the configured [`HyperLogLog`]
578/// logic.
579#[derive(Debug, Clone)]
580pub struct HyperLogLogBuilder<H, W = usize, const BETA: bool = true> {
581    build_hasher: H,
582    log2_num_regs: usize,
583    num_elements: usize,
584    _marker: std::marker::PhantomData<W>,
585}
586
587impl HyperLogLogBuilder<BuildHasherDefault<DefaultHasher>> {
588    /// Creates a new builder for a [`HyperLogLog`] logic with the default word
589    /// type (the fixed-size equivalent of `usize`).
590    ///
591    /// # Panics
592    ///
593    /// If `n` is zero.
594    pub const fn new(num_elements: usize) -> Self {
595        assert!(
596            num_elements > 0,
597            "the upper bound on the number of distinct elements must be positive"
598        );
599        Self {
600            build_hasher: BuildHasherDefault::new(),
601            log2_num_regs: 4,
602            num_elements,
603            _marker: std::marker::PhantomData,
604        }
605    }
606}
607
608fn min_alignment(bits: usize) -> String {
609    if bits % 128 == 0 {
610        "u128"
611    } else if bits % 64 == 0 {
612        "u64"
613    } else if bits % 32 == 0 {
614        "u32"
615    } else if bits % 16 == 0 {
616        "u16"
617    } else {
618        "u8"
619    }
620    .to_string()
621}
622
623impl HyperLogLog<(), (), (), true> {
624    /// Returns the logarithm of the number of registers per estimator that are
625    /// necessary to attain a given relative standard deviation.
626    ///
627    /// # Arguments
628    /// * `rsd`: the relative standard deviation to be attained.
629    pub fn log2_num_of_registers(rsd: f64) -> usize {
630        ((1.106 / rsd).powi(2)).log2().ceil() as usize
631    }
632
633    /// Returns the relative standard deviation corresponding to a given number
634    /// of registers per estimator.
635    ///
636    /// # Arguments
637    ///
638    /// * `log2_num_regs`: the logarithm of the number of registers per
639    ///   estimator.
640    pub fn rel_std(log2_num_regs: usize) -> f64 {
641        let tmp = match log2_num_regs {
642            4 => 1.106,
643            5 => 1.070,
644            6 => 1.054,
645            7 => 1.046,
646            _ => 1.04,
647        };
648        tmp / ((1 << log2_num_regs) as f64).sqrt()
649    }
650
651    /// Returns the register size in bits, given an upper bound on the number of
652    /// distinct elements.
653    ///
654    /// # Arguments
655    /// * `num_elements`: an upper bound on the number of distinct elements.
656    pub fn register_size(num_elements: usize) -> usize {
657        std::cmp::max(
658            5,
659            (((num_elements as f64).ln() / LN_2) / LN_2).ln().ceil() as usize,
660        )
661    }
662}
663
664impl<H, W: Word, const BETA: bool> HyperLogLogBuilder<H, W, BETA> {
665    /// Sets the desired relative standard deviation.
666    ///
667    /// This is a high-level alternative to [`Self::log2_num_regs`]. Calling one
668    /// after the other invalidates the work done by the first one.
669    ///
670    /// # Arguments
671    /// * `rsd`: the relative standard deviation to be attained.
672    ///
673    /// # Panics
674    ///
675    /// If the resulting number of registers is less than 16 (i.e., `rsd` is
676    /// too large).
677    pub fn rsd(self, rsd: f64) -> Self {
678        self.log2_num_regs(HyperLogLog::log2_num_of_registers(rsd))
679    }
680
681    /// Sets the base-2 logarithm of the number of registers.
682    ///
683    /// This is a low-level alternative to [`Self::rsd`]. Calling one after the
684    /// other invalidates the work done by the first one.
685    ///
686    /// # Arguments
687    /// * `log2_num_regs`: the logarithm of the number of registers per
688    ///   estimator.
689    ///
690    /// # Panics
691    ///
692    /// If `log2_num_regs` is less than 4.
693    pub const fn log2_num_regs(mut self, log2_num_regs: usize) -> Self {
694        assert!(
695            log2_num_regs >= 4,
696            "the logarithm of the number of registers per estimator should be at least 4"
697        );
698        self.log2_num_regs = log2_num_regs;
699        self
700    }
701
702    /// Returns the minimum value allowed for [`Self::log2_num_regs`] given the
703    /// current value of [`Self::num_elements`].
704    pub fn min_log2_num_regs(&self) -> usize {
705        let register_size = HyperLogLog::register_size(self.num_elements);
706        let register_size = NonZeroUsize::try_from(register_size).expect("register_size is zero");
707        let min_num_regs = W::BITS / highest_power_of_2_dividing(register_size);
708        assert_eq!(min_num_regs, min_num_regs.next_power_of_two());
709        min_num_regs.trailing_zeros() as usize // log2(min_num_regs)
710    }
711
712    /// Sets the type `W` to use to represent backends.
713    ///
714    /// Note that the returned builder will have a different type if `W2` is
715    /// different from `W`.
716    ///
717    /// See the [`logic documentation`](HyperLogLog) for the limitations on the
718    /// choice of `W2`.
719    pub fn word_type<W2>(self) -> HyperLogLogBuilder<H, W2, BETA> {
720        HyperLogLogBuilder {
721            num_elements: self.num_elements,
722            build_hasher: self.build_hasher,
723            log2_num_regs: self.log2_num_regs,
724            _marker: std::marker::PhantomData,
725        }
726    }
727
728    /// Enables or disables the [LogLog-β bias correction](beta_horner) in
729    /// the estimate.
730    ///
731    /// When enabled (the default), the estimate uses the LogLog-β formula for
732    /// precisions 4–18, which provides better accuracy across the full
733    /// cardinality range without a separate linear-counting correction, at the
734    /// cost of roughly 20ns per estimate call when some registers are still
735    /// zero. When all registers are populated, the correction is skipped and
736    /// performance is identical to the classic formula.
737    ///
738    /// When disabled, the classic HyperLogLog formula with linear-counting
739    /// fallback is used.
740    ///
741    /// ```
742    /// # use card_est_array::impls::HyperLogLogBuilder;
743    /// // Disable LogLog-β correction
744    /// let logic = HyperLogLogBuilder::new(1_000_000)
745    ///     .log2_num_regs(8)
746    ///     .beta::<false>()
747    ///     .build::<usize>().unwrap();
748    /// ```
749    pub fn beta<const BETA2: bool>(self) -> HyperLogLogBuilder<H, W, BETA2> {
750        HyperLogLogBuilder {
751            num_elements: self.num_elements,
752            build_hasher: self.build_hasher,
753            log2_num_regs: self.log2_num_regs,
754            _marker: std::marker::PhantomData,
755        }
756    }
757
758    /// Sets the upper bound on the number of elements.
759    ///
760    /// # Panics
761    ///
762    /// If `n` is zero.
763    pub const fn num_elements(mut self, num_elements: usize) -> Self {
764        assert!(
765            num_elements > 0,
766            "the upper bound on the number of distinct elements must be positive"
767        );
768        self.num_elements = num_elements;
769        self
770    }
771
772    /// Sets the [`BuildHasher`] to use.
773    ///
774    /// Using this method you can select a specific hasher based on one or more
775    /// seeds.
776    pub fn build_hasher<H2>(self, build_hasher: H2) -> HyperLogLogBuilder<H2, W, BETA> {
777        HyperLogLogBuilder {
778            num_elements: self.num_elements,
779            log2_num_regs: self.log2_num_regs,
780            build_hasher,
781            _marker: std::marker::PhantomData,
782        }
783    }
784
785    /// Builds the logic.
786    ///
787    /// The type of objects the estimators keep track of is defined here by `T`,
788    /// but it is usually inferred by the compiler.
789    ///
790    /// # Errors
791    ///
792    /// Returns [`HyperLogLogError::RegisterSizeTooLarge`] if the register size
793    /// derived from `num_elements` exceeds the maximum supported by the hash
794    /// type.
795    ///
796    /// Returns [`HyperLogLogError::UnalignedBackend`] if the estimator size in
797    /// bits is not divisible by the bit width of `W`.
798    pub fn build<T>(self) -> Result<HyperLogLog<T, H, W, BETA>, HyperLogLogError> {
799        let bits = W::BITS as usize;
800        let log2_num_regs = self.log2_num_regs;
801        let num_elements = self.num_elements;
802        let number_of_registers = 1 << log2_num_regs;
803        let register_size = HyperLogLog::register_size(num_elements);
804        if register_size > 6 {
805            return Err(HyperLogLogError::RegisterSizeTooLarge {
806                register_size,
807                num_elements,
808                hash_bits: HashResult::BITS,
809            });
810        }
811        let sentinel_mask = 1 << ((1 << register_size) - 2);
812        let alpha = match log2_num_regs {
813            4 => 0.673,
814            5 => 0.697,
815            6 => 0.709,
816            _ => 0.7213 / (1.0 + 1.079 / number_of_registers as f64),
817        };
818        let num_regs_minus_1 = (number_of_registers - 1) as HashResult;
819
820        let est_size_in_bits = number_of_registers * register_size;
821
822        // This ensures estimators are always aligned to W
823        if est_size_in_bits % bits != 0 {
824            return Err(HyperLogLogError::UnalignedBackend {
825                est_size_in_bits,
826                word_bits: bits,
827                min_alignment: min_alignment(est_size_in_bits),
828                min_log2_num_regs: self.min_log2_num_regs(),
829            });
830        }
831        let est_size_in_words = est_size_in_bits / bits;
832
833        let msb_mask = build_register_mask(
834            est_size_in_words,
835            register_size,
836            W::ONE << (register_size - 1),
837        );
838        let lsb_mask = build_register_mask(est_size_in_words, register_size, W::ONE);
839
840        Ok(HyperLogLog {
841            num_regs: number_of_registers,
842            num_regs_minus_1,
843            log2_num_regs,
844            register_size,
845            alpha_m_m: alpha * (number_of_registers as f64).powi(2),
846            sentinel_mask,
847            build_hasher: self.build_hasher,
848            msb_mask,
849            lsb_mask,
850            words_per_estimator: est_size_in_words,
851            _marker: std::marker::PhantomData,
852        })
853    }
854}
855
856/// Builds a mask of `num_words` words by repeating a `register_size`-bit
857/// pattern across all register positions.
858fn build_register_mask<W: Word>(num_words: usize, register_size: usize, pattern: W) -> Box<[W]> {
859    let bits = W::BITS as usize;
860    let total_bits = num_words * bits;
861    let mut result = vec![W::ZERO; num_words];
862    let mut bit_pos = 0;
863    while bit_pos < total_bits {
864        let word_index = bit_pos / bits;
865        let bit_index = bit_pos % bits;
866        result[word_index] |= pattern << bit_index;
867        if bit_index + register_size > bits && word_index + 1 < num_words {
868            result[word_index + 1] |= pattern >> (bits - bit_index);
869        }
870        bit_pos += register_size;
871    }
872    result.into_boxed_slice()
873}
874
875/// Performs a multiple precision subtraction, leaving the result in the first operand.
876/// The operands MUST have the same length.
877///
878/// # Arguments
879/// * `x`: the first operand. This will contain the final result.
880/// * `y`: the second operand that will be subtracted from `x`.
881#[inline(always)]
882pub(super) fn subtract<W: Word>(x: &mut [W], y: &[W]) {
883    debug_assert_eq!(x.len(), y.len());
884    let mut borrow = false;
885
886    for (x_word, &y) in x.iter_mut().zip(y.iter()) {
887        let mut x = *x_word;
888        if !borrow {
889            borrow = x < y;
890        } else if x != W::ZERO {
891            x = x.wrapping_sub(W::ONE);
892            borrow = x < y;
893        } else {
894            x = x.wrapping_sub(W::ONE);
895        }
896        *x_word = x.wrapping_sub(y);
897    }
898}
899
900fn merge_hyperloglog_bitwise<W: Word>(
901    mut x: impl AsMut<[W]>,
902    y: impl AsRef<[W]>,
903    msb_mask: impl AsRef<[W]>,
904    lsb_mask: impl AsRef<[W]>,
905    acc: &mut Vec<W>,
906    mask: &mut Vec<W>,
907    register_size: usize,
908) {
909    let x = x.as_mut();
910    let y = y.as_ref();
911    let msb_mask = msb_mask.as_ref();
912    let lsb_mask = lsb_mask.as_ref();
913
914    debug_assert_eq!(x.len(), y.len());
915    debug_assert_eq!(x.len(), msb_mask.len());
916    debug_assert_eq!(x.len(), lsb_mask.len());
917
918    let register_size_minus_1 = register_size - 1;
919    let num_words_minus_1 = x.len() - 1;
920    let shift_register_size_minus_1 = W::BITS as usize - register_size_minus_1;
921
922    acc.clear();
923    mask.clear();
924
925    /* We work in two phases. Let H_r (msb_mask) be the mask with the
926     * highest bit of each register (of size r) set, and L_r (lsb_mask)
927     * be the mask with the lowest bit of each register set.
928     * We describe the algorithm on a single word.
929     *
930     * In the first phase we perform an unsigned strict register-by-register
931     * comparison of x and y, using the formula
932     *
933     * z = ((((y | H_r) - (x & !H_r)) | (y ^ x)) ^ (y | !x)) & H_r
934     *
935     * Then, we generate a register-by-register mask of all ones or
936     * all zeroes, depending on the result of the comparison, using the
937     * formula
938     *
939     * (((z >> r-1 | H_r) - L_r) | H_r) ^ z
940     *
941     * At that point, it is trivial to select from x and y the right values.
942     */
943
944    // We load y | H_r into the accumulator.
945    acc.extend(
946        y.iter()
947            .zip(msb_mask)
948            .map(|(&y_word, &msb_word)| y_word | msb_word),
949    );
950
951    // We load x & !H_r into mask as temporary storage.
952    mask.extend(
953        x.iter()
954            .zip(msb_mask)
955            .map(|(&x_word, &msb_word)| x_word & !msb_word),
956    );
957
958    // We subtract x & !H_r, using mask as temporary storage
959    subtract(acc, mask);
960
961    // We OR with y ^ x, XOR with (y | !x), and finally AND with H_r.
962    acc.iter_mut()
963        .zip(x.iter())
964        .zip(y.iter())
965        .zip(msb_mask.iter())
966        .for_each(|(((acc_word, &x_word), &y_word), &msb_word)| {
967            *acc_word = ((*acc_word | (y_word ^ x_word)) ^ (y_word | !x_word)) & msb_word
968        });
969
970    // We shift by register_size - 1 places and put the result into mask.
971    {
972        let (mask_last, mask_slice) = mask.split_last_mut().unwrap();
973        let (&msb_last, msb_slice) = msb_mask.split_last().unwrap();
974        mask_slice
975            .iter_mut()
976            .zip(acc[0..num_words_minus_1].iter())
977            .zip(acc[1..].iter())
978            .zip(msb_slice.iter())
979            .rev()
980            .for_each(|(((mask_word, &acc_word), &next_acc_word), &msb_word)| {
981                // W is always unsigned so the shift is always with a 0
982                *mask_word = (acc_word >> register_size_minus_1)
983                    | (next_acc_word << shift_register_size_minus_1)
984                    | msb_word
985            });
986        *mask_last = (acc[num_words_minus_1] >> register_size_minus_1) | msb_last;
987    }
988
989    // We subtract L_r from mask.
990    subtract(mask, lsb_mask);
991
992    // We OR with H_r and XOR with the accumulator.
993    mask.iter_mut()
994        .zip(msb_mask.iter())
995        .zip(acc.iter())
996        .for_each(|((mask_word, &msb_word), &acc_word)| {
997            *mask_word = (*mask_word | msb_word) ^ acc_word
998        });
999
1000    // Finally, we use mask to select the right bits from x and y and store the result.
1001    x.iter_mut()
1002        .zip(y.iter())
1003        .zip(mask.iter())
1004        .for_each(|((x_word, &y_word), &mask_word)| {
1005            *x_word = *x_word ^ ((*x_word ^ y_word) & mask_word);
1006        });
1007}
1008
1009fn highest_power_of_2_dividing(n: NonZeroUsize) -> u32 {
1010    1 << n.trailing_zeros()
1011}
1012
1013#[test]
1014fn test_highest_power_of_2_dividing() {
1015    let powers_of_2: Vec<_> = (1..=20)
1016        .map(|n| highest_power_of_2_dividing(n.try_into().unwrap()))
1017        .collect();
1018    assert_eq!(
1019        powers_of_2,
1020        vec![1, 2, 1, 4, 1, 2, 1, 8, 1, 2, 1, 4, 1, 2, 1, 16, 1, 2, 1, 4]
1021    );
1022}