Skip to main content

card_est_array/impls/
hyper_log_log.rs

1/*
2 * SPDX-FileCopyrightText: 2024 Matteo Dell'Acqua
3 * SPDX-FileCopyrightText: 2025 Sebastiano Vigna
4 *
5 * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
6 */
7
8use num_primitive::{PrimitiveNumber, PrimitiveNumberAs};
9use std::hash::*;
10use std::num::NonZeroUsize;
11use std::{borrow::Borrow, f64::consts::LN_2};
12
13use crate::traits::Word;
14
15use crate::traits::{EstimationLogic, MergeEstimationLogic, SliceEstimationLogic};
16
17use super::DefaultEstimator;
18
19/// The type returned by the hash function.
20type HashResult = u64;
21
22/// Estimator logic implementing the HyperLogLog algorithm.
23///
24/// Instances are built using [`HyperLogLogBuilder`], which provides convenient
25/// ways to set the internal parameters.
26///
27/// Note that `T` can be any type satisfying the [`Hash`] trait. The parameter
28/// `H` makes it possible to select a hashing algorithm, and `W` is the unsigned
29/// type used to store backends.
30///
31/// An important constraint is that `W` must be able to represent exactly the
32/// backend of an estimator. While usually `usize` will work (and it is the default
33/// type chosen by [`new`](HyperLogLogBuilder::new)), with odd register sizes
34/// and a small number of registers it might be necessary to select a smaller
35/// type, resulting in slower merges. For example, using 16 5-bit registers one
36/// needs to use `u16`, whereas for 16 6-bit registers `u32` will be sufficient.
37///
38/// Formally, this means that `W::BITS` must divide
39/// `(1 << log_2_num_registers) * register_size` (using
40/// [`HyperLogLog::register_size(num_elements)`](HyperLogLog::register_size)).
41/// [`HyperLogLogBuilder::min_log_2_num_reg`] returns the minimum value for
42/// `log_2_num_registers` that satisfies this property.
43#[derive(Debug, PartialEq)]
44pub struct HyperLogLog<T, H, W = usize> {
45    build_hasher: H,
46    register_size: usize,
47    num_registers_minus_1: HashResult,
48    log_2_num_registers: usize,
49    sentinel_mask: HashResult,
50    num_registers: usize,
51    pub(super) words_per_estimator: usize,
52    alpha_m_m: f64,
53    msb_mask: Box<[W]>,
54    lsb_mask: Box<[W]>,
55    _marker: std::marker::PhantomData<T>,
56}
57
58// We implement Clone manually because we do not want to require that T is
59// Clone.
60impl<T, H: Clone, W: Clone> Clone for HyperLogLog<T, H, W> {
61    fn clone(&self) -> Self {
62        Self {
63            build_hasher: self.build_hasher.clone(),
64            register_size: self.register_size,
65            num_registers_minus_1: self.num_registers_minus_1,
66            log_2_num_registers: self.log_2_num_registers,
67            sentinel_mask: self.sentinel_mask,
68            num_registers: self.num_registers,
69            words_per_estimator: self.words_per_estimator,
70            alpha_m_m: self.alpha_m_m,
71            msb_mask: self.msb_mask.clone(),
72            lsb_mask: self.lsb_mask.clone(),
73            _marker: std::marker::PhantomData,
74        }
75    }
76}
77
78impl<T, H: Clone, W: Word> HyperLogLog<T, H, W> {
79    /// Returns the value contained in a register of a given backend.
80    #[inline(always)]
81    fn get_register_unchecked(&self, backend: impl AsRef<[W]>, index: usize) -> W {
82        let backend = backend.as_ref();
83        let bits = W::BITS as usize;
84        let bit_width = self.register_size;
85        let mask = W::MAX >> (bits - bit_width);
86        let pos = index * bit_width;
87        let word_index = pos / bits;
88        let bit_index = pos % bits;
89
90        if bit_index + bit_width <= bits {
91            (unsafe { *backend.get_unchecked(word_index) } >> bit_index) & mask
92        } else {
93            ((unsafe { *backend.get_unchecked(word_index) } >> bit_index)
94                | (unsafe { *backend.get_unchecked(word_index + 1) } << (bits - bit_index)))
95                & mask
96        }
97    }
98
99    /// Sets the value contained in a register of a given backend.
100    #[inline(always)]
101    fn set_register_unchecked(&self, mut backend: impl AsMut<[W]>, index: usize, new_value: W) {
102        let backend = backend.as_mut();
103        let bits = W::BITS as usize;
104        let bit_width = self.register_size;
105        let mask = W::MAX >> (bits - bit_width);
106        let pos = index * bit_width;
107        let word_index = pos / bits;
108        let bit_index = pos % bits;
109
110        if bit_index + bit_width <= bits {
111            let mut word = unsafe { *backend.get_unchecked_mut(word_index) };
112            word &= !(mask << bit_index);
113            word |= new_value << bit_index;
114            unsafe { *backend.get_unchecked_mut(word_index) = word };
115        } else {
116            let mut word = unsafe { *backend.get_unchecked_mut(word_index) };
117            word &= (W::ONE << bit_index) - W::ONE;
118            word |= new_value << bit_index;
119            unsafe { *backend.get_unchecked_mut(word_index) = word };
120
121            let mut word = unsafe { *backend.get_unchecked_mut(word_index + 1) };
122            word &= !(mask >> (bits - bit_index));
123            word |= new_value >> (bits - bit_index);
124            unsafe { *backend.get_unchecked_mut(word_index + 1) = word };
125        }
126    }
127}
128
129impl<T: Hash, H: BuildHasher + Clone, W: Word> SliceEstimationLogic<W> for HyperLogLog<T, H, W>
130where
131    u32: PrimitiveNumberAs<W>,
132{
133    #[inline(always)]
134    fn backend_len(&self) -> usize {
135        self.words_per_estimator
136    }
137}
138
139impl<T: Hash, H: BuildHasher + Clone, W: Word> EstimationLogic for HyperLogLog<T, H, W>
140where
141    u32: PrimitiveNumberAs<W>,
142{
143    type Item = T;
144    type Backend = [W];
145    type Estimator<'a>
146        = DefaultEstimator<Self, &'a Self, Box<[W]>>
147    where
148        T: 'a,
149        W: 'a,
150        H: 'a;
151
152    fn new_estimator(&self) -> Self::Estimator<'_> {
153        Self::Estimator::new(
154            self,
155            vec![W::ZERO; self.words_per_estimator].into_boxed_slice(),
156        )
157    }
158
159    fn add(&self, backend: &mut Self::Backend, element: impl Borrow<T>) {
160        let x = self.build_hasher.hash_one(element.borrow());
161        let j = x & self.num_registers_minus_1;
162        // The number of trailing zeroes is certainly expressible in
163        // a variable of type W
164        let r = ((x >> self.log_2_num_registers) | self.sentinel_mask)
165            .trailing_zeros()
166            .as_to();
167        let register = j as usize;
168
169        debug_assert!(r < (W::ONE << self.register_size) - W::ONE);
170        debug_assert!(register < self.num_registers);
171
172        let current_value = self.get_register_unchecked(&*backend, register);
173        let candidate_value = r + W::ONE;
174        let new_value = std::cmp::max(current_value, candidate_value);
175        if current_value != new_value {
176            self.set_register_unchecked(backend, register, new_value);
177        }
178    }
179
180    fn estimate(&self, backend: &[W]) -> f64 {
181        let mut harmonic_mean = 0.0;
182        let mut zeroes = 0;
183
184        for i in 0..self.num_registers {
185            // Registers are at most 8 bits wide
186            let value: u32 = self.get_register_unchecked(backend, i).as_to();
187            if value == 0 {
188                zeroes += 1;
189            }
190            harmonic_mean += 1.0 / (1_u64 << value) as f64;
191        }
192
193        let mut estimate = self.alpha_m_m / harmonic_mean;
194        if zeroes != 0 && estimate < 2.5 * self.num_registers as f64 {
195            estimate = self.num_registers as f64 * (self.num_registers as f64 / zeroes as f64).ln();
196        }
197        estimate
198    }
199
200    #[inline(always)]
201    fn clear(&self, backend: &mut [W]) {
202        backend.fill(W::ZERO);
203    }
204
205    #[inline(always)]
206    fn set(&self, dst: &mut [W], src: &[W]) {
207        debug_assert_eq!(dst.len(), src.len());
208        dst.copy_from_slice(src);
209    }
210}
211
212/// Helper for merge operations with [`HyperLogLog`] logic.
213pub struct HyperLogLogHelper<W> {
214    acc: Vec<W>,
215    mask: Vec<W>,
216}
217
218impl<T: Hash, H: BuildHasher + Clone, W: Word> MergeEstimationLogic for HyperLogLog<T, H, W>
219where
220    u32: PrimitiveNumberAs<W>,
221{
222    type Helper = HyperLogLogHelper<W>;
223
224    fn new_helper(&self) -> Self::Helper {
225        HyperLogLogHelper {
226            acc: vec![W::ZERO; self.words_per_estimator],
227            mask: vec![W::ZERO; self.words_per_estimator],
228        }
229    }
230
231    #[inline(always)]
232    fn merge_with_helper(&self, dst: &mut [W], src: &[W], helper: &mut Self::Helper) {
233        merge_hyperloglog_bitwise(
234            dst,
235            src,
236            self.msb_mask.as_ref(),
237            self.lsb_mask.as_ref(),
238            &mut helper.acc,
239            &mut helper.mask,
240            self.register_size,
241        );
242    }
243}
244
245/// Builds a [`HyperLogLog`] cardinality-estimator logic.
246#[derive(Debug, Clone)]
247pub struct HyperLogLogBuilder<H, W = usize> {
248    build_hasher: H,
249    log_2_num_registers: usize,
250    num_elements: usize,
251    _marker: std::marker::PhantomData<W>,
252}
253
254impl HyperLogLogBuilder<BuildHasherDefault<DefaultHasher>> {
255    /// Creates a new builder for a [`HyperLogLog`] logic with the default word
256    /// type (the fixed-size equivalent of `usize`).
257    ///
258    /// # Panics
259    ///
260    /// If `n` is zero.
261    pub const fn new(num_elements: usize) -> Self {
262        assert!(
263            num_elements > 0,
264            "the upper bound on the number of distinct elements must be positive"
265        );
266        Self {
267            build_hasher: BuildHasherDefault::new(),
268            log_2_num_registers: 4,
269            num_elements,
270            _marker: std::marker::PhantomData,
271        }
272    }
273}
274
275fn min_alignment(bits: usize) -> String {
276    if bits % 128 == 0 {
277        "u128"
278    } else if bits % 64 == 0 {
279        "u64"
280    } else if bits % 32 == 0 {
281        "u32"
282    } else if bits % 16 == 0 {
283        "u16"
284    } else {
285        "u8"
286    }
287    .to_string()
288}
289
290impl HyperLogLog<(), (), ()> {
291    /// Returns the logarithm of the number of registers per estimator that are
292    /// necessary to attain a given relative standard deviation.
293    ///
294    /// # Arguments
295    /// * `rsd`: the relative standard deviation to be attained.
296    pub fn log_2_num_of_registers(rsd: f64) -> usize {
297        ((1.106 / rsd).powi(2)).log2().ceil() as usize
298    }
299
300    /// Returns the relative standard deviation corresponding to a given number
301    /// of registers per estimator.
302    ///
303    /// # Arguments
304    ///
305    /// * `log_2_num_registers`: the logarithm of the number of registers per
306    ///   estimator.
307    pub fn rel_std(log_2_num_registers: usize) -> f64 {
308        let tmp = match log_2_num_registers {
309            4 => 1.106,
310            5 => 1.070,
311            6 => 1.054,
312            7 => 1.046,
313            _ => 1.04,
314        };
315        tmp / ((1 << log_2_num_registers) as f64).sqrt()
316    }
317
318    /// Returns the register size in bits, given an upper bound on the number of
319    /// distinct elements.
320    ///
321    /// # Arguments
322    /// * `num_elements`: an upper bound on the number of distinct elements.
323    pub fn register_size(num_elements: usize) -> usize {
324        std::cmp::max(
325            5,
326            (((num_elements as f64).ln() / LN_2) / LN_2).ln().ceil() as usize,
327        )
328    }
329}
330
331impl<H, W: Word> HyperLogLogBuilder<H, W> {
332    /// Sets the desired relative standard deviation.
333    ///
334    /// ## Note
335    ///
336    /// This is a high-level alternative to [`Self::log_2_num_reg`]. Calling one
337    /// after the other invalidates the work done by the first one.
338    ///
339    /// # Arguments
340    /// * `rsd`: the relative standard deviation to be attained.
341    ///
342    /// # Panics
343    ///
344    /// If the resulting number of registers is less than 16 (i.e., `rsd` is
345    /// too large).
346    pub fn rsd(self, rsd: f64) -> Self {
347        self.log_2_num_reg(HyperLogLog::log_2_num_of_registers(rsd))
348    }
349
350    /// Sets the base-2 logarithm of the number of registers.
351    ///
352    /// ## Note
353    /// This is a low-level alternative to [`Self::rsd`]. Calling one after the
354    /// other invalidates the work done by the first one.
355    ///
356    /// # Arguments
357    /// * `log_2_num_registers`: the logarithm of the number of registers per
358    ///   estimator.
359    ///
360    /// # Panics
361    ///
362    /// If `log_2_num_registers` is less than 4.
363    pub const fn log_2_num_reg(mut self, log_2_num_registers: usize) -> Self {
364        assert!(
365            log_2_num_registers >= 4,
366            "the logarithm of the number of registers per estimator should be at least 4"
367        );
368        self.log_2_num_registers = log_2_num_registers;
369        self
370    }
371
372    /// Returns the minimum value allowed for [`Self::log_2_num_reg`] given the current value of
373    /// [`Self::num_elements`].
374    pub fn min_log_2_num_reg(&self) -> usize {
375        let register_size = HyperLogLog::register_size(self.num_elements);
376        let register_size = NonZeroUsize::try_from(register_size).expect("register_size is zero");
377        let min_num_regs = W::BITS / highest_power_of_2_dividing(register_size);
378        assert_eq!(min_num_regs, min_num_regs.next_power_of_two());
379        min_num_regs.trailing_zeros() as usize // log2(min_num_regs)
380    }
381
382    /// Sets the type `W` to use to represent backends.
383    ///
384    /// Note that the returned builder will have a different type if `W2` is
385    /// different from `W`.
386    ///
387    /// See the [`logic documentation`](HyperLogLog) for the limitations on the
388    /// choice of `W2`.
389    pub fn word_type<W2>(self) -> HyperLogLogBuilder<H, W2> {
390        HyperLogLogBuilder {
391            num_elements: self.num_elements,
392            build_hasher: self.build_hasher,
393            log_2_num_registers: self.log_2_num_registers,
394            _marker: std::marker::PhantomData,
395        }
396    }
397
398    /// Sets the upper bound on the number of elements.
399    ///
400    /// # Panics
401    ///
402    /// If `n` is zero.
403    pub const fn num_elements(mut self, num_elements: usize) -> Self {
404        assert!(
405            num_elements > 0,
406            "the upper bound on the number of distinct elements must be positive"
407        );
408        self.num_elements = num_elements;
409        self
410    }
411
412    /// Sets the [`BuildHasher`] to use.
413    ///
414    /// Using this method you can select a specific hasher based on one or more
415    /// seeds.
416    pub fn build_hasher<H2>(self, build_hasher: H2) -> HyperLogLogBuilder<H2, W> {
417        HyperLogLogBuilder {
418            num_elements: self.num_elements,
419            log_2_num_registers: self.log_2_num_registers,
420            build_hasher,
421            _marker: std::marker::PhantomData,
422        }
423    }
424
425    /// Builds the logic.
426    ///
427    /// The type of objects the estimators keep track of is defined here by `T`,
428    /// but it is usually inferred by the compiler.
429    ///
430    /// # Panics
431    ///
432    /// If the estimator size in bits is not divisible by the bit width of `W`.
433    pub fn build<T>(self) -> HyperLogLog<T, H, W> {
434        let bits = W::BITS as usize;
435        let log_2_num_registers = self.log_2_num_registers;
436        let num_elements = self.num_elements;
437        let number_of_registers = 1 << log_2_num_registers;
438        let register_size = HyperLogLog::register_size(num_elements);
439        let sentinel_mask = 1 << ((1 << register_size) - 2);
440        let alpha = match log_2_num_registers {
441            4 => 0.673,
442            5 => 0.697,
443            6 => 0.709,
444            _ => 0.7213 / (1.0 + 1.079 / number_of_registers as f64),
445        };
446        let num_registers_minus_1 = (number_of_registers - 1) as HashResult;
447
448        let est_size_in_bits = number_of_registers * register_size;
449
450        // This ensures estimators are always aligned to W
451        assert!(
452            est_size_in_bits % bits == 0,
453            "W should allow estimator backends to be aligned. Use {} or smaller unsigned integer types; or increase log_2_num_registers to be >= {}",
454            min_alignment(est_size_in_bits),
455            self.min_log_2_num_reg(),
456        );
457        let est_size_in_words = est_size_in_bits / bits;
458
459        let msb_mask = build_register_mask(
460            est_size_in_words,
461            register_size,
462            W::ONE << (register_size - 1),
463        );
464        let lsb_mask = build_register_mask(est_size_in_words, register_size, W::ONE);
465
466        HyperLogLog {
467            num_registers: number_of_registers,
468            num_registers_minus_1,
469            log_2_num_registers,
470            register_size,
471            alpha_m_m: alpha * (number_of_registers as f64).powi(2),
472            sentinel_mask,
473            build_hasher: self.build_hasher,
474            msb_mask,
475            lsb_mask,
476            words_per_estimator: est_size_in_words,
477            _marker: std::marker::PhantomData,
478        }
479    }
480}
481
482impl<T, H, W> std::fmt::Display for HyperLogLog<T, H, W> {
483    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
484        write!(
485            f,
486            "HyperLogLog with relative standard deviation: {}% ({} registers/estimator, {} bits/register, {} bytes/estimator)",
487            100.0 * HyperLogLog::rel_std(self.log_2_num_registers),
488            self.num_registers,
489            self.register_size,
490            (self.num_registers * self.register_size) / 8
491        )
492    }
493}
494
495/// Builds a mask of `num_words` words by repeating a `register_size`-bit
496/// pattern across all register positions.
497fn build_register_mask<W: Word>(num_words: usize, register_size: usize, pattern: W) -> Box<[W]> {
498    let bits = W::BITS as usize;
499    let total_bits = num_words * bits;
500    let mut result = vec![W::ZERO; num_words];
501    let mut bit_pos = 0;
502    while bit_pos < total_bits {
503        let word_index = bit_pos / bits;
504        let bit_index = bit_pos % bits;
505        result[word_index] |= pattern << bit_index;
506        if bit_index + register_size > bits && word_index + 1 < num_words {
507            result[word_index + 1] |= pattern >> (bits - bit_index);
508        }
509        bit_pos += register_size;
510    }
511    result.into_boxed_slice()
512}
513
514/// Performs a multiple precision subtraction, leaving the result in the first operand.
515/// The operands MUST have the same length.
516///
517/// # Arguments
518/// * `x`: the first operand. This will contain the final result.
519/// * `y`: the second operand that will be subtracted from `x`.
520#[inline(always)]
521pub(super) fn subtract<W: Word>(x: &mut [W], y: &[W]) {
522    debug_assert_eq!(x.len(), y.len());
523    let mut borrow = false;
524
525    for (x_word, &y) in x.iter_mut().zip(y.iter()) {
526        let mut x = *x_word;
527        if !borrow {
528            borrow = x < y;
529        } else if x != W::ZERO {
530            x = x.wrapping_sub(W::ONE);
531            borrow = x < y;
532        } else {
533            x = x.wrapping_sub(W::ONE);
534        }
535        *x_word = x.wrapping_sub(y);
536    }
537}
538
539fn merge_hyperloglog_bitwise<W: Word>(
540    mut x: impl AsMut<[W]>,
541    y: impl AsRef<[W]>,
542    msb_mask: impl AsRef<[W]>,
543    lsb_mask: impl AsRef<[W]>,
544    acc: &mut Vec<W>,
545    mask: &mut Vec<W>,
546    register_size: usize,
547) {
548    let x = x.as_mut();
549    let y = y.as_ref();
550    let msb_mask = msb_mask.as_ref();
551    let lsb_mask = lsb_mask.as_ref();
552
553    debug_assert_eq!(x.len(), y.len());
554    debug_assert_eq!(x.len(), msb_mask.len());
555    debug_assert_eq!(x.len(), lsb_mask.len());
556
557    let register_size_minus_1 = register_size - 1;
558    let num_words_minus_1 = x.len() - 1;
559    let shift_register_size_minus_1 = W::BITS as usize - register_size_minus_1;
560
561    acc.clear();
562    mask.clear();
563
564    /* We work in two phases. Let H_r (msb_mask) be the mask with the
565     * highest bit of each register (of size r) set, and L_r (lsb_mask)
566     * be the mask with the lowest bit of each register set.
567     * We describe the algorithm on a single word.
568     *
569     * In the first phase we perform an unsigned strict register-by-register
570     * comparison of x and y, using the formula
571     *
572     * z = ((((y | H_r) - (x & !H_r)) | (y ^ x)) ^ (y | !x)) & H_r
573     *
574     * Then, we generate a register-by-register mask of all ones or
575     * all zeroes, depending on the result of the comparison, using the
576     * formula
577     *
578     * (((z >> r-1 | H_r) - L_r) | H_r) ^ z
579     *
580     * At that point, it is trivial to select from x and y the right values.
581     */
582
583    // We load y | H_r into the accumulator.
584    acc.extend(
585        y.iter()
586            .zip(msb_mask)
587            .map(|(&y_word, &msb_word)| y_word | msb_word),
588    );
589
590    // We load x & !H_r into mask as temporary storage.
591    mask.extend(
592        x.iter()
593            .zip(msb_mask)
594            .map(|(&x_word, &msb_word)| x_word & !msb_word),
595    );
596
597    // We subtract x & !H_r, using mask as temporary storage
598    subtract(acc, mask);
599
600    // We OR with y ^ x, XOR with (y | !x), and finally AND with H_r.
601    acc.iter_mut()
602        .zip(x.iter())
603        .zip(y.iter())
604        .zip(msb_mask.iter())
605        .for_each(|(((acc_word, &x_word), &y_word), &msb_word)| {
606            *acc_word = ((*acc_word | (y_word ^ x_word)) ^ (y_word | !x_word)) & msb_word
607        });
608
609    // We shift by register_size - 1 places and put the result into mask.
610    {
611        let (mask_last, mask_slice) = mask.split_last_mut().unwrap();
612        let (&msb_last, msb_slice) = msb_mask.split_last().unwrap();
613        mask_slice
614            .iter_mut()
615            .zip(acc[0..num_words_minus_1].iter())
616            .zip(acc[1..].iter())
617            .zip(msb_slice.iter())
618            .rev()
619            .for_each(|(((mask_word, &acc_word), &next_acc_word), &msb_word)| {
620                // W is always unsigned so the shift is always with a 0
621                *mask_word = (acc_word >> register_size_minus_1)
622                    | (next_acc_word << shift_register_size_minus_1)
623                    | msb_word
624            });
625        *mask_last = (acc[num_words_minus_1] >> register_size_minus_1) | msb_last;
626    }
627
628    // We subtract L_r from mask.
629    subtract(mask, lsb_mask);
630
631    // We OR with H_r and XOR with the accumulator.
632    mask.iter_mut()
633        .zip(msb_mask.iter())
634        .zip(acc.iter())
635        .for_each(|((mask_word, &msb_word), &acc_word)| {
636            *mask_word = (*mask_word | msb_word) ^ acc_word
637        });
638
639    // Finally, we use mask to select the right bits from x and y and store the result.
640    x.iter_mut()
641        .zip(y.iter())
642        .zip(mask.iter())
643        .for_each(|((x_word, &y_word), &mask_word)| {
644            *x_word = *x_word ^ ((*x_word ^ y_word) & mask_word);
645        });
646}
647
648fn highest_power_of_2_dividing(n: NonZeroUsize) -> u32 {
649    1 << n.trailing_zeros()
650}
651
652#[test]
653fn test_highest_power_of_2_dividing() {
654    let powers_of_2: Vec<_> = (1..=20)
655        .map(|n| highest_power_of_2_dividing(n.try_into().unwrap()))
656        .collect();
657    assert_eq!(
658        powers_of_2,
659        vec![1, 2, 1, 4, 1, 2, 1, 8, 1, 2, 1, 4, 1, 2, 1, 16, 1, 2, 1, 4]
660    );
661}