hyperloglog_rs/
iter.rs

1//! This module defines a trait and an implementation for estimating the cardinality of an iterator
2//! using a HyperLogLog data structure.
3//!
4//! # Example
5//! You can estimate the cardinality of an iterator using the `estimate_cardinality` method.
6//! ```
7//! use hyperloglog_rs::prelude::*;
8//!
9//! let v = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
10//! let cardinality_estimate = v.iter().estimate_cardinality::<Precision12, 5>();
11//! assert!((cardinality_estimate - 10.0).abs() < 1.0);
12//! ```
13//!
14//! You can merge multiple HyperLogLog counters from iterators using the `union` method.
15//!
16//! ```
17//! use hyperloglog_rs::prelude::*;
18//!
19//! let v = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
20//! let hll: HyperLogLog<Precision12, 6> = v.into_iter().map(|index|{
21//!     HyperLogLog::from(index)
22//! }).union();
23//! let cardinality_estimate = hll.estimate_cardinality();
24//! assert!((cardinality_estimate - 10.0).abs() < 1.0);
25//! ```
26use core::hash::Hash;
27
28use crate::prelude::*;
29
30/// A trait for estimating the cardinality of an iterator.
31pub trait EstimateIterCardinality {
32    /// Estimate the cardinality of the iterator.
33    ///
34    /// # Arguments
35    ///
36    /// * `self` - An iterator over elements to estimate the cardinality of.
37    ///
38    /// # Type parameters
39    ///
40    /// * `PRECISION` - The precision to use for the HyperLogLog counter.
41    /// * `BITS` - The number of bits per register in the HyperLogLog counter.
42    ///
43    fn estimate_cardinality<P: Precision + WordType<BITS>, const BITS: usize>(self) -> f32;
44}
45
46impl<I, T: Hash> EstimateIterCardinality for I
47where
48    I: Iterator<Item = T>,
49{
50    fn estimate_cardinality<P: Precision + WordType<BITS>, const BITS: usize>(self) -> f32 {
51        let hll: HyperLogLog<P, BITS> = self.collect();
52        hll.estimate_cardinality()
53    }
54}
55
56pub trait HyperLogLogIterator<P: Precision + WordType<BITS>, const BITS: usize> {
57    /// Returns a HyperLogLog that is the union of all HyperLogLogs in the iterator.
58    ///
59    /// # Example
60    ///
61    /// ```rust
62    /// use hyperloglog_rs::prelude::*;
63    ///
64    /// let mut hll1 = HyperLogLog::<Precision12, 6>::default();
65    /// hll1.insert(&1);
66    /// hll1.insert(&2);
67    ///
68    /// let mut hll2 = HyperLogLog::<Precision12, 6>::default();
69    /// hll2.insert(&3);
70    /// hll2.insert(&4);
71    ///
72    /// let mut hll3 = HyperLogLog::<Precision12, 6>::default();
73    /// hll3.insert(&5);
74    /// hll3.insert(&6);
75    ///
76    /// let hll_union = vec![hll1, hll2, hll3].iter().union();
77    ///
78    /// assert!(hll_union.estimate_cardinality() - 6.0 < 1.0, "Expected 6.0, got {}", hll_union.estimate_cardinality());
79    /// ```
80    fn union(self) -> HyperLogLog<P, BITS>;
81}
82
83impl<P: Precision + WordType<BITS>, const BITS: usize, I, C> HyperLogLogIterator<P, BITS> for I
84where
85    I: Iterator<Item = C>,
86    HyperLogLog<P, BITS>: BitOr<C, Output = HyperLogLog<P, BITS>>,
87{
88    #[inline(always)]
89    fn union(self) -> HyperLogLog<P, BITS> {
90        self.fold(HyperLogLog::default(), |acc, hll| acc | hll)
91    }
92}