hyperloglog_rs/
hyperloglog_trait.rs

1use crate::array_default::ArrayIter;
2use crate::bias::BIAS_DATA;
3use crate::estimated_union_cardinalities::EstimatedUnionCardinalities;
4use crate::precisions::{Precision, WordType};
5use crate::prelude::*;
6use crate::prelude::{linear_counting_threshold, MaxMin};
7use crate::primitive::Primitive;
8use crate::raw_estimate_data::RAW_ESTIMATE_DATA;
9use crate::sip::Sip64Scalar;
10use crate::utils::{ceil, get_alpha};
11use core::hash::Hash;
12use core::hash::Hasher;
13
14pub trait HyperLogLogTrait<P: Precision + WordType<BITS>, const BITS: usize>: Sized {
15    /// The threshold value used in the small range correction of the HyperLogLog algorithm.
16    const INTERMEDIATE_RANGE_CORRECTION_THRESHOLD: f32 = 5.0_f32 * (P::NUMBER_OF_REGISTERS as f32);
17
18    const LINEAR_COUNT_THRESHOLD: f32 = linear_counting_threshold(P::EXPONENT);
19
20    /// The mask used to obtain the lower register bits in the HyperLogLog algorithm.
21    const LOWER_REGISTER_MASK: u32 = (1 << BITS) - 1;
22
23    /// The mask used to obtain the lower precision bits in the HyperLogLog algorithm.
24    const LOWER_PRECISION_MASK: usize = P::NUMBER_OF_REGISTERS - 1;
25    const NOT_LOWER_PRECISION_MASK: u64 = !Self::LOWER_PRECISION_MASK as u64;
26
27    const NUMBER_OF_PADDING_BITS: usize = 32 - (32 / BITS) * BITS;
28
29    /// The mask representing the bits that are never used in the u32 word in the cases
30    /// where the number of bits is not a divisor of 32, such as 5 or 6.
31    /// We set the LEADING bits as the padding bits, the unused one, so the leftmost bits.
32    const PADDING_BITS_MASK: u32 = !((1_u64 << (32 - Self::NUMBER_OF_PADDING_BITS)) - 1) as u32;
33
34    const NUMBER_OF_PADDING_REGISTERS: usize = ceil(P::NUMBER_OF_REGISTERS, 32 / BITS)
35        * Self::NUMBER_OF_REGISTERS_IN_WORD
36        - P::NUMBER_OF_REGISTERS;
37
38    /// The mask representing the bits that are never used in the last u32 word in the cases
39    /// where the number of registers is not a multiple of the number of registers in a word.
40    const LAST_WORD_PADDING_BITS_MASK: u32 = !((1_u64
41        << (32 - BITS * Self::NUMBER_OF_PADDING_REGISTERS - Self::NUMBER_OF_PADDING_BITS))
42        - 1_u64) as u32;
43
44    /// The mask used to obtain the upper precision bits in the HyperLogLog algorithm.
45    const UPPER_PRECISION_MASK: usize = Self::LOWER_PRECISION_MASK << (64 - P::EXPONENT);
46
47    /// The number of registers that can fit in a single 32-bit word in the HyperLogLog algorithm.
48    const NUMBER_OF_REGISTERS_IN_WORD: usize = 32 / BITS;
49
50    fn adjust_estimate(mut raw_estimate: f32) -> f32 {
51        // Apply the final scaling factor to obtain the estimate of the cardinality
52        raw_estimate = get_alpha(P::NUMBER_OF_REGISTERS)
53            * (P::NUMBER_OF_REGISTERS * P::NUMBER_OF_REGISTERS) as f32
54            / raw_estimate;
55
56        // Apply the small range correction factor if the raw estimate is below the threshold
57        // and there are zero registers in the counter.
58        if raw_estimate <= Self::INTERMEDIATE_RANGE_CORRECTION_THRESHOLD {
59            // Get a reference to raw estimates/biases for precision.
60            let biases = BIAS_DATA[P::EXPONENT - 4];
61            let estimates = RAW_ESTIMATE_DATA[P::EXPONENT - 4];
62
63            // Raw estimate is first/last in estimates. Return the first/last bias.
64            if raw_estimate <= estimates[0] {
65                return raw_estimate - biases[0];
66            }
67
68            if estimates[estimates.len() - 1] <= raw_estimate {
69                return raw_estimate - biases[biases.len() - 1];
70            }
71
72            // Raw estimate is somewhere in between estimates.
73            // Binary search for the calculated raw estimate.
74            //
75            // Here we unwrap because neither the values in `estimates`
76            // nor `raw` are going to be NaN.
77            let partition_index = estimates.partition_point(|est| *est <= raw_estimate);
78
79            // Return linear interpolation between raw's neighboring points.
80            let ratio = (raw_estimate - estimates[partition_index - 1])
81                / (estimates[partition_index] - estimates[partition_index - 1]);
82
83            // Calculate bias.
84            raw_estimate
85                - (biases[partition_index - 1]
86                    + ratio * (biases[partition_index] - biases[partition_index - 1]))
87        } else {
88            raw_estimate
89        }
90    }
91
92    fn adjust_estimate_with_zeros(raw_estimate: f32, number_of_zeros: usize) -> f32 {
93        if number_of_zeros > 0 {
94            let low_range_correction = P::SMALL_CORRECTIONS[number_of_zeros - 1];
95            if low_range_correction <= Self::LINEAR_COUNT_THRESHOLD {
96                return low_range_correction;
97            }
98        }
99        Self::adjust_estimate(raw_estimate)
100    }
101
102    /// Returns whether the cardinality of this HLL will be computed using the small-range correction.
103    ///
104    /// # Implementation details
105    /// The small-range correction is used when the cardinality of the set is small enough that the
106    /// linear counting algorithm can be used to estimate the cardinality. The threshold for using
107    /// the linear counting algorithm is determined by the number of registers in the HLL counter.
108    fn use_small_range_correction(&self) -> bool {
109        self.get_number_of_zero_registers() > 0
110            && P::SMALL_CORRECTIONS[self.get_number_of_zero_registers() - 1]
111                <= Self::LINEAR_COUNT_THRESHOLD
112    }
113
114    #[inline(always)]
115    /// Estimates the cardinality of the set based on the HLL counter data.
116    ///
117    /// # Example
118    ///
119    /// ```
120    /// # use hyperloglog_rs::prelude::*;
121    /// let mut hll = HyperLogLog::<Precision9, 5>::default();
122    /// let elements = vec![1, 2, 3, 4, 5];
123    /// for element in &elements {
124    ///     hll.insert(element);
125    /// }
126    /// let estimated_cardinality = hll.estimate_cardinality();
127    /// assert!(estimated_cardinality >= elements.len() as f32 * 0.9 &&
128    ///         estimated_cardinality <= elements.len() as f32 * 1.1);
129    /// ```
130    ///
131    /// # Returns
132    /// * `f32` - The estimated cardinality of the set.
133    fn estimate_cardinality(&self) -> f32 {
134        if self.get_number_of_zero_registers() > 0 {
135            let low_range_correction =
136                P::SMALL_CORRECTIONS[self.get_number_of_zero_registers() - 1];
137            if low_range_correction <= Self::LINEAR_COUNT_THRESHOLD {
138                return low_range_correction;
139            }
140        }
141
142        let mut raw_estimate = 0.0;
143
144        for &word in self.get_words().iter_elements() {
145            let mut partial: f32 = 0.0;
146            for i in 0..Self::NUMBER_OF_REGISTERS_IN_WORD {
147                let register = (word >> (i * BITS)) & Self::LOWER_REGISTER_MASK;
148                let two_to_minus_register = (127 - register) << 23;
149                partial += f32::from_le_bytes(two_to_minus_register.to_le_bytes());
150            }
151            raw_estimate += partial;
152        }
153
154        raw_estimate -= Self::get_number_of_padding_registers() as f32;
155
156        Self::adjust_estimate(raw_estimate)
157    }
158
159    #[inline(always)]
160    /// Returns an estimate of the cardinality of the union of two HyperLogLog counters.
161    ///
162    /// This method calculates an estimate of the cardinality of the union of two HyperLogLog counters
163    /// using the raw estimation values of each counter. It combines the estimation values by iterating
164    /// over the register words of both counters and performing necessary calculations.
165    ///
166    /// # Arguments
167    /// * `other`: A reference to the other HyperLogLog counter.
168    ///
169    /// # Returns
170    /// An estimation of the cardinality of the union of the two HyperLogLog counters.
171    ///
172    /// # Example
173    ///
174    /// ```
175    /// use hyperloglog_rs::prelude::*;
176    ///
177    /// let mut hll1 = HyperLogLog::<Precision12, 6>::default();
178    /// hll1.insert(&1);
179    /// hll1.insert(&2);
180    ///
181    /// let mut hll2 = HyperLogLog::<Precision12, 6>::default();
182    /// hll2.insert(&2);
183    /// hll2.insert(&3);
184    ///
185    /// let union_cardinality = hll1.estimate_union_cardinality(&hll2);
186    ///
187    /// assert!(union_cardinality >= 3.0 * 0.9 &&
188    ///         union_cardinality <= 3.0 * 1.1);
189    /// ```
190    fn estimate_union_cardinality(&self, other: &Self) -> f32 {
191        self.estimate_union_and_sets_cardinality(other)
192            .get_union_cardinality()
193    }
194
195    #[inline(always)]
196    /// Returns an estimate of the cardinality of the two HLL counters union.
197    fn estimate_union_and_sets_cardinality<F: Primitive<f32> + MaxMin>(
198        &self,
199        other: &Self,
200    ) -> EstimatedUnionCardinalities<F> {
201        let mut raw_union_estimate = 0.0;
202        let mut raw_left_estimate = 0.0;
203        let mut raw_right_estimate = 0.0;
204
205        let mut union_zeros = 0;
206        for (left_word, right_word) in self
207            .get_words()
208            .iter_elements()
209            .copied()
210            .zip(other.get_words().iter_elements().copied())
211        {
212            let mut union_partial: f32 = 0.0;
213            let mut left_partial: f32 = 0.0;
214            let mut right_partial: f32 = 0.0;
215            for i in 0..Self::NUMBER_OF_REGISTERS_IN_WORD {
216                let left_register = (left_word >> (i * BITS)) & Self::LOWER_REGISTER_MASK;
217                let right_register = (right_word >> (i * BITS)) & Self::LOWER_REGISTER_MASK;
218                let maximal_register = (left_register).max(right_register);
219                union_partial += f32::from_le_bytes(((127 - maximal_register) << 23).to_le_bytes());
220                left_partial += f32::from_le_bytes(((127 - left_register) << 23).to_le_bytes());
221                right_partial += f32::from_le_bytes(((127 - right_register) << 23).to_le_bytes());
222                union_zeros += (maximal_register == 0) as usize;
223            }
224            raw_union_estimate += union_partial;
225            raw_left_estimate += left_partial;
226            raw_right_estimate += right_partial;
227        }
228
229        union_zeros -= Self::get_number_of_padding_registers();
230
231        // We need to subtract the padding registers from the raw estimates
232        // as for each such register we are adding a one.
233        raw_union_estimate -= Self::get_number_of_padding_registers() as f32;
234        raw_left_estimate -= Self::get_number_of_padding_registers() as f32;
235        raw_right_estimate -= Self::get_number_of_padding_registers() as f32;
236
237        let mut union_estimate = F::reverse(Self::adjust_estimate_with_zeros(
238            raw_union_estimate,
239            union_zeros,
240        ));
241        let left_estimate = F::reverse(Self::adjust_estimate_with_zeros(
242            raw_left_estimate,
243            self.get_number_of_zero_registers(),
244        ));
245        let right_estimate = F::reverse(Self::adjust_estimate_with_zeros(
246            raw_right_estimate,
247            other.get_number_of_zero_registers(),
248        ));
249
250        // The union estimate cannot be higher than the sum of the left and right estimates.
251        union_estimate = union_estimate.get_min(left_estimate + right_estimate);
252
253        EstimatedUnionCardinalities::from((left_estimate, right_estimate, union_estimate))
254    }
255
256    #[inline(always)]
257    /// Returns an estimate of the cardinality of the intersection of two HyperLogLog counters.
258    ///
259    /// This method calculates an estimate of the cardinality of the intersection of two HyperLogLog
260    /// counters using the raw estimation values of each counter. It combines the estimation values by
261    /// iterating over the register words of both counters and performing necessary calculations.
262    ///
263    /// # Arguments
264    /// * `other`: A reference to the other HyperLogLog counter.
265    ///
266    /// # Returns
267    /// An estimation of the cardinality of the intersection of the two HyperLogLog counters.
268    ///
269    /// # Example
270    ///
271    /// ```
272    /// use hyperloglog_rs::prelude::*;
273    ///
274    /// let mut hll1 = HyperLogLog::<Precision12, 6>::default();
275    /// hll1.insert(&1);
276    /// hll1.insert(&2);
277    ///
278    /// let mut hll2 = HyperLogLog::<Precision12, 6>::default();
279    /// hll2.insert(&2);
280    /// hll2.insert(&3);
281    ///
282    /// let intersection_cardinality: f32 = hll1.estimate_intersection_cardinality(&hll2);
283    ///
284    /// assert!(intersection_cardinality >= 1.0 * 0.9 &&
285    ///         intersection_cardinality <= 1.0 * 1.1);
286    /// ```
287    fn estimate_intersection_cardinality<F: Primitive<f32>>(&self, other: &Self) -> F {
288        self.estimate_union_and_sets_cardinality(other)
289            .get_intersection_cardinality()
290    }
291
292    #[inline(always)]
293    /// Returns an estimate of the Jaccard index between two HyperLogLog counters.
294    ///
295    /// The Jaccard index is a measure of similarity between two sets. In the context of HyperLogLog
296    /// counters, it represents the ratio of the size of the intersection of the sets represented by
297    /// the counters to the size of their union. This method estimates the Jaccard index by utilizing
298    /// the cardinality estimation values of the intersection, left set, and right set.
299    ///
300    /// # Arguments
301    /// * `other`: A reference to the other HyperLogLog counter.
302    ///
303    /// # Returns
304    /// An estimation of the Jaccard index between the two HyperLogLog counters.
305    ///
306    /// # Example
307    ///
308    /// ```
309    /// use hyperloglog_rs::prelude::*;
310    ///
311    /// let mut hll1 = HyperLogLog::<Precision12, 6>::default();
312    /// hll1.insert(&1);
313    /// hll1.insert(&2);
314    /// hll1.insert(&3);
315    /// hll1.insert(&4);
316    ///
317    /// let mut hll2 = HyperLogLog::<Precision12, 6>::default();
318    /// hll2.insert(&2);
319    /// hll2.insert(&3);
320    /// hll2.insert(&5);
321    /// hll2.insert(&6);
322    ///
323    /// let jaccard_index = hll1.estimate_jaccard_index(&hll2);
324    ///
325    /// let expected = 2.0 / 6.0;
326    ///
327    /// assert!(jaccard_index >= expected * 0.9 &&
328    ///         jaccard_index <= expected * 1.1);
329    /// ```
330    fn estimate_jaccard_index(&self, other: &Self) -> f32 {
331        self.estimate_union_and_sets_cardinality(other)
332            .get_jaccard_index()
333    }
334
335    #[inline(always)]
336    /// Returns an estimate of the cardinality of the current HyperLogLog counter minus the provided one.
337    ///
338    /// # Arguments
339    /// * `other`: A reference to the other HyperLogLog counter.
340    ///
341    /// # Example
342    ///
343    /// ```
344    /// use hyperloglog_rs::prelude::*;
345    ///
346    /// let mut hll1 = HyperLogLog::<Precision12, 6>::default();
347    /// hll1.insert(&1);
348    /// hll1.insert(&2);
349    /// hll1.insert(&3);
350    /// hll1.insert(&4);
351    ///     
352    /// let mut hll2 = HyperLogLog::<Precision12, 6>::default();
353    /// hll2.insert(&2);
354    /// hll2.insert(&3);
355    /// hll2.insert(&5);
356    /// hll2.insert(&6);
357    ///
358    /// let difference_cardinality: f32 = hll1.estimate_difference_cardinality(&hll2);
359    ///
360    /// assert!(difference_cardinality >= 2.0 * 0.9 &&
361    ///        difference_cardinality <= 2.0 * 1.1);
362    /// ```
363    fn estimate_difference_cardinality<F: Primitive<f32> + One>(&self, other: &Self) -> F {
364        self.estimate_union_and_sets_cardinality(other)
365            .get_left_difference_cardinality()
366    }
367
368    #[inline(always)]
369    /// Returns the number of registers in the HLL counter.
370    ///
371    ///
372    /// # Example
373    ///
374    /// ```
375    /// # use hyperloglog_rs::prelude::*;
376    ///
377    /// // Create a new HLL counter with 128 registers
378    /// let mut hll = HyperLogLog::<Precision12, 4>::default();
379    /// assert_eq!(hll.len(), 4096);
380    ///
381    /// // Insert some elements into the HLL counter
382    /// hll.insert(&1);
383    /// hll.insert(&2);
384    /// hll.insert(&3);
385    /// assert_eq!(hll.len(), 1 << 12);
386    ///
387    /// // Merge another HLL counter with 128 registers
388    /// let mut hll2 = HyperLogLog::<Precision12, 4>::default();
389    /// hll2.insert(&4);
390    /// hll2.insert(&5);
391    /// hll |= hll2;
392    /// assert_eq!(hll.len(), 1 << 12);
393    /// ```
394    fn len(&self) -> usize {
395        P::NUMBER_OF_REGISTERS
396    }
397
398    #[inline(always)]
399    /// Returns whether no element was yet added to the HLL counter.
400    ///
401    ///
402    /// # Examples
403    ///
404    /// ```
405    /// use hyperloglog_rs::prelude::*;
406    ///
407    /// let mut hll: HyperLogLog<Precision8, 4> = HyperLogLog::default();
408    ///
409    /// assert!(hll.is_empty());
410    ///
411    /// hll.insert(&1);
412    ///
413    /// assert!(!hll.is_empty());
414    /// ```
415    fn is_empty(&self) -> bool {
416        self.len() == self.get_number_of_zero_registers()
417    }
418
419    #[inline(always)]
420    /// Returns the number of extra registers that are not actually used.
421    ///
422    /// # Examples
423    /// Since the number of registers is not a multiple of the number of registers in a word,
424    /// there are padding registers that are not actually used.
425    ///
426    /// ```
427    /// # use hyperloglog_rs::prelude::*;
428    ///
429    /// assert_eq!(HyperLogLog::<Precision10, 6>::get_number_of_padding_registers(), 1);
430    /// ```
431    ///
432    /// For instance, in the case using the bare minimum bits per registers (4)
433    /// and the minimal precision (4), for a total of 16 registers, we expect
434    /// to not have any padding registers.
435    ///
436    /// ```
437    /// # use hyperloglog_rs::prelude::*;
438    ///
439    /// assert_eq!(HyperLogLog::<Precision4, 1>::get_number_of_padding_registers(), 16, "Expected 16 padding registers, precision 4, bits 1, got {}.", HyperLogLog::<Precision4, 1>::get_number_of_padding_registers());
440    /// assert_eq!(HyperLogLog::<Precision4, 2>::get_number_of_padding_registers(), 0, "Expected 0 padding registers, precision 4, bits 2, got {}.", HyperLogLog::<Precision4, 2>::get_number_of_padding_registers());
441    /// assert_eq!(HyperLogLog::<Precision4, 3>::get_number_of_padding_registers(), 4, "Expected 4 padding registers, precision 4, bits 3, got {}.", HyperLogLog::<Precision4, 3>::get_number_of_padding_registers());
442    /// assert_eq!(HyperLogLog::<Precision4, 4>::get_number_of_padding_registers(), 0, "Expected 0 padding registers, precision 4, bits 4, got {}.", HyperLogLog::<Precision4, 4>::get_number_of_padding_registers());
443    /// assert_eq!(HyperLogLog::<Precision4, 5>::get_number_of_padding_registers(), 2, "Expected 2 padding registers, precision 4, bits 5, got {}.", HyperLogLog::<Precision4, 5>::get_number_of_padding_registers());
444    /// assert_eq!(HyperLogLog::<Precision4, 6>::get_number_of_padding_registers(), 4, "Expected 1 padding registers, precision 4, bits 6, got {}.", HyperLogLog::<Precision4, 6>::get_number_of_padding_registers());
445    ///
446    /// assert_eq!(HyperLogLog::<Precision5, 1>::get_number_of_padding_registers(), 0, "Expected 0 padding registers, precision 5, bits 1, got {}.", HyperLogLog::<Precision5, 1>::get_number_of_padding_registers());
447    /// assert_eq!(HyperLogLog::<Precision5, 2>::get_number_of_padding_registers(), 0, "Expected 0 padding registers, precision 5, bits 2, got {}.", HyperLogLog::<Precision5, 2>::get_number_of_padding_registers());
448    /// assert_eq!(HyperLogLog::<Precision5, 3>::get_number_of_padding_registers(), 8, "Expected 30 padding registers, precision 5, bits 3, got {}.", HyperLogLog::<Precision5, 3>::get_number_of_padding_registers());
449    /// assert_eq!(HyperLogLog::<Precision5, 4>::get_number_of_padding_registers(), 0, "Expected 0 padding registers, precision 5, bits 4, got {}.", HyperLogLog::<Precision5, 4>::get_number_of_padding_registers());
450    /// assert_eq!(HyperLogLog::<Precision5, 5>::get_number_of_padding_registers(), 4, "Expected 4 padding registers, precision 5, bits 5, got {}.", HyperLogLog::<Precision5, 5>::get_number_of_padding_registers());
451    /// assert_eq!(HyperLogLog::<Precision5, 6>::get_number_of_padding_registers(), 3, "Expected 3 padding registers, precision 5, bits 6, got {}.", HyperLogLog::<Precision5, 6>::get_number_of_padding_registers());
452    ///
453    /// assert_eq!(HyperLogLog::<Precision6, 1>::get_number_of_padding_registers(), 0, "Expected 0 padding registers, precision 6, bits 1, got {}.", HyperLogLog::<Precision6, 1>::get_number_of_padding_registers());
454    /// assert_eq!(HyperLogLog::<Precision6, 2>::get_number_of_padding_registers(), 0, "Expected 0 padding registers, precision 6, bits 2, got {}.", HyperLogLog::<Precision6, 2>::get_number_of_padding_registers());
455    /// assert_eq!(HyperLogLog::<Precision6, 3>::get_number_of_padding_registers(), 6, "Expected 6 padding registers, precision 6, bits 3, got {}.", HyperLogLog::<Precision6, 3>::get_number_of_padding_registers());
456    /// assert_eq!(HyperLogLog::<Precision6, 4>::get_number_of_padding_registers(), 0, "Expected 0 padding registers, precision 6, bits 4, got {}.", HyperLogLog::<Precision6, 4>::get_number_of_padding_registers());
457    /// assert_eq!(HyperLogLog::<Precision6, 5>::get_number_of_padding_registers(), 2, "Expected 2 padding registers, precision 6, bits 5, got {}.", HyperLogLog::<Precision6, 5>::get_number_of_padding_registers());
458    /// assert_eq!(HyperLogLog::<Precision6, 6>::get_number_of_padding_registers(), 1, "Expected 1 padding registers, precision 6, bits 6, got {}.", HyperLogLog::<Precision6, 6>::get_number_of_padding_registers());
459    ///
460    /// ```
461    ///
462    fn get_number_of_padding_registers() -> usize {
463        ceil(P::NUMBER_OF_REGISTERS, 32 / BITS) * Self::NUMBER_OF_REGISTERS_IN_WORD
464            - P::NUMBER_OF_REGISTERS
465    }
466
467    /// Returns the number of registers with zero values. This value is used for computing a small
468    /// correction when estimating the cardinality of a small set.
469    ///
470    /// # Examples
471    ///
472    /// ```
473    /// # use hyperloglog_rs::prelude::*;
474    ///
475    /// // Create a new HyperLogLog counter with precision 14 and 5 bits per register.
476    /// let mut hll = HyperLogLog::<Precision14, 5>::default();
477    ///
478    /// // Add some elements to the counter.
479    /// hll.insert(&1);
480    /// hll.insert(&2);
481    /// hll.insert(&3);
482    ///
483    /// // Get the number of zero registers.
484    /// let number_of_zero_registers = hll.get_number_of_zero_registers();
485    ///
486    /// assert_eq!(number_of_zero_registers, 16381);
487    /// ```
488    fn get_number_of_zero_registers(&self) -> usize;
489
490    #[inline(always)]
491    fn get_number_of_non_zero_registers(&self) -> usize {
492        // Calculates the number of registers that have a non-zero value by
493        // subtracting the number of registers with a zero value from the total number of registers
494        self.len() - self.get_number_of_zero_registers()
495    }
496
497    /// Returns an array of registers of the HyperLogLog counter.
498    ///
499    /// # Examples
500    ///
501    /// ```rust
502    /// # use hyperloglog_rs::prelude::*;
503    ///
504    /// let mut hll = HyperLogLog::<Precision10, 6>::default();
505    /// hll.insert(&4);
506    /// hll.insert(&5);
507    /// hll.insert(&6);
508    /// let registers = hll.get_registers();
509    ///
510    /// assert_eq!(registers.len(), 1024);
511    /// assert!(registers.iter().any(|&x| x > 0));
512    /// ```
513    ///
514    /// We can also create an HLL from registers, and then check
515    /// whether the registers are what we expect:
516    ///
517    /// ```rust
518    /// # use hyperloglog_rs::prelude::*;
519    ///
520    /// let expected = [3, 2, 1, 1, 7, 15, 39, 63, 28, 23, 0, 0, 11, 11, 11, 0];
521    /// let mut hll = HyperLogLog::<Precision4, 6>::from_registers(&expected);
522    /// assert_eq!(hll.get_registers(), expected, "Expected {:?}, got {:?}", expected, hll.get_registers());
523    /// ```
524    ///
525    ///
526    fn get_registers(&self) -> P::Registers {
527        let mut registers = P::Registers::default_array();
528        self.get_words()
529            .iter_elements()
530            .flat_map(|word| {
531                (0..Self::NUMBER_OF_REGISTERS_IN_WORD)
532                    .map(move |i: usize| (word >> (i * BITS)) & Self::LOWER_REGISTER_MASK)
533            })
534            .zip(registers.iter_elements_mut())
535            .for_each(|(value, cell): (u32, &mut u32)| {
536                *cell = value;
537            });
538        registers
539    }
540
541    /// Returns the array of words of the HyperLogLog counter.
542    fn get_words(&self) -> &P::Words;
543
544    #[inline(always)]
545    /// Returns `true` if the HyperLogLog counter may contain the given element.
546    ///
547    /// # Arguments
548    /// * `rhs` - The element to check.
549    ///
550    /// # Examples
551    ///
552    /// ```rust
553    /// # use hyperloglog_rs::prelude::*;
554    ///
555    /// let mut hll: HyperLogLog<Precision8, 6> = HyperLogLog::default();
556    /// assert_eq!(hll.may_contain(&42), false);
557    ///
558    /// hll.insert(&42);
559    /// assert_eq!(hll.may_contain(&42), true);
560    /// ```
561    fn may_contain<T: Hash>(&self, rhs: &T) -> bool {
562        let (_hash, index) = self.get_hash_and_index::<T>(rhs);
563
564        // Calculate the position of the register in the internal buffer array.
565        let word_position = index / Self::NUMBER_OF_REGISTERS_IN_WORD;
566
567        // Calculate the position of the register within the 32-bit word containing it.
568        let register_position_in_u32 = index % Self::NUMBER_OF_REGISTERS_IN_WORD;
569
570        // Extract the current value of the register at `index`.
571        let register_value: u32 = (self.get_words()[word_position]
572            >> (register_position_in_u32 * BITS))
573            & Self::LOWER_REGISTER_MASK;
574
575        register_value > 0
576    }
577
578    #[inline(always)]
579    /// Returns whether the provided HyperLogLog counter may be fully contained in the current HyperLogLog counter.
580    ///
581    /// # Arguments
582    /// * `rhs` - The HyperLogLog counter to check.
583    ///
584    /// # Implementative details
585    /// We define a counter that fully contains another counter when all of the registers
586    /// of the first counter are greater than or equal to the corresponding registers of the second counter.
587    ///
588    /// # Examples
589    ///
590    /// ```rust
591    /// # use hyperloglog_rs::prelude::*;
592    ///
593    /// let mut hll1: HyperLogLog<Precision8, 6> = HyperLogLog::default();
594    /// let mut hll2: HyperLogLog<Precision8, 6> = HyperLogLog::default();
595    ///
596    /// hll1.insert(&42);
597    /// hll1.insert(&43);
598    /// hll1.insert(&44);
599    ///
600    /// hll2.insert(&42);
601    /// hll2.insert(&43);
602    ///
603    /// assert_eq!(hll1.may_contain_all(&hll2), true);
604    /// assert_eq!(hll2.may_contain_all(&hll1), false);
605    ///
606    /// hll2.insert(&44);
607    ///
608    /// assert_eq!(hll1.may_contain_all(&hll2), true);
609    /// assert_eq!(hll2.may_contain_all(&hll1), true);
610    /// ```
611    fn may_contain_all(&self, rhs: &Self) -> bool {
612        for (left_word, right_word) in self
613            .get_words()
614            .iter_elements()
615            .copied()
616            .zip(rhs.get_words().iter_elements().copied())
617        {
618            for i in 0..Self::NUMBER_OF_REGISTERS_IN_WORD {
619                let left_register = (left_word >> (i * BITS)) & Self::LOWER_REGISTER_MASK;
620                let right_register = (right_word >> (i * BITS)) & Self::LOWER_REGISTER_MASK;
621                if left_register < right_register {
622                    return false;
623                }
624            }
625        }
626        true
627    }
628
629    #[inline(always)]
630    /// Returns the hash value and the corresponding register's index for a given value.
631    ///
632    /// # Arguments
633    /// * `value` - A reference to the value to be hashed.
634    ///
635    /// # Examples
636    ///
637    /// ```
638    /// use hyperloglog_rs::prelude::*;
639    ///
640    /// let mut hll: HyperLogLog<Precision8, 6> = HyperLogLog::default();
641    /// let value = 42;
642    /// let (hash, index) = hll.get_hash_and_index(&value);
643    ///
644    /// //assert_eq!(hash, 10123147082338939904, "Expected hash {}, got {}.", 10123147082338939904, hash);
645    /// ```
646    fn get_hash_and_index<T: Hash>(&self, value: &T) -> (u64, usize) {
647        let mut hasher = Sip64Scalar::<1, 3>::new();
648        value.hash(&mut hasher);
649        let hash = hasher.finish();
650
651        // Calculate the register's index using the highest bits of the hash.
652        // The index of the register has to vary from 0 to 2^p - 1, where p is the precision,
653        // so we use the highest p bits of the hash.
654        let index: usize = hash as usize >> (64 - P::EXPONENT);
655
656        // And we delete the used bits from the hash.
657        let hash: u64 = hash << P::EXPONENT;
658        debug_assert!(
659            index < P::NUMBER_OF_REGISTERS,
660            "The index {} must be less than the number of registers {}.",
661            index,
662            P::NUMBER_OF_REGISTERS
663        );
664
665        (hash, index)
666    }
667
668    /// Returns the number of registers in the counter.
669    ///
670    /// # Implementation details
671    /// This function is overriding the estimate_cardinality function of the HyperLogLogTrait trait
672    /// as we can compute the cardinality of the counter using the multiplicities instead of the
673    /// registers. This is much faster as we do not need to compute the harmonic mean of the registers.
674    fn estimate_cardinality_from_multiplicities(multiplicities: &P::RegisterMultiplicities) -> f32 {
675        if multiplicities[0] > P::NumberOfZeros::ZERO {
676            let number_of_zeros: usize = multiplicities[0].convert();
677            let low_range_correction = P::SMALL_CORRECTIONS[number_of_zeros - 1_usize];
678            if low_range_correction <= Self::LINEAR_COUNT_THRESHOLD {
679                return low_range_correction;
680            }
681        }
682
683        let mut raw_estimate: f32 = 0.0;
684
685        for (current_register, multeplicity) in multiplicities.iter_elements().enumerate() {
686            let two_to_minus_register: i32 = (127 - current_register as i32) << 23;
687            let register_count: f32 = multeplicity.convert();
688            raw_estimate +=
689                register_count * f32::from_le_bytes(two_to_minus_register.to_le_bytes());
690        }
691
692        Self::adjust_estimate(raw_estimate)
693    }
694}