hyperloglog_rs/hyperloglog_trait.rs
1use crate::array_default::ArrayIter;
2use crate::bias::BIAS_DATA;
3use crate::estimated_union_cardinalities::EstimatedUnionCardinalities;
4use crate::precisions::{Precision, WordType};
5use crate::prelude::*;
6use crate::prelude::{linear_counting_threshold, MaxMin};
7use crate::primitive::Primitive;
8use crate::raw_estimate_data::RAW_ESTIMATE_DATA;
9use crate::sip::Sip64Scalar;
10use crate::utils::{ceil, get_alpha};
11use core::hash::Hash;
12use core::hash::Hasher;
13
14pub trait HyperLogLogTrait<P: Precision + WordType<BITS>, const BITS: usize>: Sized {
15 /// The threshold value used in the small range correction of the HyperLogLog algorithm.
16 const INTERMEDIATE_RANGE_CORRECTION_THRESHOLD: f32 = 5.0_f32 * (P::NUMBER_OF_REGISTERS as f32);
17
18 const LINEAR_COUNT_THRESHOLD: f32 = linear_counting_threshold(P::EXPONENT);
19
20 /// The mask used to obtain the lower register bits in the HyperLogLog algorithm.
21 const LOWER_REGISTER_MASK: u32 = (1 << BITS) - 1;
22
23 /// The mask used to obtain the lower precision bits in the HyperLogLog algorithm.
24 const LOWER_PRECISION_MASK: usize = P::NUMBER_OF_REGISTERS - 1;
25 const NOT_LOWER_PRECISION_MASK: u64 = !Self::LOWER_PRECISION_MASK as u64;
26
27 const NUMBER_OF_PADDING_BITS: usize = 32 - (32 / BITS) * BITS;
28
29 /// The mask representing the bits that are never used in the u32 word in the cases
30 /// where the number of bits is not a divisor of 32, such as 5 or 6.
31 /// We set the LEADING bits as the padding bits, the unused one, so the leftmost bits.
32 const PADDING_BITS_MASK: u32 = !((1_u64 << (32 - Self::NUMBER_OF_PADDING_BITS)) - 1) as u32;
33
34 const NUMBER_OF_PADDING_REGISTERS: usize = ceil(P::NUMBER_OF_REGISTERS, 32 / BITS)
35 * Self::NUMBER_OF_REGISTERS_IN_WORD
36 - P::NUMBER_OF_REGISTERS;
37
38 /// The mask representing the bits that are never used in the last u32 word in the cases
39 /// where the number of registers is not a multiple of the number of registers in a word.
40 const LAST_WORD_PADDING_BITS_MASK: u32 = !((1_u64
41 << (32 - BITS * Self::NUMBER_OF_PADDING_REGISTERS - Self::NUMBER_OF_PADDING_BITS))
42 - 1_u64) as u32;
43
44 /// The mask used to obtain the upper precision bits in the HyperLogLog algorithm.
45 const UPPER_PRECISION_MASK: usize = Self::LOWER_PRECISION_MASK << (64 - P::EXPONENT);
46
47 /// The number of registers that can fit in a single 32-bit word in the HyperLogLog algorithm.
48 const NUMBER_OF_REGISTERS_IN_WORD: usize = 32 / BITS;
49
50 fn adjust_estimate(mut raw_estimate: f32) -> f32 {
51 // Apply the final scaling factor to obtain the estimate of the cardinality
52 raw_estimate = get_alpha(P::NUMBER_OF_REGISTERS)
53 * (P::NUMBER_OF_REGISTERS * P::NUMBER_OF_REGISTERS) as f32
54 / raw_estimate;
55
56 // Apply the small range correction factor if the raw estimate is below the threshold
57 // and there are zero registers in the counter.
58 if raw_estimate <= Self::INTERMEDIATE_RANGE_CORRECTION_THRESHOLD {
59 // Get a reference to raw estimates/biases for precision.
60 let biases = BIAS_DATA[P::EXPONENT - 4];
61 let estimates = RAW_ESTIMATE_DATA[P::EXPONENT - 4];
62
63 // Raw estimate is first/last in estimates. Return the first/last bias.
64 if raw_estimate <= estimates[0] {
65 return raw_estimate - biases[0];
66 }
67
68 if estimates[estimates.len() - 1] <= raw_estimate {
69 return raw_estimate - biases[biases.len() - 1];
70 }
71
72 // Raw estimate is somewhere in between estimates.
73 // Binary search for the calculated raw estimate.
74 //
75 // Here we unwrap because neither the values in `estimates`
76 // nor `raw` are going to be NaN.
77 let partition_index = estimates.partition_point(|est| *est <= raw_estimate);
78
79 // Return linear interpolation between raw's neighboring points.
80 let ratio = (raw_estimate - estimates[partition_index - 1])
81 / (estimates[partition_index] - estimates[partition_index - 1]);
82
83 // Calculate bias.
84 raw_estimate
85 - (biases[partition_index - 1]
86 + ratio * (biases[partition_index] - biases[partition_index - 1]))
87 } else {
88 raw_estimate
89 }
90 }
91
92 fn adjust_estimate_with_zeros(raw_estimate: f32, number_of_zeros: usize) -> f32 {
93 if number_of_zeros > 0 {
94 let low_range_correction = P::SMALL_CORRECTIONS[number_of_zeros - 1];
95 if low_range_correction <= Self::LINEAR_COUNT_THRESHOLD {
96 return low_range_correction;
97 }
98 }
99 Self::adjust_estimate(raw_estimate)
100 }
101
102 /// Returns whether the cardinality of this HLL will be computed using the small-range correction.
103 ///
104 /// # Implementation details
105 /// The small-range correction is used when the cardinality of the set is small enough that the
106 /// linear counting algorithm can be used to estimate the cardinality. The threshold for using
107 /// the linear counting algorithm is determined by the number of registers in the HLL counter.
108 fn use_small_range_correction(&self) -> bool {
109 self.get_number_of_zero_registers() > 0
110 && P::SMALL_CORRECTIONS[self.get_number_of_zero_registers() - 1]
111 <= Self::LINEAR_COUNT_THRESHOLD
112 }
113
114 #[inline(always)]
115 /// Estimates the cardinality of the set based on the HLL counter data.
116 ///
117 /// # Example
118 ///
119 /// ```
120 /// # use hyperloglog_rs::prelude::*;
121 /// let mut hll = HyperLogLog::<Precision9, 5>::default();
122 /// let elements = vec![1, 2, 3, 4, 5];
123 /// for element in &elements {
124 /// hll.insert(element);
125 /// }
126 /// let estimated_cardinality = hll.estimate_cardinality();
127 /// assert!(estimated_cardinality >= elements.len() as f32 * 0.9 &&
128 /// estimated_cardinality <= elements.len() as f32 * 1.1);
129 /// ```
130 ///
131 /// # Returns
132 /// * `f32` - The estimated cardinality of the set.
133 fn estimate_cardinality(&self) -> f32 {
134 if self.get_number_of_zero_registers() > 0 {
135 let low_range_correction =
136 P::SMALL_CORRECTIONS[self.get_number_of_zero_registers() - 1];
137 if low_range_correction <= Self::LINEAR_COUNT_THRESHOLD {
138 return low_range_correction;
139 }
140 }
141
142 let mut raw_estimate = 0.0;
143
144 for &word in self.get_words().iter_elements() {
145 let mut partial: f32 = 0.0;
146 for i in 0..Self::NUMBER_OF_REGISTERS_IN_WORD {
147 let register = (word >> (i * BITS)) & Self::LOWER_REGISTER_MASK;
148 let two_to_minus_register = (127 - register) << 23;
149 partial += f32::from_le_bytes(two_to_minus_register.to_le_bytes());
150 }
151 raw_estimate += partial;
152 }
153
154 raw_estimate -= Self::get_number_of_padding_registers() as f32;
155
156 Self::adjust_estimate(raw_estimate)
157 }
158
159 #[inline(always)]
160 /// Returns an estimate of the cardinality of the union of two HyperLogLog counters.
161 ///
162 /// This method calculates an estimate of the cardinality of the union of two HyperLogLog counters
163 /// using the raw estimation values of each counter. It combines the estimation values by iterating
164 /// over the register words of both counters and performing necessary calculations.
165 ///
166 /// # Arguments
167 /// * `other`: A reference to the other HyperLogLog counter.
168 ///
169 /// # Returns
170 /// An estimation of the cardinality of the union of the two HyperLogLog counters.
171 ///
172 /// # Example
173 ///
174 /// ```
175 /// use hyperloglog_rs::prelude::*;
176 ///
177 /// let mut hll1 = HyperLogLog::<Precision12, 6>::default();
178 /// hll1.insert(&1);
179 /// hll1.insert(&2);
180 ///
181 /// let mut hll2 = HyperLogLog::<Precision12, 6>::default();
182 /// hll2.insert(&2);
183 /// hll2.insert(&3);
184 ///
185 /// let union_cardinality = hll1.estimate_union_cardinality(&hll2);
186 ///
187 /// assert!(union_cardinality >= 3.0 * 0.9 &&
188 /// union_cardinality <= 3.0 * 1.1);
189 /// ```
190 fn estimate_union_cardinality(&self, other: &Self) -> f32 {
191 self.estimate_union_and_sets_cardinality(other)
192 .get_union_cardinality()
193 }
194
195 #[inline(always)]
196 /// Returns an estimate of the cardinality of the two HLL counters union.
197 fn estimate_union_and_sets_cardinality<F: Primitive<f32> + MaxMin>(
198 &self,
199 other: &Self,
200 ) -> EstimatedUnionCardinalities<F> {
201 let mut raw_union_estimate = 0.0;
202 let mut raw_left_estimate = 0.0;
203 let mut raw_right_estimate = 0.0;
204
205 let mut union_zeros = 0;
206 for (left_word, right_word) in self
207 .get_words()
208 .iter_elements()
209 .copied()
210 .zip(other.get_words().iter_elements().copied())
211 {
212 let mut union_partial: f32 = 0.0;
213 let mut left_partial: f32 = 0.0;
214 let mut right_partial: f32 = 0.0;
215 for i in 0..Self::NUMBER_OF_REGISTERS_IN_WORD {
216 let left_register = (left_word >> (i * BITS)) & Self::LOWER_REGISTER_MASK;
217 let right_register = (right_word >> (i * BITS)) & Self::LOWER_REGISTER_MASK;
218 let maximal_register = (left_register).max(right_register);
219 union_partial += f32::from_le_bytes(((127 - maximal_register) << 23).to_le_bytes());
220 left_partial += f32::from_le_bytes(((127 - left_register) << 23).to_le_bytes());
221 right_partial += f32::from_le_bytes(((127 - right_register) << 23).to_le_bytes());
222 union_zeros += (maximal_register == 0) as usize;
223 }
224 raw_union_estimate += union_partial;
225 raw_left_estimate += left_partial;
226 raw_right_estimate += right_partial;
227 }
228
229 union_zeros -= Self::get_number_of_padding_registers();
230
231 // We need to subtract the padding registers from the raw estimates
232 // as for each such register we are adding a one.
233 raw_union_estimate -= Self::get_number_of_padding_registers() as f32;
234 raw_left_estimate -= Self::get_number_of_padding_registers() as f32;
235 raw_right_estimate -= Self::get_number_of_padding_registers() as f32;
236
237 let mut union_estimate = F::reverse(Self::adjust_estimate_with_zeros(
238 raw_union_estimate,
239 union_zeros,
240 ));
241 let left_estimate = F::reverse(Self::adjust_estimate_with_zeros(
242 raw_left_estimate,
243 self.get_number_of_zero_registers(),
244 ));
245 let right_estimate = F::reverse(Self::adjust_estimate_with_zeros(
246 raw_right_estimate,
247 other.get_number_of_zero_registers(),
248 ));
249
250 // The union estimate cannot be higher than the sum of the left and right estimates.
251 union_estimate = union_estimate.get_min(left_estimate + right_estimate);
252
253 EstimatedUnionCardinalities::from((left_estimate, right_estimate, union_estimate))
254 }
255
256 #[inline(always)]
257 /// Returns an estimate of the cardinality of the intersection of two HyperLogLog counters.
258 ///
259 /// This method calculates an estimate of the cardinality of the intersection of two HyperLogLog
260 /// counters using the raw estimation values of each counter. It combines the estimation values by
261 /// iterating over the register words of both counters and performing necessary calculations.
262 ///
263 /// # Arguments
264 /// * `other`: A reference to the other HyperLogLog counter.
265 ///
266 /// # Returns
267 /// An estimation of the cardinality of the intersection of the two HyperLogLog counters.
268 ///
269 /// # Example
270 ///
271 /// ```
272 /// use hyperloglog_rs::prelude::*;
273 ///
274 /// let mut hll1 = HyperLogLog::<Precision12, 6>::default();
275 /// hll1.insert(&1);
276 /// hll1.insert(&2);
277 ///
278 /// let mut hll2 = HyperLogLog::<Precision12, 6>::default();
279 /// hll2.insert(&2);
280 /// hll2.insert(&3);
281 ///
282 /// let intersection_cardinality: f32 = hll1.estimate_intersection_cardinality(&hll2);
283 ///
284 /// assert!(intersection_cardinality >= 1.0 * 0.9 &&
285 /// intersection_cardinality <= 1.0 * 1.1);
286 /// ```
287 fn estimate_intersection_cardinality<F: Primitive<f32>>(&self, other: &Self) -> F {
288 self.estimate_union_and_sets_cardinality(other)
289 .get_intersection_cardinality()
290 }
291
292 #[inline(always)]
293 /// Returns an estimate of the Jaccard index between two HyperLogLog counters.
294 ///
295 /// The Jaccard index is a measure of similarity between two sets. In the context of HyperLogLog
296 /// counters, it represents the ratio of the size of the intersection of the sets represented by
297 /// the counters to the size of their union. This method estimates the Jaccard index by utilizing
298 /// the cardinality estimation values of the intersection, left set, and right set.
299 ///
300 /// # Arguments
301 /// * `other`: A reference to the other HyperLogLog counter.
302 ///
303 /// # Returns
304 /// An estimation of the Jaccard index between the two HyperLogLog counters.
305 ///
306 /// # Example
307 ///
308 /// ```
309 /// use hyperloglog_rs::prelude::*;
310 ///
311 /// let mut hll1 = HyperLogLog::<Precision12, 6>::default();
312 /// hll1.insert(&1);
313 /// hll1.insert(&2);
314 /// hll1.insert(&3);
315 /// hll1.insert(&4);
316 ///
317 /// let mut hll2 = HyperLogLog::<Precision12, 6>::default();
318 /// hll2.insert(&2);
319 /// hll2.insert(&3);
320 /// hll2.insert(&5);
321 /// hll2.insert(&6);
322 ///
323 /// let jaccard_index = hll1.estimate_jaccard_index(&hll2);
324 ///
325 /// let expected = 2.0 / 6.0;
326 ///
327 /// assert!(jaccard_index >= expected * 0.9 &&
328 /// jaccard_index <= expected * 1.1);
329 /// ```
330 fn estimate_jaccard_index(&self, other: &Self) -> f32 {
331 self.estimate_union_and_sets_cardinality(other)
332 .get_jaccard_index()
333 }
334
335 #[inline(always)]
336 /// Returns an estimate of the cardinality of the current HyperLogLog counter minus the provided one.
337 ///
338 /// # Arguments
339 /// * `other`: A reference to the other HyperLogLog counter.
340 ///
341 /// # Example
342 ///
343 /// ```
344 /// use hyperloglog_rs::prelude::*;
345 ///
346 /// let mut hll1 = HyperLogLog::<Precision12, 6>::default();
347 /// hll1.insert(&1);
348 /// hll1.insert(&2);
349 /// hll1.insert(&3);
350 /// hll1.insert(&4);
351 ///
352 /// let mut hll2 = HyperLogLog::<Precision12, 6>::default();
353 /// hll2.insert(&2);
354 /// hll2.insert(&3);
355 /// hll2.insert(&5);
356 /// hll2.insert(&6);
357 ///
358 /// let difference_cardinality: f32 = hll1.estimate_difference_cardinality(&hll2);
359 ///
360 /// assert!(difference_cardinality >= 2.0 * 0.9 &&
361 /// difference_cardinality <= 2.0 * 1.1);
362 /// ```
363 fn estimate_difference_cardinality<F: Primitive<f32> + One>(&self, other: &Self) -> F {
364 self.estimate_union_and_sets_cardinality(other)
365 .get_left_difference_cardinality()
366 }
367
368 #[inline(always)]
369 /// Returns the number of registers in the HLL counter.
370 ///
371 ///
372 /// # Example
373 ///
374 /// ```
375 /// # use hyperloglog_rs::prelude::*;
376 ///
377 /// // Create a new HLL counter with 128 registers
378 /// let mut hll = HyperLogLog::<Precision12, 4>::default();
379 /// assert_eq!(hll.len(), 4096);
380 ///
381 /// // Insert some elements into the HLL counter
382 /// hll.insert(&1);
383 /// hll.insert(&2);
384 /// hll.insert(&3);
385 /// assert_eq!(hll.len(), 1 << 12);
386 ///
387 /// // Merge another HLL counter with 128 registers
388 /// let mut hll2 = HyperLogLog::<Precision12, 4>::default();
389 /// hll2.insert(&4);
390 /// hll2.insert(&5);
391 /// hll |= hll2;
392 /// assert_eq!(hll.len(), 1 << 12);
393 /// ```
394 fn len(&self) -> usize {
395 P::NUMBER_OF_REGISTERS
396 }
397
398 #[inline(always)]
399 /// Returns whether no element was yet added to the HLL counter.
400 ///
401 ///
402 /// # Examples
403 ///
404 /// ```
405 /// use hyperloglog_rs::prelude::*;
406 ///
407 /// let mut hll: HyperLogLog<Precision8, 4> = HyperLogLog::default();
408 ///
409 /// assert!(hll.is_empty());
410 ///
411 /// hll.insert(&1);
412 ///
413 /// assert!(!hll.is_empty());
414 /// ```
415 fn is_empty(&self) -> bool {
416 self.len() == self.get_number_of_zero_registers()
417 }
418
419 #[inline(always)]
420 /// Returns the number of extra registers that are not actually used.
421 ///
422 /// # Examples
423 /// Since the number of registers is not a multiple of the number of registers in a word,
424 /// there are padding registers that are not actually used.
425 ///
426 /// ```
427 /// # use hyperloglog_rs::prelude::*;
428 ///
429 /// assert_eq!(HyperLogLog::<Precision10, 6>::get_number_of_padding_registers(), 1);
430 /// ```
431 ///
432 /// For instance, in the case using the bare minimum bits per registers (4)
433 /// and the minimal precision (4), for a total of 16 registers, we expect
434 /// to not have any padding registers.
435 ///
436 /// ```
437 /// # use hyperloglog_rs::prelude::*;
438 ///
439 /// assert_eq!(HyperLogLog::<Precision4, 1>::get_number_of_padding_registers(), 16, "Expected 16 padding registers, precision 4, bits 1, got {}.", HyperLogLog::<Precision4, 1>::get_number_of_padding_registers());
440 /// assert_eq!(HyperLogLog::<Precision4, 2>::get_number_of_padding_registers(), 0, "Expected 0 padding registers, precision 4, bits 2, got {}.", HyperLogLog::<Precision4, 2>::get_number_of_padding_registers());
441 /// assert_eq!(HyperLogLog::<Precision4, 3>::get_number_of_padding_registers(), 4, "Expected 4 padding registers, precision 4, bits 3, got {}.", HyperLogLog::<Precision4, 3>::get_number_of_padding_registers());
442 /// assert_eq!(HyperLogLog::<Precision4, 4>::get_number_of_padding_registers(), 0, "Expected 0 padding registers, precision 4, bits 4, got {}.", HyperLogLog::<Precision4, 4>::get_number_of_padding_registers());
443 /// assert_eq!(HyperLogLog::<Precision4, 5>::get_number_of_padding_registers(), 2, "Expected 2 padding registers, precision 4, bits 5, got {}.", HyperLogLog::<Precision4, 5>::get_number_of_padding_registers());
444 /// assert_eq!(HyperLogLog::<Precision4, 6>::get_number_of_padding_registers(), 4, "Expected 1 padding registers, precision 4, bits 6, got {}.", HyperLogLog::<Precision4, 6>::get_number_of_padding_registers());
445 ///
446 /// assert_eq!(HyperLogLog::<Precision5, 1>::get_number_of_padding_registers(), 0, "Expected 0 padding registers, precision 5, bits 1, got {}.", HyperLogLog::<Precision5, 1>::get_number_of_padding_registers());
447 /// assert_eq!(HyperLogLog::<Precision5, 2>::get_number_of_padding_registers(), 0, "Expected 0 padding registers, precision 5, bits 2, got {}.", HyperLogLog::<Precision5, 2>::get_number_of_padding_registers());
448 /// assert_eq!(HyperLogLog::<Precision5, 3>::get_number_of_padding_registers(), 8, "Expected 30 padding registers, precision 5, bits 3, got {}.", HyperLogLog::<Precision5, 3>::get_number_of_padding_registers());
449 /// assert_eq!(HyperLogLog::<Precision5, 4>::get_number_of_padding_registers(), 0, "Expected 0 padding registers, precision 5, bits 4, got {}.", HyperLogLog::<Precision5, 4>::get_number_of_padding_registers());
450 /// assert_eq!(HyperLogLog::<Precision5, 5>::get_number_of_padding_registers(), 4, "Expected 4 padding registers, precision 5, bits 5, got {}.", HyperLogLog::<Precision5, 5>::get_number_of_padding_registers());
451 /// assert_eq!(HyperLogLog::<Precision5, 6>::get_number_of_padding_registers(), 3, "Expected 3 padding registers, precision 5, bits 6, got {}.", HyperLogLog::<Precision5, 6>::get_number_of_padding_registers());
452 ///
453 /// assert_eq!(HyperLogLog::<Precision6, 1>::get_number_of_padding_registers(), 0, "Expected 0 padding registers, precision 6, bits 1, got {}.", HyperLogLog::<Precision6, 1>::get_number_of_padding_registers());
454 /// assert_eq!(HyperLogLog::<Precision6, 2>::get_number_of_padding_registers(), 0, "Expected 0 padding registers, precision 6, bits 2, got {}.", HyperLogLog::<Precision6, 2>::get_number_of_padding_registers());
455 /// assert_eq!(HyperLogLog::<Precision6, 3>::get_number_of_padding_registers(), 6, "Expected 6 padding registers, precision 6, bits 3, got {}.", HyperLogLog::<Precision6, 3>::get_number_of_padding_registers());
456 /// assert_eq!(HyperLogLog::<Precision6, 4>::get_number_of_padding_registers(), 0, "Expected 0 padding registers, precision 6, bits 4, got {}.", HyperLogLog::<Precision6, 4>::get_number_of_padding_registers());
457 /// assert_eq!(HyperLogLog::<Precision6, 5>::get_number_of_padding_registers(), 2, "Expected 2 padding registers, precision 6, bits 5, got {}.", HyperLogLog::<Precision6, 5>::get_number_of_padding_registers());
458 /// assert_eq!(HyperLogLog::<Precision6, 6>::get_number_of_padding_registers(), 1, "Expected 1 padding registers, precision 6, bits 6, got {}.", HyperLogLog::<Precision6, 6>::get_number_of_padding_registers());
459 ///
460 /// ```
461 ///
462 fn get_number_of_padding_registers() -> usize {
463 ceil(P::NUMBER_OF_REGISTERS, 32 / BITS) * Self::NUMBER_OF_REGISTERS_IN_WORD
464 - P::NUMBER_OF_REGISTERS
465 }
466
467 /// Returns the number of registers with zero values. This value is used for computing a small
468 /// correction when estimating the cardinality of a small set.
469 ///
470 /// # Examples
471 ///
472 /// ```
473 /// # use hyperloglog_rs::prelude::*;
474 ///
475 /// // Create a new HyperLogLog counter with precision 14 and 5 bits per register.
476 /// let mut hll = HyperLogLog::<Precision14, 5>::default();
477 ///
478 /// // Add some elements to the counter.
479 /// hll.insert(&1);
480 /// hll.insert(&2);
481 /// hll.insert(&3);
482 ///
483 /// // Get the number of zero registers.
484 /// let number_of_zero_registers = hll.get_number_of_zero_registers();
485 ///
486 /// assert_eq!(number_of_zero_registers, 16381);
487 /// ```
488 fn get_number_of_zero_registers(&self) -> usize;
489
490 #[inline(always)]
491 fn get_number_of_non_zero_registers(&self) -> usize {
492 // Calculates the number of registers that have a non-zero value by
493 // subtracting the number of registers with a zero value from the total number of registers
494 self.len() - self.get_number_of_zero_registers()
495 }
496
497 /// Returns an array of registers of the HyperLogLog counter.
498 ///
499 /// # Examples
500 ///
501 /// ```rust
502 /// # use hyperloglog_rs::prelude::*;
503 ///
504 /// let mut hll = HyperLogLog::<Precision10, 6>::default();
505 /// hll.insert(&4);
506 /// hll.insert(&5);
507 /// hll.insert(&6);
508 /// let registers = hll.get_registers();
509 ///
510 /// assert_eq!(registers.len(), 1024);
511 /// assert!(registers.iter().any(|&x| x > 0));
512 /// ```
513 ///
514 /// We can also create an HLL from registers, and then check
515 /// whether the registers are what we expect:
516 ///
517 /// ```rust
518 /// # use hyperloglog_rs::prelude::*;
519 ///
520 /// let expected = [3, 2, 1, 1, 7, 15, 39, 63, 28, 23, 0, 0, 11, 11, 11, 0];
521 /// let mut hll = HyperLogLog::<Precision4, 6>::from_registers(&expected);
522 /// assert_eq!(hll.get_registers(), expected, "Expected {:?}, got {:?}", expected, hll.get_registers());
523 /// ```
524 ///
525 ///
526 fn get_registers(&self) -> P::Registers {
527 let mut registers = P::Registers::default_array();
528 self.get_words()
529 .iter_elements()
530 .flat_map(|word| {
531 (0..Self::NUMBER_OF_REGISTERS_IN_WORD)
532 .map(move |i: usize| (word >> (i * BITS)) & Self::LOWER_REGISTER_MASK)
533 })
534 .zip(registers.iter_elements_mut())
535 .for_each(|(value, cell): (u32, &mut u32)| {
536 *cell = value;
537 });
538 registers
539 }
540
541 /// Returns the array of words of the HyperLogLog counter.
542 fn get_words(&self) -> &P::Words;
543
544 #[inline(always)]
545 /// Returns `true` if the HyperLogLog counter may contain the given element.
546 ///
547 /// # Arguments
548 /// * `rhs` - The element to check.
549 ///
550 /// # Examples
551 ///
552 /// ```rust
553 /// # use hyperloglog_rs::prelude::*;
554 ///
555 /// let mut hll: HyperLogLog<Precision8, 6> = HyperLogLog::default();
556 /// assert_eq!(hll.may_contain(&42), false);
557 ///
558 /// hll.insert(&42);
559 /// assert_eq!(hll.may_contain(&42), true);
560 /// ```
561 fn may_contain<T: Hash>(&self, rhs: &T) -> bool {
562 let (_hash, index) = self.get_hash_and_index::<T>(rhs);
563
564 // Calculate the position of the register in the internal buffer array.
565 let word_position = index / Self::NUMBER_OF_REGISTERS_IN_WORD;
566
567 // Calculate the position of the register within the 32-bit word containing it.
568 let register_position_in_u32 = index % Self::NUMBER_OF_REGISTERS_IN_WORD;
569
570 // Extract the current value of the register at `index`.
571 let register_value: u32 = (self.get_words()[word_position]
572 >> (register_position_in_u32 * BITS))
573 & Self::LOWER_REGISTER_MASK;
574
575 register_value > 0
576 }
577
578 #[inline(always)]
579 /// Returns whether the provided HyperLogLog counter may be fully contained in the current HyperLogLog counter.
580 ///
581 /// # Arguments
582 /// * `rhs` - The HyperLogLog counter to check.
583 ///
584 /// # Implementative details
585 /// We define a counter that fully contains another counter when all of the registers
586 /// of the first counter are greater than or equal to the corresponding registers of the second counter.
587 ///
588 /// # Examples
589 ///
590 /// ```rust
591 /// # use hyperloglog_rs::prelude::*;
592 ///
593 /// let mut hll1: HyperLogLog<Precision8, 6> = HyperLogLog::default();
594 /// let mut hll2: HyperLogLog<Precision8, 6> = HyperLogLog::default();
595 ///
596 /// hll1.insert(&42);
597 /// hll1.insert(&43);
598 /// hll1.insert(&44);
599 ///
600 /// hll2.insert(&42);
601 /// hll2.insert(&43);
602 ///
603 /// assert_eq!(hll1.may_contain_all(&hll2), true);
604 /// assert_eq!(hll2.may_contain_all(&hll1), false);
605 ///
606 /// hll2.insert(&44);
607 ///
608 /// assert_eq!(hll1.may_contain_all(&hll2), true);
609 /// assert_eq!(hll2.may_contain_all(&hll1), true);
610 /// ```
611 fn may_contain_all(&self, rhs: &Self) -> bool {
612 for (left_word, right_word) in self
613 .get_words()
614 .iter_elements()
615 .copied()
616 .zip(rhs.get_words().iter_elements().copied())
617 {
618 for i in 0..Self::NUMBER_OF_REGISTERS_IN_WORD {
619 let left_register = (left_word >> (i * BITS)) & Self::LOWER_REGISTER_MASK;
620 let right_register = (right_word >> (i * BITS)) & Self::LOWER_REGISTER_MASK;
621 if left_register < right_register {
622 return false;
623 }
624 }
625 }
626 true
627 }
628
629 #[inline(always)]
630 /// Returns the hash value and the corresponding register's index for a given value.
631 ///
632 /// # Arguments
633 /// * `value` - A reference to the value to be hashed.
634 ///
635 /// # Examples
636 ///
637 /// ```
638 /// use hyperloglog_rs::prelude::*;
639 ///
640 /// let mut hll: HyperLogLog<Precision8, 6> = HyperLogLog::default();
641 /// let value = 42;
642 /// let (hash, index) = hll.get_hash_and_index(&value);
643 ///
644 /// //assert_eq!(hash, 10123147082338939904, "Expected hash {}, got {}.", 10123147082338939904, hash);
645 /// ```
646 fn get_hash_and_index<T: Hash>(&self, value: &T) -> (u64, usize) {
647 let mut hasher = Sip64Scalar::<1, 3>::new();
648 value.hash(&mut hasher);
649 let hash = hasher.finish();
650
651 // Calculate the register's index using the highest bits of the hash.
652 // The index of the register has to vary from 0 to 2^p - 1, where p is the precision,
653 // so we use the highest p bits of the hash.
654 let index: usize = hash as usize >> (64 - P::EXPONENT);
655
656 // And we delete the used bits from the hash.
657 let hash: u64 = hash << P::EXPONENT;
658 debug_assert!(
659 index < P::NUMBER_OF_REGISTERS,
660 "The index {} must be less than the number of registers {}.",
661 index,
662 P::NUMBER_OF_REGISTERS
663 );
664
665 (hash, index)
666 }
667
668 /// Returns the number of registers in the counter.
669 ///
670 /// # Implementation details
671 /// This function is overriding the estimate_cardinality function of the HyperLogLogTrait trait
672 /// as we can compute the cardinality of the counter using the multiplicities instead of the
673 /// registers. This is much faster as we do not need to compute the harmonic mean of the registers.
674 fn estimate_cardinality_from_multiplicities(multiplicities: &P::RegisterMultiplicities) -> f32 {
675 if multiplicities[0] > P::NumberOfZeros::ZERO {
676 let number_of_zeros: usize = multiplicities[0].convert();
677 let low_range_correction = P::SMALL_CORRECTIONS[number_of_zeros - 1_usize];
678 if low_range_correction <= Self::LINEAR_COUNT_THRESHOLD {
679 return low_range_correction;
680 }
681 }
682
683 let mut raw_estimate: f32 = 0.0;
684
685 for (current_register, multeplicity) in multiplicities.iter_elements().enumerate() {
686 let two_to_minus_register: i32 = (127 - current_register as i32) << 23;
687 let register_count: f32 = multeplicity.convert();
688 raw_estimate +=
689 register_count * f32::from_le_bytes(two_to_minus_register.to_le_bytes());
690 }
691
692 Self::adjust_estimate(raw_estimate)
693 }
694}