Trait hyperloglog_rs::prelude::HyperLogLogTrait
source · pub trait HyperLogLogTrait<PRECISION: Precision + WordType<BITS>, const BITS: usize>: Sized {
const INTERMEDIATE_RANGE_CORRECTION_THRESHOLD: f32 = _;
const LINEAR_COUNT_THRESHOLD: f32 = _;
const LOWER_REGISTER_MASK: u32 = _;
const LOWER_PRECISION_MASK: usize = _;
const NOT_LOWER_PRECISION_MASK: u64 = _;
const PADDING_BITS_MASK: u32 = _;
const UPPER_PRECISION_MASK: usize = _;
const NUMBER_OF_REGISTERS_IN_WORD: usize = _;
Show 23 methods
// Required methods
fn new() -> Self;
fn from_registers(registers: &[u32]) -> Self;
fn from_words(words: &PRECISION::Words) -> Self;
fn get_number_of_zero_registers(&self) -> usize;
fn get_words(&self) -> &PRECISION::Words;
fn insert<T: Hash>(&mut self, rhs: T);
// Provided methods
fn adjust_estimate(raw_estimate: f32) -> f32 { ... }
fn adjust_estimate_with_zeros(
raw_estimate: f32,
number_of_zeros: usize
) -> f32 { ... }
fn use_small_range_correction(&self) -> bool { ... }
fn estimate_cardinality(&self) -> f32 { ... }
fn estimate_union_cardinality(&self, other: &Self) -> f32 { ... }
fn estimate_union_and_sets_cardinality<F: Primitive<f32> + MaxMin>(
&self,
other: &Self
) -> EstimatedUnionCardinalities<F> { ... }
fn estimate_intersection_cardinality<F: Primitive<f32>>(
&self,
other: &Self
) -> F { ... }
fn estimate_jaccard_index(&self, other: &Self) -> f32 { ... }
fn estimate_difference_cardinality<F: Primitive<f32>>(
&self,
other: &Self
) -> F { ... }
fn len(&self) -> usize { ... }
fn is_empty(&self) -> bool { ... }
fn get_number_of_padding_registers() -> usize { ... }
fn get_number_of_non_zero_registers(&self) -> usize { ... }
fn get_registers(&self) -> PRECISION::Registers { ... }
fn may_contain<T: Hash>(&self, rhs: &T) -> bool { ... }
fn may_contain_all(&self, rhs: &Self) -> bool { ... }
fn get_hash_and_index<T: Hash>(&self, value: &T) -> (u64, usize) { ... }
}Provided Associated Constants§
sourceconst INTERMEDIATE_RANGE_CORRECTION_THRESHOLD: f32 = _
const INTERMEDIATE_RANGE_CORRECTION_THRESHOLD: f32 = _
The threshold value used in the small range correction of the HyperLogLog algorithm.
const LINEAR_COUNT_THRESHOLD: f32 = _
sourceconst LOWER_REGISTER_MASK: u32 = _
const LOWER_REGISTER_MASK: u32 = _
The mask used to obtain the lower register bits in the HyperLogLog algorithm.
sourceconst LOWER_PRECISION_MASK: usize = _
const LOWER_PRECISION_MASK: usize = _
The mask used to obtain the lower precision bits in the HyperLogLog algorithm.
const NOT_LOWER_PRECISION_MASK: u64 = _
sourceconst PADDING_BITS_MASK: u32 = _
const PADDING_BITS_MASK: u32 = _
The mask representing the bits that are never used in the u32 word in the cases where the number of bits is not a divisor of 32, such as 5 or 6. We set the LEADING bits as the padding bits, the unused one, so the leftmost bits.
sourceconst UPPER_PRECISION_MASK: usize = _
const UPPER_PRECISION_MASK: usize = _
The mask used to obtain the upper precision bits in the HyperLogLog algorithm.
sourceconst NUMBER_OF_REGISTERS_IN_WORD: usize = _
const NUMBER_OF_REGISTERS_IN_WORD: usize = _
The number of registers that can fit in a single 32-bit word in the HyperLogLog algorithm.
Required Methods§
sourcefn from_registers(registers: &[u32]) -> Self
fn from_registers(registers: &[u32]) -> Self
Create a new HyperLogLog counter from an array of registers.
Arguments
registers- An array of u32 registers to use for the HyperLogLog counter.
Returns
A new HyperLogLog counter initialized with the given registers.
Examples
use hyperloglog_rs::prelude::*;
let registers = [0_u32; 1 << 4];
let hll = HyperLogLog::<Precision4, 6>::from_registers(®isters);
assert_eq!(hll.len(), 1 << 4);sourcefn from_words(words: &PRECISION::Words) -> Self
fn from_words(words: &PRECISION::Words) -> Self
Create a new HyperLogLog counter from an array of words.
Arguments
words- An array of u64 words to use for the HyperLogLog counter.
Returns
A new HyperLogLog counter initialized with the given words.
Examples
use hyperloglog_rs::prelude::*;
let words = [0_u32; 4];
let hll = HyperLogLog::<Precision4, 6>::from_words(&words);
assert_eq!(hll.len(), 16);sourcefn get_number_of_zero_registers(&self) -> usize
fn get_number_of_zero_registers(&self) -> usize
Returns the number of registers with zero values. This value is used for computing a small correction when estimating the cardinality of a small set.
Examples
// Create a new HyperLogLog counter with precision 14 and 5 bits per register.
let mut hll = HyperLogLog::<Precision14, 5>::new();
// Add some elements to the counter.
hll.insert(&1);
hll.insert(&2);
hll.insert(&3);
// Get the number of zero registers.
let number_of_zero_registers = hll.get_number_of_zero_registers();
assert_eq!(number_of_zero_registers, 16381);sourcefn get_words(&self) -> &PRECISION::Words
fn get_words(&self) -> &PRECISION::Words
Returns the array of words of the HyperLogLog counter.
sourcefn insert<T: Hash>(&mut self, rhs: T)
fn insert<T: Hash>(&mut self, rhs: T)
Adds an element to the HyperLogLog counter.
Arguments
rhs- The element to add.
Examples
use hyperloglog_rs::prelude::*;
let mut hll = HyperLogLog::<Precision10, 6>::new();
hll.insert("Hello");
hll.insert("World");
assert!(hll.estimate_cardinality() >= 2.0);Performance
The performance of this function depends on the size of the HyperLogLog counter (N), the number
of distinct elements in the input, and the hash function used to hash elements. For a given value of N,
the function has an average time complexity of O(1) and a worst-case time complexity of O(log N).
However, the actual time complexity may vary depending on the distribution of the hashed elements.
Errors
This function does not return any errors.
Provided Methods§
fn adjust_estimate(raw_estimate: f32) -> f32
fn adjust_estimate_with_zeros(raw_estimate: f32, number_of_zeros: usize) -> f32
sourcefn use_small_range_correction(&self) -> bool
fn use_small_range_correction(&self) -> bool
Returns whether the cardinality of this HLL will be computed using the small-range correction.
Implementation details
The small-range correction is used when the cardinality of the set is small enough that the linear counting algorithm can be used to estimate the cardinality. The threshold for using the linear counting algorithm is determined by the number of registers in the HLL counter.
sourcefn estimate_cardinality(&self) -> f32
fn estimate_cardinality(&self) -> f32
Estimates the cardinality of the set based on the HLL counter data.
Example
let mut hll = HyperLogLog::<Precision9, 5>::new();
let elements = vec![1, 2, 3, 4, 5];
for element in &elements {
hll.insert(element);
}
let estimated_cardinality = hll.estimate_cardinality();
assert!(estimated_cardinality >= elements.len() as f32 * 0.9 &&
estimated_cardinality <= elements.len() as f32 * 1.1);Returns
f32- The estimated cardinality of the set.
sourcefn estimate_union_cardinality(&self, other: &Self) -> f32
fn estimate_union_cardinality(&self, other: &Self) -> f32
Returns an estimate of the cardinality of the union of two HyperLogLog counters.
This method calculates an estimate of the cardinality of the union of two HyperLogLog counters using the raw estimation values of each counter. It combines the estimation values by iterating over the register words of both counters and performing necessary calculations.
Arguments
other: A reference to the other HyperLogLog counter.
Returns
An estimation of the cardinality of the union of the two HyperLogLog counters.
Example
use hyperloglog_rs::prelude::*;
let mut hll1 = HyperLogLog::<Precision12, 6>::new();
hll1.insert(&1);
hll1.insert(&2);
let mut hll2 = HyperLogLog::<Precision12, 6>::new();
hll2.insert(&2);
hll2.insert(&3);
let union_cardinality = hll1.estimate_union_cardinality(&hll2);
assert!(union_cardinality >= 3.0 * 0.9 &&
union_cardinality <= 3.0 * 1.1);sourcefn estimate_union_and_sets_cardinality<F: Primitive<f32> + MaxMin>(
&self,
other: &Self
) -> EstimatedUnionCardinalities<F>
fn estimate_union_and_sets_cardinality<F: Primitive<f32> + MaxMin>( &self, other: &Self ) -> EstimatedUnionCardinalities<F>
Returns an estimate of the cardinality of the two HLL counters union.
sourcefn estimate_intersection_cardinality<F: Primitive<f32>>(
&self,
other: &Self
) -> F
fn estimate_intersection_cardinality<F: Primitive<f32>>( &self, other: &Self ) -> F
Returns an estimate of the cardinality of the intersection of two HyperLogLog counters.
This method calculates an estimate of the cardinality of the intersection of two HyperLogLog counters using the raw estimation values of each counter. It combines the estimation values by iterating over the register words of both counters and performing necessary calculations.
Arguments
other: A reference to the other HyperLogLog counter.
Returns
An estimation of the cardinality of the intersection of the two HyperLogLog counters.
Example
use hyperloglog_rs::prelude::*;
let mut hll1 = HyperLogLog::<Precision12, 6>::new();
hll1.insert(&1);
hll1.insert(&2);
let mut hll2 = HyperLogLog::<Precision12, 6>::new();
hll2.insert(&2);
hll2.insert(&3);
let intersection_cardinality: f32 = hll1.estimate_intersection_cardinality(&hll2);
assert!(intersection_cardinality >= 1.0 * 0.9 &&
intersection_cardinality <= 1.0 * 1.1);sourcefn estimate_jaccard_index(&self, other: &Self) -> f32
fn estimate_jaccard_index(&self, other: &Self) -> f32
Returns an estimate of the Jaccard index between two HyperLogLog counters.
The Jaccard index is a measure of similarity between two sets. In the context of HyperLogLog counters, it represents the ratio of the size of the intersection of the sets represented by the counters to the size of their union. This method estimates the Jaccard index by utilizing the cardinality estimation values of the intersection, left set, and right set.
Arguments
other: A reference to the other HyperLogLog counter.
Returns
An estimation of the Jaccard index between the two HyperLogLog counters.
Example
use hyperloglog_rs::prelude::*;
let mut hll1 = HyperLogLog::<Precision12, 6>::new();
hll1.insert(&1);
hll1.insert(&2);
hll1.insert(&3);
hll1.insert(&4);
let mut hll2 = HyperLogLog::<Precision12, 6>::new();
hll2.insert(&2);
hll2.insert(&3);
hll2.insert(&5);
hll2.insert(&6);
let jaccard_index = hll1.estimate_jaccard_index(&hll2);
let expected = 2.0 / 6.0;
assert!(jaccard_index >= expected * 0.9 &&
jaccard_index <= expected * 1.1);sourcefn estimate_difference_cardinality<F: Primitive<f32>>(&self, other: &Self) -> F
fn estimate_difference_cardinality<F: Primitive<f32>>(&self, other: &Self) -> F
Returns an estimate of the cardinality of the current HyperLogLog counter minus the provided one.
Arguments
other: A reference to the other HyperLogLog counter.
Example
use hyperloglog_rs::prelude::*;
let mut hll1 = HyperLogLog::<Precision12, 6>::new();
hll1.insert(&1);
hll1.insert(&2);
hll1.insert(&3);
hll1.insert(&4);
let mut hll2 = HyperLogLog::<Precision12, 6>::new();
hll2.insert(&2);
hll2.insert(&3);
hll2.insert(&5);
hll2.insert(&6);
let difference_cardinality: f32 = hll1.estimate_difference_cardinality(&hll2);
assert!(difference_cardinality >= 2.0 * 0.9 &&
difference_cardinality <= 2.0 * 1.1);sourcefn len(&self) -> usize
fn len(&self) -> usize
Returns the number of registers in the HLL counter.
Example
// Create a new HLL counter with 128 registers
let mut hll = HyperLogLog::<Precision12, 4>::new();
assert_eq!(hll.len(), 4096);
// Insert some elements into the HLL counter
hll.insert(&1);
hll.insert(&2);
hll.insert(&3);
assert_eq!(hll.len(), 1 << 12);
// Merge another HLL counter with 128 registers
let mut hll2 = HyperLogLog::<Precision12, 4>::new();
hll2.insert(&4);
hll2.insert(&5);
hll |= hll2;
assert_eq!(hll.len(), 1 << 12);sourcefn is_empty(&self) -> bool
fn is_empty(&self) -> bool
Returns whether no element was yet added to the HLL counter.
Examples
use hyperloglog_rs::prelude::*;
let mut hll: HyperLogLog<Precision8, 4> = HyperLogLog::new();
assert!(hll.is_empty());
hll.insert(&1);
assert!(!hll.is_empty());sourcefn get_number_of_padding_registers() -> usize
fn get_number_of_padding_registers() -> usize
Returns the number of extra registers that are not actually used.
Examples
Since the number of registers is not a multiple of the number of registers in a word, there are padding registers that are not actually used.
assert_eq!(HyperLogLog::<Precision10, 6>::get_number_of_padding_registers(), 1);For instance, in the case using the bare minimum bits per registers (4) and the minimal precision (4), for a total of 16 registers, we expect to not have any padding registers.
assert_eq!(HyperLogLog::<Precision4, 4>::get_number_of_padding_registers(), 0);
fn get_number_of_non_zero_registers(&self) -> usize
sourcefn get_registers(&self) -> PRECISION::Registers
fn get_registers(&self) -> PRECISION::Registers
Returns an array of registers of the HyperLogLog counter.
Examples
let mut hll = HyperLogLog::<Precision10, 6>::new();
hll.insert(&4);
hll.insert(&5);
hll.insert(&6);
let registers = hll.get_registers();
assert_eq!(registers.len(), 1024);
assert!(registers.iter().any(|&x| x > 0));We can also create an HLL from registers, and then check whether the registers are what we expect:
let expected = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 11, 11, 0];
let mut hll = HyperLogLog::<Precision4, 6>::from_registers(&expected);
assert_eq!(hll.get_registers(), expected, "Expected {:?}, got {:?}", expected, hll.get_registers());sourcefn may_contain<T: Hash>(&self, rhs: &T) -> bool
fn may_contain<T: Hash>(&self, rhs: &T) -> bool
sourcefn may_contain_all(&self, rhs: &Self) -> bool
fn may_contain_all(&self, rhs: &Self) -> bool
Returns whether the provided HyperLogLog counter may be fully contained in the current HyperLogLog counter.
Arguments
rhs- The HyperLogLog counter to check.
Implementative details
We define a counter that fully contains another counter when all of the registers of the first counter are greater than or equal to the corresponding registers of the second counter.
Examples
let mut hll1: HyperLogLog<Precision8, 6> = HyperLogLog::new();
let mut hll2: HyperLogLog<Precision8, 6> = HyperLogLog::new();
hll1.insert(&42);
hll1.insert(&43);
hll1.insert(&44);
hll2.insert(&42);
hll2.insert(&43);
assert_eq!(hll1.may_contain_all(&hll2), true);
assert_eq!(hll2.may_contain_all(&hll1), false);
hll2.insert(&44);
assert_eq!(hll1.may_contain_all(&hll2), true);
assert_eq!(hll2.may_contain_all(&hll1), true);sourcefn get_hash_and_index<T: Hash>(&self, value: &T) -> (u64, usize)
fn get_hash_and_index<T: Hash>(&self, value: &T) -> (u64, usize)
Returns the hash value and the corresponding register’s index for a given value.
Arguments
value- A reference to the value to be hashed.
Examples
use hyperloglog_rs::prelude::*;
let mut hll: HyperLogLog<Precision8, 6> = HyperLogLog::new();
let value = 42;
let (hash, index) = hll.get_hash_and_index(&value);
//assert_eq!(hash, 10123147082338939904, "Expected hash {}, got {}.", 10123147082338939904, hash);