pub struct HyperLogLog<const PRECISION: usize, const BITS: usize> { /* private fields */ }
Expand description

A probabilistic algorithm for estimating the number of distinct elements in a set.

HyperLogLog is a probabilistic algorithm designed to estimate the number of distinct elements in a set. It does so by taking advantage of the fact that the representation of an element can be transformed into a uniform distribution in a space with a fixed range.

HyperLogLog works by maintaining a fixed-sized register array, where each register holds a counter. The algorithm splits the input set into subsets, applies a hash function to each element in the subset, and then updates the corresponding counter in the register array.

HyperLogLog uses a trick called “probabilistic counting” to estimate the number of distinct elements in the set. Each register counter is converted to a binary string, and the algorithm counts the number of leading zeros in each binary string. The maximum number of leading zeros over all counters is used to estimate the number of distinct elements in the set.

HyperLogLog has a tunable parameter called precision that determines the accuracy of the algorithm. Higher precision leads to better accuracy, but requires more memory. The error rate of the algorithm is guaranteed to be within a certain bound, depending on the chosen precision.

Examples

use hyperloglog_rs::HyperLogLog;

let mut hll = HyperLogLog::<10, 6>::new();
hll.insert(&"apple");
hll.insert(&"banana");
hll.insert(&"cherry");

let estimated_cardinality = hll.estimate_cardinality();
assert!(estimated_cardinality >= 3.0_f32 * 0.9 &&
        estimated_cardinality <= 3.0_f32 * 1.1);

Citations

This implementation is based on the following papers:

  • Flajolet, Philippe, et al. “HyperLogLog: the analysis of a near-optimal cardinality estimation algorithm.” DMTCS Proceedings 1 (2007): 127-146.
  • Heule, Stefan, Marc Nunkesser, and Alexander Hall. “HyperLogLog in practice: algorithmic engineering of a state of the art cardinality estimation algorithm.” Proceedings of the 16th International Conference on Extending Database Technology. 2013.

Implementations§

source§

impl<const PRECISION: usize, const BITS: usize> HyperLogLog<PRECISION, BITS>

source

pub const NUMBER_OF_REGISTERS: usize = _

The number of registers used by the HyperLogLog algorithm, which depends on its precision.

source

pub const SMALL_RANGE_CORRECTION_THRESHOLD: f32 = _

The threshold value used in the small range correction of the HyperLogLog algorithm.

source

pub const TWO_32: f32 = 4.2949673E+9f32

The float value of 2^32, used in the intermediate range correction of the HyperLogLog algorithm.

source

pub const INTERMEDIATE_RANGE_CORRECTION_THRESHOLD: f32 = 143165584f32

The threshold value used in the intermediate range correction of the HyperLogLog algorithm.

source

pub const LOWER_REGISTER_MASK: u32 = _

The mask used to obtain the lower register bits in the HyperLogLog algorithm.

source

pub const NUMBER_OF_REGISTERS_IN_WORD: usize = _

The number of registers that can fit in a single 32-bit word in the HyperLogLog algorithm.

source

pub const SMALL_CORRECTIONS: [f32; { _ }] = _

The precomputed small corrections used in the HyperLogLog algorithm for better performance.

source

pub fn new() -> Self

Create a new HyperLogLog counter.

source

pub fn from_registers(registers: &[u32]) -> Self

Create a new HyperLogLog counter from an array of registers.

Arguments
  • registers - An array of u32 registers to use for the HyperLogLog counter.
Returns

A new HyperLogLog counter initialized with the given registers.

Examples
use hyperloglog_rs::HyperLogLog;

let registers = [0_u32; 1 << 4];
let hll = HyperLogLog::<4, 6>::from_registers(&registers);
assert_eq!(hll.len(), 1 << 4);
source

pub fn estimate_cardinality(&self) -> f32

Estimates the cardinality of the set based on the HLL counter data.

Example
const PRECISION: usize = 8;
const BITS: usize = 5;
let mut hll = HyperLogLog::<PRECISION, BITS>::new();
let elements = vec![1, 2, 3, 4, 5];
for element in &elements {
    hll.insert(element);
}
let estimated_cardinality = hll.estimate_cardinality();
assert!(estimated_cardinality >= elements.len() as f32 * 0.9 &&
        estimated_cardinality <= elements.len() as f32 * 1.1);
Returns
  • f32 - The estimated cardinality of the set.
source

pub fn estimate_union_cardinality(&self, other: &Self) -> f32

Returns an estimate of the cardinality of the union of two HyperLogLog counters.

This method calculates an estimate of the cardinality of the union of two HyperLogLog counters using the raw estimation values of each counter. It combines the estimation values by iterating over the register words of both counters and performing necessary calculations.

Arguments
  • other: A reference to the other HyperLogLog counter.
Returns

An estimation of the cardinality of the union of the two HyperLogLog counters.

Example
use hyperloglog_rs::HyperLogLog;

let mut hll1 = HyperLogLog::<12, 6>::new();
hll1.insert(&1);
hll1.insert(&2);

let mut hll2 = HyperLogLog::<12, 6>::new();
hll2.insert(&2);
hll2.insert(&3);

let union_cardinality = hll1.estimate_union_cardinality(&hll2);

assert!(union_cardinality >= 3.0 * 0.9 &&
        union_cardinality <= 3.0 * 1.1);
source

pub fn estimate_union_and_sets_cardinality( &self, other: &Self ) -> EstimatedUnionCardinalities

Returns an estimate of the cardinality of the two HLL counters union.

source

pub fn estimate_intersection_cardinality(&self, other: &Self) -> f32

Returns an estimate of the cardinality of the intersection of two HyperLogLog counters.

This method calculates an estimate of the cardinality of the intersection of two HyperLogLog counters using the raw estimation values of each counter. It combines the estimation values by iterating over the register words of both counters and performing necessary calculations.

Arguments
  • other: A reference to the other HyperLogLog counter.
Returns

An estimation of the cardinality of the intersection of the two HyperLogLog counters.

Example
use hyperloglog_rs::HyperLogLog;

let mut hll1 = HyperLogLog::<12, 6>::new();
hll1.insert(&1);
hll1.insert(&2);

let mut hll2 = HyperLogLog::<12, 6>::new();
hll2.insert(&2);
hll2.insert(&3);

let intersection_cardinality = hll1.estimate_intersection_cardinality(&hll2);

assert!(intersection_cardinality >= 1.0 * 0.9 &&
        intersection_cardinality <= 1.0 * 1.1);
source

pub fn estimate_jaccard_cardinality(&self, other: &Self) -> f32

Returns an estimate of the Jaccard index between two HyperLogLog counters.

The Jaccard index is a measure of similarity between two sets. In the context of HyperLogLog counters, it represents the ratio of the size of the intersection of the sets represented by the counters to the size of their union. This method estimates the Jaccard index by utilizing the cardinality estimation values of the intersection, left set, and right set.

Arguments
  • other: A reference to the other HyperLogLog counter.
Returns

An estimation of the Jaccard index between the two HyperLogLog counters.

Example
use hyperloglog_rs::HyperLogLog;

let mut hll1 = HyperLogLog::<12, 6>::new();
hll1.insert(&1);
hll1.insert(&2);
hll1.insert(&3);
hll1.insert(&4);

let mut hll2 = HyperLogLog::<12, 6>::new();
hll2.insert(&2);
hll2.insert(&3);
hll2.insert(&5);
hll2.insert(&6);

let jaccard_index = hll1.estimate_jaccard_cardinality(&hll2);

let expected = 2.0 / 6.0;

assert!(jaccard_index >= expected * 0.9 &&
        jaccard_index <= expected * 1.1);
source

pub fn estimate_difference_cardinality(&self, other: &Self) -> f32

Returns an estimate of the cardinality of the current HyperLogLog counter minus the provided one.

Arguments
  • other: A reference to the other HyperLogLog counter.
Example
use hyperloglog_rs::HyperLogLog;

let mut hll1 = HyperLogLog::<12, 6>::new();
hll1.insert(&1);
hll1.insert(&2);
hll1.insert(&3);
hll1.insert(&4);
     
let mut hll2 = HyperLogLog::<12, 6>::new();
hll2.insert(&2);
hll2.insert(&3);
hll2.insert(&5);
hll2.insert(&6);

let difference_cardinality = hll1.estimate_difference_cardinality(&hll2);

assert!(difference_cardinality >= 2.0 * 0.9 &&
       difference_cardinality <= 2.0 * 1.1);
source

pub fn iter(&self) -> impl Iterator<Item = u32> + '_

Returns an iterator over the register values of the HyperLogLog instance.

The register values are extracted from the words array, where each word contains multiple register values. This method first checks that the size of the words array matches the expected number of registers per word, which is determined by the number of bits per register. It then iterates over each word in the array and extracts the register values using bit shifting and masking operations. Finally, it takes only the expected number of register values and returns an iterator over them.

Returns

An iterator over the register values of the HyperLogLog instance.

Examples
use hyperloglog_rs::HyperLogLog;
const PRECISION: usize = 8;
const BITS: usize = 5;
const HYPERLOGLOG_SIZE: usize = 1 << PRECISION;

let mut hll = HyperLogLog::<PRECISION, BITS>::new();
assert_eq!(hll.iter().count(), HYPERLOGLOG_SIZE);

hll.insert(&"foo");
hll.insert(&"bar");

let mut hll2 = HyperLogLog::<PRECISION, BITS>::new();
hll2|= hll;

assert_eq!(hll2.iter().count(), HYPERLOGLOG_SIZE);
source

pub fn len(&self) -> usize

Returns the number of registers in the HLL counter.

Example

// Create a new HLL counter with 128 registers
let mut hll = HyperLogLog::<12, 8>::new();
assert_eq!(hll.len(), 4096);

// Insert some elements into the HLL counter
hll.insert(&1);
hll.insert(&2);
hll.insert(&3);
assert_eq!(hll.len(), 1 << 12);

// Merge another HLL counter with 128 registers
let mut hll2 = HyperLogLog::<12, 8>::new();
hll2.insert(&4);
hll2.insert(&5);
hll |= hll2;
assert_eq!(hll.len(), 1 << 12);
source

pub fn is_empty(&self) -> bool

Returns whether no element was yet added to the HLL counter.

Examples
use hyperloglog_rs::HyperLogLog;

let mut hll: HyperLogLog<8, 8> = HyperLogLog::new();

assert!(hll.is_empty());

hll.insert(&1);

assert!(!hll.is_empty());
source

pub const fn get_number_of_bits(&self) -> usize

Returns the number of bits used to represent each register in the HyperLogLog counter.

Returns

An unsigned integer value representing the number of bits used to represent each register in the HyperLogLog counter.

Example
use hyperloglog_rs::HyperLogLog;

let hll = HyperLogLog::<13, 6>::new();
assert_eq!(hll.get_number_of_bits(), 6);
source

pub const fn get_number_of_padding_registers(&self) -> usize

Returns the number of extra registers that are not actually used.

Examples

// Create a HyperLogLog counter with precision 10 and 6-bit registers
let mut hll = HyperLogLog::<10, 6>::new();

// Since the number of registers is not a multiple of the number of registers in a word,
// there are padding registers that are not actually used.
assert_eq!(hll.get_number_of_padding_registers(), 1);

// Insert some elements into the counter
hll.insert(&1);
hll.insert(&2);

// The number of padding registers is still the same
assert_eq!(hll.get_number_of_padding_registers(), 1);
source

pub fn get_number_of_zero_registers(&self) -> usize

Returns the number of registers with zero values. This value is used for computing a small correction when estimating the cardinality of a small set.

Examples

// Create a new HyperLogLog counter with precision 14 and 5 bits per register.
let mut hll = HyperLogLog::<14, 5>::new();

// Add some elements to the counter.
hll.insert(&1);
hll.insert(&2);
hll.insert(&3);

// Get the number of zero registers.
let number_of_zero_registers = hll.get_number_of_zero_registers();

assert_eq!(number_of_zero_registers, 16381);
source

pub fn get_number_of_non_zero_registers(&self) -> usize

source

pub fn get_registers(&self) -> [u32; { _ }]

Returns an array of registers of the HyperLogLog counter.

Examples

let mut hll = HyperLogLog::<10, 6>::new();
hll.insert(&4);
hll.insert(&5);
hll.insert(&6);
let registers = hll.get_registers();

assert_eq!(registers.len(), 1024);
assert!(registers.iter().any(|&x| x > 0));

We can also create an HLL from registers, and then check whether the registers are what we expect:


let expected = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 11, 11, 0];
let mut hll = HyperLogLog::<4, 6>::from_registers(&expected);
assert_eq!(hll.get_registers(), expected, "Expected {:?}, got {:?}", expected, hll.get_registers());
source

pub fn get_words(&self) -> [u32; { _ }]

Returns the array of words of the HyperLogLog counter.

source

pub fn get_hash_and_index<T: Hash>(&self, value: &T) -> (u64, usize)

Returns the hash value and the corresponding register’s index for a given value.

Arguments
  • value - A reference to the value to be hashed.
Examples
use hyperloglog_rs::HyperLogLog;

let mut hll: HyperLogLog<8, 6> = HyperLogLog::new();
let value = 42;
let (hash, index) = hll.get_hash_and_index(&value);

assert_eq!(index, 213, "Expected index {}, got {}.", 213, index);
assert_eq!(hash, 15387811073369036852, "Expected hash {}, got {}.", 15387811073369036852, hash);
source

pub fn may_contain<T: Hash>(&self, rhs: &T) -> bool

Returns true if the HyperLogLog counter may contain the given element.

Arguments
  • rhs - The element to check.
Examples

let mut hll: HyperLogLog<8, 6> = HyperLogLog::new();
assert_eq!(hll.may_contain(&42), false);

hll.insert(&42);
assert_eq!(hll.may_contain(&42), true);
source

pub fn may_contain_all(&self, rhs: &Self) -> bool

Returns whether the provided HyperLogLog counter may be fully contained in the current HyperLogLog counter.

Arguments
  • rhs - The HyperLogLog counter to check.
Implementative details

We define a counter that fully contains another counter when all of the registers of the first counter are greater than or equal to the corresponding registers of the second counter.

Examples

let mut hll1: HyperLogLog<8, 6> = HyperLogLog::new();
let mut hll2: HyperLogLog<8, 6> = HyperLogLog::new();

hll1.insert(&42);
hll1.insert(&43);
hll1.insert(&44);

hll2.insert(&42);
hll2.insert(&43);

assert_eq!(hll1.may_contain_all(&hll2), true);
assert_eq!(hll2.may_contain_all(&hll1), false);

hll2.insert(&44);

assert_eq!(hll1.may_contain_all(&hll2), true);
assert_eq!(hll2.may_contain_all(&hll1), true);
source

pub fn estimated_overlap_and_differences_cardinality_matrices<const L: usize, const R: usize>( left: &[Self; L], right: &[Self; R] ) -> ([[f32; R]; L], [f32; L], [f32; R])

Returns estimated overlapping and differences cardinality matrices of the provided HyperLogLog counters.

Arguments
  • left - Array of L HyperLogLog counters describing increasingly large surroundings of a first element.
  • right - Array of R HyperLogLog counters describing increasingly large surroundings of a second element.
Returns
  • overlap_cardinality_matrix - Matrix of estimated overlapping cardinalities between the elements of the left and right arrays.
  • left_difference_cardinality_vector - Vector of estimated difference cardinalities between the elements of the left array and the last element of the right array.
  • right_difference_cardinality_vector - Vector of estimated difference cardinalities between the elements of the right array and the last element of the left array.
Implementative details

We expect the elements of the left and right arrays to be increasingly contained in the next one.

Examples

In the following illustration, we show that for two vectors left and right of three elements, we expect to compute the exclusively overlap matrix $A_{ij}$ and the exclusively differences vectors $B_i$.

Illustration of overlaps

Very similarly, for the case of vectors of two elements:

Illustration of overlaps

In this crate, we make available the singular method to compute the estimated overlapping and differences cardinality matrices and vectors. In some instances, it is necessary to compute both of these matrices and vectors at once, and as they have a lot of common computations, it is more efficient to compute them at once. We expect that the two methods provide the same results, and we test this in the following example.

In this example, we populate randomly two arrays of two HLL each, and we compute the estimated overlapping and differences cardinality matrices and vectors using the method computing the values at once, and we compare them to the values computed using the methods which compute the values separately.


for k in 0..1000 {
    let mut left = [HyperLogLog::<12, 6>::new(), HyperLogLog::<12, 6>::new()];
    let mut right = [HyperLogLog::<12, 6>::new(), HyperLogLog::<12, 6>::new()];

    for i in 0..2 {
         for j in 0..100 {
            left[i].insert((k + 1) * (i + 1) * (j + 1) % 50);
            right[i].insert((k + 1) * (i + 1) * (j + 1) % 30);
        }
    }

    let left_tmp = left[0] | left[1];
    left[1] = left_tmp;
    let right_tmp = right[0] | right[1];
    right[1] = right_tmp;

    let (overlap_cardinality_matrix, left_difference_cardinality_vector, right_difference_cardinality_vector) =
        HyperLogLog::<12, 6>::estimated_overlap_and_differences_cardinality_matrices(&left, &right);
    let overlap_cardinality_matrix_singular = HyperLogLog::estimated_overlap_cardinality_matrix(&left, &right);
    let left_difference_cardinality_vector_singular = HyperLogLog::estimated_difference_cardinality_vector(&left, &right[1]);
    let right_difference_cardinality_vector_singular = HyperLogLog::estimated_difference_cardinality_vector(&right, &left[1]);

   for i in 0..2 {
       for j in 0..2 {
            assert!(
                overlap_cardinality_matrix[i][j] >= overlap_cardinality_matrix_singular[i][j] * 0.9 &&
                overlap_cardinality_matrix[i][j] <= overlap_cardinality_matrix_singular[i][j] * 1.1,
                "The value of the overlap cardinality matrix at position ({}, {}) is not the same when computed at once and separately.",
                i,
                j
            );
        }
    }

for i in 0..2 {
    assert!(
        left_difference_cardinality_vector[i] >= left_difference_cardinality_vector_singular[i] * 0.9 - 0.1 &&
        left_difference_cardinality_vector[i] <= left_difference_cardinality_vector_singular[i] * 1.1 + 0.1,
        "The value of the left difference cardinality vector at position {} is not the same when computed at once and separately.",
        i
    );

    assert!(
        right_difference_cardinality_vector[i] >= right_difference_cardinality_vector_singular[i] * 0.9 - 0.1 &&
        right_difference_cardinality_vector[i] <= right_difference_cardinality_vector_singular[i] * 1.1 + 0.1,
        concat!(
            "The value of the right difference cardinality vector at position {} ",
            "is not the same when computed at once and separately.",
            "We expected {} but got {}."
        ),
        i,
        right_difference_cardinality_vector_singular[i],
        right_difference_cardinality_vector[i]
    );
}

}
source

pub fn estimated_overlap_cardinality_matrix<const L: usize, const R: usize>( left: &[Self; L], right: &[Self; R] ) -> [[f32; R]; L]

Returns estimated overlapping cardinality matrices of the provided HyperLogLog counters.

Arguments
  • left - Array of L HyperLogLog counters describing increasingly large surroundings of a first element.
  • right - Array of R HyperLogLog counters describing increasingly large surroundings of a second element.
Implementation details

Both arrays are expected to contain HyperLogLog counters increasing in size, i.e. the first element of left should be contained in the second element of left, which should be contained in the third element of left, and so on. The same applies to right.

Examples

We start with a trivial example with solely two counters. In this case, the result will have to contain a single element which is the estimated intersection cardinality of the two counters.


let mut hll1: HyperLogLog<8, 6> = HyperLogLog::new();
let mut hll2: HyperLogLog<8, 6> = HyperLogLog::new();

hll1.insert(&42);
hll1.insert(&43);
hll1.insert(&44);

hll2.insert(&42);
hll2.insert(&43);

let result = HyperLogLog::estimated_overlap_cardinality_matrix(&[hll1,], &[hll2,]);

assert!(
    result[0][0] < 2.0 * 1.1 &&
    result[0][0] > 2.0 * 0.9,
    "The estimated intersection cardinality should be around 2, but it is {}.",
    result[0][0]
);

Now we consider a more complex example with two arrays of counters. We start with two arrays of two elements each. This means that in the end we will have a 2x2 matrix. The value in position (0,0) of the matrix will be the estimated intersection cardinality of the first element of the first array and the first element of the second array. The value of the subsequent positions are less trivial, as we will have to take into account the difference of the elements present in the smaller sets which we do not want to count multiple times.

It follows that, for the value in position (0, 1), we will need to subtract the value in position (0,0). For the value in position (1, 0), we will need to subtract the value in position (0,0). And finally, for the value in position (1, 1), we will need to subtract the values in positions (0,0), (0,1) and (1,0).


let mut hll1: HyperLogLog<8, 6> = HyperLogLog::new();
let mut hll2: HyperLogLog<8, 6> = HyperLogLog::new();

hll1.insert(&42);
hll1.insert(&43);
hll1.insert(&44);
    
hll2.insert(&42);
hll2.insert(&43);

let mut hll3: HyperLogLog<8, 6> = HyperLogLog::new();
let mut hll4: HyperLogLog<8, 6> = HyperLogLog::new();

hll3.insert(&42);
hll3.insert(&43);
hll3.insert(&44);
hll3.insert(&45);

hll4.insert(&42);
hll4.insert(&43);
hll4.insert(&44);

let result = HyperLogLog::estimated_overlap_cardinality_matrix(&[hll1, hll3], &[hll2, hll4]);

assert!(
    result[0][0] < 2.0 * 1.1 &&
    result[0][0] > 2.0 * 0.9,
    "Test 1a: The estimated intersection cardinality should be around 2, but it is {}.",
    result[0][0]
);

assert!(
    result[0][1] < 1.0 * 1.1 &&
    result[0][1] > 1.0 * 0.9,
    "Test 2a: The estimated intersection cardinality should be around 1, but it is {}.",
    result[0][1]
);

assert!(
    result[1][0] < 0.1 &&
    result[1][0] > -0.1,
    "Test 3a: The estimated intersection cardinality should be around 0, but it is {}.",
    result[1][0]
);

assert!(
    result[1][1] < 0.1 &&
    result[1][1] > -0.1,
    "Test 4a: The estimated intersection cardinality should be around 0, but it is {}.",
    result[1][1]
);

We now consider a more complex example, with two arrays of three elements each.


let mut hll1: HyperLogLog<8, 6> = HyperLogLog::new();
let mut hll2: HyperLogLog<8, 6> = HyperLogLog::new();
let mut hll3: HyperLogLog<8, 6> = HyperLogLog::new();

let mut hll4: HyperLogLog<8, 6> = HyperLogLog::new();
let mut hll5: HyperLogLog<8, 6> = HyperLogLog::new();
let mut hll6: HyperLogLog<8, 6> = HyperLogLog::new();

hll1.insert(&42);
hll1.insert(&43);
hll1.insert(&44);

hll2.insert(&42);
hll2.insert(&43);
hll2.insert(&44);

hll3.insert(&42);
hll3.insert(&43);
hll3.insert(&44);
hll3.insert(&45);

hll4.insert(&42);
hll4.insert(&43);
hll4.insert(&44);

hll5.insert(&42);
hll5.insert(&43);
hll5.insert(&44);
hll5.insert(&45);

hll6.insert(&42);
hll6.insert(&43);
hll6.insert(&44);
hll6.insert(&45);
hll6.insert(&46);

let result = HyperLogLog::estimated_overlap_cardinality_matrix(&[hll1, hll3, hll6], &[hll2, hll4, hll5]);

assert!(
    result[0][0] < 3.0 * 1.1 &&
    result[0][0] > 3.0 * 0.9,
    concat!(
        "Test 1b: The estimated intersection cardinality should be around 3, but it is {}. ",
        "This is because the value in cell {:?} is dependent on no previous intersection.",
    ),
    result[0][0],
    (0, 0),
);

assert!(
    result[0][1] < 0.1 &&
    result[0][1] > -0.1,
    concat!(
        "Test 2b: The estimated intersection cardinality should be around 0, but it is {}. ",
        "This is because the value in cell {:?} is dependent on the previous intersection ",
        "values {:?}, and is not equal to the simple intersection of the HLL counters in ",
        "positions {} and {}, which would have been an estimated cardinality of {}."
    ),
    result[0][1],
    (0, 1),
    vec![result[0][0]],
    0,
    1,
    hll1.estimate_intersection_cardinality(&hll4),
);

assert!(
    result[1][0] < 0.1 &&
    result[1][0] > -0.1,
    concat!(
        "Test 3b: The estimated intersection cardinality should be around 1, but it is {}. ",
        "This is because the value in cell {:?} is dependent on the previous intersection ",
        "values {:?}, and is not equal to the simple intersection of the HLL counters in ",
        "positions {} and {}, which would have been an estimated cardinality of {}."
    ),
    result[1][0],
    (1, 0),
    vec![result[0][0]],
    1,
    0,
    hll3.estimate_intersection_cardinality(&hll2),
);

assert!(
    result[1][1] < 0.1 &&
    result[1][1] > -0.1,
    concat!(
        "Test 4b: The estimated intersection cardinality should be around 2, but it is {}. ",
        "This is because the value in cell {:?} is dependent on the previous intersection ",
        "values {:?}, and is not equal to the simple intersection of the HLL counters in ",
        "positions {} and {}, which would have been an estimated cardinality of {}."
    ),
    result[1][1],
    (1, 1),
    vec![result[0][0], result[1][0], result[0][1]],
    1,
    1,
    hll3.estimate_intersection_cardinality(&hll4),
);

assert!(
    result[1][2] < 1.0 * 1.1 &&
    result[1][2] > 1.0 * 0.9,
    concat!(
        "Test 5b: The estimated intersection cardinality should be around 1, but it is {}. ",
        "This is because the value in cell {:?} is dependent on the previous intersection ",
        "values {:?}, and is not equal to the simple intersection of the HLL counters in ",
        "positions {} and {}, which would have been an estimated cardinality of {}."
    ),
    result[1][2],
    (1, 2),
    vec![result[0][0], result[1][0], result[0][1], result[1][1]],
    1,
    2,
    hll3.estimate_intersection_cardinality(&hll5),
);

assert!(
    result[2][0] < 0.1 &&
    result[2][0] > -0.1,
    concat!(
        "Test 6b: The estimated intersection cardinality should be around 0, but it is {}. ",
        "This is because the value in cell {:?} is dependent on the previous intersection ",
        "values {:?}, and is not equal to the simple intersection of the HLL counters in ",
        "positions {} and {}, which would have been an estimated cardinality of {}."
    ),
    result[2][0],
    (2, 0),
    vec![result[0][0], result[1][0],],
    2,
    0,
    hll6.estimate_intersection_cardinality(&hll2),
);

assert!(
    result[2][1] < 0.1 &&
    result[2][1] > -0.1,
    concat!(
        "Test 7b: The estimated intersection cardinality should be around 0, but it is {}." ,
        "This is because the value in cell {:?} is dependent on the previous intersection ",
        "values {:?}, and is not equal to the simple intersection of the HLL counters in ",
        "positions {} and {}, which would have been an estimated cardinality of {}."
    ),
    result[2][1],
    (2, 1),
    vec![result[0][0], result[1][0], result[2][0]],
    2,
    1,
    hll6.estimate_intersection_cardinality(&hll4),
);

assert!(
    result[2][2] < 0.1 &&
    result[2][2] > -0.1,
    concat!(
        "Test 8b: The estimated intersection cardinality should be around 0, but it is {}. ",
        "This is because the value in cell {:?} is dependent on the previous intersection ",
        "values {:?}, and is not equal to the simple intersection of the HLL counters in ",
        "positions {} and {}, which would have been an estimated cardinality of {}."
    ),
    result[2][2],
    (2, 2),
    vec![result[0][0], result[1][0], result[0][1], result[2][0], result[0][2], result[2][1], result[1][2]],
    2,
    2,
    hll6.estimate_intersection_cardinality(&hll5),
);
source

pub fn estimated_difference_cardinality_vector<const N: usize>( array: &[Self; N], other: &Self ) -> [f32; N]

Returns estimated overlapping cardinality vectors of the provided HyperLogLog counters.

Arguments
  • left - Array of N HyperLogLog counters describing increasingly large surroundings of a first element.
  • right - A single HyperLogLog counter describing, usually, the largest surroundings of a second element.
Implementation details

The array of HyperLogLog counters is expected to contain HyperLogLog counters increasing in size, i.e. the first element of left should be contained in the second element of left, which should be contained in the third element of left, and so on.

Examples

We start with a trivial example with solely two counters. In this case, the result will have to contain a single element which is the estimated left-difference cardinality of the two counters.


let mut hll1: HyperLogLog<8, 6> = HyperLogLog::new();
let mut hll2: HyperLogLog<8, 6> = HyperLogLog::new();

hll1.insert(&42);
hll1.insert(&43);
hll1.insert(&44);

hll2.insert(&42);
hll2.insert(&43);

let result = HyperLogLog::estimated_difference_cardinality_vector(&[hll1,], &hll2);

assert!(
   result[0] < 1.0 * 1.1 &&
  result[0] > 1.0 * 0.9,
 "The estimated left-difference cardinality should be around 1, but it is {}.",
result[0]
);

Now we consider a more complex example with two arrays of counters. We start with two arrays of two elements each. This means that in the end we will have a 2x1 vector. The value in position (0) of the vector will be the estimated left-difference cardinality of the first element of the first array and the first element of the second array. The value of the subsequent positions are less trivial, as we will have to take into account the difference of the elements present in the smaller sets which we do not want to count multiple times.

It follows that, for the value in position (1), we will need to subtract the value in position (0).


let mut hll1: HyperLogLog<8, 6> = HyperLogLog::new();
let mut hll2: HyperLogLog<8, 6> = HyperLogLog::new();

hll1.insert(&42);
hll1.insert(&43);
hll1.insert(&44);

hll2.insert(&42);
hll2.insert(&43);

let mut hll3: HyperLogLog<8, 6> = HyperLogLog::new();

hll3.insert(&42);
hll3.insert(&43);
hll3.insert(&44);
hll3.insert(&45);

let result = HyperLogLog::estimated_difference_cardinality_vector(&[hll1, hll3], &hll2);

assert!(
    result[0] < 1.0 * 1.1 &&
    result[0] > 1.0 * 0.9,
    "Test 1a: The estimated left-difference cardinality should be around 1, but it is {}.",
    result[0]
);

assert!(
    result[1] < 1.1 &&
    result[1] > 0.9,
    "Test 2a: The estimated left-difference cardinality should be around 1, but it is {}.",
    result[1]
);
source

pub fn insert<T: Hash>(&mut self, rhs: T)

Adds an element to the HyperLogLog counter.

Arguments
  • rhs - The element to add.
Examples
use hyperloglog_rs::HyperLogLog;

const PRECISION: usize = 10;

let mut hll = HyperLogLog::<PRECISION, 6>::new();

hll.insert("Hello");
hll.insert("World");

assert!(hll.estimate_cardinality() >= 2.0);
Performance

The performance of this function depends on the size of the HyperLogLog counter (N), the number of distinct elements in the input, and the hash function used to hash elements. For a given value of N, the function has an average time complexity of O(1) and a worst-case time complexity of O(log N). However, the actual time complexity may vary depending on the distribution of the hashed elements.

Errors

This function does not return any errors.

Trait Implementations§

source§

impl<const PRECISION: usize, const BITS: usize> BitOr<&HyperLogLog<PRECISION, BITS>> for HyperLogLog<PRECISION, BITS>

source§

fn bitor(self, rhs: &Self) -> Self

Computes the union between two HyperLogLog counters of the same precision and number of bits per register.

Example
let mut hll1 = HyperLogLog::<14, 5>::new();
hll1.insert(&1);
hll1.insert(&2);

let mut hll2 = HyperLogLog::<14, 5>::new();
hll2.insert(&2);
hll2.insert(&3);

let hll_union = hll1 | hll2;

assert!(hll_union.estimate_cardinality() >= 3.0_f32 * 0.9 &&
        hll_union.estimate_cardinality() <= 3.0_f32 * 1.1);

Merging a set with an empty set should not change the cardinality.

let mut hll1 = HyperLogLog::<14, 5>::new();
hll1.insert(&1);
hll1.insert(&2);

let hll_union = hll1.clone() | HyperLogLog::<14, 5>::new();
assert_eq!(
    hll_union,
    hll1,
    concat!(
        "The cardinality of the union should ",
        "be the same as the cardinality of the first set."
   )
);

We can create the HLL counters from array from registers, so to be able to check that everything works as expected.


let first_registers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let second_registers = [9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 19];
let expected = [9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 11, 12, 13, 14, 15, 19];

let mut hll1 = HyperLogLog::<4, 5>::from_registers(&first_registers);
let mut hll2 = HyperLogLog::<4, 5>::from_registers(&second_registers);
let union = hll1 | &hll2;

assert_eq!(union.get_registers(), expected, "The registers are not the expected ones, got {:?} instead of {:?}.", union.get_registers(), expected);
§

type Output = HyperLogLog<PRECISION, BITS>

The resulting type after applying the | operator.
source§

impl<const PRECISION: usize, const BITS: usize> BitOr<HyperLogLog<PRECISION, BITS>> for HyperLogLog<PRECISION, BITS>

source§

fn bitor(self, rhs: Self) -> Self

Computes the union between two HyperLogLog counters of the same precision and number of bits per register.

Example
let mut hll1 = HyperLogLog::<14, 5>::new();
hll1.insert(&1);
hll1.insert(&2);

let mut hll2 = HyperLogLog::<14, 5>::new();
hll2.insert(&2);
hll2.insert(&3);

let hll_union = hll1 | hll2;

assert!(hll_union.estimate_cardinality() >= 3.0_f32 * 0.9 &&
        hll_union.estimate_cardinality() <= 3.0_f32 * 1.1);

Merging a set with an empty set should not change the cardinality.

let mut hll1 = HyperLogLog::<14, 5>::new();
hll1.insert(&1);
hll1.insert(&2);

let hll_union = hll1.clone() | HyperLogLog::<14, 5>::new();
assert_eq!(
    hll_union,
    hll1,
    concat!(
        "The cardinality of the union should ",
        "be the same as the cardinality of the first set."
   )
);

We can create the HLL counters from array from registers, so to be able to check that everything works as expected.


let first_registers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
let second_registers = [9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 19];
let expected = [9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 11, 12, 13, 14, 15, 19];

let mut hll1 = HyperLogLog::<4, 5>::from_registers(&first_registers);
let mut hll2 = HyperLogLog::<4, 5>::from_registers(&second_registers);
let union = hll1 | hll2;

assert_eq!(union.get_registers(), expected, "The registers are not the expected ones, got {:?} instead of {:?}.", union.get_registers(), expected);
§

type Output = HyperLogLog<PRECISION, BITS>

The resulting type after applying the | operator.
source§

impl<const PRECISION: usize, const BITS: usize> BitOrAssign<&HyperLogLog<PRECISION, BITS>> for HyperLogLog<PRECISION, BITS>

source§

fn bitor_assign(&mut self, rhs: &Self)

Computes union between HLL counters.


let mut hll = HyperLogLog::<8, 6>::new();
hll.insert(1u8);

let mut hll2 = HyperLogLog::<8, 6>::new();
hll2.insert(2u8);

hll.bitor_assign(&hll2);

assert!(hll.estimate_cardinality() > 2.0 - 0.1, "The cardinality is {}, we were expecting 2.", hll.estimate_cardinality());
assert!(hll.estimate_cardinality() < 2.0 + 0.1, "The cardinality is {}, we were expecting 2.", hll.estimate_cardinality());

let mut hll = HyperLogLog::<8, 6>::new();
hll.insert(1u8);

let mut hll2 = HyperLogLog::<8, 6>::new();
hll2.insert(1u8);

hll.bitor_assign(&hll2);

assert!(hll.estimate_cardinality() > 1.0 - 0.1, "The cardinality is {}, we were expecting 1.", hll.estimate_cardinality());
assert!(hll.estimate_cardinality() < 1.0 + 0.1, "The cardinality is {}, we were expecting 1.", hll.estimate_cardinality());

let mut hll3 = HyperLogLog::<16, 6>::new();
hll3.insert(3u8);
hll3.insert(4u8);

let mut hll4 = HyperLogLog::<16, 6>::new();
hll4.insert(5u8);
hll4.insert(6u8);

hll3.bitor_assign(&hll4);

assert!(hll3.estimate_cardinality() > 4.0 - 0.1, "Expected a value equal to around 4, got {}", hll3.estimate_cardinality());
assert!(hll3.estimate_cardinality() < 4.0 + 0.1, "Expected a value equal to around 4, got {}", hll3.estimate_cardinality());
source§

impl<const PRECISION: usize, const BITS: usize> BitOrAssign<HyperLogLog<PRECISION, BITS>> for HyperLogLog<PRECISION, BITS>

source§

fn bitor_assign(&mut self, rhs: Self)

Computes union between HLL counters.


let mut hll = HyperLogLog::<8, 6>::new();
hll.insert(1u8);

let mut hll2 = HyperLogLog::<8, 6>::new();
hll2.insert(2u8);

hll.bitor_assign(hll2);

assert!(hll.estimate_cardinality() > 2.0 - 0.1, "The cardinality is {}, we were expecting 2.", hll.estimate_cardinality());
assert!(hll.estimate_cardinality() < 2.0 + 0.1, "The cardinality is {}, we were expecting 2.", hll.estimate_cardinality());

let mut hll = HyperLogLog::<8, 6>::new();
hll.insert(1u8);

let mut hll2 = HyperLogLog::<8, 6>::new();
hll2.insert(1u8);

hll.bitor_assign(hll2);

assert!(hll.estimate_cardinality() > 1.0 - 0.1, "The cardinality is {}, we were expecting 1.", hll.estimate_cardinality());
assert!(hll.estimate_cardinality() < 1.0 + 0.1, "The cardinality is {}, we were expecting 1.", hll.estimate_cardinality());

let mut hll3 = HyperLogLog::<16, 6>::new();
hll3.insert(3u8);
hll3.insert(4u8);

let mut hll4 = HyperLogLog::<16, 6>::new();
hll4.insert(5u8);
hll4.insert(6u8);

hll3.bitor_assign(hll4);

assert!(hll3.estimate_cardinality() > 4.0 - 0.1, "Expected a value equal to around 4, got {}", hll3.estimate_cardinality());
assert!(hll3.estimate_cardinality() < 4.0 + 0.1, "Expected a value equal to around 4, got {}", hll3.estimate_cardinality());
source§

impl<const PRECISION: usize, const BITS: usize> Clone for HyperLogLog<PRECISION, BITS>

source§

fn clone(&self) -> HyperLogLog<PRECISION, BITS>

Returns a copy of the value. Read more
1.0.0 · source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
source§

impl<const PRECISION: usize, const BITS: usize> Debug for HyperLogLog<PRECISION, BITS>

source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more
source§

impl<const PRECISION: usize, const BITS: usize> Default for HyperLogLog<PRECISION, BITS>

Implements the Default trait for HyperLogLog.

HyperLogLog is a probabilistic cardinality estimator that uses a fixed amount of memory to estimate the number of distinct elements in a set.

Examples


let hll: HyperLogLog<10, 6> = Default::default();
assert_eq!(hll.len(), 1024);
assert_eq!(hll.get_number_of_bits(), 6);
source§

fn default() -> Self

Returns a new HyperLogLog instance with default configuration settings.

source§

impl<'de, const PRECISION: usize, const BITS: usize> Deserialize<'de> for HyperLogLog<PRECISION, BITS>

source§

fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error>

Deserializes the HyperLogLog counter using the given deserializer.

This method is part of the Deserialize trait implementation for the HyperLogLog struct, allowing the counter to be deserialized from a format supported by the deserializer.

Arguments
  • deserializer: The deserializer used to deserialize the HyperLogLog counter.
Returns

The deserialization result, indicating success or failure.

Example
#![feature(generic_const_exprs)]
use serde::de::Deserialize;
use serde_json::Deserializer;
use hyperloglog_rs::HyperLogLog;

let words = [0, 0, 0, 0, 5, 0, 4, 0, 0, 3, 0, 0, 0];
let words_str = "[0, 0, 0, 0, 5, 0, 4, 0, 0, 3, 0, 0, 0]";
let mut deserializer = Deserializer::from_str(words_str);
let result = HyperLogLog::<6, 6>::deserialize(&mut deserializer);
assert!(result.is_ok(), "Deserialization failed, error: {:?}", result.err());
let hll = result.unwrap();
hll.get_words().iter().zip(words.iter()).for_each(|(a, b)| assert_eq!(a, b, "Deserialized words do not match, expected: {}, actual: {}", b, a));
source§

impl<const PRECISION: usize, const BITS: usize, T: Hash> From<T> for HyperLogLog<PRECISION, BITS>

source§

fn from(value: T) -> Self

Create a new HyperLogLog counter from a value.

This method creates a new empty HyperLogLog counter and inserts the hash of the given value into it. The value can be any type that implements the Hash trait.

Examples

let hll = HyperLogLog::<14, 5>::from("test");

assert!(hll.estimate_cardinality() >=  1.0_f32);
assert!(!hll.is_empty());
assert!(hll.may_contain(&"test"));
source§

impl<const PRECISION: usize, const BITS: usize, A: Hash> FromIterator<A> for HyperLogLog<PRECISION, BITS>

source§

fn from_iter<T: IntoIterator<Item = A>>(iter: T) -> Self

Creates a new HyperLogLog counter and adds all elements from an iterator to it.

Examples
use hyperloglog_rs::HyperLogLog;

let data = vec![1, 2, 3, 4, 5, 6, 7, 8, 9];
let hll: HyperLogLog<12, 5> = data.iter().collect();
assert!(
    hll.estimate_cardinality() > 0.9 * data.len() as f32,
    concat!(
        "The estimate is too low, expected ",
        "at least {}, got {}",
    ),
    0.9 * data.len() as f32,
    hll.estimate_cardinality()
);
assert!(
    hll.estimate_cardinality() < 1.1 * data.len() as f32,
    concat!(
    "The estimate is too high, expected ",
    "at most {}, got {}",
   ),
    1.1 * data.len() as f32,
    hll.estimate_cardinality()
);
source§

impl<const PRECISION: usize, const BITS: usize> PartialEq<HyperLogLog<PRECISION, BITS>> for HyperLogLog<PRECISION, BITS>

source§

fn eq(&self, other: &HyperLogLog<PRECISION, BITS>) -> bool

This method tests for self and other values to be equal, and is used by ==.
1.0.0 · source§

fn ne(&self, other: &Rhs) -> bool

This method tests for !=. The default implementation is almost always sufficient, and should not be overridden without very good reason.
source§

impl<const PRECISION: usize, const BITS: usize> Serialize for HyperLogLog<PRECISION, BITS>

source§

fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error>

Serializes the HyperLogLog counter using the given serializer.

This method is part of the Serialize trait implementation for the HyperLogLog struct, allowing the counter to be serialized into a format supported by the serializer.

Arguments
  • serializer: The serializer used to serialize the HyperLogLog counter.
Returns

The serialization result, indicating success or failure.

Example
use serde::Serialize;
use serde_json::Serializer;
use hyperloglog_rs::HyperLogLog;

let hll = HyperLogLog::<12, 6>::new();
let mut serializer = Serializer::new(Vec::new());
let result = hll.serialize(&mut serializer);
assert!(result.is_ok(), "Serialization failed, error: {:?}", result.err());
source§

impl<const PRECISION: usize, const BITS: usize> Copy for HyperLogLog<PRECISION, BITS>

source§

impl<const PRECISION: usize, const BITS: usize> Eq for HyperLogLog<PRECISION, BITS>

source§

impl<const PRECISION: usize, const BITS: usize> StructuralEq for HyperLogLog<PRECISION, BITS>

source§

impl<const PRECISION: usize, const BITS: usize> StructuralPartialEq for HyperLogLog<PRECISION, BITS>

Auto Trait Implementations§

§

impl<const PRECISION: usize, const BITS: usize> RefUnwindSafe for HyperLogLog<PRECISION, BITS>

§

impl<const PRECISION: usize, const BITS: usize> Send for HyperLogLog<PRECISION, BITS>

§

impl<const PRECISION: usize, const BITS: usize> Sync for HyperLogLog<PRECISION, BITS>

§

impl<const PRECISION: usize, const BITS: usize> Unpin for HyperLogLog<PRECISION, BITS>

§

impl<const PRECISION: usize, const BITS: usize> UnwindSafe for HyperLogLog<PRECISION, BITS>

Blanket Implementations§

source§

impl<T> Any for Twhere T: 'static + ?Sized,

source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
source§

impl<T> Borrow<T> for Twhere T: ?Sized,

source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
source§

impl<T> BorrowMut<T> for Twhere T: ?Sized,

source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
source§

impl<T> From<!> for T

source§

fn from(t: !) -> T

Converts to this type from the input type.
source§

impl<T> From<T> for T

source§

fn from(t: T) -> T

Returns the argument unchanged.

source§

impl<T, U> Into<U> for Twhere U: From<T>,

source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

source§

impl<T> ToOwned for Twhere T: Clone,

§

type Owned = T

The resulting type after obtaining ownership.
source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
source§

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

§

type Error = Infallible

The type returned in the event of a conversion error.
source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
source§

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
source§

impl<T> DeserializeOwned for Twhere T: for<'de> Deserialize<'de>,