pub struct HyperLogLog<T, B = SipHasherBuilder> { /* private fields */ }Expand description
A space-efficient probabilitic data structure to count the number of distinct items in a multiset.
A HyperLogLog<T> uses the observation that the cardinality of a multiset of uniformly
distributed items can be estimated by calculating the maximum number of leading zeros in the
hash of each item in the multiset. It also buckets each item in a register and takes the
harmonic mean of the count in order to reduce the variance. Finally, it uses linear counting
for small cardinalities and small correction for large cardinalities.
§Examples
use probabilistic_collections::hyperloglog::HyperLogLog;
use probabilistic_collections::SipHasherBuilder;
let mut hhl = HyperLogLog::<u32>::with_hasher(0.1, SipHasherBuilder::from_seed(0, 0));
assert!(hhl.is_empty());
for key in &[0, 1, 2, 0, 1, 2] {
hhl.insert(&key);
}
assert!((hhl.len().round() - 3.0).abs() < EPSILON);Implementations§
Source§impl<T> HyperLogLog<T>
impl<T> HyperLogLog<T>
Source§impl<T, B> HyperLogLog<T, B>where
B: BuildHasher,
impl<T, B> HyperLogLog<T, B>where
B: BuildHasher,
Sourcepub fn with_hasher(error_probability: f64, hash_builder: B) -> Self
pub fn with_hasher(error_probability: f64, hash_builder: B) -> Self
Constructs a new, empty HyperLogLog<T> with a given error probability and hasher builder.
§Panics
Panics if error_probability is not in (0, 1).
§Examples
use probabilistic_collections::hyperloglog::HyperLogLog;
use probabilistic_collections::SipHasherBuilder;
let hhl = HyperLogLog::<u32, _>::with_hasher(0.1, SipHasherBuilder::from_entropy());Sourcepub fn insert<U>(&mut self, item: &U)
pub fn insert<U>(&mut self, item: &U)
Inserts an item into the HyperLogLog<T>.
§Examples
use probabilistic_collections::hyperloglog::HyperLogLog;
let mut hhl = HyperLogLog::<u32>::new(0.1);
hhl.insert(&0);Sourcepub fn merge(&mut self, other: &HyperLogLog<T, B>)
pub fn merge(&mut self, other: &HyperLogLog<T, B>)
Merges self with other.
§Panics
Panics if the error probability of self is not equal to the error probability of other
or if the hash builder of self is not equal to the hash builder of other.
§Examples
use probabilistic_collections::hyperloglog::HyperLogLog;
use probabilistic_collections::SipHasherBuilder;
let mut hhl1 = HyperLogLog::<u32>::with_hasher(0.1, SipHasherBuilder::from_seed(0, 0));
hhl1.insert(&0);
hhl1.insert(&1);
let mut hhl2 = HyperLogLog::<u32>::with_hasher(0.1, *hhl1.hasher());
hhl2.insert(&0);
hhl2.insert(&2);
hhl1.merge(&hhl2);
assert!((hhl1.len().round() - 3.0).abs() < EPSILON);Sourcepub fn len(&self) -> f64
pub fn len(&self) -> f64
Returns the estimated number of distinct items in the HyperLogLog<T>.
§Examples
use probabilistic_collections::hyperloglog::HyperLogLog;
let mut hhl = HyperLogLog::<u32>::new(0.1);
assert!((hhl.len().round() - 0.0).abs() < EPSILON);
hhl.insert(&1);
assert!((hhl.len().round() - 1.0).abs() < EPSILON);Sourcepub fn is_empty(&self) -> bool
pub fn is_empty(&self) -> bool
Returns true is the HyperLogLog<T> is empty.
§Examples
use probabilistic_collections::hyperloglog::HyperLogLog;
let mut hhl = HyperLogLog::<u32>::new(0.1);
assert!(hhl.is_empty());
hhl.insert(&1);
assert!(!hhl.is_empty());