[−][src]Struct streaming_algorithms::Top
This probabilistic data structure tracks the n
top keys given a stream of (key,value)
tuples, ordered by the sum of the values for each key (the "aggregated value"). It uses only O(n)
space.
Its implementation is two parts:
- a doubly linked hashmap, mapping the top
n
keys to their aggregated values, and ordered by their aggregated values. This is used to keep a more precise track of the aggregated value of the topn
keys, and reduce collisions in the count-min sketch. - a count-min sketch to track all of the keys outside the top
n
. This data structure is also known as a counting Bloom filter. It uses conservative updating for increased accuracy.
The algorithm is as follows:
while a key and value from the input stream arrive:
if H[key] exists
increment aggregated value associated with H[key]
elsif number of items in H < k
put H[key] into map with its associated value
else
add C[key] into the count-min sketch with its associated value
if aggregated value associated with C[key] is > the lowest aggregated value in H
move the lowest key and value from H into C
move C[key] and value from C into H
endwhile
See An Improved Data Stream Summary: The Count-Min Sketch and its Applications and New Directions in Traffic Measurement and Accounting for background on the count-min sketch with conservative updating.
Implementations
impl<A: Hash + Eq + Clone, C: Ord + New + for<'a> UnionAssign<&'a C> + Intersect> Top<A, C>
[src]
pub fn new(
n: usize,
probability: f64,
tolerance: f64,
config: <C as New>::Config
) -> Self
[src]
n: usize,
probability: f64,
tolerance: f64,
config: <C as New>::Config
) -> Self
Create an empty Top
data structure with the specified n
capacity.
pub fn capacity(&self) -> usize
[src]
The n
most frequent elements we have capacity to track.
pub fn push<V: ?Sized>(&mut self, item: A, value: &V) where
C: for<'a> AddAssign<&'a V> + IntersectPlusUnionIsPlus,
[src]
C: for<'a> AddAssign<&'a V> + IntersectPlusUnionIsPlus,
"Visit" an element.
pub fn clear(&mut self)
[src]
Clears the Top
data structure, as if it was new.
pub fn iter(&self) -> TopIter<A, C>
[src]
An iterator visiting all elements and their counts in descending order of frequency. The iterator element type is (&'a A, usize).
Trait Implementations
impl<A: Hash + Eq + Clone, C: Ord + New + Clone + for<'a> AddAssign<&'a C> + for<'a> UnionAssign<&'a C> + Intersect + IntersectPlusUnionIsPlus> Add<Top<A, C>> for Top<A, C>
[src]
type Output = Self
The resulting type after applying the +
operator.
fn add(self, other: Self) -> Self
[src]
impl<A: Hash + Eq + Clone, C: Ord + New + Clone + for<'a> AddAssign<&'a C> + for<'a> UnionAssign<&'a C> + Intersect + IntersectPlusUnionIsPlus> AddAssign<Top<A, C>> for Top<A, C>
[src]
fn add_assign(&mut self, other: Self)
[src]
impl<A: Clone, C: Clone + New> Clone for Top<A, C>
[src]
impl<A: Hash + Eq + Clone + Debug, C: Ord + New + Clone + for<'a> UnionAssign<&'a C> + Intersect + Debug> Debug for Top<A, C>
[src]
impl<'de, A, C: New> Deserialize<'de> for Top<A, C> where
A: Hash + Eq + Deserialize<'de>,
C: Deserialize<'de>,
<C as New>::Config: Deserialize<'de>,
[src]
A: Hash + Eq + Deserialize<'de>,
C: Deserialize<'de>,
<C as New>::Config: Deserialize<'de>,
fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error> where
__D: Deserializer<'de>,
[src]
__D: Deserializer<'de>,
impl<A, C: New> Serialize for Top<A, C> where
A: Hash + Eq + Serialize,
C: Serialize,
<C as New>::Config: Serialize,
[src]
A: Hash + Eq + Serialize,
C: Serialize,
<C as New>::Config: Serialize,
fn serialize<__S>(&self, __serializer: __S) -> Result<__S::Ok, __S::Error> where
__S: Serializer,
[src]
__S: Serializer,
impl<A: Hash + Eq + Clone, C: Ord + New + Clone + for<'a> AddAssign<&'a C> + for<'a> UnionAssign<&'a C> + Intersect + IntersectPlusUnionIsPlus> Sum<Top<A, C>> for Option<Top<A, C>>
[src]
Auto Trait Implementations
impl<A, C> RefUnwindSafe for Top<A, C> where
A: RefUnwindSafe,
C: RefUnwindSafe,
<C as New>::Config: RefUnwindSafe,
A: RefUnwindSafe,
C: RefUnwindSafe,
<C as New>::Config: RefUnwindSafe,
impl<A, C> Send for Top<A, C> where
A: Send,
C: Send,
<C as New>::Config: Send,
A: Send,
C: Send,
<C as New>::Config: Send,
impl<A, C> Sync for Top<A, C> where
A: Sync,
C: Sync,
<C as New>::Config: Sync,
A: Sync,
C: Sync,
<C as New>::Config: Sync,
impl<A, C> Unpin for Top<A, C> where
A: Unpin,
C: Unpin,
<C as New>::Config: Unpin,
A: Unpin,
C: Unpin,
<C as New>::Config: Unpin,
impl<A, C> UnwindSafe for Top<A, C> where
A: UnwindSafe,
C: UnwindSafe,
<C as New>::Config: UnwindSafe,
A: UnwindSafe,
C: UnwindSafe,
<C as New>::Config: UnwindSafe,
Blanket Implementations
impl<T> Any for T where
T: 'static + ?Sized,
[src]
T: 'static + ?Sized,
impl<T> Borrow<T> for T where
T: ?Sized,
[src]
T: ?Sized,
impl<T> BorrowMut<T> for T where
T: ?Sized,
[src]
T: ?Sized,
fn borrow_mut(&mut self) -> &mut T
[src]
impl<T> DeserializeOwned for T where
T: for<'de> Deserialize<'de>,
[src]
T: for<'de> Deserialize<'de>,
impl<T> From<T> for T
[src]
impl<T, U> Into<U> for T where
U: From<T>,
[src]
U: From<T>,
impl<T> ToOwned for T where
T: Clone,
[src]
T: Clone,
type Owned = T
The resulting type after obtaining ownership.
fn to_owned(&self) -> T
[src]
fn clone_into(&self, target: &mut T)
[src]
impl<T, U> TryFrom<U> for T where
U: Into<T>,
[src]
U: Into<T>,
type Error = Infallible
The type returned in the event of a conversion error.
fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>
[src]
impl<T, U> TryInto<U> for T where
U: TryFrom<T>,
[src]
U: TryFrom<T>,
type Error = <U as TryFrom<T>>::Error
The type returned in the event of a conversion error.
fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>
[src]
impl<V, T> VZip<V> for T where
V: MultiLane<T>,
V: MultiLane<T>,