1#![warn(missing_docs)]
29#![cfg_attr(test, feature(test))]
30
31#[cfg(test)]
32extern crate test;
33
34use std::ops::Index;
35use std::default::Default;
36use std::hash::{Hasher, Hash, BuildHasher};
37use std::iter::{FromIterator, IntoIterator};
38use std::borrow::Borrow;
39use std::collections::HashMap;
40use std::collections::hash_map::{Keys, IntoIter, Iter, RandomState};
41
42
43static ZERO: usize = 0;
44
45
46#[allow(missing_docs)]
47pub struct FrequencyDistribution<K, S = RandomState> {
48 hashmap: HashMap<K, usize, S>,
49 sum_counts: usize,
50}
51
52impl<K, H, S> FrequencyDistribution<K, S>
53 where K: Eq + Hash,
54 H: Hasher,
55 S: BuildHasher<Hasher = H>
56{
57 #[inline(always)]
60 pub fn with_capacity_and_hasher(size: usize, state: S) -> FrequencyDistribution<K, S> {
61 FrequencyDistribution {
62 hashmap: HashMap::with_capacity_and_hasher(size, state),
63 sum_counts: 0,
64 }
65 }
66
67 #[inline(always)]
69 pub fn with_hasher(state: S) -> FrequencyDistribution<K, S> {
70 FrequencyDistribution {
71 hashmap: HashMap::with_hasher(state),
72 sum_counts: 0,
73 }
74 }
75
76 #[inline(always)]
78 pub fn keys(&self) -> Keys<K, usize> {
79 self.hashmap.keys()
80 }
81
82 #[inline(always)]
84 pub fn iter(&self) -> Iter<K, usize> {
85 self.hashmap.iter()
86 }
87
88 #[inline(always)]
113 pub fn iter_non_zero(&self) -> NonZeroKeysIter<K> {
114 NonZeroKeysIter { iter: self.iter() }
115 }
116
117 #[inline(always)]
119 pub fn sum_counts(&self) -> usize {
120 self.sum_counts
121 }
122
123 #[inline(always)]
125 pub fn len(&self) -> usize {
126 self.hashmap.len()
127 }
128
129 #[inline(always)]
131 pub fn get<Q: ?Sized>(&self, k: &Q) -> usize
132 where K: Borrow<Q>,
133 Q: Hash + Eq
134 {
135 self[k]
136 }
137
138 #[inline(always)]
141 pub fn clear(&mut self) {
142 self.hashmap.clear()
143 }
144
145 #[inline(always)]
149 pub fn insert(&mut self, k: K) {
150 self.insert_or_incr_by(k, 1);
151 }
152
153 #[inline(always)]
155 pub fn remove<Q: ?Sized>(&mut self, k: &Q)
156 where K: Borrow<Q>,
157 Q: Hash + Eq
158 {
159 match self.hashmap.remove(k) {
160 Some(count) => self.sum_counts -= count,
161 None => (),
162 }
163 }
164
165 #[inline]
169 fn insert_or_incr_by(&mut self, k: K, incr: usize) {
170 if !self.hashmap.contains_key(&k) {
171 self.hashmap.insert(k, incr);
172 } else {
173 *self.hashmap.get_mut(&k).unwrap() += incr;
174 }
175
176 self.sum_counts += incr;
177 }
178}
179
180impl<K, H, S> FrequencyDistribution<K, S>
181 where K: Eq + Hash,
182 H: Hasher + Default,
183 S: BuildHasher<Hasher = H> + Default
184{
185 #[inline(always)]
188 pub fn new() -> FrequencyDistribution<K, S> {
189 FrequencyDistribution::with_hasher(Default::default())
190 }
191
192 #[inline(always)]
195 pub fn with_capacity(size: usize) -> FrequencyDistribution<K, S> {
196 FrequencyDistribution::with_capacity_and_hasher(size, Default::default())
197 }
198}
199
200impl<K, H, S> Default for FrequencyDistribution<K, S>
201 where K: Eq + Hash,
202 H: Hasher + Default,
203 S: BuildHasher<Hasher = H> + Default
204{
205 #[inline(always)]
207 fn default() -> FrequencyDistribution<K, S> {
208 FrequencyDistribution::new()
209 }
210}
211
212impl<K, H, S> FromIterator<(K, usize)> for FrequencyDistribution<K, S>
213 where K: Eq + Hash,
214 H: Hasher,
215 S: BuildHasher<Hasher = H> + Default
216{
217 fn from_iter<T>(iter: T) -> FrequencyDistribution<K, S>
242 where T: IntoIterator<Item = (K, usize)>
243 {
244 let iterator = iter.into_iter();
245 let mut fdist = if iterator.size_hint().1.is_some() {
246 FrequencyDistribution::with_capacity_and_hasher(iterator.size_hint().1.unwrap(),
247 Default::default())
248 } else {
249 FrequencyDistribution::with_capacity_and_hasher(iterator.size_hint().0, Default::default())
250 };
251
252 for (k, freq) in iterator {
253 fdist.insert_or_incr_by(k, freq);
254 }
255
256 fdist
257 }
258}
259
260impl<K, H, S> Extend<(K, usize)> for FrequencyDistribution<K, S>
261 where K: Eq + Hash,
262 H: Hasher,
263 S: BuildHasher<Hasher = H>
264{
265 fn extend<T>(&mut self, iter: T)
267 where T: IntoIterator<Item = (K, usize)>
268 {
269 for (k, freq) in iter.into_iter() {
270 self.insert_or_incr_by(k, freq);
271 }
272 }
273}
274
275impl<K, H, S> IntoIterator for FrequencyDistribution<K, S>
276 where K: Eq + Hash,
277 H: Hasher,
278 S: BuildHasher<Hasher = H>
279{
280 type Item = (K, usize);
281 type IntoIter = IntoIter<K, usize>;
282
283 #[inline]
286 fn into_iter(self) -> IntoIter<K, usize> {
287 self.hashmap.into_iter()
288 }
289}
290
291impl<'a, K, H, S, Q: ?Sized> Index<&'a Q> for FrequencyDistribution<K, S>
292 where K: Eq + Hash + Borrow<Q>,
293 H: Hasher,
294 S: BuildHasher<Hasher = H>,
295 Q: Eq + Hash
296{
297 type Output = usize;
298
299 #[inline]
300 fn index<'b>(&'b self, index: &Q) -> &'b usize {
301 self.hashmap.get(index).unwrap_or(&ZERO)
302 }
303}
304
305pub struct NonZeroKeysIter<'a, K: 'a> {
307 iter: Iter<'a, K, usize>,
308}
309
310impl<'a, K: 'a> Iterator for NonZeroKeysIter<'a, K> {
311 type Item = &'a K;
312
313 #[inline(always)]
314 fn next(&mut self) -> Option<&'a K> {
315 loop {
316 match self.iter.next() {
317 Some((k, c)) if *c > 0 => return Some(k),
318 None => return None,
319 _ => (),
320 }
321 }
322 }
323}
324
325#[test]
326fn smoke_test_frequency_distribution_insert() {
327 let words = vec!["alpha", "beta"];
328 let mut dist: FrequencyDistribution<&str> = FrequencyDistribution::new();
329
330 dist.insert(words[0]);
331
332 assert_eq!(dist.get(&words[0]), 1);
333
334 dist.insert(words[1]);
335
336 assert_eq!(dist.get(&words[1]), 1);
337
338 for _ in 0..7u32 {
339 dist.insert(words[0]);
340 }
341
342 assert_eq!(dist.get(&words[0]), 8);
343}
344
345#[test]
346fn smoke_test_frequency_distribution_iter() {
347 let words = vec![("a", 50usize), ("b", 100usize), ("c", 75usize), ("d", 0usize)];
348 let dist: FrequencyDistribution<&str> = FromIterator::from_iter(words.into_iter());
349
350 assert_eq!(dist.get(&"a"), 50);
351 assert_eq!(dist.get(&"b"), 100);
352 assert_eq!(dist.get(&"c"), 75);
353
354 let mut iter = dist.iter_non_zero();
355
356 assert!(iter.next().is_some());
357 assert!(iter.next().is_some());
358 assert!(iter.next().is_some());
359 assert!(iter.next().is_none());
360
361 assert_eq!(dist.sum_counts(), 225);
362}
363
364#[test]
365fn smoke_test_frequency_distribution_remove() {
366 let words = vec![("a", 50usize), ("b", 100usize), ("c", 25usize)];
367 let mut dist: FrequencyDistribution<&str> = FromIterator::from_iter(words.into_iter());
368
369 assert_eq!(dist.get(&"a"), 50);
370
371 dist.remove(&"a");
372
373 assert_eq!(dist.get(&"a"), 0);
374 assert_eq!(dist.sum_counts(), 125);
375}
376
377#[test]
378fn smoke_test_frequency_sum_counts() {
379 let words = vec![("a", 7usize), ("b", 5usize), ("c", 8usize), ("d", 3usize)];
380 let mut dist: FrequencyDistribution<&str> = FromIterator::from_iter(words.into_iter());
381
382 assert_eq!(dist.sum_counts(), 23);
383
384 dist.insert("e");
385
386 assert_eq!(dist.sum_counts(), 24);
387}