1use std::collections::HashMap;
9use std::hash::Hash;
10
11use itertools::Itertools;
12
13#[derive(Clone, Debug, Default)]
15pub struct FrequencyDistribution<T>
16where
17 T: Eq + Hash,
18{
19 values: HashMap<T, usize>,
20 total: usize,
21}
22
23impl<T: Eq + Hash> FrequencyDistribution<T> {
24 pub fn record(&mut self, value: T) {
25 *self.values.entry(value).or_default() += 1;
26 self.total += 1;
27 }
28
29 pub fn count(&self) -> usize {
31 return self.total;
32 }
33
34 pub fn unique(&self) -> usize {
36 return self.values.len();
37 }
38
39 pub fn frequencies(&self) -> FrequencyDistribution<usize> {
40 let mut fs = FrequencyDistribution::default();
41 for count in self.values.values() {
42 fs.record(*count);
43 }
44 fs
45 }
46}
47
48impl<T: Eq + Hash + Ord> FrequencyDistribution<T> {
49 pub fn quantiles(&self, q: usize) -> Vec<&T> {
50 if q == 0 || self.total == 0 {
51 return vec![];
52 }
53
54 let mut it = self.values.iter().sorted_by_key(|e| e.0);
55 let mut total_count = 0;
56 let mut last_value;
57 let mut result = Vec::new();
58
59 if let Some((value, count)) = it.next() {
60 total_count += count;
61 last_value = value;
62 } else {
63 return vec![];
64 }
65 result.push(last_value);
66
67 for k in 1..=q {
68 let limit = ((self.total as f64 * k as f64) / q as f64).round() as usize;
69 while total_count < limit {
70 if let Some((value, count)) = it.next() {
71 total_count += count;
72 last_value = value;
73 } else {
74 break;
75 }
76 }
77 result.push(last_value);
78 }
79
80 result
81 }
82}
83
84impl<T> std::ops::AddAssign<Self> for FrequencyDistribution<T>
85where
86 T: Eq + Hash,
87{
88 fn add_assign(&mut self, rhs: Self) {
89 for (value, count) in rhs.values {
90 *self.values.entry(value).or_default() += count;
91 }
92 self.total += rhs.total;
93 }
94}
95
96impl<T> std::ops::AddAssign<&Self> for FrequencyDistribution<T>
97where
98 T: Eq + Hash + Clone,
99{
100 fn add_assign(&mut self, rhs: &Self) {
101 for (value, count) in &rhs.values {
102 *self.values.entry(value.clone()).or_default() += count;
103 }
104 self.total += rhs.total;
105 }
106}