stack_graphs/
stats.rs

1// -*- coding: utf-8 -*-
2// ------------------------------------------------------------------------------------------------
3// Copyright © 2023, stack-graphs authors.
4// Licensed under either of Apache License, Version 2.0, or MIT license, at your option.
5// Please see the LICENSE-APACHE or LICENSE-MIT files in this distribution for license details.
6// ------------------------------------------------------------------------------------------------
7
8use std::collections::HashMap;
9use std::hash::Hash;
10
11use itertools::Itertools;
12
13/// Frequency distribution maintains the frequency of T values.
14#[derive(Clone, Debug, Default)]
15pub struct FrequencyDistribution<T>
16where
17    T: Eq + Hash,
18{
19    values: HashMap<T, usize>,
20    total: usize,
21}
22
23impl<T: Eq + Hash> FrequencyDistribution<T> {
24    pub fn record(&mut self, value: T) {
25        *self.values.entry(value).or_default() += 1;
26        self.total += 1;
27    }
28
29    // The number of recorded values.
30    pub fn count(&self) -> usize {
31        return self.total;
32    }
33
34    // The number of unique recorded values.
35    pub fn unique(&self) -> usize {
36        return self.values.len();
37    }
38
39    pub fn frequencies(&self) -> FrequencyDistribution<usize> {
40        let mut fs = FrequencyDistribution::default();
41        for count in self.values.values() {
42            fs.record(*count);
43        }
44        fs
45    }
46}
47
48impl<T: Eq + Hash + Ord> FrequencyDistribution<T> {
49    pub fn quantiles(&self, q: usize) -> Vec<&T> {
50        if q == 0 || self.total == 0 {
51            return vec![];
52        }
53
54        let mut it = self.values.iter().sorted_by_key(|e| e.0);
55        let mut total_count = 0;
56        let mut last_value;
57        let mut result = Vec::new();
58
59        if let Some((value, count)) = it.next() {
60            total_count += count;
61            last_value = value;
62        } else {
63            return vec![];
64        }
65        result.push(last_value);
66
67        for k in 1..=q {
68            let limit = ((self.total as f64 * k as f64) / q as f64).round() as usize;
69            while total_count < limit {
70                if let Some((value, count)) = it.next() {
71                    total_count += count;
72                    last_value = value;
73                } else {
74                    break;
75                }
76            }
77            result.push(last_value);
78        }
79
80        result
81    }
82}
83
84impl<T> std::ops::AddAssign<Self> for FrequencyDistribution<T>
85where
86    T: Eq + Hash,
87{
88    fn add_assign(&mut self, rhs: Self) {
89        for (value, count) in rhs.values {
90            *self.values.entry(value).or_default() += count;
91        }
92        self.total += rhs.total;
93    }
94}
95
96impl<T> std::ops::AddAssign<&Self> for FrequencyDistribution<T>
97where
98    T: Eq + Hash + Clone,
99{
100    fn add_assign(&mut self, rhs: &Self) {
101        for (value, count) in &rhs.values {
102            *self.values.entry(value.clone()).or_default() += count;
103        }
104        self.total += rhs.total;
105    }
106}