use std::{collections::HashMap, hash::Hash};
use linreg::linear_regression;
use nohash_hasher::{BuildNoHashHasher, IsEnabled};
#[derive(Clone)]
pub struct Zipf<T> {
frequencies: HashMap<T, u64, BuildNoHashHasher<T>>,
}
impl<T> Zipf<T> {
pub fn insert(&mut self, value: T)
where
T: Eq + Hash + IsEnabled,
{
self.frequencies
.entry(value)
.and_modify(|frequency| *frequency += 1)
.or_insert(1);
}
pub fn into_alpha(self) -> Option<f64> {
if self.frequencies.is_empty() {
return None;
}
let mut frequencies = self.frequencies.into_iter().collect::<Vec<_>>();
frequencies.sort_unstable_by(|(_, a), (_, b)| b.cmp(a));
let mut log_x = Vec::<f64>::new();
let mut log_y = Vec::<f64>::new();
for (index, (_, frequency)) in frequencies.into_iter().enumerate() {
log_x.push((index as f64 + 1.0).log10());
log_y.push((frequency as f64).log10());
}
linear_regression::<f64, f64, f64>(&log_x, &log_y)
.map(|(m, _)| -m)
.ok()
}
}
impl<T> Default for Zipf<T> {
fn default() -> Self {
let hasher = BuildNoHashHasher::default();
Zipf {
frequencies: HashMap::with_hasher(hasher),
}
}
}