use std::collections::HashMap;
#[derive(Debug, Clone)]
pub struct FrequencyTable {
counts: HashMap<u8, usize>,
total: usize,
}
impl FrequencyTable {
pub fn from_data(data: &[u8]) -> Self {
let mut counts = HashMap::new();
let mut total = 0;
for &b in data {
*counts.entry(b).or_insert(0) += 1;
total += 1;
}
FrequencyTable { counts, total }
}
pub fn new() -> Self {
FrequencyTable {
counts: HashMap::new(),
total: 0,
}
}
pub fn increment(&mut self, symbol: u8) {
*self.counts.entry(symbol).or_insert(0) += 1;
self.total += 1;
}
pub fn count(&self, symbol: &u8) -> Option<usize> {
self.counts.get(symbol).copied()
}
pub fn total(&self) -> usize {
self.total
}
pub fn distinct_count(&self) -> usize {
self.counts.len()
}
pub fn iter(&self) -> impl Iterator<Item = (&u8, &usize)> {
self.counts.iter()
}
pub fn entropy(&self) -> f64 {
if self.total == 0 {
return 0.0;
}
let total = self.total as f64;
let mut ent = 0.0;
for &count in self.counts.values() {
if count == 0 {
continue;
}
let p = count as f64 / total;
ent -= p * p.log2();
}
ent
}
pub fn most_frequent(&self) -> Option<(u8, usize)> {
self.counts
.iter()
.max_by_key(|&(_, c)| c)
.map(|(&b, &c)| (b, c))
}
}
impl Default for FrequencyTable {
fn default() -> Self {
Self::new()
}
}