pub trait Categorical {
fn as_usize(&self) -> usize;
fn from_usize(id: usize) -> Self;
fn n_categories(&self) -> Option<usize>;
}
pub trait CatCount<C>
where C: Categorical
{
fn add(&mut self, c: C);
fn add_n(&mut self, c: C, n: usize);
fn remove(&mut self, _c: C) { unimplemented!() }
fn probability(&self, c: C) -> f64;
fn most_frequent(&self) -> C;
}
impl Categorical for u8 {
fn as_usize(&self) -> usize {
*self as usize
}
fn from_usize(id: usize) -> Self {
id as u8
}
fn n_categories(&self) -> Option<usize> {
None
}
}
impl Categorical for usize {
fn as_usize(&self) -> usize {
*self
}
fn from_usize(id: usize) -> Self {
id
}
fn n_categories(&self) -> Option<usize> {
None
}
}
#[derive(Debug, Clone)]
pub struct GenericCatCounter {
counts: Vec<usize>,
total: usize,
}
impl GenericCatCounter {
pub fn new() -> Self {
GenericCatCounter {
counts: Vec::new(),
total: 0,
}
}
pub fn probs<F: FnMut(f64)>(&self, mut f: F) {
let n = self.total as f64;
for c in self.counts.iter() {
f(*c as f64 / n);
}
}
}
impl<C: Categorical> CatCount<C> for GenericCatCounter {
fn add(&mut self, c: C) {
self.add_n(c, 1)
}
fn add_n(&mut self, c: C, n: usize) {
let i = c.as_usize();
if i >= self.counts.len() {
self.counts.resize(i + 1, 0);
}
self.counts[i] += n;
self.total += n;
}
fn remove(&mut self, c: C) {
let i = c.as_usize();
debug_assert!(i < self.counts.len());
debug_assert!(self.counts[i] > 0);
debug_assert!(self.total > 0);
self.counts[i] -= 1;
self.total -= 1;
}
fn probability(&self, c: C) -> f64 {
let i = c.as_usize();
if i < self.counts.len() {
self.counts[i] as f64 / self.total as f64
} else {
0.0
}
}
fn most_frequent(&self) -> C {
let mut n = 0;
let mut c = 0;
for i in 0..self.counts.len() {
if self.counts[i] > n {
n = self.counts[i];
c = i;
}
}
C::from_usize(c)
}
}
#[test]
fn cat_u8() {
let x = u8::from_usize(42);
assert_eq!(x, 42u8);
assert_eq!(x.as_usize(), 42usize);
assert_eq!(x.n_categories(), None);
}