1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
#[cfg(feature = "serde1")]
use serde::{Deserialize, Serialize};

use crate::data::CategoricalDatum;
use crate::data::DataOrSuffStat;
use crate::dist::Categorical;
use crate::traits::SuffStat;

/// Categorical distribution sufficient statistic.
///
/// Store the number of observations and the count of observations of each
/// instance.
#[derive(Debug, Clone, PartialEq)]
#[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))]
pub struct CategoricalSuffStat {
    n: usize,
    counts: Vec<f64>,
}

impl CategoricalSuffStat {
    #[inline]
    pub fn new(k: usize) -> Self {
        CategoricalSuffStat {
            n: 0,
            counts: vec![0.0; k],
        }
    }

    /// Create a sufficient statistic from components without checking whether
    /// they are valid.
    #[inline]
    pub fn from_parts_unchecked(n: usize, counts: Vec<f64>) -> Self {
        CategoricalSuffStat { n, counts }
    }

    /// Get the total number of trials
    ///
    /// # Example
    ///
    /// ```
    /// # use rv::data::CategoricalSuffStat;
    /// # use rv::traits::SuffStat;
    /// let mut stat = CategoricalSuffStat::new(3);
    ///
    /// stat.observe(&0_u8);
    /// stat.observe(&1_u8);
    /// stat.observe(&1_u8);
    ///
    /// assert_eq!(stat.n(), 3);
    /// ```
    #[inline]
    pub fn n(&self) -> usize {
        self.n
    }

    /// Get the number of occurrences of each class, counts
    ///
    /// # Example
    ///
    /// ```
    /// # use rv::data::CategoricalSuffStat;
    /// # use rv::traits::SuffStat;
    /// let mut stat = CategoricalSuffStat::new(3);
    ///
    /// stat.observe(&0_u8);
    /// stat.observe(&1_u8);
    /// stat.observe(&1_u8);
    ///
    /// assert_eq!(*stat.counts(), vec![1.0, 2.0, 0.0]);
    /// ```
    #[inline]
    pub fn counts(&self) -> &Vec<f64> {
        &self.counts
    }
}

impl<'a, X> From<&'a CategoricalSuffStat> for DataOrSuffStat<'a, X, Categorical>
where
    X: CategoricalDatum,
{
    fn from(stat: &'a CategoricalSuffStat) -> Self {
        DataOrSuffStat::SuffStat(stat)
    }
}

impl<'a, X> From<&'a Vec<X>> for DataOrSuffStat<'a, X, Categorical>
where
    X: CategoricalDatum,
{
    fn from(xs: &'a Vec<X>) -> Self {
        DataOrSuffStat::Data(xs)
    }
}

impl<X: CategoricalDatum> SuffStat<X> for CategoricalSuffStat {
    fn n(&self) -> usize {
        self.n
    }

    fn observe(&mut self, x: &X) {
        let ix = x.into_usize();
        self.n += 1;
        self.counts[ix] += 1.0;
    }

    fn forget(&mut self, x: &X) {
        let ix = x.into_usize();
        self.n -= 1;
        self.counts[ix] -= 1.0;
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn new() {
        let sf = CategoricalSuffStat::new(4);
        assert_eq!(sf.counts.len(), 4);
        assert_eq!(sf.n, 0);
        assert!(sf.counts.iter().all(|&ct| ct.abs() < 1E-12))
    }

    #[test]
    fn from_parts_unchecked() {
        let stat = CategoricalSuffStat::from_parts_unchecked(
            10,
            vec![1.0, 2.0, 3.0, 4.0],
        );

        assert_eq!(stat.n(), 10);
        assert_eq!(stat.counts()[0], 1.0);
        assert_eq!(stat.counts()[1], 2.0);
        assert_eq!(stat.counts()[2], 3.0);
        assert_eq!(stat.counts()[3], 4.0);
    }
}