use linalg::{Matrix, Axes, BaseMatrix, BaseMatrixMut};
use learning::{LearningResult, SupModel};
use learning::error::{Error, ErrorKind};
use rulinalg::utils;
use std::f64::consts::PI;
#[derive(Debug)]
pub struct NaiveBayes<T: Distribution> {
distr: Option<T>,
cluster_count: Option<usize>,
class_prior: Option<Vec<f64>>,
class_counts: Vec<usize>,
}
impl<T: Distribution> NaiveBayes<T> {
pub fn new() -> NaiveBayes<T> {
NaiveBayes {
distr: None,
cluster_count: None,
class_prior: None,
class_counts: Vec::new(),
}
}
pub fn cluster_count(&self) -> Option<&usize> {
self.cluster_count.as_ref()
}
pub fn class_prior(&self) -> Option<&Vec<f64>> {
self.class_prior.as_ref()
}
pub fn distr(&self) -> Option<&T> {
self.distr.as_ref()
}
}
impl<T: Distribution> SupModel<Matrix<f64>, Matrix<f64>> for NaiveBayes<T> {
fn train(&mut self, inputs: &Matrix<f64>, targets: &Matrix<f64>) -> LearningResult<()> {
self.distr = Some(T::from_model_params(targets.cols(), inputs.cols()));
self.update_params(inputs, targets)
}
fn predict(&self, inputs: &Matrix<f64>) -> LearningResult<Matrix<f64>> {
let log_probs = try!(self.get_log_probs(inputs));
let input_classes = NaiveBayes::<T>::get_classes(log_probs);
if let Some(cluster_count) = self.cluster_count {
let mut class_data = Vec::with_capacity(inputs.rows() * cluster_count);
for c in input_classes {
let mut row = vec![0f64; cluster_count];
row[c] = 1f64;
class_data.append(&mut row);
}
Ok(Matrix::new(inputs.rows(), cluster_count, class_data))
} else {
Err(Error::new(ErrorKind::UntrainedModel, "The model has not been trained."))
}
}
}
impl<T: Distribution> NaiveBayes<T> {
pub fn get_log_probs(&self, inputs: &Matrix<f64>) -> LearningResult<Matrix<f64>> {
if let (&Some(ref distr), &Some(ref prior)) = (&self.distr, &self.class_prior) {
distr.joint_log_lik(inputs, prior)
} else {
Err(Error::new_untrained())
}
}
fn update_params(&mut self, inputs: &Matrix<f64>, targets: &Matrix<f64>) -> LearningResult<()> {
let class_count = targets.cols();
let total_data = inputs.rows();
self.class_counts = vec![0; class_count];
let mut class_data = vec![Vec::new(); class_count];
for (idx, row) in targets.iter_rows().enumerate() {
let class = try!(NaiveBayes::<T>::find_class(row));
class_data[class].push(idx);
self.class_counts[class] += 1;
}
if let Some(ref mut distr) = self.distr {
for (idx, c) in class_data.into_iter().enumerate() {
if c.is_empty() {
continue;
}
try!(distr.update_params(&inputs.select_rows(&c), idx));
}
}
let mut class_prior = Vec::with_capacity(class_count);
class_prior.extend(self.class_counts.iter().map(|c| *c as f64 / total_data as f64));
self.class_prior = Some(class_prior);
self.cluster_count = Some(class_count);
Ok(())
}
fn find_class(row: &[f64]) -> LearningResult<usize> {
for (idx, r) in row.into_iter().enumerate() {
if *r == 1f64 {
return Ok(idx);
}
}
Err(Error::new(ErrorKind::InvalidState,
"No class found for entry in targets"))
}
fn get_classes(log_probs: Matrix<f64>) -> Vec<usize> {
let mut data_classes = Vec::with_capacity(log_probs.rows());
data_classes.extend(log_probs.iter_rows().map(|row| {
let (class, _) = utils::argmax(row);
class
}));
data_classes
}
}
pub trait Distribution {
fn from_model_params(class_count: usize, features: usize) -> Self;
fn update_params(&mut self, data: &Matrix<f64>, class: usize) -> LearningResult<()>;
fn joint_log_lik(&self,
data: &Matrix<f64>,
class_prior: &[f64])
-> LearningResult<Matrix<f64>>;
}
#[derive(Debug)]
pub struct Gaussian {
theta: Matrix<f64>,
sigma: Matrix<f64>,
}
impl Gaussian {
pub fn theta(&self) -> &Matrix<f64> {
&self.theta
}
pub fn sigma(&self) -> &Matrix<f64> {
&self.sigma
}
}
impl Distribution for Gaussian {
fn from_model_params(class_count: usize, features: usize) -> Gaussian {
Gaussian {
theta: Matrix::zeros(class_count, features),
sigma: Matrix::zeros(class_count, features),
}
}
fn update_params(&mut self, data: &Matrix<f64>, class: usize) -> LearningResult<()> {
let mean = data.mean(Axes::Row).into_vec();
let var = try!(data.variance(Axes::Row).map_err(|_| {
Error::new(ErrorKind::InvalidData,
"Cannot compute variance for Gaussian distribution.")
}))
.into_vec();
let features = data.cols();
for (idx, (m, v)) in mean.into_iter().zip(var.into_iter()).enumerate() {
self.theta.mut_data()[class * features + idx] = m;
self.sigma.mut_data()[class * features + idx] = v;
}
Ok(())
}
fn joint_log_lik(&self,
data: &Matrix<f64>,
class_prior: &[f64])
-> LearningResult<Matrix<f64>> {
let class_count = class_prior.len();
let mut log_lik = Vec::with_capacity(class_count);
for (i, item) in class_prior.into_iter().enumerate() {
let joint_i = item.ln();
let n_ij = -0.5 * (self.sigma.select_rows(&[i]) * 2.0 * PI).apply(&|x| x.ln()).sum();
let r_ij = (data - self.theta.select_rows(&vec![i; data.rows()]))
.apply(&|x| x * x)
.elediv(&self.sigma.select_rows(&vec![i; data.rows()]))
.sum_cols();
let res = (-r_ij * 0.5) + n_ij;
log_lik.append(&mut (res + joint_i).into_vec());
}
Ok(Matrix::new(class_count, data.rows(), log_lik).transpose())
}
}
#[derive(Debug)]
pub struct Bernoulli {
log_probs: Matrix<f64>,
pseudo_count: f64,
}
impl Bernoulli {
pub fn log_probs(&self) -> &Matrix<f64> {
&self.log_probs
}
}
impl Distribution for Bernoulli {
fn from_model_params(class_count: usize, features: usize) -> Bernoulli {
Bernoulli {
log_probs: Matrix::zeros(class_count, features),
pseudo_count: 1f64,
}
}
fn update_params(&mut self, data: &Matrix<f64>, class: usize) -> LearningResult<()> {
let features = data.cols();
let pseudo_cc = data.rows() as f64 + (2f64 * self.pseudo_count);
let pseudo_fc = data.sum_rows() + self.pseudo_count;
let log_probs = (pseudo_fc.apply(&|x| x.ln()) - pseudo_cc.ln()).into_vec();
for (i, item) in log_probs.iter().enumerate().take(features) {
self.log_probs[[class, i]] = *item;
}
Ok(())
}
fn joint_log_lik(&self,
data: &Matrix<f64>,
class_prior: &[f64])
-> LearningResult<Matrix<f64>> {
let class_count = class_prior.len();
let neg_prob = self.log_probs.clone().apply(&|x| (1f64 - x.exp()).ln());
let res = data * (&self.log_probs - &neg_prob).transpose();
let mut per_class_row = Vec::with_capacity(class_count);
let neg_prob_sum = neg_prob.sum_cols();
for (idx, p) in class_prior.into_iter().enumerate() {
per_class_row.push(p.ln() + neg_prob_sum[idx]);
}
let class_row_mat = Matrix::new(1, class_count, per_class_row);
Ok(res + class_row_mat.select_rows(&vec![0; data.rows()]))
}
}
#[derive(Debug)]
pub struct Multinomial {
log_probs: Matrix<f64>,
pseudo_count: f64,
}
impl Multinomial {
pub fn log_probs(&self) -> &Matrix<f64> {
&self.log_probs
}
}
impl Distribution for Multinomial {
fn from_model_params(class_count: usize, features: usize) -> Multinomial {
Multinomial {
log_probs: Matrix::zeros(class_count, features),
pseudo_count: 1f64,
}
}
fn update_params(&mut self, data: &Matrix<f64>, class: usize) -> LearningResult<()> {
let features = data.cols();
let pseudo_fc = data.sum_rows() + self.pseudo_count;
let pseudo_cc = pseudo_fc.sum();
let log_probs = (pseudo_fc.apply(&|x| x.ln()) - pseudo_cc.ln()).into_vec();
for (i, item) in log_probs.iter().enumerate().take(features) {
self.log_probs[[class, i]] = *item;
}
Ok(())
}
fn joint_log_lik(&self,
data: &Matrix<f64>,
class_prior: &[f64])
-> LearningResult<Matrix<f64>> {
let class_count = class_prior.len();
let res = data * self.log_probs.transpose();
let mut per_class_row = Vec::with_capacity(class_count);
for p in class_prior {
per_class_row.push(p.ln());
}
let class_row_mat = Matrix::new(1, class_count, per_class_row);
Ok(res + class_row_mat.select_rows(&vec![0; data.rows()]))
}
}
#[cfg(test)]
mod tests {
use super::NaiveBayes;
use super::Gaussian;
use super::Bernoulli;
use super::Multinomial;
use learning::SupModel;
use linalg::Matrix;
#[test]
fn test_gaussian() {
let inputs = Matrix::new(6,
2,
vec![1.0, 1.1, 1.1, 0.9, 2.2, 2.3, 2.5, 2.7, 5.2, 4.3, 6.2, 7.3]);
let targets = Matrix::new(6,
3,
vec![1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0,
0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0]);
let mut model = NaiveBayes::<Gaussian>::new();
model.train(&inputs, &targets).unwrap();
let outputs = model.predict(&inputs).unwrap();
assert_eq!(outputs.into_vec(), targets.into_vec());
}
#[test]
fn test_bernoulli() {
let inputs = Matrix::new(4,
3,
vec![1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0]);
let targets = Matrix::new(4, 2, vec![1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0]);
let mut model = NaiveBayes::<Bernoulli>::new();
model.train(&inputs, &targets).unwrap();
let outputs = model.predict(&inputs).unwrap();
assert_eq!(outputs.into_vec(), targets.into_vec());
}
#[test]
fn test_multinomial() {
let inputs = Matrix::new(4,
3,
vec![1.0, 0.0, 5.0, 0.0, 0.0, 11.0, 13.0, 1.0, 0.0, 12.0, 3.0,
0.0]);
let targets = Matrix::new(4, 2, vec![1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0]);
let mut model = NaiveBayes::<Multinomial>::new();
model.train(&inputs, &targets).unwrap();
let outputs = model.predict(&inputs).unwrap();
assert_eq!(outputs.into_vec(), targets.into_vec());
}
}