rusty_machine/learning/
naive_bayes.rs

1//! Naive Bayes Classifiers
2//!
3//! The classifier supports Gaussian, Bernoulli and Multinomial distributions.
4//!
5//! A naive Bayes classifier works by treating the features of each input as independent
6//! observations. Under this assumption we utilize Bayes' rule to compute the
7//! probability that each input belongs to a given class.
8//!
9//! # Examples
10//!
11//! ```
12//! use rusty_machine::learning::naive_bayes::{NaiveBayes, Gaussian};
13//! use rusty_machine::linalg::Matrix;
14//! use rusty_machine::learning::SupModel;
15//!
16//! let inputs = Matrix::new(6, 2, vec![1.0, 1.1,
17//!                                     1.1, 0.9,
18//!                                     2.2, 2.3,
19//!                                     2.5, 2.7,
20//!                                     5.2, 4.3,
21//!                                     6.2, 7.3]);
22//!
23//! let targets = Matrix::new(6,3, vec![1.0, 0.0, 0.0,
24//!                                     1.0, 0.0, 0.0,
25//!                                     0.0, 1.0, 0.0,
26//!                                     0.0, 1.0, 0.0,
27//!                                     0.0, 0.0, 1.0,
28//!                                     0.0, 0.0, 1.0]);
29//!
30//! // Create a Gaussian Naive Bayes classifier.
31//! let mut model = NaiveBayes::<Gaussian>::new();
32//!
33//! // Train the model.
34//! model.train(&inputs, &targets).unwrap();
35//!
36//! // Predict the classes on the input data
37//! let outputs = model.predict(&inputs).unwrap();
38//!
39//! // Will output the target classes - otherwise our classifier is bad!
40//! println!("Final outputs --\n{}", outputs);
41//! ```
42
43use linalg::{Matrix, Axes, BaseMatrix, BaseMatrixMut};
44use learning::{LearningResult, SupModel};
45use learning::error::{Error, ErrorKind};
46use rulinalg::utils;
47
48use std::f64::consts::PI;
49
50/// The Naive Bayes model.
51#[derive(Debug)]
52pub struct NaiveBayes<T: Distribution> {
53    distr: Option<T>,
54    cluster_count: Option<usize>,
55    class_prior: Option<Vec<f64>>,
56    class_counts: Vec<usize>,
57}
58
59impl<T: Distribution> NaiveBayes<T> {
60    /// Create a new NaiveBayes model from a given
61    /// distribution.
62    ///
63    /// # Examples
64    ///
65    /// ```
66    /// use rusty_machine::learning::naive_bayes::{NaiveBayes, Gaussian};
67    ///
68    /// // Create a new Gaussian Naive Bayes model.
69    /// let _ = NaiveBayes::<Gaussian>::new();
70    /// ```
71    pub fn new() -> NaiveBayes<T> {
72        NaiveBayes {
73            distr: None,
74            cluster_count: None,
75            class_prior: None,
76            class_counts: Vec::new(),
77        }
78    }
79
80    /// Get the cluster count for this model.
81    ///
82    /// Returns an option which is `None` until the model has been trained.
83    pub fn cluster_count(&self) -> Option<&usize> {
84        self.cluster_count.as_ref()
85    }
86
87    /// Get the class prior distribution for this model.
88    ///
89    /// Returns an option which is `None` until the model has been trained.
90    pub fn class_prior(&self) -> Option<&Vec<f64>> {
91        self.class_prior.as_ref()
92    }
93
94    /// Get the distribution for this model.
95    ///
96    /// Returns an option which is `None` until the model has been trained.
97    pub fn distr(&self) -> Option<&T> {
98        self.distr.as_ref()
99    }
100}
101
102/// Train and predict from the Naive Bayes model.
103///
104/// The input matrix must be rows made up of features.
105/// The target matrix should have indicator vectors in each row specifying
106/// the input class. e.g. [[1,0,0],[0,0,1]] shows class 1 first, then class 3.
107impl<T: Distribution> SupModel<Matrix<f64>, Matrix<f64>> for NaiveBayes<T> {
108    /// Train the model using inputs and targets.
109    fn train(&mut self, inputs: &Matrix<f64>, targets: &Matrix<f64>) -> LearningResult<()> {
110        self.distr = Some(T::from_model_params(targets.cols(), inputs.cols()));
111        self.update_params(inputs, targets)
112    }
113
114    /// Predict output from inputs.
115    fn predict(&self, inputs: &Matrix<f64>) -> LearningResult<Matrix<f64>> {
116        let log_probs = try!(self.get_log_probs(inputs));
117        let input_classes = NaiveBayes::<T>::get_classes(log_probs);
118
119        if let Some(cluster_count) = self.cluster_count {
120            let mut class_data = Vec::with_capacity(inputs.rows() * cluster_count);
121
122            for c in input_classes {
123                let mut row = vec![0f64; cluster_count];
124                row[c] = 1f64;
125
126                class_data.append(&mut row);
127            }
128
129            Ok(Matrix::new(inputs.rows(), cluster_count, class_data))
130        } else {
131            Err(Error::new(ErrorKind::UntrainedModel, "The model has not been trained."))
132        }
133    }
134}
135
136impl<T: Distribution> NaiveBayes<T> {
137    /// Get the log-probabilities per class for each input.
138    pub fn get_log_probs(&self, inputs: &Matrix<f64>) -> LearningResult<Matrix<f64>> {
139
140        if let (&Some(ref distr), &Some(ref prior)) = (&self.distr, &self.class_prior) {
141            // Get the joint log likelihood from the distribution
142            distr.joint_log_lik(inputs, prior)
143        } else {
144            Err(Error::new_untrained())
145        }
146    }
147
148    fn update_params(&mut self, inputs: &Matrix<f64>, targets: &Matrix<f64>) -> LearningResult<()> {
149        let class_count = targets.cols();
150        let total_data = inputs.rows();
151
152        self.class_counts = vec![0; class_count];
153        let mut class_data = vec![Vec::new(); class_count];
154
155        for (idx, row) in targets.iter_rows().enumerate() {
156            // Find the class of this input
157            let class = try!(NaiveBayes::<T>::find_class(row));
158
159            // Note the class of the input
160            class_data[class].push(idx);
161            self.class_counts[class] += 1;
162        }
163
164        if let Some(ref mut distr) = self.distr {
165            for (idx, c) in class_data.into_iter().enumerate() {
166                // If this class' vector has not been populated, we can safely
167                // skip this iteration, since the user is clearly not interested
168                // in associating features with this class
169                if c.is_empty() {
170                    continue;
171                }
172                // Update the parameters within this class
173                try!(distr.update_params(&inputs.select_rows(&c), idx));
174            }
175        }
176
177        let mut class_prior = Vec::with_capacity(class_count);
178
179        // Compute the prior as the proportion in each class
180        class_prior.extend(self.class_counts.iter().map(|c| *c as f64 / total_data as f64));
181
182        self.class_prior = Some(class_prior);
183        self.cluster_count = Some(class_count);
184        Ok(())
185    }
186
187    fn find_class(row: &[f64]) -> LearningResult<usize> {
188        // Find the `1` entry in the row
189        for (idx, r) in row.into_iter().enumerate() {
190            if *r == 1f64 {
191                return Ok(idx);
192            }
193        }
194
195        Err(Error::new(ErrorKind::InvalidState,
196                       "No class found for entry in targets"))
197    }
198
199    fn get_classes(log_probs: Matrix<f64>) -> Vec<usize> {
200        let mut data_classes = Vec::with_capacity(log_probs.rows());
201
202        data_classes.extend(log_probs.iter_rows().map(|row| {
203            // Argmax each class log-probability per input
204            let (class, _) = utils::argmax(row);
205            class
206        }));
207
208        data_classes
209    }
210}
211
212/// Naive Bayes Distribution.
213pub trait Distribution {
214    /// Initialize the distribution parameters.
215    fn from_model_params(class_count: usize, features: usize) -> Self;
216
217    /// Updates the distribution parameters.
218    fn update_params(&mut self, data: &Matrix<f64>, class: usize) -> LearningResult<()>;
219
220    /// Compute the joint log likelihood of the data.
221    ///
222    /// Returns a matrix with rows containing the probability that the input lies in each class.
223    fn joint_log_lik(&self,
224                     data: &Matrix<f64>,
225                     class_prior: &[f64])
226                     -> LearningResult<Matrix<f64>>;
227}
228
229/// The Gaussian Naive Bayes model distribution.
230///
231/// Defines:
232///
233/// p(x|C<sub>k</sub>) = ∏<sub>i</sub> N(x<sub>i</sub> ;
234/// μ<sub>k</sub>, σ<sup>2</sup><sub>k</sub>)
235#[derive(Debug)]
236pub struct Gaussian {
237    theta: Matrix<f64>,
238    sigma: Matrix<f64>,
239}
240
241impl Gaussian {
242    /// Returns the distribution means.
243    ///
244    /// This is a matrix of class by feature means.
245    pub fn theta(&self) -> &Matrix<f64> {
246        &self.theta
247    }
248
249    /// Returns the distribution variances.
250    ///
251    /// This is a matrix of class by feature variances.
252    pub fn sigma(&self) -> &Matrix<f64> {
253        &self.sigma
254    }
255}
256
257impl Distribution for Gaussian {
258    fn from_model_params(class_count: usize, features: usize) -> Gaussian {
259        Gaussian {
260            theta: Matrix::zeros(class_count, features),
261            sigma: Matrix::zeros(class_count, features),
262        }
263    }
264
265    fn update_params(&mut self, data: &Matrix<f64>, class: usize) -> LearningResult<()> {
266        // Compute mean and sample variance
267        let mean = data.mean(Axes::Row).into_vec();
268        let var = try!(data.variance(Axes::Row).map_err(|_| {
269                Error::new(ErrorKind::InvalidData,
270                           "Cannot compute variance for Gaussian distribution.")
271            }))
272            .into_vec();
273
274        let features = data.cols();
275
276        for (idx, (m, v)) in mean.into_iter().zip(var.into_iter()).enumerate() {
277            self.theta.mut_data()[class * features + idx] = m;
278            self.sigma.mut_data()[class * features + idx] = v;
279        }
280
281        Ok(())
282    }
283
284    fn joint_log_lik(&self,
285                     data: &Matrix<f64>,
286                     class_prior: &[f64])
287                     -> LearningResult<Matrix<f64>> {
288        let class_count = class_prior.len();
289        let mut log_lik = Vec::with_capacity(class_count);
290
291        for (i, item) in class_prior.into_iter().enumerate() {
292            let joint_i = item.ln();
293            let n_ij = -0.5 * (self.sigma.select_rows(&[i]) * 2.0 * PI).apply(&|x| x.ln()).sum();
294
295            // NOTE: Here we are copying the row data which is inefficient
296            let r_ij = (data - self.theta.select_rows(&vec![i; data.rows()]))
297                .apply(&|x| x * x)
298                .elediv(&self.sigma.select_rows(&vec![i; data.rows()]))
299                .sum_cols();
300
301            let res = (-r_ij * 0.5) + n_ij;
302
303            log_lik.append(&mut (res + joint_i).into_vec());
304        }
305
306        Ok(Matrix::new(class_count, data.rows(), log_lik).transpose())
307    }
308}
309
310/// The Bernoulli Naive Bayes model distribution.
311///
312/// Defines:
313///
314///    p(x|C<sub>k</sub>) = ∏<sub>i</sub> p<sub>k</sub><sup>x<sub>i</sub></sup>
315/// (1-p)<sub>k</sub><sup>1-x<sub>i</sub></sup>
316#[derive(Debug)]
317pub struct Bernoulli {
318    log_probs: Matrix<f64>,
319    pseudo_count: f64,
320}
321
322impl Bernoulli {
323    /// The log probability matrix.
324    ///
325    /// A matrix of class by feature model log-probabilities.
326    pub fn log_probs(&self) -> &Matrix<f64> {
327        &self.log_probs
328    }
329}
330
331impl Distribution for Bernoulli {
332    fn from_model_params(class_count: usize, features: usize) -> Bernoulli {
333        Bernoulli {
334            log_probs: Matrix::zeros(class_count, features),
335            pseudo_count: 1f64,
336        }
337    }
338
339    fn update_params(&mut self, data: &Matrix<f64>, class: usize) -> LearningResult<()> {
340        let features = data.cols();
341
342        // We add the pseudo count to the class count and feature count
343        let pseudo_cc = data.rows() as f64 + (2f64 * self.pseudo_count);
344        let pseudo_fc = data.sum_rows() + self.pseudo_count;
345
346        let log_probs = (pseudo_fc.apply(&|x| x.ln()) - pseudo_cc.ln()).into_vec();
347
348        for (i, item) in log_probs.iter().enumerate().take(features) {
349            self.log_probs[[class, i]] = *item;
350        }
351
352        Ok(())
353
354    }
355
356    fn joint_log_lik(&self,
357                     data: &Matrix<f64>,
358                     class_prior: &[f64])
359                     -> LearningResult<Matrix<f64>> {
360        let class_count = class_prior.len();
361
362        let neg_prob = self.log_probs.clone().apply(&|x| (1f64 - x.exp()).ln());
363
364        let res = data * (&self.log_probs - &neg_prob).transpose();
365
366        // NOTE: Some messy stuff now to get the class row contribution.
367        // Really we want to add to each row the class log-priors and the
368        // neg_prob_sum contribution - the last term in
369        // x log(p) + (1-x)log(1-p) = x (log(p) - log(1-p)) + log(1-p)
370
371        let mut per_class_row = Vec::with_capacity(class_count);
372        let neg_prob_sum = neg_prob.sum_cols();
373
374        for (idx, p) in class_prior.into_iter().enumerate() {
375            per_class_row.push(p.ln() + neg_prob_sum[idx]);
376        }
377
378        let class_row_mat = Matrix::new(1, class_count, per_class_row);
379
380        Ok(res + class_row_mat.select_rows(&vec![0; data.rows()]))
381    }
382}
383
384/// The Multinomial Naive Bayes model distribution.
385///
386/// Defines:
387///
388///    p(x|C<sub>k</sub>) ∝ ∏<sub>i</sub> p<sub>k</sub><sup>x<sub>i</sub></sup>
389#[derive(Debug)]
390pub struct Multinomial {
391    log_probs: Matrix<f64>,
392    pseudo_count: f64,
393}
394
395impl Multinomial {
396    /// The log probability matrix.
397    ///
398    /// A matrix of class by feature model log-probabilities.
399    pub fn log_probs(&self) -> &Matrix<f64> {
400        &self.log_probs
401    }
402}
403
404impl Distribution for Multinomial {
405    fn from_model_params(class_count: usize, features: usize) -> Multinomial {
406        Multinomial {
407            log_probs: Matrix::zeros(class_count, features),
408            pseudo_count: 1f64,
409        }
410    }
411
412    fn update_params(&mut self, data: &Matrix<f64>, class: usize) -> LearningResult<()> {
413        let features = data.cols();
414
415        let pseudo_fc = data.sum_rows() + self.pseudo_count;
416        let pseudo_cc = pseudo_fc.sum();
417
418        let log_probs = (pseudo_fc.apply(&|x| x.ln()) - pseudo_cc.ln()).into_vec();
419
420        for (i, item) in log_probs.iter().enumerate().take(features) {
421            self.log_probs[[class, i]] = *item;
422        }
423
424        Ok(())
425    }
426
427    fn joint_log_lik(&self,
428                     data: &Matrix<f64>,
429                     class_prior: &[f64])
430                     -> LearningResult<Matrix<f64>> {
431        let class_count = class_prior.len();
432
433        let res = data * self.log_probs.transpose();
434
435        let mut per_class_row = Vec::with_capacity(class_count);
436        for p in class_prior {
437            per_class_row.push(p.ln());
438        }
439
440        let class_row_mat = Matrix::new(1, class_count, per_class_row);
441
442        Ok(res + class_row_mat.select_rows(&vec![0; data.rows()]))
443    }
444}
445
446#[cfg(test)]
447mod tests {
448    use super::NaiveBayes;
449    use super::Gaussian;
450    use super::Bernoulli;
451    use super::Multinomial;
452
453    use learning::SupModel;
454
455    use linalg::Matrix;
456
457    #[test]
458    fn test_gaussian() {
459        let inputs = Matrix::new(6,
460                                 2,
461                                 vec![1.0, 1.1, 1.1, 0.9, 2.2, 2.3, 2.5, 2.7, 5.2, 4.3, 6.2, 7.3]);
462
463        let targets = Matrix::new(6,
464                                  3,
465                                  vec![1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0,
466                                       0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0]);
467
468        let mut model = NaiveBayes::<Gaussian>::new();
469        model.train(&inputs, &targets).unwrap();
470
471        let outputs = model.predict(&inputs).unwrap();
472        assert_eq!(outputs.into_vec(), targets.into_vec());
473    }
474
475    #[test]
476    fn test_bernoulli() {
477        let inputs = Matrix::new(4,
478                                 3,
479                                 vec![1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0]);
480
481        let targets = Matrix::new(4, 2, vec![1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0]);
482
483        let mut model = NaiveBayes::<Bernoulli>::new();
484        model.train(&inputs, &targets).unwrap();
485
486        let outputs = model.predict(&inputs).unwrap();
487        assert_eq!(outputs.into_vec(), targets.into_vec());
488    }
489
490    #[test]
491    fn test_multinomial() {
492        let inputs = Matrix::new(4,
493                                 3,
494                                 vec![1.0, 0.0, 5.0, 0.0, 0.0, 11.0, 13.0, 1.0, 0.0, 12.0, 3.0,
495                                      0.0]);
496
497        let targets = Matrix::new(4, 2, vec![1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0]);
498
499        let mut model = NaiveBayes::<Multinomial>::new();
500        model.train(&inputs, &targets).unwrap();
501
502        let outputs = model.predict(&inputs).unwrap();
503        assert_eq!(outputs.into_vec(), targets.into_vec());
504    }
505}