1use linalg::{Matrix, Axes, BaseMatrix, BaseMatrixMut};
44use learning::{LearningResult, SupModel};
45use learning::error::{Error, ErrorKind};
46use rulinalg::utils;
47
48use std::f64::consts::PI;
49
50#[derive(Debug)]
52pub struct NaiveBayes<T: Distribution> {
53 distr: Option<T>,
54 cluster_count: Option<usize>,
55 class_prior: Option<Vec<f64>>,
56 class_counts: Vec<usize>,
57}
58
59impl<T: Distribution> NaiveBayes<T> {
60 pub fn new() -> NaiveBayes<T> {
72 NaiveBayes {
73 distr: None,
74 cluster_count: None,
75 class_prior: None,
76 class_counts: Vec::new(),
77 }
78 }
79
80 pub fn cluster_count(&self) -> Option<&usize> {
84 self.cluster_count.as_ref()
85 }
86
87 pub fn class_prior(&self) -> Option<&Vec<f64>> {
91 self.class_prior.as_ref()
92 }
93
94 pub fn distr(&self) -> Option<&T> {
98 self.distr.as_ref()
99 }
100}
101
102impl<T: Distribution> SupModel<Matrix<f64>, Matrix<f64>> for NaiveBayes<T> {
108 fn train(&mut self, inputs: &Matrix<f64>, targets: &Matrix<f64>) -> LearningResult<()> {
110 self.distr = Some(T::from_model_params(targets.cols(), inputs.cols()));
111 self.update_params(inputs, targets)
112 }
113
114 fn predict(&self, inputs: &Matrix<f64>) -> LearningResult<Matrix<f64>> {
116 let log_probs = try!(self.get_log_probs(inputs));
117 let input_classes = NaiveBayes::<T>::get_classes(log_probs);
118
119 if let Some(cluster_count) = self.cluster_count {
120 let mut class_data = Vec::with_capacity(inputs.rows() * cluster_count);
121
122 for c in input_classes {
123 let mut row = vec![0f64; cluster_count];
124 row[c] = 1f64;
125
126 class_data.append(&mut row);
127 }
128
129 Ok(Matrix::new(inputs.rows(), cluster_count, class_data))
130 } else {
131 Err(Error::new(ErrorKind::UntrainedModel, "The model has not been trained."))
132 }
133 }
134}
135
136impl<T: Distribution> NaiveBayes<T> {
137 pub fn get_log_probs(&self, inputs: &Matrix<f64>) -> LearningResult<Matrix<f64>> {
139
140 if let (&Some(ref distr), &Some(ref prior)) = (&self.distr, &self.class_prior) {
141 distr.joint_log_lik(inputs, prior)
143 } else {
144 Err(Error::new_untrained())
145 }
146 }
147
148 fn update_params(&mut self, inputs: &Matrix<f64>, targets: &Matrix<f64>) -> LearningResult<()> {
149 let class_count = targets.cols();
150 let total_data = inputs.rows();
151
152 self.class_counts = vec![0; class_count];
153 let mut class_data = vec![Vec::new(); class_count];
154
155 for (idx, row) in targets.iter_rows().enumerate() {
156 let class = try!(NaiveBayes::<T>::find_class(row));
158
159 class_data[class].push(idx);
161 self.class_counts[class] += 1;
162 }
163
164 if let Some(ref mut distr) = self.distr {
165 for (idx, c) in class_data.into_iter().enumerate() {
166 if c.is_empty() {
170 continue;
171 }
172 try!(distr.update_params(&inputs.select_rows(&c), idx));
174 }
175 }
176
177 let mut class_prior = Vec::with_capacity(class_count);
178
179 class_prior.extend(self.class_counts.iter().map(|c| *c as f64 / total_data as f64));
181
182 self.class_prior = Some(class_prior);
183 self.cluster_count = Some(class_count);
184 Ok(())
185 }
186
187 fn find_class(row: &[f64]) -> LearningResult<usize> {
188 for (idx, r) in row.into_iter().enumerate() {
190 if *r == 1f64 {
191 return Ok(idx);
192 }
193 }
194
195 Err(Error::new(ErrorKind::InvalidState,
196 "No class found for entry in targets"))
197 }
198
199 fn get_classes(log_probs: Matrix<f64>) -> Vec<usize> {
200 let mut data_classes = Vec::with_capacity(log_probs.rows());
201
202 data_classes.extend(log_probs.iter_rows().map(|row| {
203 let (class, _) = utils::argmax(row);
205 class
206 }));
207
208 data_classes
209 }
210}
211
212pub trait Distribution {
214 fn from_model_params(class_count: usize, features: usize) -> Self;
216
217 fn update_params(&mut self, data: &Matrix<f64>, class: usize) -> LearningResult<()>;
219
220 fn joint_log_lik(&self,
224 data: &Matrix<f64>,
225 class_prior: &[f64])
226 -> LearningResult<Matrix<f64>>;
227}
228
229#[derive(Debug)]
236pub struct Gaussian {
237 theta: Matrix<f64>,
238 sigma: Matrix<f64>,
239}
240
241impl Gaussian {
242 pub fn theta(&self) -> &Matrix<f64> {
246 &self.theta
247 }
248
249 pub fn sigma(&self) -> &Matrix<f64> {
253 &self.sigma
254 }
255}
256
257impl Distribution for Gaussian {
258 fn from_model_params(class_count: usize, features: usize) -> Gaussian {
259 Gaussian {
260 theta: Matrix::zeros(class_count, features),
261 sigma: Matrix::zeros(class_count, features),
262 }
263 }
264
265 fn update_params(&mut self, data: &Matrix<f64>, class: usize) -> LearningResult<()> {
266 let mean = data.mean(Axes::Row).into_vec();
268 let var = try!(data.variance(Axes::Row).map_err(|_| {
269 Error::new(ErrorKind::InvalidData,
270 "Cannot compute variance for Gaussian distribution.")
271 }))
272 .into_vec();
273
274 let features = data.cols();
275
276 for (idx, (m, v)) in mean.into_iter().zip(var.into_iter()).enumerate() {
277 self.theta.mut_data()[class * features + idx] = m;
278 self.sigma.mut_data()[class * features + idx] = v;
279 }
280
281 Ok(())
282 }
283
284 fn joint_log_lik(&self,
285 data: &Matrix<f64>,
286 class_prior: &[f64])
287 -> LearningResult<Matrix<f64>> {
288 let class_count = class_prior.len();
289 let mut log_lik = Vec::with_capacity(class_count);
290
291 for (i, item) in class_prior.into_iter().enumerate() {
292 let joint_i = item.ln();
293 let n_ij = -0.5 * (self.sigma.select_rows(&[i]) * 2.0 * PI).apply(&|x| x.ln()).sum();
294
295 let r_ij = (data - self.theta.select_rows(&vec![i; data.rows()]))
297 .apply(&|x| x * x)
298 .elediv(&self.sigma.select_rows(&vec![i; data.rows()]))
299 .sum_cols();
300
301 let res = (-r_ij * 0.5) + n_ij;
302
303 log_lik.append(&mut (res + joint_i).into_vec());
304 }
305
306 Ok(Matrix::new(class_count, data.rows(), log_lik).transpose())
307 }
308}
309
310#[derive(Debug)]
317pub struct Bernoulli {
318 log_probs: Matrix<f64>,
319 pseudo_count: f64,
320}
321
322impl Bernoulli {
323 pub fn log_probs(&self) -> &Matrix<f64> {
327 &self.log_probs
328 }
329}
330
331impl Distribution for Bernoulli {
332 fn from_model_params(class_count: usize, features: usize) -> Bernoulli {
333 Bernoulli {
334 log_probs: Matrix::zeros(class_count, features),
335 pseudo_count: 1f64,
336 }
337 }
338
339 fn update_params(&mut self, data: &Matrix<f64>, class: usize) -> LearningResult<()> {
340 let features = data.cols();
341
342 let pseudo_cc = data.rows() as f64 + (2f64 * self.pseudo_count);
344 let pseudo_fc = data.sum_rows() + self.pseudo_count;
345
346 let log_probs = (pseudo_fc.apply(&|x| x.ln()) - pseudo_cc.ln()).into_vec();
347
348 for (i, item) in log_probs.iter().enumerate().take(features) {
349 self.log_probs[[class, i]] = *item;
350 }
351
352 Ok(())
353
354 }
355
356 fn joint_log_lik(&self,
357 data: &Matrix<f64>,
358 class_prior: &[f64])
359 -> LearningResult<Matrix<f64>> {
360 let class_count = class_prior.len();
361
362 let neg_prob = self.log_probs.clone().apply(&|x| (1f64 - x.exp()).ln());
363
364 let res = data * (&self.log_probs - &neg_prob).transpose();
365
366 let mut per_class_row = Vec::with_capacity(class_count);
372 let neg_prob_sum = neg_prob.sum_cols();
373
374 for (idx, p) in class_prior.into_iter().enumerate() {
375 per_class_row.push(p.ln() + neg_prob_sum[idx]);
376 }
377
378 let class_row_mat = Matrix::new(1, class_count, per_class_row);
379
380 Ok(res + class_row_mat.select_rows(&vec![0; data.rows()]))
381 }
382}
383
384#[derive(Debug)]
390pub struct Multinomial {
391 log_probs: Matrix<f64>,
392 pseudo_count: f64,
393}
394
395impl Multinomial {
396 pub fn log_probs(&self) -> &Matrix<f64> {
400 &self.log_probs
401 }
402}
403
404impl Distribution for Multinomial {
405 fn from_model_params(class_count: usize, features: usize) -> Multinomial {
406 Multinomial {
407 log_probs: Matrix::zeros(class_count, features),
408 pseudo_count: 1f64,
409 }
410 }
411
412 fn update_params(&mut self, data: &Matrix<f64>, class: usize) -> LearningResult<()> {
413 let features = data.cols();
414
415 let pseudo_fc = data.sum_rows() + self.pseudo_count;
416 let pseudo_cc = pseudo_fc.sum();
417
418 let log_probs = (pseudo_fc.apply(&|x| x.ln()) - pseudo_cc.ln()).into_vec();
419
420 for (i, item) in log_probs.iter().enumerate().take(features) {
421 self.log_probs[[class, i]] = *item;
422 }
423
424 Ok(())
425 }
426
427 fn joint_log_lik(&self,
428 data: &Matrix<f64>,
429 class_prior: &[f64])
430 -> LearningResult<Matrix<f64>> {
431 let class_count = class_prior.len();
432
433 let res = data * self.log_probs.transpose();
434
435 let mut per_class_row = Vec::with_capacity(class_count);
436 for p in class_prior {
437 per_class_row.push(p.ln());
438 }
439
440 let class_row_mat = Matrix::new(1, class_count, per_class_row);
441
442 Ok(res + class_row_mat.select_rows(&vec![0; data.rows()]))
443 }
444}
445
446#[cfg(test)]
447mod tests {
448 use super::NaiveBayes;
449 use super::Gaussian;
450 use super::Bernoulli;
451 use super::Multinomial;
452
453 use learning::SupModel;
454
455 use linalg::Matrix;
456
457 #[test]
458 fn test_gaussian() {
459 let inputs = Matrix::new(6,
460 2,
461 vec![1.0, 1.1, 1.1, 0.9, 2.2, 2.3, 2.5, 2.7, 5.2, 4.3, 6.2, 7.3]);
462
463 let targets = Matrix::new(6,
464 3,
465 vec![1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0,
466 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0]);
467
468 let mut model = NaiveBayes::<Gaussian>::new();
469 model.train(&inputs, &targets).unwrap();
470
471 let outputs = model.predict(&inputs).unwrap();
472 assert_eq!(outputs.into_vec(), targets.into_vec());
473 }
474
475 #[test]
476 fn test_bernoulli() {
477 let inputs = Matrix::new(4,
478 3,
479 vec![1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0]);
480
481 let targets = Matrix::new(4, 2, vec![1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0]);
482
483 let mut model = NaiveBayes::<Bernoulli>::new();
484 model.train(&inputs, &targets).unwrap();
485
486 let outputs = model.predict(&inputs).unwrap();
487 assert_eq!(outputs.into_vec(), targets.into_vec());
488 }
489
490 #[test]
491 fn test_multinomial() {
492 let inputs = Matrix::new(4,
493 3,
494 vec![1.0, 0.0, 5.0, 0.0, 0.0, 11.0, 13.0, 1.0, 0.0, 12.0, 3.0,
495 0.0]);
496
497 let targets = Matrix::new(4, 2, vec![1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 1.0]);
498
499 let mut model = NaiveBayes::<Multinomial>::new();
500 model.train(&inputs, &targets).unwrap();
501
502 let outputs = model.predict(&inputs).unwrap();
503 assert_eq!(outputs.into_vec(), targets.into_vec());
504 }
505}