ghostflow_ml/
naive_bayes.rs

1//! Naive Bayes classifiers
2
3use ghostflow_core::Tensor;
4use std::f32::consts::PI;
5
6/// Gaussian Naive Bayes classifier
7pub struct GaussianNB {
8    pub var_smoothing: f32,
9    class_prior_: Option<Vec<f32>>,
10    class_count_: Option<Vec<usize>>,
11    theta_: Option<Vec<Vec<f32>>>,  // Mean of each feature per class
12    var_: Option<Vec<Vec<f32>>>,    // Variance of each feature per class
13    n_classes_: usize,
14    n_features_: usize,
15}
16
17impl GaussianNB {
18    pub fn new() -> Self {
19        GaussianNB {
20            var_smoothing: 1e-9,
21            class_prior_: None,
22            class_count_: None,
23            theta_: None,
24            var_: None,
25            n_classes_: 0,
26            n_features_: 0,
27        }
28    }
29
30    pub fn var_smoothing(mut self, smoothing: f32) -> Self {
31        self.var_smoothing = smoothing;
32        self
33    }
34
35    pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
36        let x_data = x.data_f32();
37        let y_data = y.data_f32();
38        let n_samples = x.dims()[0];
39        let n_features = x.dims()[1];
40
41        self.n_features_ = n_features;
42        self.n_classes_ = y_data.iter().map(|&v| v as usize).max().unwrap_or(0) + 1;
43
44        // Count samples per class
45        let mut class_count = vec![0usize; self.n_classes_];
46        for &label in &y_data {
47            class_count[label as usize] += 1;
48        }
49
50        // Compute class priors
51        let class_prior: Vec<f32> = class_count.iter()
52            .map(|&c| c as f32 / n_samples as f32)
53            .collect();
54
55        // Compute mean and variance per class per feature
56        let mut theta = vec![vec![0.0f32; n_features]; self.n_classes_];
57        let mut var = vec![vec![0.0f32; n_features]; self.n_classes_];
58
59        // Compute means
60        for i in 0..n_samples {
61            let class = y_data[i] as usize;
62            for j in 0..n_features {
63                theta[class][j] += x_data[i * n_features + j];
64            }
65        }
66        for c in 0..self.n_classes_ {
67            if class_count[c] > 0 {
68                for j in 0..n_features {
69                    theta[c][j] /= class_count[c] as f32;
70                }
71            }
72        }
73
74        // Compute variances
75        for i in 0..n_samples {
76            let class = y_data[i] as usize;
77            for j in 0..n_features {
78                let diff = x_data[i * n_features + j] - theta[class][j];
79                var[class][j] += diff * diff;
80            }
81        }
82        for c in 0..self.n_classes_ {
83            if class_count[c] > 0 {
84                for j in 0..n_features {
85                    var[c][j] = var[c][j] / class_count[c] as f32 + self.var_smoothing;
86                }
87            }
88        }
89
90        self.class_prior_ = Some(class_prior);
91        self.class_count_ = Some(class_count);
92        self.theta_ = Some(theta);
93        self.var_ = Some(var);
94    }
95
96    fn log_likelihood(&self, x: &[f32], class: usize) -> f32 {
97        let theta = self.theta_.as_ref().unwrap();
98        let var = self.var_.as_ref().unwrap();
99        let prior = self.class_prior_.as_ref().unwrap();
100
101        let mut log_prob = prior[class].ln();
102
103        for j in 0..self.n_features_ {
104            let mean = theta[class][j];
105            let variance = var[class][j];
106            let diff = x[j] - mean;
107            
108            // Log of Gaussian PDF
109            log_prob += -0.5 * (2.0 * PI * variance).ln() - (diff * diff) / (2.0 * variance);
110        }
111
112        log_prob
113    }
114
115    pub fn predict(&self, x: &Tensor) -> Tensor {
116        let x_data = x.data_f32();
117        let n_samples = x.dims()[0];
118        let n_features = x.dims()[1];
119
120        let predictions: Vec<f32> = (0..n_samples)
121            .map(|i| {
122                let sample = &x_data[i * n_features..(i + 1) * n_features];
123                
124                let mut best_class = 0;
125                let mut best_log_prob = f32::NEG_INFINITY;
126
127                for c in 0..self.n_classes_ {
128                    let log_prob = self.log_likelihood(sample, c);
129                    if log_prob > best_log_prob {
130                        best_log_prob = log_prob;
131                        best_class = c;
132                    }
133                }
134
135                best_class as f32
136            })
137            .collect();
138
139        Tensor::from_slice(&predictions, &[n_samples]).unwrap()
140    }
141
142    pub fn predict_proba(&self, x: &Tensor) -> Tensor {
143        let x_data = x.data_f32();
144        let n_samples = x.dims()[0];
145        let n_features = x.dims()[1];
146
147        let mut probs = Vec::with_capacity(n_samples * self.n_classes_);
148
149        for i in 0..n_samples {
150            let sample = &x_data[i * n_features..(i + 1) * n_features];
151            
152            let log_probs: Vec<f32> = (0..self.n_classes_)
153                .map(|c| self.log_likelihood(sample, c))
154                .collect();
155
156            // Softmax normalization
157            let max_log = log_probs.iter().cloned().fold(f32::NEG_INFINITY, f32::max);
158            let exp_sum: f32 = log_probs.iter().map(|&lp| (lp - max_log).exp()).sum();
159            
160            for &lp in &log_probs {
161                probs.push((lp - max_log).exp() / exp_sum);
162            }
163        }
164
165        Tensor::from_slice(&probs, &[n_samples, self.n_classes_]).unwrap()
166    }
167
168    pub fn score(&self, x: &Tensor, y: &Tensor) -> f32 {
169        let predictions = self.predict(x);
170        let pred_data = predictions.data_f32();
171        let y_data = y.data_f32();
172
173        let correct: usize = pred_data.iter()
174            .zip(y_data.iter())
175            .filter(|(&p, &y)| (p - y).abs() < 0.5)
176            .count();
177
178        correct as f32 / y_data.len() as f32
179    }
180}
181
182impl Default for GaussianNB {
183    fn default() -> Self {
184        Self::new()
185    }
186}
187
188
189/// Multinomial Naive Bayes for discrete/count data
190pub struct MultinomialNB {
191    pub alpha: f32,  // Laplace smoothing
192    class_log_prior_: Option<Vec<f32>>,
193    feature_log_prob_: Option<Vec<Vec<f32>>>,
194    class_count_: Option<Vec<f32>>,
195    feature_count_: Option<Vec<Vec<f32>>>,
196    n_classes_: usize,
197    n_features_: usize,
198}
199
200impl MultinomialNB {
201    pub fn new() -> Self {
202        MultinomialNB {
203            alpha: 1.0,
204            class_log_prior_: None,
205            feature_log_prob_: None,
206            class_count_: None,
207            feature_count_: None,
208            n_classes_: 0,
209            n_features_: 0,
210        }
211    }
212
213    pub fn alpha(mut self, alpha: f32) -> Self {
214        self.alpha = alpha;
215        self
216    }
217
218    pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
219        let x_data = x.data_f32();
220        let y_data = y.data_f32();
221        let n_samples = x.dims()[0];
222        let n_features = x.dims()[1];
223
224        self.n_features_ = n_features;
225        self.n_classes_ = y_data.iter().map(|&v| v as usize).max().unwrap_or(0) + 1;
226
227        // Count features per class
228        let mut class_count = vec![0.0f32; self.n_classes_];
229        let mut feature_count = vec![vec![0.0f32; n_features]; self.n_classes_];
230
231        for i in 0..n_samples {
232            let class = y_data[i] as usize;
233            class_count[class] += 1.0;
234            for j in 0..n_features {
235                feature_count[class][j] += x_data[i * n_features + j];
236            }
237        }
238
239        // Compute log probabilities with Laplace smoothing
240        let mut class_log_prior = vec![0.0f32; self.n_classes_];
241        let mut feature_log_prob = vec![vec![0.0f32; n_features]; self.n_classes_];
242
243        let total_samples = n_samples as f32;
244        for c in 0..self.n_classes_ {
245            class_log_prior[c] = (class_count[c] / total_samples).ln();
246
247            let smoothed_total: f32 = feature_count[c].iter().sum::<f32>() + self.alpha * n_features as f32;
248            for j in 0..n_features {
249                feature_log_prob[c][j] = ((feature_count[c][j] + self.alpha) / smoothed_total).ln();
250            }
251        }
252
253        self.class_log_prior_ = Some(class_log_prior);
254        self.feature_log_prob_ = Some(feature_log_prob);
255        self.class_count_ = Some(class_count);
256        self.feature_count_ = Some(feature_count);
257    }
258
259    pub fn predict(&self, x: &Tensor) -> Tensor {
260        let x_data = x.data_f32();
261        let n_samples = x.dims()[0];
262        let n_features = x.dims()[1];
263
264        let class_log_prior = self.class_log_prior_.as_ref().unwrap();
265        let feature_log_prob = self.feature_log_prob_.as_ref().unwrap();
266
267        let predictions: Vec<f32> = (0..n_samples)
268            .map(|i| {
269                let sample = &x_data[i * n_features..(i + 1) * n_features];
270                
271                let mut best_class = 0;
272                let mut best_log_prob = f32::NEG_INFINITY;
273
274                for c in 0..self.n_classes_ {
275                    let mut log_prob = class_log_prior[c];
276                    for j in 0..n_features {
277                        log_prob += sample[j] * feature_log_prob[c][j];
278                    }
279                    
280                    if log_prob > best_log_prob {
281                        best_log_prob = log_prob;
282                        best_class = c;
283                    }
284                }
285
286                best_class as f32
287            })
288            .collect();
289
290        Tensor::from_slice(&predictions, &[n_samples]).unwrap()
291    }
292
293    pub fn score(&self, x: &Tensor, y: &Tensor) -> f32 {
294        let predictions = self.predict(x);
295        let pred_data = predictions.data_f32();
296        let y_data = y.data_f32();
297
298        let correct: usize = pred_data.iter()
299            .zip(y_data.iter())
300            .filter(|(&p, &y)| (p - y).abs() < 0.5)
301            .count();
302
303        correct as f32 / y_data.len() as f32
304    }
305}
306
307impl Default for MultinomialNB {
308    fn default() -> Self {
309        Self::new()
310    }
311}
312
313/// Bernoulli Naive Bayes for binary features
314pub struct BernoulliNB {
315    pub alpha: f32,
316    pub binarize: Option<f32>,
317    class_log_prior_: Option<Vec<f32>>,
318    feature_log_prob_: Option<Vec<Vec<f32>>>,
319    n_classes_: usize,
320    n_features_: usize,
321}
322
323impl BernoulliNB {
324    pub fn new() -> Self {
325        BernoulliNB {
326            alpha: 1.0,
327            binarize: Some(0.0),
328            class_log_prior_: None,
329            feature_log_prob_: None,
330            n_classes_: 0,
331            n_features_: 0,
332        }
333    }
334
335    pub fn binarize(mut self, threshold: f32) -> Self {
336        self.binarize = Some(threshold);
337        self
338    }
339
340    fn binarize_data(&self, x: &[f32]) -> Vec<f32> {
341        if let Some(threshold) = self.binarize {
342            x.iter().map(|&v| if v > threshold { 1.0 } else { 0.0 }).collect()
343        } else {
344            x.to_vec()
345        }
346    }
347
348    pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
349        let x_data = self.binarize_data(&x.data_f32());
350        let y_data = y.data_f32();
351        let n_samples = x.dims()[0];
352        let n_features = x.dims()[1];
353
354        self.n_features_ = n_features;
355        self.n_classes_ = y_data.iter().map(|&v| v as usize).max().unwrap_or(0) + 1;
356
357        let mut class_count = vec![0.0f32; self.n_classes_];
358        let mut feature_count = vec![vec![0.0f32; n_features]; self.n_classes_];
359
360        for i in 0..n_samples {
361            let class = y_data[i] as usize;
362            class_count[class] += 1.0;
363            for j in 0..n_features {
364                feature_count[class][j] += x_data[i * n_features + j];
365            }
366        }
367
368        let mut class_log_prior = vec![0.0f32; self.n_classes_];
369        let mut feature_log_prob = vec![vec![0.0f32; n_features * 2]; self.n_classes_];
370
371        let total_samples = n_samples as f32;
372        for c in 0..self.n_classes_ {
373            class_log_prior[c] = (class_count[c] / total_samples).ln();
374
375            for j in 0..n_features {
376                let p = (feature_count[c][j] + self.alpha) / (class_count[c] + 2.0 * self.alpha);
377                feature_log_prob[c][j * 2] = p.ln();           // log P(x_j=1|c)
378                feature_log_prob[c][j * 2 + 1] = (1.0 - p).ln(); // log P(x_j=0|c)
379            }
380        }
381
382        self.class_log_prior_ = Some(class_log_prior);
383        self.feature_log_prob_ = Some(feature_log_prob);
384    }
385
386    pub fn predict(&self, x: &Tensor) -> Tensor {
387        let x_data = self.binarize_data(&x.data_f32());
388        let n_samples = x.dims()[0];
389        let n_features = x.dims()[1];
390
391        let class_log_prior = self.class_log_prior_.as_ref().unwrap();
392        let feature_log_prob = self.feature_log_prob_.as_ref().unwrap();
393
394        let predictions: Vec<f32> = (0..n_samples)
395            .map(|i| {
396                let sample = &x_data[i * n_features..(i + 1) * n_features];
397                
398                let mut best_class = 0;
399                let mut best_log_prob = f32::NEG_INFINITY;
400
401                for c in 0..self.n_classes_ {
402                    let mut log_prob = class_log_prior[c];
403                    for j in 0..n_features {
404                        if sample[j] > 0.5 {
405                            log_prob += feature_log_prob[c][j * 2];
406                        } else {
407                            log_prob += feature_log_prob[c][j * 2 + 1];
408                        }
409                    }
410                    
411                    if log_prob > best_log_prob {
412                        best_log_prob = log_prob;
413                        best_class = c;
414                    }
415                }
416
417                best_class as f32
418            })
419            .collect();
420
421        Tensor::from_slice(&predictions, &[n_samples]).unwrap()
422    }
423
424    pub fn score(&self, x: &Tensor, y: &Tensor) -> f32 {
425        let predictions = self.predict(x);
426        let pred_data = predictions.data_f32();
427        let y_data = y.data_f32();
428
429        let correct: usize = pred_data.iter()
430            .zip(y_data.iter())
431            .filter(|(&p, &y)| (p - y).abs() < 0.5)
432            .count();
433
434        correct as f32 / y_data.len() as f32
435    }
436}
437
438impl Default for BernoulliNB {
439    fn default() -> Self {
440        Self::new()
441    }
442}
443
444/// Complement Naive Bayes - good for imbalanced datasets
445pub struct ComplementNB {
446    pub alpha: f32,
447    pub norm: bool,
448    class_log_prior_: Option<Vec<f32>>,
449    feature_log_prob_: Option<Vec<Vec<f32>>>,
450    n_classes_: usize,
451    n_features_: usize,
452}
453
454impl ComplementNB {
455    pub fn new() -> Self {
456        ComplementNB {
457            alpha: 1.0,
458            norm: false,
459            class_log_prior_: None,
460            feature_log_prob_: None,
461            n_classes_: 0,
462            n_features_: 0,
463        }
464    }
465
466    pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
467        let x_data = x.data_f32();
468        let y_data = y.data_f32();
469        let n_samples = x.dims()[0];
470        let n_features = x.dims()[1];
471
472        self.n_features_ = n_features;
473        self.n_classes_ = y_data.iter().map(|&v| v as usize).max().unwrap_or(0) + 1;
474
475        // Count features NOT in each class (complement)
476        let mut total_feature_count = vec![0.0f32; n_features];
477        let mut class_feature_count = vec![vec![0.0f32; n_features]; self.n_classes_];
478        let mut class_count = vec![0.0f32; self.n_classes_];
479
480        for i in 0..n_samples {
481            let class = y_data[i] as usize;
482            class_count[class] += 1.0;
483            for j in 0..n_features {
484                let val = x_data[i * n_features + j];
485                total_feature_count[j] += val;
486                class_feature_count[class][j] += val;
487            }
488        }
489
490        // Compute complement feature counts
491        let mut feature_log_prob = vec![vec![0.0f32; n_features]; self.n_classes_];
492        
493        for c in 0..self.n_classes_ {
494            let mut complement_sum = 0.0f32;
495            for j in 0..n_features {
496                let complement_count = total_feature_count[j] - class_feature_count[c][j] + self.alpha;
497                complement_sum += complement_count;
498            }
499
500            for j in 0..n_features {
501                let complement_count = total_feature_count[j] - class_feature_count[c][j] + self.alpha;
502                feature_log_prob[c][j] = (complement_count / complement_sum).ln();
503            }
504
505            // Normalize if requested
506            if self.norm {
507                let norm: f32 = feature_log_prob[c].iter().map(|&x| x.abs()).sum();
508                if norm > 0.0 {
509                    for j in 0..n_features {
510                        feature_log_prob[c][j] /= norm;
511                    }
512                }
513            }
514        }
515
516        let class_log_prior: Vec<f32> = class_count.iter()
517            .map(|&c| (c / n_samples as f32).ln())
518            .collect();
519
520        self.class_log_prior_ = Some(class_log_prior);
521        self.feature_log_prob_ = Some(feature_log_prob);
522    }
523
524    pub fn predict(&self, x: &Tensor) -> Tensor {
525        let x_data = x.data_f32();
526        let n_samples = x.dims()[0];
527        let n_features = x.dims()[1];
528
529        let feature_log_prob = self.feature_log_prob_.as_ref().unwrap();
530
531        let predictions: Vec<f32> = (0..n_samples)
532            .map(|i| {
533                let sample = &x_data[i * n_features..(i + 1) * n_features];
534                
535                let mut best_class = 0;
536                let mut best_score = f32::NEG_INFINITY;
537
538                for c in 0..self.n_classes_ {
539                    // Complement NB uses negative of complement log prob
540                    let mut score = 0.0f32;
541                    for j in 0..n_features {
542                        score -= sample[j] * feature_log_prob[c][j];
543                    }
544                    
545                    if score > best_score {
546                        best_score = score;
547                        best_class = c;
548                    }
549                }
550
551                best_class as f32
552            })
553            .collect();
554
555        Tensor::from_slice(&predictions, &[n_samples]).unwrap()
556    }
557}
558
559impl Default for ComplementNB {
560    fn default() -> Self {
561        Self::new()
562    }
563}
564
565#[cfg(test)]
566mod tests {
567    use super::*;
568
569    #[test]
570    fn test_gaussian_nb() {
571        let x = Tensor::from_slice(&[1.0f32, 2.0,
572            1.5, 1.8,
573            5.0, 8.0,
574            6.0, 9.0,
575        ], &[4, 2]).unwrap();
576        
577        let y = Tensor::from_slice(&[0.0f32, 0.0, 1.0, 1.0], &[4]).unwrap();
578        
579        let mut gnb = GaussianNB::new();
580        gnb.fit(&x, &y);
581        
582        let score = gnb.score(&x, &y);
583        assert!(score >= 0.5);
584    }
585
586    #[test]
587    fn test_multinomial_nb() {
588        let x = Tensor::from_slice(&[2.0f32, 1.0, 0.0,
589            1.0, 2.0, 0.0,
590            0.0, 1.0, 2.0,
591            0.0, 0.0, 3.0,
592        ], &[4, 3]).unwrap();
593        
594        let y = Tensor::from_slice(&[0.0f32, 0.0, 1.0, 1.0], &[4]).unwrap();
595        
596        let mut mnb = MultinomialNB::new();
597        mnb.fit(&x, &y);
598        
599        let predictions = mnb.predict(&x);
600        assert_eq!(predictions.dims(), &[4]);
601    }
602}
603
604