ghostflow_ml/
discriminant_analysis.rs

1//! Linear and Quadratic Discriminant Analysis
2
3use ghostflow_core::Tensor;
4
5/// Linear Discriminant Analysis
6pub struct LinearDiscriminantAnalysis {
7    pub n_components: Option<usize>,
8    pub solver: LDASolver,
9    pub shrinkage: Option<f32>,
10    pub priors: Option<Vec<f32>>,
11    means_: Option<Vec<Vec<f32>>>,
12    covariance_: Option<Vec<f32>>,  // Pooled covariance (flattened)
13    #[allow(dead_code)]
14    scalings_: Option<Vec<Vec<f32>>>,
15    classes_: Option<Vec<usize>>,
16    priors_: Option<Vec<f32>>,
17    n_features_: usize,
18}
19
20#[derive(Clone, Copy, Debug)]
21pub enum LDASolver {
22    SVD,
23    Eigen,
24}
25
26impl LinearDiscriminantAnalysis {
27    pub fn new() -> Self {
28        LinearDiscriminantAnalysis {
29            n_components: None,
30            solver: LDASolver::SVD,
31            shrinkage: None,
32            priors: None,
33            means_: None,
34            covariance_: None,
35            scalings_: None,
36            classes_: None,
37            priors_: None,
38            n_features_: 0,
39        }
40    }
41
42    pub fn n_components(mut self, n: usize) -> Self {
43        self.n_components = Some(n);
44        self
45    }
46
47    pub fn shrinkage(mut self, s: f32) -> Self {
48        self.shrinkage = Some(s.clamp(0.0, 1.0));
49        self
50    }
51
52    pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
53        let x_data = x.data_f32();
54        let y_data = y.data_f32();
55        let n_samples = x.dims()[0];
56        let n_features = x.dims()[1];
57
58        self.n_features_ = n_features;
59
60        // Get unique classes
61        let mut classes: Vec<usize> = y_data.iter().map(|&v| v as usize).collect();
62        classes.sort();
63        classes.dedup();
64        let n_classes = classes.len();
65
66        // Compute class priors and means
67        let mut class_counts = vec![0usize; n_classes];
68        let mut means = vec![vec![0.0f32; n_features]; n_classes];
69
70        for i in 0..n_samples {
71            let class_idx = classes.iter().position(|&c| c == y_data[i] as usize).unwrap();
72            class_counts[class_idx] += 1;
73            for j in 0..n_features {
74                means[class_idx][j] += x_data[i * n_features + j];
75            }
76        }
77
78        for c in 0..n_classes {
79            if class_counts[c] > 0 {
80                for j in 0..n_features {
81                    means[c][j] /= class_counts[c] as f32;
82                }
83            }
84        }
85
86        // Compute priors
87        let priors: Vec<f32> = if let Some(ref p) = self.priors {
88            p.clone()
89        } else {
90            class_counts.iter().map(|&c| c as f32 / n_samples as f32).collect()
91        };
92
93        // Compute pooled within-class covariance
94        let mut cov = vec![0.0f32; n_features * n_features];
95
96        for i in 0..n_samples {
97            let class_idx = classes.iter().position(|&c| c == y_data[i] as usize).unwrap();
98            for j in 0..n_features {
99                for k in 0..n_features {
100                    let diff_j = x_data[i * n_features + j] - means[class_idx][j];
101                    let diff_k = x_data[i * n_features + k] - means[class_idx][k];
102                    cov[j * n_features + k] += diff_j * diff_k;
103                }
104            }
105        }
106
107        // Normalize covariance
108        let denom = (n_samples - n_classes).max(1) as f32;
109        for c in &mut cov {
110            *c /= denom;
111        }
112
113        // Apply shrinkage if specified
114        if let Some(shrinkage) = self.shrinkage {
115            let trace: f32 = (0..n_features).map(|i| cov[i * n_features + i]).sum();
116            let mu = trace / n_features as f32;
117            
118            for i in 0..n_features {
119                for j in 0..n_features {
120                    if i == j {
121                        cov[i * n_features + j] = (1.0 - shrinkage) * cov[i * n_features + j] + shrinkage * mu;
122                    } else {
123                        cov[i * n_features + j] *= 1.0 - shrinkage;
124                    }
125                }
126            }
127        }
128
129        // Compute LDA scalings using eigendecomposition of Sw^-1 * Sb
130        // For simplicity, we'll use the means directly for classification
131        
132        self.means_ = Some(means);
133        self.covariance_ = Some(cov);
134        self.classes_ = Some(classes);
135        self.priors_ = Some(priors);
136    }
137
138    fn solve_linear_system(&self, cov: &[f32], b: &[f32], n: usize) -> Vec<f32> {
139        // Solve Ax = b using Cholesky decomposition
140        let mut a = cov.to_vec();
141        
142        // Add regularization
143        for i in 0..n {
144            a[i * n + i] += 1e-6;
145        }
146
147        // Cholesky decomposition
148        let mut l = vec![0.0f32; n * n];
149        for i in 0..n {
150            for j in 0..=i {
151                let mut sum = 0.0f32;
152                if i == j {
153                    for k in 0..j {
154                        sum += l[j * n + k] * l[j * n + k];
155                    }
156                    l[j * n + j] = (a[j * n + j] - sum).max(1e-10).sqrt();
157                } else {
158                    for k in 0..j {
159                        sum += l[i * n + k] * l[j * n + k];
160                    }
161                    l[i * n + j] = (a[i * n + j] - sum) / l[j * n + j].max(1e-10);
162                }
163            }
164        }
165
166        // Forward substitution
167        let mut y = vec![0.0f32; n];
168        for i in 0..n {
169            let mut sum = 0.0f32;
170            for j in 0..i {
171                sum += l[i * n + j] * y[j];
172            }
173            y[i] = (b[i] - sum) / l[i * n + i].max(1e-10);
174        }
175
176        // Backward substitution
177        let mut x = vec![0.0f32; n];
178        for i in (0..n).rev() {
179            let mut sum = 0.0f32;
180            for j in (i + 1)..n {
181                sum += l[j * n + i] * x[j];
182            }
183            x[i] = (y[i] - sum) / l[i * n + i].max(1e-10);
184        }
185
186        x
187    }
188
189    pub fn predict(&self, x: &Tensor) -> Tensor {
190        let x_data = x.data_f32();
191        let n_samples = x.dims()[0];
192        let n_features = x.dims()[1];
193
194        let means = self.means_.as_ref().unwrap();
195        let cov = self.covariance_.as_ref().unwrap();
196        let classes = self.classes_.as_ref().unwrap();
197        let priors = self.priors_.as_ref().unwrap();
198        let n_classes = classes.len();
199
200        // Precompute Sigma^-1 * mu for each class
201        let cov_inv_means: Vec<Vec<f32>> = means.iter()
202            .map(|mean| self.solve_linear_system(cov, mean, n_features))
203            .collect();
204
205        let predictions: Vec<f32> = (0..n_samples)
206            .map(|i| {
207                let sample = &x_data[i * n_features..(i + 1) * n_features];
208                
209                let mut best_class = 0;
210                let mut best_score = f32::NEG_INFINITY;
211
212                for c in 0..n_classes {
213                    // Linear discriminant function
214                    let mut score = priors[c].ln();
215                    
216                    // x^T * Sigma^-1 * mu_c
217                    for j in 0..n_features {
218                        score += sample[j] * cov_inv_means[c][j];
219                    }
220                    
221                    // -0.5 * mu_c^T * Sigma^-1 * mu_c
222                    let mut quad = 0.0f32;
223                    for j in 0..n_features {
224                        quad += means[c][j] * cov_inv_means[c][j];
225                    }
226                    score -= 0.5 * quad;
227
228                    if score > best_score {
229                        best_score = score;
230                        best_class = c;
231                    }
232                }
233
234                classes[best_class] as f32
235            })
236            .collect();
237
238        Tensor::from_slice(&predictions, &[n_samples]).unwrap()
239    }
240
241    pub fn transform(&self, x: &Tensor) -> Tensor {
242        // Project data onto LDA components
243        let x_data = x.data_f32();
244        let n_samples = x.dims()[0];
245        let n_features = x.dims()[1];
246
247        let means = self.means_.as_ref().unwrap();
248        let classes = self.classes_.as_ref().unwrap();
249        let n_classes = classes.len();
250        let n_components = self.n_components.unwrap_or(n_classes - 1).min(n_classes - 1);
251
252        // Compute global mean
253        let priors = self.priors_.as_ref().unwrap();
254        let mut global_mean = vec![0.0f32; n_features];
255        for c in 0..n_classes {
256            for j in 0..n_features {
257                global_mean[j] += priors[c] * means[c][j];
258            }
259        }
260
261        // Use class means as projection directions (simplified)
262        let mut result = vec![0.0f32; n_samples * n_components];
263        
264        for i in 0..n_samples {
265            for c in 0..n_components {
266                let mut proj = 0.0f32;
267                for j in 0..n_features {
268                    proj += (x_data[i * n_features + j] - global_mean[j]) * 
269                            (means[c][j] - global_mean[j]);
270                }
271                result[i * n_components + c] = proj;
272            }
273        }
274
275        Tensor::from_slice(&result, &[n_samples, n_components]).unwrap()
276    }
277
278    pub fn score(&self, x: &Tensor, y: &Tensor) -> f32 {
279        let predictions = self.predict(x);
280        let pred_data = predictions.data_f32();
281        let y_data = y.data_f32();
282
283        let correct: usize = pred_data.iter()
284            .zip(y_data.iter())
285            .filter(|(&p, &y)| (p - y).abs() < 0.5)
286            .count();
287
288        correct as f32 / y_data.len() as f32
289    }
290}
291
292impl Default for LinearDiscriminantAnalysis {
293    fn default() -> Self {
294        Self::new()
295    }
296}
297
298
299/// Quadratic Discriminant Analysis
300pub struct QuadraticDiscriminantAnalysis {
301    pub reg_param: f32,
302    pub priors: Option<Vec<f32>>,
303    means_: Option<Vec<Vec<f32>>>,
304    covariances_: Option<Vec<Vec<f32>>>,  // Per-class covariances
305    classes_: Option<Vec<usize>>,
306    priors_: Option<Vec<f32>>,
307    n_features_: usize,
308}
309
310impl QuadraticDiscriminantAnalysis {
311    pub fn new() -> Self {
312        QuadraticDiscriminantAnalysis {
313            reg_param: 0.0,
314            priors: None,
315            means_: None,
316            covariances_: None,
317            classes_: None,
318            priors_: None,
319            n_features_: 0,
320        }
321    }
322
323    pub fn reg_param(mut self, r: f32) -> Self {
324        self.reg_param = r;
325        self
326    }
327
328    pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
329        let x_data = x.data_f32();
330        let y_data = y.data_f32();
331        let n_samples = x.dims()[0];
332        let n_features = x.dims()[1];
333
334        self.n_features_ = n_features;
335
336        // Get unique classes
337        let mut classes: Vec<usize> = y_data.iter().map(|&v| v as usize).collect();
338        classes.sort();
339        classes.dedup();
340        let n_classes = classes.len();
341
342        // Compute class counts and means
343        let mut class_counts = vec![0usize; n_classes];
344        let mut means = vec![vec![0.0f32; n_features]; n_classes];
345
346        for i in 0..n_samples {
347            let class_idx = classes.iter().position(|&c| c == y_data[i] as usize).unwrap();
348            class_counts[class_idx] += 1;
349            for j in 0..n_features {
350                means[class_idx][j] += x_data[i * n_features + j];
351            }
352        }
353
354        for c in 0..n_classes {
355            if class_counts[c] > 0 {
356                for j in 0..n_features {
357                    means[c][j] /= class_counts[c] as f32;
358                }
359            }
360        }
361
362        // Compute priors
363        let priors: Vec<f32> = if let Some(ref p) = self.priors {
364            p.clone()
365        } else {
366            class_counts.iter().map(|&c| c as f32 / n_samples as f32).collect()
367        };
368
369        // Compute per-class covariances
370        let mut covariances = vec![vec![0.0f32; n_features * n_features]; n_classes];
371
372        for i in 0..n_samples {
373            let class_idx = classes.iter().position(|&c| c == y_data[i] as usize).unwrap();
374            for j in 0..n_features {
375                for k in 0..n_features {
376                    let diff_j = x_data[i * n_features + j] - means[class_idx][j];
377                    let diff_k = x_data[i * n_features + k] - means[class_idx][k];
378                    covariances[class_idx][j * n_features + k] += diff_j * diff_k;
379                }
380            }
381        }
382
383        // Normalize and regularize
384        for c in 0..n_classes {
385            let denom = (class_counts[c] - 1).max(1) as f32;
386            for idx in 0..n_features * n_features {
387                covariances[c][idx] /= denom;
388            }
389
390            // Add regularization
391            if self.reg_param > 0.0 {
392                for i in 0..n_features {
393                    covariances[c][i * n_features + i] += self.reg_param;
394                }
395            }
396        }
397
398        self.means_ = Some(means);
399        self.covariances_ = Some(covariances);
400        self.classes_ = Some(classes);
401        self.priors_ = Some(priors);
402    }
403
404    fn log_det_and_inv(&self, cov: &[f32], n: usize) -> (f32, Vec<f32>) {
405        // Compute log determinant and inverse using Cholesky
406        let mut a = cov.to_vec();
407        for i in 0..n {
408            a[i * n + i] += 1e-6;
409        }
410
411        // Cholesky decomposition
412        let mut l = vec![0.0f32; n * n];
413        let mut log_det = 0.0f32;
414
415        for i in 0..n {
416            for j in 0..=i {
417                let mut sum = 0.0f32;
418                if i == j {
419                    for k in 0..j {
420                        sum += l[j * n + k] * l[j * n + k];
421                    }
422                    let val = (a[j * n + j] - sum).max(1e-10).sqrt();
423                    l[j * n + j] = val;
424                    log_det += 2.0 * val.ln();
425                } else {
426                    for k in 0..j {
427                        sum += l[i * n + k] * l[j * n + k];
428                    }
429                    l[i * n + j] = (a[i * n + j] - sum) / l[j * n + j].max(1e-10);
430                }
431            }
432        }
433
434        // Compute inverse
435        let mut inv = vec![0.0f32; n * n];
436        for col in 0..n {
437            let mut e = vec![0.0f32; n];
438            e[col] = 1.0;
439
440            // Forward substitution
441            let mut y = vec![0.0f32; n];
442            for i in 0..n {
443                let mut sum = 0.0f32;
444                for j in 0..i {
445                    sum += l[i * n + j] * y[j];
446                }
447                y[i] = (e[i] - sum) / l[i * n + i].max(1e-10);
448            }
449
450            // Backward substitution
451            let mut x = vec![0.0f32; n];
452            for i in (0..n).rev() {
453                let mut sum = 0.0f32;
454                for j in (i + 1)..n {
455                    sum += l[j * n + i] * x[j];
456                }
457                x[i] = (y[i] - sum) / l[i * n + i].max(1e-10);
458            }
459
460            for i in 0..n {
461                inv[i * n + col] = x[i];
462            }
463        }
464
465        (log_det, inv)
466    }
467
468    pub fn predict(&self, x: &Tensor) -> Tensor {
469        let x_data = x.data_f32();
470        let n_samples = x.dims()[0];
471        let n_features = x.dims()[1];
472
473        let means = self.means_.as_ref().unwrap();
474        let covariances = self.covariances_.as_ref().unwrap();
475        let classes = self.classes_.as_ref().unwrap();
476        let priors = self.priors_.as_ref().unwrap();
477        let n_classes = classes.len();
478
479        // Precompute log determinants and inverses
480        let cov_info: Vec<(f32, Vec<f32>)> = covariances.iter()
481            .map(|cov| self.log_det_and_inv(cov, n_features))
482            .collect();
483
484        let predictions: Vec<f32> = (0..n_samples)
485            .map(|i| {
486                let sample = &x_data[i * n_features..(i + 1) * n_features];
487                
488                let mut best_class = 0;
489                let mut best_score = f32::NEG_INFINITY;
490
491                for c in 0..n_classes {
492                    let (log_det, ref cov_inv) = cov_info[c];
493                    
494                    // Quadratic discriminant function
495                    let mut score = priors[c].ln() - 0.5 * log_det;
496                    
497                    // -0.5 * (x - mu)^T * Sigma^-1 * (x - mu)
498                    let mut quad = 0.0f32;
499                    for j in 0..n_features {
500                        for k in 0..n_features {
501                            let diff_j = sample[j] - means[c][j];
502                            let diff_k = sample[k] - means[c][k];
503                            quad += diff_j * cov_inv[j * n_features + k] * diff_k;
504                        }
505                    }
506                    score -= 0.5 * quad;
507
508                    if score > best_score {
509                        best_score = score;
510                        best_class = c;
511                    }
512                }
513
514                classes[best_class] as f32
515            })
516            .collect();
517
518        Tensor::from_slice(&predictions, &[n_samples]).unwrap()
519    }
520
521    pub fn score(&self, x: &Tensor, y: &Tensor) -> f32 {
522        let predictions = self.predict(x);
523        let pred_data = predictions.data_f32();
524        let y_data = y.data_f32();
525
526        let correct: usize = pred_data.iter()
527            .zip(y_data.iter())
528            .filter(|(&p, &y)| (p - y).abs() < 0.5)
529            .count();
530
531        correct as f32 / y_data.len() as f32
532    }
533}
534
535impl Default for QuadraticDiscriminantAnalysis {
536    fn default() -> Self {
537        Self::new()
538    }
539}
540
541#[cfg(test)]
542mod tests {
543    use super::*;
544
545    #[test]
546    fn test_lda() {
547        let x = Tensor::from_slice(&[1.0f32, 2.0,
548            1.5, 1.8,
549            5.0, 8.0,
550            6.0, 9.0,
551        ], &[4, 2]).unwrap();
552        
553        let y = Tensor::from_slice(&[0.0f32, 0.0, 1.0, 1.0], &[4]).unwrap();
554        
555        let mut lda = LinearDiscriminantAnalysis::new();
556        lda.fit(&x, &y);
557        
558        let score = lda.score(&x, &y);
559        assert!(score >= 0.5);
560    }
561
562    #[test]
563    fn test_qda() {
564        let x = Tensor::from_slice(&[1.0f32, 2.0,
565            1.5, 1.8,
566            5.0, 8.0,
567            6.0, 9.0,
568        ], &[4, 2]).unwrap();
569        
570        let y = Tensor::from_slice(&[0.0f32, 0.0, 1.0, 1.0], &[4]).unwrap();
571        
572        let mut qda = QuadraticDiscriminantAnalysis::new().reg_param(0.1);
573        qda.fit(&x, &y);
574        
575        let score = qda.score(&x, &y);
576        assert!(score >= 0.5);
577    }
578}
579
580