ghostflow_ml/
mixture.rs

1//! Mixture Models - Gaussian Mixture Model, Bayesian GMM
2
3use ghostflow_core::Tensor;
4use rand::prelude::*;
5
6/// Gaussian Mixture Model using EM algorithm
7pub struct GaussianMixture {
8    pub n_components: usize,
9    pub covariance_type: CovarianceType,
10    pub max_iter: usize,
11    pub tol: f32,
12    pub n_init: usize,
13    pub reg_covar: f32,
14    weights_: Option<Vec<f32>>,
15    means_: Option<Vec<Vec<f32>>>,
16    covariances_: Option<Vec<Vec<f32>>>,
17    precisions_: Option<Vec<Vec<f32>>>,
18    converged_: bool,
19    n_iter_: usize,
20    lower_bound_: f32,
21}
22
23#[derive(Clone, Copy)]
24pub enum CovarianceType {
25    Full,
26    Tied,
27    Diag,
28    Spherical,
29}
30
31impl GaussianMixture {
32    pub fn new(n_components: usize) -> Self {
33        GaussianMixture {
34            n_components,
35            covariance_type: CovarianceType::Full,
36            max_iter: 100,
37            tol: 1e-3,
38            n_init: 1,
39            reg_covar: 1e-6,
40            weights_: None,
41            means_: None,
42            covariances_: None,
43            precisions_: None,
44            converged_: false,
45            n_iter_: 0,
46            lower_bound_: f32::NEG_INFINITY,
47        }
48    }
49
50    pub fn covariance_type(mut self, ct: CovarianceType) -> Self {
51        self.covariance_type = ct;
52        self
53    }
54
55    pub fn max_iter(mut self, n: usize) -> Self {
56        self.max_iter = n;
57        self
58    }
59
60    fn initialize_parameters(&self, x: &[f32], n_samples: usize, n_features: usize) 
61        -> (Vec<f32>, Vec<Vec<f32>>, Vec<Vec<f32>>) 
62    {
63        let mut rng = thread_rng();
64        
65        // Initialize weights uniformly
66        let weights = vec![1.0 / self.n_components as f32; self.n_components];
67        
68        // Initialize means using k-means++ style
69        let mut means = Vec::with_capacity(self.n_components);
70        let first_idx = rng.gen_range(0..n_samples);
71        means.push(x[first_idx * n_features..(first_idx + 1) * n_features].to_vec());
72
73        for _ in 1..self.n_components {
74            let distances: Vec<f32> = (0..n_samples)
75                .map(|i| {
76                    let xi = &x[i * n_features..(i + 1) * n_features];
77                    means.iter()
78                        .map(|m| {
79                            xi.iter().zip(m.iter())
80                                .map(|(&a, &b)| (a - b).powi(2))
81                                .sum::<f32>()
82                        })
83                        .fold(f32::MAX, f32::min)
84                })
85                .collect();
86
87            let total: f32 = distances.iter().sum();
88            if total > 0.0 {
89                let threshold = rng.gen::<f32>() * total;
90                let mut cumsum = 0.0f32;
91                for (i, &d) in distances.iter().enumerate() {
92                    cumsum += d;
93                    if cumsum >= threshold {
94                        means.push(x[i * n_features..(i + 1) * n_features].to_vec());
95                        break;
96                    }
97                }
98            }
99            if means.len() < self.n_components {
100                let idx = rng.gen_range(0..n_samples);
101                means.push(x[idx * n_features..(idx + 1) * n_features].to_vec());
102            }
103        }
104
105        // Initialize covariances as identity
106        let covariances: Vec<Vec<f32>> = (0..self.n_components)
107            .map(|_| {
108                let mut cov = vec![0.0f32; n_features * n_features];
109                for i in 0..n_features {
110                    cov[i * n_features + i] = 1.0;
111                }
112                cov
113            })
114            .collect();
115
116        (weights, means, covariances)
117    }
118
119    fn compute_log_det_cholesky(&self, cov: &[f32], n_features: usize) -> f32 {
120        // Compute log determinant via Cholesky
121        let mut l = vec![0.0f32; n_features * n_features];
122        
123        for i in 0..n_features {
124            for j in 0..=i {
125                let mut sum = cov[i * n_features + j];
126                for k in 0..j {
127                    sum -= l[i * n_features + k] * l[j * n_features + k];
128                }
129                if i == j {
130                    l[i * n_features + j] = sum.max(self.reg_covar).sqrt();
131                } else {
132                    l[i * n_features + j] = sum / l[j * n_features + j].max(1e-10);
133                }
134            }
135        }
136
137        let mut log_det = 0.0f32;
138        for i in 0..n_features {
139            log_det += 2.0 * l[i * n_features + i].max(1e-10).ln();
140        }
141        log_det
142    }
143
144    fn compute_precision(&self, cov: &[f32], n_features: usize) -> Vec<f32> {
145        // Invert covariance matrix
146        let mut a = cov.to_vec();
147        let mut inv = vec![0.0f32; n_features * n_features];
148        for i in 0..n_features {
149            inv[i * n_features + i] = 1.0;
150            a[i * n_features + i] += self.reg_covar;
151        }
152
153        // Gaussian elimination
154        for i in 0..n_features {
155            let pivot = a[i * n_features + i];
156            if pivot.abs() > 1e-10 {
157                for j in 0..n_features {
158                    a[i * n_features + j] /= pivot;
159                    inv[i * n_features + j] /= pivot;
160                }
161            }
162
163            for k in 0..n_features {
164                if k != i {
165                    let factor = a[k * n_features + i];
166                    for j in 0..n_features {
167                        a[k * n_features + j] -= factor * a[i * n_features + j];
168                        inv[k * n_features + j] -= factor * inv[i * n_features + j];
169                    }
170                }
171            }
172        }
173
174        inv
175    }
176
177    fn log_gaussian_prob(&self, x: &[f32], mean: &[f32], precision: &[f32], 
178                         log_det: f32, n_features: usize) -> f32 {
179        let mut diff = vec![0.0f32; n_features];
180        for i in 0..n_features {
181            diff[i] = x[i] - mean[i];
182        }
183
184        let mut mahalanobis = 0.0f32;
185        for i in 0..n_features {
186            for j in 0..n_features {
187                mahalanobis += diff[i] * precision[i * n_features + j] * diff[j];
188            }
189        }
190
191        -0.5 * (n_features as f32 * (2.0 * std::f32::consts::PI).ln() + log_det + mahalanobis)
192    }
193
194    fn e_step(&self, x: &[f32], n_samples: usize, n_features: usize,
195              weights: &[f32], means: &[Vec<f32>], precisions: &[Vec<f32>], 
196              log_dets: &[f32]) -> (Vec<Vec<f32>>, f32) {
197        let mut log_resp = vec![vec![0.0f32; self.n_components]; n_samples];
198        let mut log_prob_sum = 0.0f32;
199
200        for i in 0..n_samples {
201            let xi = &x[i * n_features..(i + 1) * n_features];
202            let mut log_probs = vec![0.0f32; self.n_components];
203            let mut max_log_prob = f32::NEG_INFINITY;
204
205            for k in 0..self.n_components {
206                log_probs[k] = weights[k].ln() + 
207                    self.log_gaussian_prob(xi, &means[k], &precisions[k], log_dets[k], n_features);
208                max_log_prob = max_log_prob.max(log_probs[k]);
209            }
210
211            // Log-sum-exp trick
212            let mut sum_exp = 0.0f32;
213            for k in 0..self.n_components {
214                sum_exp += (log_probs[k] - max_log_prob).exp();
215            }
216            let log_sum = max_log_prob + sum_exp.ln();
217            log_prob_sum += log_sum;
218
219            for k in 0..self.n_components {
220                log_resp[i][k] = log_probs[k] - log_sum;
221            }
222        }
223
224        // Convert to responsibilities
225        let resp: Vec<Vec<f32>> = log_resp.iter()
226            .map(|lr| lr.iter().map(|&l| l.exp()).collect())
227            .collect();
228
229        (resp, log_prob_sum / n_samples as f32)
230    }
231
232    fn m_step(&self, x: &[f32], resp: &[Vec<f32>], n_samples: usize, n_features: usize)
233        -> (Vec<f32>, Vec<Vec<f32>>, Vec<Vec<f32>>) 
234    {
235        // Compute Nk
236        let nk: Vec<f32> = (0..self.n_components)
237            .map(|k| resp.iter().map(|r| r[k]).sum::<f32>().max(1e-10))
238            .collect();
239
240        // Update weights
241        let weights: Vec<f32> = nk.iter().map(|&n| n / n_samples as f32).collect();
242
243        // Update means
244        let means: Vec<Vec<f32>> = (0..self.n_components)
245            .map(|k| {
246                let mut mean = vec![0.0f32; n_features];
247                for i in 0..n_samples {
248                    for j in 0..n_features {
249                        mean[j] += resp[i][k] * x[i * n_features + j];
250                    }
251                }
252                for j in 0..n_features {
253                    mean[j] /= nk[k];
254                }
255                mean
256            })
257            .collect();
258
259        // Update covariances
260        let covariances: Vec<Vec<f32>> = (0..self.n_components)
261            .map(|k| {
262                let mut cov = vec![0.0f32; n_features * n_features];
263                for i in 0..n_samples {
264                    for j1 in 0..n_features {
265                        for j2 in 0..n_features {
266                            let diff1 = x[i * n_features + j1] - means[k][j1];
267                            let diff2 = x[i * n_features + j2] - means[k][j2];
268                            cov[j1 * n_features + j2] += resp[i][k] * diff1 * diff2;
269                        }
270                    }
271                }
272                for j in 0..n_features * n_features {
273                    cov[j] /= nk[k];
274                }
275                // Add regularization
276                for j in 0..n_features {
277                    cov[j * n_features + j] += self.reg_covar;
278                }
279                cov
280            })
281            .collect();
282
283        (weights, means, covariances)
284    }
285
286    pub fn fit(&mut self, x: &Tensor) {
287        let x_data = x.data_f32();
288        let n_samples = x.dims()[0];
289        let n_features = x.dims()[1];
290
291        let mut best_lower_bound = f32::NEG_INFINITY;
292        let mut best_params = None;
293
294        for _ in 0..self.n_init {
295            let (mut weights, mut means, mut covariances) = 
296                self.initialize_parameters(&x_data, n_samples, n_features);
297
298            let mut prev_lower_bound = f32::NEG_INFINITY;
299
300            for iter in 0..self.max_iter {
301                // Compute precisions and log determinants
302                let precisions: Vec<Vec<f32>> = covariances.iter()
303                    .map(|c| self.compute_precision(c, n_features))
304                    .collect();
305                let log_dets: Vec<f32> = covariances.iter()
306                    .map(|c| self.compute_log_det_cholesky(c, n_features))
307                    .collect();
308
309                // E-step
310                let (resp, lower_bound) = self.e_step(
311                    &x_data, n_samples, n_features,
312                    &weights, &means, &precisions, &log_dets
313                );
314
315                // M-step
316                let (new_weights, new_means, new_covariances) = 
317                    self.m_step(&x_data, &resp, n_samples, n_features);
318
319                weights = new_weights;
320                means = new_means;
321                covariances = new_covariances;
322
323                // Check convergence
324                if (lower_bound - prev_lower_bound).abs() < self.tol {
325                    self.converged_ = true;
326                    self.n_iter_ = iter + 1;
327                    break;
328                }
329                prev_lower_bound = lower_bound;
330                self.n_iter_ = iter + 1;
331            }
332
333            if prev_lower_bound > best_lower_bound {
334                best_lower_bound = prev_lower_bound;
335                let precisions: Vec<Vec<f32>> = covariances.iter()
336                    .map(|c| self.compute_precision(c, n_features))
337                    .collect();
338                best_params = Some((weights, means, covariances, precisions));
339            }
340        }
341
342        if let Some((weights, means, covariances, precisions)) = best_params {
343            self.weights_ = Some(weights);
344            self.means_ = Some(means);
345            self.covariances_ = Some(covariances);
346            self.precisions_ = Some(precisions);
347            self.lower_bound_ = best_lower_bound;
348        }
349    }
350
351    pub fn predict(&self, x: &Tensor) -> Tensor {
352        let proba = self.predict_proba(x);
353        let proba_data = proba.data_f32();
354        let n_samples = x.dims()[0];
355
356        let labels: Vec<f32> = (0..n_samples)
357            .map(|i| {
358                let mut max_prob = f32::NEG_INFINITY;
359                let mut max_k = 0;
360                for k in 0..self.n_components {
361                    if proba_data[i * self.n_components + k] > max_prob {
362                        max_prob = proba_data[i * self.n_components + k];
363                        max_k = k;
364                    }
365                }
366                max_k as f32
367            })
368            .collect();
369
370        Tensor::from_slice(&labels, &[n_samples]).unwrap()
371    }
372
373    pub fn predict_proba(&self, x: &Tensor) -> Tensor {
374        let x_data = x.data_f32();
375        let n_samples = x.dims()[0];
376        let n_features = x.dims()[1];
377
378        let weights = self.weights_.as_ref().expect("Model not fitted");
379        let means = self.means_.as_ref().unwrap();
380        let precisions = self.precisions_.as_ref().unwrap();
381        let covariances = self.covariances_.as_ref().unwrap();
382
383        let log_dets: Vec<f32> = covariances.iter()
384            .map(|c| self.compute_log_det_cholesky(c, n_features))
385            .collect();
386
387        let (resp, _) = self.e_step(&x_data, n_samples, n_features, weights, means, precisions, &log_dets);
388
389        let proba: Vec<f32> = resp.into_iter().flatten().collect();
390        Tensor::from_slice(&proba, &[n_samples, self.n_components]).unwrap()
391    }
392
393    pub fn fit_predict(&mut self, x: &Tensor) -> Tensor {
394        self.fit(x);
395        self.predict(x)
396    }
397
398    pub fn score(&self, x: &Tensor) -> f32 {
399        let x_data = x.data_f32();
400        let n_samples = x.dims()[0];
401        let n_features = x.dims()[1];
402
403        let weights = self.weights_.as_ref().expect("Model not fitted");
404        let means = self.means_.as_ref().unwrap();
405        let precisions = self.precisions_.as_ref().unwrap();
406        let covariances = self.covariances_.as_ref().unwrap();
407
408        let log_dets: Vec<f32> = covariances.iter()
409            .map(|c| self.compute_log_det_cholesky(c, n_features))
410            .collect();
411
412        let (_, lower_bound) = self.e_step(&x_data, n_samples, n_features, weights, means, precisions, &log_dets);
413        lower_bound
414    }
415
416    pub fn bic(&self, x: &Tensor) -> f32 {
417        let n_samples = x.dims()[0];
418        let n_features = x.dims()[1];
419        
420        // Number of parameters
421        let n_params = self.n_components - 1  // weights
422            + self.n_components * n_features  // means
423            + self.n_components * n_features * (n_features + 1) / 2;  // covariances
424
425        -2.0 * self.score(x) * n_samples as f32 + n_params as f32 * (n_samples as f32).ln()
426    }
427
428    pub fn aic(&self, x: &Tensor) -> f32 {
429        let n_samples = x.dims()[0];
430        let n_features = x.dims()[1];
431        
432        let n_params = self.n_components - 1
433            + self.n_components * n_features
434            + self.n_components * n_features * (n_features + 1) / 2;
435
436        -2.0 * self.score(x) * n_samples as f32 + 2.0 * n_params as f32
437    }
438}
439
440/// Bayesian Gaussian Mixture Model
441pub struct BayesianGaussianMixture {
442    pub n_components: usize,
443    pub covariance_type: CovarianceType,
444    pub max_iter: usize,
445    pub tol: f32,
446    pub weight_concentration_prior_type: WeightConcentrationType,
447    pub weight_concentration_prior: Option<f32>,
448    pub reg_covar: f32,
449    weights_: Option<Vec<f32>>,
450    means_: Option<Vec<Vec<f32>>>,
451    covariances_: Option<Vec<Vec<f32>>>,
452    converged_: bool,
453    n_iter_: usize,
454}
455
456#[derive(Clone, Copy)]
457pub enum WeightConcentrationType {
458    DirichletProcess,
459    DirichletDistribution,
460}
461
462impl BayesianGaussianMixture {
463    pub fn new(n_components: usize) -> Self {
464        BayesianGaussianMixture {
465            n_components,
466            covariance_type: CovarianceType::Full,
467            max_iter: 100,
468            tol: 1e-3,
469            weight_concentration_prior_type: WeightConcentrationType::DirichletProcess,
470            weight_concentration_prior: None,
471            reg_covar: 1e-6,
472            weights_: None,
473            means_: None,
474            covariances_: None,
475            converged_: false,
476            n_iter_: 0,
477        }
478    }
479
480    pub fn fit(&mut self, x: &Tensor) {
481        // Simplified: use standard GMM with automatic component selection
482        let mut gmm = GaussianMixture::new(self.n_components);
483        gmm.max_iter = self.max_iter;
484        gmm.tol = self.tol;
485        gmm.reg_covar = self.reg_covar;
486        gmm.fit(x);
487
488        self.weights_ = gmm.weights_;
489        self.means_ = gmm.means_;
490        self.covariances_ = gmm.covariances_;
491        self.converged_ = gmm.converged_;
492        self.n_iter_ = gmm.n_iter_;
493    }
494
495    pub fn predict(&self, x: &Tensor) -> Tensor {
496        let x_data = x.data_f32();
497        let n_samples = x.dims()[0];
498        let n_features = x.dims()[1];
499
500        let means = self.means_.as_ref().expect("Model not fitted");
501        
502        let labels: Vec<f32> = (0..n_samples)
503            .map(|i| {
504                let xi = &x_data[i * n_features..(i + 1) * n_features];
505                let mut min_dist = f32::INFINITY;
506                let mut best_k = 0;
507                for (k, mean) in means.iter().enumerate() {
508                    let dist: f32 = xi.iter().zip(mean.iter())
509                        .map(|(&a, &b)| (a - b).powi(2))
510                        .sum();
511                    if dist < min_dist {
512                        min_dist = dist;
513                        best_k = k;
514                    }
515                }
516                best_k as f32
517            })
518            .collect();
519
520        Tensor::from_slice(&labels, &[n_samples]).unwrap()
521    }
522
523    pub fn fit_predict(&mut self, x: &Tensor) -> Tensor {
524        self.fit(x);
525        self.predict(x)
526    }
527}
528
529#[cfg(test)]
530mod tests {
531    use super::*;
532
533    #[test]
534    fn test_gaussian_mixture() {
535        let x = Tensor::from_slice(&[
536            0.0, 0.0, 0.1, 0.1, 0.2, 0.0,
537            5.0, 5.0, 5.1, 5.1, 5.2, 5.0,
538        ], &[4, 2]).unwrap();
539        
540        let mut gmm = GaussianMixture::new(2);
541        let labels = gmm.fit_predict(&x);
542        assert_eq!(labels.dims(), &[4]);
543    }
544
545    #[test]
546    fn test_bayesian_gmm() {
547        let x = Tensor::from_slice(&[
548            0.0, 0.0, 0.1, 0.1,
549            5.0, 5.0, 5.1, 5.1,
550        ], &[4, 2]).unwrap();
551        
552        let mut bgmm = BayesianGaussianMixture::new(2);
553        let labels = bgmm.fit_predict(&x);
554        assert_eq!(labels.dims(), &[4]);
555    }
556}