ghostflow_ml/
linear.rs

1//! Linear Models - Real implementations with gradient descent and closed-form solutions
2
3use ghostflow_core::Tensor;
4
5/// Linear Regression using Ordinary Least Squares (closed-form) or Gradient Descent
6pub struct LinearRegression {
7    /// Coefficients (weights)
8    pub coef_: Option<Vec<f32>>,
9    /// Intercept (bias)
10    pub intercept_: Option<f32>,
11    /// Whether to fit intercept
12    pub fit_intercept: bool,
13    /// Solver to use
14    pub solver: LinearSolver,
15    /// Learning rate for gradient descent
16    pub learning_rate: f32,
17    /// Maximum iterations for gradient descent
18    pub max_iter: usize,
19    /// Tolerance for convergence
20    pub tol: f32,
21}
22
23#[derive(Clone, Copy, Debug)]
24pub enum LinearSolver {
25    /// Normal equation: (X^T X)^-1 X^T y
26    ClosedForm,
27    /// Gradient descent
28    GradientDescent,
29    /// Stochastic gradient descent
30    SGD,
31}
32
33impl LinearRegression {
34    pub fn new() -> Self {
35        LinearRegression {
36            coef_: None,
37            intercept_: None,
38            fit_intercept: true,
39            solver: LinearSolver::ClosedForm,
40            learning_rate: 0.01,
41            max_iter: 1000,
42            tol: 1e-4,
43        }
44    }
45
46    pub fn fit_intercept(mut self, fit: bool) -> Self {
47        self.fit_intercept = fit;
48        self
49    }
50
51    pub fn solver(mut self, solver: LinearSolver) -> Self {
52        self.solver = solver;
53        self
54    }
55
56    pub fn learning_rate(mut self, lr: f32) -> Self {
57        self.learning_rate = lr;
58        self
59    }
60
61    pub fn max_iter(mut self, n: usize) -> Self {
62        self.max_iter = n;
63        self
64    }
65
66    /// Fit the linear regression model
67    pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
68        match self.solver {
69            LinearSolver::ClosedForm => self.fit_closed_form(x, y),
70            LinearSolver::GradientDescent => self.fit_gradient_descent(x, y),
71            LinearSolver::SGD => self.fit_sgd(x, y),
72        }
73    }
74
75    /// Closed-form solution using normal equation
76    fn fit_closed_form(&mut self, x: &Tensor, y: &Tensor) {
77        let x_data = x.data_f32();
78        let y_data = y.data_f32();
79        let n_samples = x.dims()[0];
80        let n_features = x.dims()[1];
81
82        // Add bias column if fitting intercept
83        let n_cols = if self.fit_intercept { n_features + 1 } else { n_features };
84        
85        let mut x_aug = Vec::with_capacity(n_samples * n_cols);
86        for i in 0..n_samples {
87            if self.fit_intercept {
88                x_aug.push(1.0); // Bias term
89            }
90            for j in 0..n_features {
91                x_aug.push(x_data[i * n_features + j]);
92            }
93        }
94
95        // Compute X^T X
96        let mut xtx = vec![0.0f32; n_cols * n_cols];
97        for i in 0..n_cols {
98            for j in 0..n_cols {
99                let mut sum = 0.0f32;
100                for k in 0..n_samples {
101                    sum += x_aug[k * n_cols + i] * x_aug[k * n_cols + j];
102                }
103                xtx[i * n_cols + j] = sum;
104            }
105        }
106
107        // Compute X^T y
108        let mut xty = vec![0.0f32; n_cols];
109        for i in 0..n_cols {
110            let mut sum = 0.0f32;
111            for k in 0..n_samples {
112                sum += x_aug[k * n_cols + i] * y_data[k];
113            }
114            xty[i] = sum;
115        }
116
117        // Solve (X^T X) w = X^T y using Cholesky decomposition
118        let weights = self.solve_linear_system(&xtx, &xty, n_cols);
119
120        if self.fit_intercept {
121            self.intercept_ = Some(weights[0]);
122            self.coef_ = Some(weights[1..].to_vec());
123        } else {
124            self.intercept_ = Some(0.0);
125            self.coef_ = Some(weights);
126        }
127    }
128
129    /// Solve linear system Ax = b using Cholesky decomposition
130    fn solve_linear_system(&self, a: &[f32], b: &[f32], n: usize) -> Vec<f32> {
131        // Add small regularization for numerical stability
132        let mut a_reg = a.to_vec();
133        for i in 0..n {
134            a_reg[i * n + i] += 1e-8;
135        }
136
137        // Cholesky decomposition: A = L L^T
138        let mut l = vec![0.0f32; n * n];
139        
140        for i in 0..n {
141            for j in 0..=i {
142                let mut sum = 0.0f32;
143                
144                if i == j {
145                    for k in 0..j {
146                        sum += l[j * n + k] * l[j * n + k];
147                    }
148                    let val = a_reg[j * n + j] - sum;
149                    l[j * n + j] = if val > 0.0 { val.sqrt() } else { 1e-10 };
150                } else {
151                    for k in 0..j {
152                        sum += l[i * n + k] * l[j * n + k];
153                    }
154                    l[i * n + j] = (a_reg[i * n + j] - sum) / l[j * n + j].max(1e-10);
155                }
156            }
157        }
158
159        // Forward substitution: L y = b
160        let mut y = vec![0.0f32; n];
161        for i in 0..n {
162            let mut sum = 0.0f32;
163            for j in 0..i {
164                sum += l[i * n + j] * y[j];
165            }
166            y[i] = (b[i] - sum) / l[i * n + i].max(1e-10);
167        }
168
169        // Backward substitution: L^T x = y
170        let mut x = vec![0.0f32; n];
171        for i in (0..n).rev() {
172            let mut sum = 0.0f32;
173            for j in (i + 1)..n {
174                sum += l[j * n + i] * x[j];
175            }
176            x[i] = (y[i] - sum) / l[i * n + i].max(1e-10);
177        }
178
179        x
180    }
181
182    /// Fit using batch gradient descent
183    fn fit_gradient_descent(&mut self, x: &Tensor, y: &Tensor) {
184        let x_data = x.data_f32();
185        let y_data = y.data_f32();
186        let n_samples = x.dims()[0];
187        let n_features = x.dims()[1];
188
189        // Initialize weights
190        let mut weights = vec![0.0f32; n_features];
191        let mut bias = 0.0f32;
192
193        for _iter in 0..self.max_iter {
194            // Compute predictions
195            let mut predictions = vec![0.0f32; n_samples];
196            for i in 0..n_samples {
197                let mut pred = bias;
198                for j in 0..n_features {
199                    pred += weights[j] * x_data[i * n_features + j];
200                }
201                predictions[i] = pred;
202            }
203
204            // Compute gradients
205            let mut grad_w = vec![0.0f32; n_features];
206            let mut grad_b = 0.0f32;
207
208            for i in 0..n_samples {
209                let error = predictions[i] - y_data[i];
210                grad_b += error;
211                for j in 0..n_features {
212                    grad_w[j] += error * x_data[i * n_features + j];
213                }
214            }
215
216            // Average gradients
217            let scale = 1.0 / n_samples as f32;
218            grad_b *= scale;
219            for g in &mut grad_w {
220                *g *= scale;
221            }
222
223            // Update weights
224            if self.fit_intercept {
225                bias -= self.learning_rate * grad_b;
226            }
227            for j in 0..n_features {
228                weights[j] -= self.learning_rate * grad_w[j];
229            }
230
231            // Check convergence
232            let grad_norm: f32 = grad_w.iter().map(|&g| g * g).sum::<f32>() + grad_b * grad_b;
233            if grad_norm.sqrt() < self.tol {
234                break;
235            }
236        }
237
238        self.coef_ = Some(weights);
239        self.intercept_ = Some(bias);
240    }
241
242    /// Fit using stochastic gradient descent
243    fn fit_sgd(&mut self, x: &Tensor, y: &Tensor) {
244        use rand::seq::SliceRandom;
245        
246        let x_data = x.data_f32();
247        let y_data = y.data_f32();
248        let n_samples = x.dims()[0];
249        let n_features = x.dims()[1];
250
251        let mut weights = vec![0.0f32; n_features];
252        let mut bias = 0.0f32;
253        let mut indices: Vec<usize> = (0..n_samples).collect();
254
255        for _epoch in 0..self.max_iter {
256            indices.shuffle(&mut rand::thread_rng());
257
258            for &i in &indices {
259                // Compute prediction for single sample
260                let mut pred = bias;
261                for j in 0..n_features {
262                    pred += weights[j] * x_data[i * n_features + j];
263                }
264
265                let error = pred - y_data[i];
266
267                // Update weights
268                if self.fit_intercept {
269                    bias -= self.learning_rate * error;
270                }
271                for j in 0..n_features {
272                    weights[j] -= self.learning_rate * error * x_data[i * n_features + j];
273                }
274            }
275        }
276
277        self.coef_ = Some(weights);
278        self.intercept_ = Some(bias);
279    }
280
281    /// Predict target values
282    pub fn predict(&self, x: &Tensor) -> Tensor {
283        let x_data = x.data_f32();
284        let n_samples = x.dims()[0];
285        let n_features = x.dims()[1];
286
287        let coef = self.coef_.as_ref().expect("Model not fitted");
288        let intercept = self.intercept_.unwrap_or(0.0);
289
290        let predictions: Vec<f32> = (0..n_samples)
291            .map(|i| {
292                let mut pred = intercept;
293                for j in 0..n_features {
294                    pred += coef[j] * x_data[i * n_features + j];
295                }
296                pred
297            })
298            .collect();
299
300        Tensor::from_slice(&predictions, &[n_samples]).unwrap()
301    }
302
303    /// Compute R² score
304    pub fn score(&self, x: &Tensor, y: &Tensor) -> f32 {
305        let predictions = self.predict(x);
306        let pred_data = predictions.data_f32();
307        let y_data = y.data_f32();
308
309        let y_mean: f32 = y_data.iter().sum::<f32>() / y_data.len() as f32;
310
311        let ss_res: f32 = pred_data.iter()
312            .zip(y_data.iter())
313            .map(|(&p, &y)| (y - p).powi(2))
314            .sum();
315
316        let ss_tot: f32 = y_data.iter()
317            .map(|&y| (y - y_mean).powi(2))
318            .sum();
319
320        1.0 - ss_res / ss_tot.max(1e-10)
321    }
322}
323
324impl Default for LinearRegression {
325    fn default() -> Self {
326        Self::new()
327    }
328}
329
330
331/// Ridge Regression (L2 regularization)
332pub struct Ridge {
333    pub coef_: Option<Vec<f32>>,
334    pub intercept_: Option<f32>,
335    pub alpha: f32,
336    pub fit_intercept: bool,
337    pub max_iter: usize,
338    pub tol: f32,
339}
340
341impl Ridge {
342    pub fn new(alpha: f32) -> Self {
343        Ridge {
344            coef_: None,
345            intercept_: None,
346            alpha,
347            fit_intercept: true,
348            max_iter: 1000,
349            tol: 1e-4,
350        }
351    }
352
353    pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
354        let x_data = x.data_f32();
355        let y_data = y.data_f32();
356        let n_samples = x.dims()[0];
357        let n_features = x.dims()[1];
358
359        // Center data if fitting intercept
360        let (x_centered, y_centered, x_mean, y_mean) = if self.fit_intercept {
361            let x_mean: Vec<f32> = (0..n_features)
362                .map(|j| {
363                    (0..n_samples).map(|i| x_data[i * n_features + j]).sum::<f32>() / n_samples as f32
364                })
365                .collect();
366            
367            let y_mean = y_data.iter().sum::<f32>() / n_samples as f32;
368
369            let x_centered: Vec<f32> = (0..n_samples)
370                .flat_map(|i| {
371                    (0..n_features).map(|j| x_data[i * n_features + j] - x_mean[j]).collect::<Vec<_>>()
372                })
373                .collect();
374
375            let y_centered: Vec<f32> = y_data.iter().map(|&y| y - y_mean).collect();
376
377            (x_centered, y_centered, x_mean, y_mean)
378        } else {
379            (x_data.to_vec(), y_data.to_vec(), vec![0.0; n_features], 0.0)
380        };
381
382        // Compute X^T X + alpha * I
383        let mut xtx = vec![0.0f32; n_features * n_features];
384        for i in 0..n_features {
385            for j in 0..n_features {
386                let mut sum = 0.0f32;
387                for k in 0..n_samples {
388                    sum += x_centered[k * n_features + i] * x_centered[k * n_features + j];
389                }
390                xtx[i * n_features + j] = sum;
391                if i == j {
392                    xtx[i * n_features + j] += self.alpha;
393                }
394            }
395        }
396
397        // Compute X^T y
398        let mut xty = vec![0.0f32; n_features];
399        for i in 0..n_features {
400            let mut sum = 0.0f32;
401            for k in 0..n_samples {
402                sum += x_centered[k * n_features + i] * y_centered[k];
403            }
404            xty[i] = sum;
405        }
406
407        // Solve using Cholesky
408        let weights = self.solve_cholesky(&xtx, &xty, n_features);
409
410        self.coef_ = Some(weights.clone());
411
412        // Compute intercept
413        if self.fit_intercept {
414            let mut intercept = y_mean;
415            for j in 0..n_features {
416                intercept -= weights[j] * x_mean[j];
417            }
418            self.intercept_ = Some(intercept);
419        } else {
420            self.intercept_ = Some(0.0);
421        }
422    }
423
424    fn solve_cholesky(&self, a: &[f32], b: &[f32], n: usize) -> Vec<f32> {
425        let mut l = vec![0.0f32; n * n];
426        
427        for i in 0..n {
428            for j in 0..=i {
429                let mut sum = 0.0f32;
430                if i == j {
431                    for k in 0..j {
432                        sum += l[j * n + k] * l[j * n + k];
433                    }
434                    l[j * n + j] = (a[j * n + j] - sum).max(1e-10).sqrt();
435                } else {
436                    for k in 0..j {
437                        sum += l[i * n + k] * l[j * n + k];
438                    }
439                    l[i * n + j] = (a[i * n + j] - sum) / l[j * n + j].max(1e-10);
440                }
441            }
442        }
443
444        let mut y = vec![0.0f32; n];
445        for i in 0..n {
446            let mut sum = 0.0f32;
447            for j in 0..i {
448                sum += l[i * n + j] * y[j];
449            }
450            y[i] = (b[i] - sum) / l[i * n + i].max(1e-10);
451        }
452
453        let mut x = vec![0.0f32; n];
454        for i in (0..n).rev() {
455            let mut sum = 0.0f32;
456            for j in (i + 1)..n {
457                sum += l[j * n + i] * x[j];
458            }
459            x[i] = (y[i] - sum) / l[i * n + i].max(1e-10);
460        }
461
462        x
463    }
464
465    pub fn predict(&self, x: &Tensor) -> Tensor {
466        let x_data = x.data_f32();
467        let n_samples = x.dims()[0];
468        let n_features = x.dims()[1];
469
470        let coef = self.coef_.as_ref().expect("Model not fitted");
471        let intercept = self.intercept_.unwrap_or(0.0);
472
473        let predictions: Vec<f32> = (0..n_samples)
474            .map(|i| {
475                let mut pred = intercept;
476                for j in 0..n_features {
477                    pred += coef[j] * x_data[i * n_features + j];
478                }
479                pred
480            })
481            .collect();
482
483        Tensor::from_slice(&predictions, &[n_samples]).unwrap()
484    }
485
486    pub fn score(&self, x: &Tensor, y: &Tensor) -> f32 {
487        let predictions = self.predict(x);
488        let pred_data = predictions.data_f32();
489        let y_data = y.data_f32();
490
491        let y_mean: f32 = y_data.iter().sum::<f32>() / y_data.len() as f32;
492        let ss_res: f32 = pred_data.iter().zip(y_data.iter()).map(|(&p, &y)| (y - p).powi(2)).sum();
493        let ss_tot: f32 = y_data.iter().map(|&y| (y - y_mean).powi(2)).sum();
494
495        1.0 - ss_res / ss_tot.max(1e-10)
496    }
497}
498
499
500/// Lasso Regression (L1 regularization) using Coordinate Descent
501pub struct Lasso {
502    pub coef_: Option<Vec<f32>>,
503    pub intercept_: Option<f32>,
504    pub alpha: f32,
505    pub fit_intercept: bool,
506    pub max_iter: usize,
507    pub tol: f32,
508}
509
510impl Lasso {
511    pub fn new(alpha: f32) -> Self {
512        Lasso {
513            coef_: None,
514            intercept_: None,
515            alpha,
516            fit_intercept: true,
517            max_iter: 1000,
518            tol: 1e-4,
519        }
520    }
521
522    /// Soft thresholding operator
523    fn soft_threshold(x: f32, lambda: f32) -> f32 {
524        if x > lambda {
525            x - lambda
526        } else if x < -lambda {
527            x + lambda
528        } else {
529            0.0
530        }
531    }
532
533    pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
534        let x_data = x.data_f32();
535        let y_data = y.data_f32();
536        let n_samples = x.dims()[0];
537        let n_features = x.dims()[1];
538
539        // Center data
540        let x_mean: Vec<f32> = (0..n_features)
541            .map(|j| (0..n_samples).map(|i| x_data[i * n_features + j]).sum::<f32>() / n_samples as f32)
542            .collect();
543        
544        let y_mean = if self.fit_intercept {
545            y_data.iter().sum::<f32>() / n_samples as f32
546        } else {
547            0.0
548        };
549
550        let x_centered: Vec<f32> = (0..n_samples)
551            .flat_map(|i| (0..n_features).map(|j| x_data[i * n_features + j] - x_mean[j]).collect::<Vec<_>>())
552            .collect();
553
554        let y_centered: Vec<f32> = y_data.iter().map(|&y| y - y_mean).collect();
555
556        // Precompute X^T X diagonal and X^T y
557        let mut x_sq_sum = vec![0.0f32; n_features];
558        for j in 0..n_features {
559            for i in 0..n_samples {
560                x_sq_sum[j] += x_centered[i * n_features + j].powi(2);
561            }
562        }
563
564        // Initialize weights
565        let mut weights = vec![0.0f32; n_features];
566        let mut residuals = y_centered.clone();
567
568        // Coordinate descent
569        for _iter in 0..self.max_iter {
570            let weights_old = weights.clone();
571
572            for j in 0..n_features {
573                // Add back contribution of current weight
574                for i in 0..n_samples {
575                    residuals[i] += weights[j] * x_centered[i * n_features + j];
576                }
577
578                // Compute correlation
579                let mut rho = 0.0f32;
580                for i in 0..n_samples {
581                    rho += x_centered[i * n_features + j] * residuals[i];
582                }
583
584                // Update weight with soft thresholding
585                let lambda = self.alpha * n_samples as f32;
586                weights[j] = Self::soft_threshold(rho, lambda) / x_sq_sum[j].max(1e-10);
587
588                // Update residuals
589                for i in 0..n_samples {
590                    residuals[i] -= weights[j] * x_centered[i * n_features + j];
591                }
592            }
593
594            // Check convergence
595            let max_change: f32 = weights.iter()
596                .zip(weights_old.iter())
597                .map(|(&w, &w_old)| (w - w_old).abs())
598                .fold(0.0f32, f32::max);
599
600            if max_change < self.tol {
601                break;
602            }
603        }
604
605        self.coef_ = Some(weights.clone());
606
607        // Compute intercept
608        if self.fit_intercept {
609            let mut intercept = y_mean;
610            for j in 0..n_features {
611                intercept -= weights[j] * x_mean[j];
612            }
613            self.intercept_ = Some(intercept);
614        } else {
615            self.intercept_ = Some(0.0);
616        }
617    }
618
619    pub fn predict(&self, x: &Tensor) -> Tensor {
620        let x_data = x.data_f32();
621        let n_samples = x.dims()[0];
622        let n_features = x.dims()[1];
623
624        let coef = self.coef_.as_ref().expect("Model not fitted");
625        let intercept = self.intercept_.unwrap_or(0.0);
626
627        let predictions: Vec<f32> = (0..n_samples)
628            .map(|i| {
629                let mut pred = intercept;
630                for j in 0..n_features {
631                    pred += coef[j] * x_data[i * n_features + j];
632                }
633                pred
634            })
635            .collect();
636
637        Tensor::from_slice(&predictions, &[n_samples]).unwrap()
638    }
639
640    pub fn score(&self, x: &Tensor, y: &Tensor) -> f32 {
641        let predictions = self.predict(x);
642        let pred_data = predictions.data_f32();
643        let y_data = y.data_f32();
644
645        let y_mean: f32 = y_data.iter().sum::<f32>() / y_data.len() as f32;
646        let ss_res: f32 = pred_data.iter().zip(y_data.iter()).map(|(&p, &y)| (y - p).powi(2)).sum();
647        let ss_tot: f32 = y_data.iter().map(|&y| (y - y_mean).powi(2)).sum();
648
649        1.0 - ss_res / ss_tot.max(1e-10)
650    }
651}
652
653
654/// Elastic Net (L1 + L2 regularization) using Coordinate Descent
655pub struct ElasticNet {
656    pub coef_: Option<Vec<f32>>,
657    pub intercept_: Option<f32>,
658    pub alpha: f32,
659    pub l1_ratio: f32,
660    pub fit_intercept: bool,
661    pub max_iter: usize,
662    pub tol: f32,
663}
664
665impl ElasticNet {
666    pub fn new(alpha: f32, l1_ratio: f32) -> Self {
667        ElasticNet {
668            coef_: None,
669            intercept_: None,
670            alpha,
671            l1_ratio: l1_ratio.clamp(0.0, 1.0),
672            fit_intercept: true,
673            max_iter: 1000,
674            tol: 1e-4,
675        }
676    }
677
678    fn soft_threshold(x: f32, lambda: f32) -> f32 {
679        if x > lambda {
680            x - lambda
681        } else if x < -lambda {
682            x + lambda
683        } else {
684            0.0
685        }
686    }
687
688    pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
689        let x_data = x.data_f32();
690        let y_data = y.data_f32();
691        let n_samples = x.dims()[0];
692        let n_features = x.dims()[1];
693
694        let l1_reg = self.alpha * self.l1_ratio * n_samples as f32;
695        let l2_reg = self.alpha * (1.0 - self.l1_ratio) * n_samples as f32;
696
697        // Center data
698        let x_mean: Vec<f32> = (0..n_features)
699            .map(|j| (0..n_samples).map(|i| x_data[i * n_features + j]).sum::<f32>() / n_samples as f32)
700            .collect();
701        
702        let y_mean = if self.fit_intercept {
703            y_data.iter().sum::<f32>() / n_samples as f32
704        } else {
705            0.0
706        };
707
708        let x_centered: Vec<f32> = (0..n_samples)
709            .flat_map(|i| (0..n_features).map(|j| x_data[i * n_features + j] - x_mean[j]).collect::<Vec<_>>())
710            .collect();
711
712        let y_centered: Vec<f32> = y_data.iter().map(|&y| y - y_mean).collect();
713
714        // Precompute X^T X diagonal
715        let mut x_sq_sum = vec![0.0f32; n_features];
716        for j in 0..n_features {
717            for i in 0..n_samples {
718                x_sq_sum[j] += x_centered[i * n_features + j].powi(2);
719            }
720        }
721
722        let mut weights = vec![0.0f32; n_features];
723        let mut residuals = y_centered.clone();
724
725        for _iter in 0..self.max_iter {
726            let weights_old = weights.clone();
727
728            for j in 0..n_features {
729                for i in 0..n_samples {
730                    residuals[i] += weights[j] * x_centered[i * n_features + j];
731                }
732
733                let mut rho = 0.0f32;
734                for i in 0..n_samples {
735                    rho += x_centered[i * n_features + j] * residuals[i];
736                }
737
738                // Elastic net update
739                let denom = x_sq_sum[j] + l2_reg;
740                weights[j] = Self::soft_threshold(rho, l1_reg) / denom.max(1e-10);
741
742                for i in 0..n_samples {
743                    residuals[i] -= weights[j] * x_centered[i * n_features + j];
744                }
745            }
746
747            let max_change: f32 = weights.iter()
748                .zip(weights_old.iter())
749                .map(|(&w, &w_old)| (w - w_old).abs())
750                .fold(0.0f32, f32::max);
751
752            if max_change < self.tol {
753                break;
754            }
755        }
756
757        self.coef_ = Some(weights.clone());
758
759        if self.fit_intercept {
760            let mut intercept = y_mean;
761            for j in 0..n_features {
762                intercept -= weights[j] * x_mean[j];
763            }
764            self.intercept_ = Some(intercept);
765        } else {
766            self.intercept_ = Some(0.0);
767        }
768    }
769
770    pub fn predict(&self, x: &Tensor) -> Tensor {
771        let x_data = x.data_f32();
772        let n_samples = x.dims()[0];
773        let n_features = x.dims()[1];
774
775        let coef = self.coef_.as_ref().expect("Model not fitted");
776        let intercept = self.intercept_.unwrap_or(0.0);
777
778        let predictions: Vec<f32> = (0..n_samples)
779            .map(|i| {
780                let mut pred = intercept;
781                for j in 0..n_features {
782                    pred += coef[j] * x_data[i * n_features + j];
783                }
784                pred
785            })
786            .collect();
787
788        Tensor::from_slice(&predictions, &[n_samples]).unwrap()
789    }
790
791    pub fn score(&self, x: &Tensor, y: &Tensor) -> f32 {
792        let predictions = self.predict(x);
793        let pred_data = predictions.data_f32();
794        let y_data = y.data_f32();
795
796        let y_mean: f32 = y_data.iter().sum::<f32>() / y_data.len() as f32;
797        let ss_res: f32 = pred_data.iter().zip(y_data.iter()).map(|(&p, &y)| (y - p).powi(2)).sum();
798        let ss_tot: f32 = y_data.iter().map(|&y| (y - y_mean).powi(2)).sum();
799
800        1.0 - ss_res / ss_tot.max(1e-10)
801    }
802}
803
804
805/// Logistic Regression for binary and multiclass classification
806pub struct LogisticRegression {
807    pub coef_: Option<Vec<Vec<f32>>>,
808    pub intercept_: Option<Vec<f32>>,
809    pub penalty: Penalty,
810    pub c: f32,
811    pub fit_intercept: bool,
812    pub max_iter: usize,
813    pub tol: f32,
814    pub multi_class: MultiClass,
815    n_classes: usize,
816}
817
818#[derive(Clone, Copy, Debug)]
819pub enum Penalty {
820    None,
821    L1,
822    L2,
823    ElasticNet(f32),
824}
825
826#[derive(Clone, Copy, Debug)]
827pub enum MultiClass {
828    /// One-vs-Rest for multiclass
829    OvR,
830    /// Multinomial (softmax)
831    Multinomial,
832}
833
834impl LogisticRegression {
835    pub fn new() -> Self {
836        LogisticRegression {
837            coef_: None,
838            intercept_: None,
839            penalty: Penalty::L2,
840            c: 1.0,
841            fit_intercept: true,
842            max_iter: 100,
843            tol: 1e-4,
844            multi_class: MultiClass::OvR,
845            n_classes: 0,
846        }
847    }
848
849    pub fn penalty(mut self, penalty: Penalty) -> Self {
850        self.penalty = penalty;
851        self
852    }
853
854    pub fn c(mut self, c: f32) -> Self {
855        self.c = c;
856        self
857    }
858
859    pub fn max_iter(mut self, n: usize) -> Self {
860        self.max_iter = n;
861        self
862    }
863
864    pub fn multi_class(mut self, mc: MultiClass) -> Self {
865        self.multi_class = mc;
866        self
867    }
868
869    fn sigmoid(x: f32) -> f32 {
870        if x >= 0.0 {
871            1.0 / (1.0 + (-x).exp())
872        } else {
873            let exp_x = x.exp();
874            exp_x / (1.0 + exp_x)
875        }
876    }
877
878    pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
879        let y_data = y.data_f32();
880        self.n_classes = y_data.iter().map(|&v| v as usize).max().unwrap_or(0) + 1;
881
882        if self.n_classes == 2 {
883            self.fit_binary(x, y);
884        } else {
885            match self.multi_class {
886                MultiClass::OvR => self.fit_ovr(x, y),
887                MultiClass::Multinomial => self.fit_multinomial(x, y),
888            }
889        }
890    }
891
892    fn fit_binary(&mut self, x: &Tensor, y: &Tensor) {
893        let x_data = x.data_f32();
894        let y_data = y.data_f32();
895        let n_samples = x.dims()[0];
896        let n_features = x.dims()[1];
897
898        let mut weights = vec![0.0f32; n_features];
899        let mut bias = 0.0f32;
900
901        let reg = 1.0 / self.c;
902
903        // L-BFGS or gradient descent
904        for _iter in 0..self.max_iter {
905            let mut grad_w = vec![0.0f32; n_features];
906            let mut grad_b = 0.0f32;
907
908            for i in 0..n_samples {
909                let mut z = bias;
910                for j in 0..n_features {
911                    z += weights[j] * x_data[i * n_features + j];
912                }
913                let p = Self::sigmoid(z);
914                let error = p - y_data[i];
915
916                grad_b += error;
917                for j in 0..n_features {
918                    grad_w[j] += error * x_data[i * n_features + j];
919                }
920            }
921
922            // Add regularization gradient
923            match self.penalty {
924                Penalty::L2 => {
925                    for j in 0..n_features {
926                        grad_w[j] += reg * weights[j];
927                    }
928                }
929                Penalty::L1 => {
930                    for j in 0..n_features {
931                        grad_w[j] += reg * weights[j].signum();
932                    }
933                }
934                _ => {}
935            }
936
937            // Scale gradients
938            let scale = 1.0 / n_samples as f32;
939            grad_b *= scale;
940            for g in &mut grad_w {
941                *g *= scale;
942            }
943
944            // Update with learning rate
945            let lr = 0.1;
946            if self.fit_intercept {
947                bias -= lr * grad_b;
948            }
949            for j in 0..n_features {
950                weights[j] -= lr * grad_w[j];
951            }
952
953            // Check convergence
954            let grad_norm: f32 = grad_w.iter().map(|&g| g * g).sum::<f32>().sqrt();
955            if grad_norm < self.tol {
956                break;
957            }
958        }
959
960        self.coef_ = Some(vec![weights]);
961        self.intercept_ = Some(vec![bias]);
962    }
963
964    fn fit_ovr(&mut self, x: &Tensor, y: &Tensor) {
965        let y_data = y.data_f32();
966        let n_samples = x.dims()[0];
967        let _n_features = x.dims()[1];
968
969        let mut all_coef = Vec::with_capacity(self.n_classes);
970        let mut all_intercept = Vec::with_capacity(self.n_classes);
971
972        for c in 0..self.n_classes {
973            // Create binary labels
974            let y_binary: Vec<f32> = y_data.iter()
975                .map(|&yi| if yi as usize == c { 1.0 } else { 0.0 })
976                .collect();
977            let y_tensor = Tensor::from_slice(&y_binary, &[n_samples]).unwrap();
978
979            let mut binary_lr = LogisticRegression::new()
980                .penalty(self.penalty)
981                .c(self.c)
982                .max_iter(self.max_iter);
983            binary_lr.n_classes = 2;
984            binary_lr.fit_binary(x, &y_tensor);
985
986            all_coef.push(binary_lr.coef_.unwrap().into_iter().next().unwrap());
987            all_intercept.push(binary_lr.intercept_.unwrap().into_iter().next().unwrap());
988        }
989
990        self.coef_ = Some(all_coef);
991        self.intercept_ = Some(all_intercept);
992    }
993
994    fn fit_multinomial(&mut self, x: &Tensor, y: &Tensor) {
995        let x_data = x.data_f32();
996        let y_data = y.data_f32();
997        let n_samples = x.dims()[0];
998        let n_features = x.dims()[1];
999
1000        let mut weights = vec![vec![0.0f32; n_features]; self.n_classes];
1001        let mut biases = vec![0.0f32; self.n_classes];
1002
1003        let reg = 1.0 / self.c;
1004        let lr = 0.1;
1005
1006        for _iter in 0..self.max_iter {
1007            // Compute softmax probabilities
1008            let mut probs = vec![vec![0.0f32; self.n_classes]; n_samples];
1009            
1010            for i in 0..n_samples {
1011                let mut max_z = f32::NEG_INFINITY;
1012                let mut z = vec![0.0f32; self.n_classes];
1013                
1014                for c in 0..self.n_classes {
1015                    z[c] = biases[c];
1016                    for j in 0..n_features {
1017                        z[c] += weights[c][j] * x_data[i * n_features + j];
1018                    }
1019                    max_z = max_z.max(z[c]);
1020                }
1021
1022                let mut sum_exp = 0.0f32;
1023                for c in 0..self.n_classes {
1024                    probs[i][c] = (z[c] - max_z).exp();
1025                    sum_exp += probs[i][c];
1026                }
1027                for c in 0..self.n_classes {
1028                    probs[i][c] /= sum_exp;
1029                }
1030            }
1031
1032            // Compute gradients
1033            let mut grad_w = vec![vec![0.0f32; n_features]; self.n_classes];
1034            let mut grad_b = vec![0.0f32; self.n_classes];
1035
1036            for i in 0..n_samples {
1037                let true_class = y_data[i] as usize;
1038                for c in 0..self.n_classes {
1039                    let target = if c == true_class { 1.0 } else { 0.0 };
1040                    let error = probs[i][c] - target;
1041                    
1042                    grad_b[c] += error;
1043                    for j in 0..n_features {
1044                        grad_w[c][j] += error * x_data[i * n_features + j];
1045                    }
1046                }
1047            }
1048
1049            // Add regularization and scale
1050            let scale = 1.0 / n_samples as f32;
1051            for c in 0..self.n_classes {
1052                grad_b[c] *= scale;
1053                for j in 0..n_features {
1054                    grad_w[c][j] = grad_w[c][j] * scale + reg * weights[c][j];
1055                }
1056            }
1057
1058            // Update
1059            for c in 0..self.n_classes {
1060                if self.fit_intercept {
1061                    biases[c] -= lr * grad_b[c];
1062                }
1063                for j in 0..n_features {
1064                    weights[c][j] -= lr * grad_w[c][j];
1065                }
1066            }
1067        }
1068
1069        self.coef_ = Some(weights);
1070        self.intercept_ = Some(biases);
1071    }
1072
1073    pub fn predict(&self, x: &Tensor) -> Tensor {
1074        let proba = self.predict_proba(x);
1075        let proba_data = proba.data_f32();
1076        let n_samples = x.dims()[0];
1077
1078        let predictions: Vec<f32> = (0..n_samples)
1079            .map(|i| {
1080                let start = i * self.n_classes;
1081                let sample_probs = &proba_data[start..start + self.n_classes];
1082                sample_probs.iter()
1083                    .enumerate()
1084                    .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
1085                    .map(|(idx, _)| idx as f32)
1086                    .unwrap_or(0.0)
1087            })
1088            .collect();
1089
1090        Tensor::from_slice(&predictions, &[n_samples]).unwrap()
1091    }
1092
1093    pub fn predict_proba(&self, x: &Tensor) -> Tensor {
1094        let x_data = x.data_f32();
1095        let n_samples = x.dims()[0];
1096        let n_features = x.dims()[1];
1097
1098        let coef = self.coef_.as_ref().expect("Model not fitted");
1099        let intercept = self.intercept_.as_ref().expect("Model not fitted");
1100
1101        let mut probs = Vec::with_capacity(n_samples * self.n_classes);
1102
1103        for i in 0..n_samples {
1104            if self.n_classes == 2 {
1105                let mut z = intercept[0];
1106                for j in 0..n_features {
1107                    z += coef[0][j] * x_data[i * n_features + j];
1108                }
1109                let p = Self::sigmoid(z);
1110                probs.push(1.0 - p);
1111                probs.push(p);
1112            } else {
1113                let mut z = vec![0.0f32; self.n_classes];
1114                let mut max_z = f32::NEG_INFINITY;
1115
1116                for c in 0..self.n_classes {
1117                    z[c] = intercept[c];
1118                    for j in 0..n_features {
1119                        z[c] += coef[c][j] * x_data[i * n_features + j];
1120                    }
1121                    max_z = max_z.max(z[c]);
1122                }
1123
1124                let mut sum_exp = 0.0f32;
1125                for c in 0..self.n_classes {
1126                    z[c] = (z[c] - max_z).exp();
1127                    sum_exp += z[c];
1128                }
1129
1130                for c in 0..self.n_classes {
1131                    probs.push(z[c] / sum_exp);
1132                }
1133            }
1134        }
1135
1136        Tensor::from_slice(&probs, &[n_samples, self.n_classes]).unwrap()
1137    }
1138
1139    pub fn score(&self, x: &Tensor, y: &Tensor) -> f32 {
1140        let predictions = self.predict(x);
1141        let pred_data = predictions.data_f32();
1142        let y_data = y.data_f32();
1143
1144        let correct: usize = pred_data.iter()
1145            .zip(y_data.iter())
1146            .filter(|(&p, &y)| (p - y).abs() < 0.5)
1147            .count();
1148
1149        correct as f32 / y_data.len() as f32
1150    }
1151}
1152
1153impl Default for LogisticRegression {
1154    fn default() -> Self {
1155        Self::new()
1156    }
1157}
1158
1159#[cfg(test)]
1160mod tests {
1161    use super::*;
1162
1163    #[test]
1164    fn test_linear_regression() {
1165        let x = Tensor::from_slice(&[1.0f32, 2.0, 3.0, 4.0, 5.0], &[5, 1]).unwrap();
1166        let y = Tensor::from_slice(&[2.0f32, 4.0, 6.0, 8.0, 10.0], &[5]).unwrap();
1167
1168        let mut lr = LinearRegression::new();
1169        lr.fit(&x, &y);
1170
1171        let _predictions = lr.predict(&x);
1172        let score = lr.score(&x, &y);
1173
1174        assert!(score > 0.99, "R² should be close to 1.0");
1175    }
1176
1177    #[test]
1178    fn test_ridge() {
1179        let x = Tensor::from_slice(&[1.0f32, 2.0, 3.0, 4.0, 5.0], &[5, 1]).unwrap();
1180        let y = Tensor::from_slice(&[2.1f32, 3.9, 6.2, 7.8, 10.1], &[5]).unwrap();
1181
1182        let mut ridge = Ridge::new(1.0);
1183        ridge.fit(&x, &y);
1184
1185        let score = ridge.score(&x, &y);
1186        assert!(score > 0.95);
1187    }
1188
1189    #[test]
1190    fn test_logistic_regression() {
1191        let x = Tensor::from_slice(&[0.0f32, 0.0,
1192            0.0, 1.0,
1193            1.0, 0.0,
1194            1.0, 1.0,
1195        ], &[4, 2]).unwrap();
1196        let y = Tensor::from_slice(&[0.0f32, 0.0, 0.0, 1.0], &[4]).unwrap();
1197
1198        let mut lr = LogisticRegression::new().max_iter(1000);
1199        lr.fit(&x, &y);
1200
1201        let predictions = lr.predict(&x);
1202        assert_eq!(predictions.dims(), &[4]);
1203    }
1204}
1205
1206