1use ghostflow_core::Tensor;
4
5pub struct LinearRegression {
7 pub coef_: Option<Vec<f32>>,
9 pub intercept_: Option<f32>,
11 pub fit_intercept: bool,
13 pub solver: LinearSolver,
15 pub learning_rate: f32,
17 pub max_iter: usize,
19 pub tol: f32,
21}
22
23#[derive(Clone, Copy, Debug)]
24pub enum LinearSolver {
25 ClosedForm,
27 GradientDescent,
29 SGD,
31}
32
33impl LinearRegression {
34 pub fn new() -> Self {
35 LinearRegression {
36 coef_: None,
37 intercept_: None,
38 fit_intercept: true,
39 solver: LinearSolver::ClosedForm,
40 learning_rate: 0.01,
41 max_iter: 1000,
42 tol: 1e-4,
43 }
44 }
45
46 pub fn fit_intercept(mut self, fit: bool) -> Self {
47 self.fit_intercept = fit;
48 self
49 }
50
51 pub fn solver(mut self, solver: LinearSolver) -> Self {
52 self.solver = solver;
53 self
54 }
55
56 pub fn learning_rate(mut self, lr: f32) -> Self {
57 self.learning_rate = lr;
58 self
59 }
60
61 pub fn max_iter(mut self, n: usize) -> Self {
62 self.max_iter = n;
63 self
64 }
65
66 pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
68 match self.solver {
69 LinearSolver::ClosedForm => self.fit_closed_form(x, y),
70 LinearSolver::GradientDescent => self.fit_gradient_descent(x, y),
71 LinearSolver::SGD => self.fit_sgd(x, y),
72 }
73 }
74
75 fn fit_closed_form(&mut self, x: &Tensor, y: &Tensor) {
77 let x_data = x.data_f32();
78 let y_data = y.data_f32();
79 let n_samples = x.dims()[0];
80 let n_features = x.dims()[1];
81
82 let n_cols = if self.fit_intercept { n_features + 1 } else { n_features };
84
85 let mut x_aug = Vec::with_capacity(n_samples * n_cols);
86 for i in 0..n_samples {
87 if self.fit_intercept {
88 x_aug.push(1.0); }
90 for j in 0..n_features {
91 x_aug.push(x_data[i * n_features + j]);
92 }
93 }
94
95 let mut xtx = vec![0.0f32; n_cols * n_cols];
97 for i in 0..n_cols {
98 for j in 0..n_cols {
99 let mut sum = 0.0f32;
100 for k in 0..n_samples {
101 sum += x_aug[k * n_cols + i] * x_aug[k * n_cols + j];
102 }
103 xtx[i * n_cols + j] = sum;
104 }
105 }
106
107 let mut xty = vec![0.0f32; n_cols];
109 for i in 0..n_cols {
110 let mut sum = 0.0f32;
111 for k in 0..n_samples {
112 sum += x_aug[k * n_cols + i] * y_data[k];
113 }
114 xty[i] = sum;
115 }
116
117 let weights = self.solve_linear_system(&xtx, &xty, n_cols);
119
120 if self.fit_intercept {
121 self.intercept_ = Some(weights[0]);
122 self.coef_ = Some(weights[1..].to_vec());
123 } else {
124 self.intercept_ = Some(0.0);
125 self.coef_ = Some(weights);
126 }
127 }
128
129 fn solve_linear_system(&self, a: &[f32], b: &[f32], n: usize) -> Vec<f32> {
131 let mut a_reg = a.to_vec();
133 for i in 0..n {
134 a_reg[i * n + i] += 1e-8;
135 }
136
137 let mut l = vec![0.0f32; n * n];
139
140 for i in 0..n {
141 for j in 0..=i {
142 let mut sum = 0.0f32;
143
144 if i == j {
145 for k in 0..j {
146 sum += l[j * n + k] * l[j * n + k];
147 }
148 let val = a_reg[j * n + j] - sum;
149 l[j * n + j] = if val > 0.0 { val.sqrt() } else { 1e-10 };
150 } else {
151 for k in 0..j {
152 sum += l[i * n + k] * l[j * n + k];
153 }
154 l[i * n + j] = (a_reg[i * n + j] - sum) / l[j * n + j].max(1e-10);
155 }
156 }
157 }
158
159 let mut y = vec![0.0f32; n];
161 for i in 0..n {
162 let mut sum = 0.0f32;
163 for j in 0..i {
164 sum += l[i * n + j] * y[j];
165 }
166 y[i] = (b[i] - sum) / l[i * n + i].max(1e-10);
167 }
168
169 let mut x = vec![0.0f32; n];
171 for i in (0..n).rev() {
172 let mut sum = 0.0f32;
173 for j in (i + 1)..n {
174 sum += l[j * n + i] * x[j];
175 }
176 x[i] = (y[i] - sum) / l[i * n + i].max(1e-10);
177 }
178
179 x
180 }
181
182 fn fit_gradient_descent(&mut self, x: &Tensor, y: &Tensor) {
184 let x_data = x.data_f32();
185 let y_data = y.data_f32();
186 let n_samples = x.dims()[0];
187 let n_features = x.dims()[1];
188
189 let mut weights = vec![0.0f32; n_features];
191 let mut bias = 0.0f32;
192
193 for _iter in 0..self.max_iter {
194 let mut predictions = vec![0.0f32; n_samples];
196 for i in 0..n_samples {
197 let mut pred = bias;
198 for j in 0..n_features {
199 pred += weights[j] * x_data[i * n_features + j];
200 }
201 predictions[i] = pred;
202 }
203
204 let mut grad_w = vec![0.0f32; n_features];
206 let mut grad_b = 0.0f32;
207
208 for i in 0..n_samples {
209 let error = predictions[i] - y_data[i];
210 grad_b += error;
211 for j in 0..n_features {
212 grad_w[j] += error * x_data[i * n_features + j];
213 }
214 }
215
216 let scale = 1.0 / n_samples as f32;
218 grad_b *= scale;
219 for g in &mut grad_w {
220 *g *= scale;
221 }
222
223 if self.fit_intercept {
225 bias -= self.learning_rate * grad_b;
226 }
227 for j in 0..n_features {
228 weights[j] -= self.learning_rate * grad_w[j];
229 }
230
231 let grad_norm: f32 = grad_w.iter().map(|&g| g * g).sum::<f32>() + grad_b * grad_b;
233 if grad_norm.sqrt() < self.tol {
234 break;
235 }
236 }
237
238 self.coef_ = Some(weights);
239 self.intercept_ = Some(bias);
240 }
241
242 fn fit_sgd(&mut self, x: &Tensor, y: &Tensor) {
244 use rand::seq::SliceRandom;
245
246 let x_data = x.data_f32();
247 let y_data = y.data_f32();
248 let n_samples = x.dims()[0];
249 let n_features = x.dims()[1];
250
251 let mut weights = vec![0.0f32; n_features];
252 let mut bias = 0.0f32;
253 let mut indices: Vec<usize> = (0..n_samples).collect();
254
255 for _epoch in 0..self.max_iter {
256 indices.shuffle(&mut rand::thread_rng());
257
258 for &i in &indices {
259 let mut pred = bias;
261 for j in 0..n_features {
262 pred += weights[j] * x_data[i * n_features + j];
263 }
264
265 let error = pred - y_data[i];
266
267 if self.fit_intercept {
269 bias -= self.learning_rate * error;
270 }
271 for j in 0..n_features {
272 weights[j] -= self.learning_rate * error * x_data[i * n_features + j];
273 }
274 }
275 }
276
277 self.coef_ = Some(weights);
278 self.intercept_ = Some(bias);
279 }
280
281 pub fn predict(&self, x: &Tensor) -> Tensor {
283 let x_data = x.data_f32();
284 let n_samples = x.dims()[0];
285 let n_features = x.dims()[1];
286
287 let coef = self.coef_.as_ref().expect("Model not fitted");
288 let intercept = self.intercept_.unwrap_or(0.0);
289
290 let predictions: Vec<f32> = (0..n_samples)
291 .map(|i| {
292 let mut pred = intercept;
293 for j in 0..n_features {
294 pred += coef[j] * x_data[i * n_features + j];
295 }
296 pred
297 })
298 .collect();
299
300 Tensor::from_slice(&predictions, &[n_samples]).unwrap()
301 }
302
303 pub fn score(&self, x: &Tensor, y: &Tensor) -> f32 {
305 let predictions = self.predict(x);
306 let pred_data = predictions.data_f32();
307 let y_data = y.data_f32();
308
309 let y_mean: f32 = y_data.iter().sum::<f32>() / y_data.len() as f32;
310
311 let ss_res: f32 = pred_data.iter()
312 .zip(y_data.iter())
313 .map(|(&p, &y)| (y - p).powi(2))
314 .sum();
315
316 let ss_tot: f32 = y_data.iter()
317 .map(|&y| (y - y_mean).powi(2))
318 .sum();
319
320 1.0 - ss_res / ss_tot.max(1e-10)
321 }
322}
323
324impl Default for LinearRegression {
325 fn default() -> Self {
326 Self::new()
327 }
328}
329
330
331pub struct Ridge {
333 pub coef_: Option<Vec<f32>>,
334 pub intercept_: Option<f32>,
335 pub alpha: f32,
336 pub fit_intercept: bool,
337 pub max_iter: usize,
338 pub tol: f32,
339}
340
341impl Ridge {
342 pub fn new(alpha: f32) -> Self {
343 Ridge {
344 coef_: None,
345 intercept_: None,
346 alpha,
347 fit_intercept: true,
348 max_iter: 1000,
349 tol: 1e-4,
350 }
351 }
352
353 pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
354 let x_data = x.data_f32();
355 let y_data = y.data_f32();
356 let n_samples = x.dims()[0];
357 let n_features = x.dims()[1];
358
359 let (x_centered, y_centered, x_mean, y_mean) = if self.fit_intercept {
361 let x_mean: Vec<f32> = (0..n_features)
362 .map(|j| {
363 (0..n_samples).map(|i| x_data[i * n_features + j]).sum::<f32>() / n_samples as f32
364 })
365 .collect();
366
367 let y_mean = y_data.iter().sum::<f32>() / n_samples as f32;
368
369 let x_centered: Vec<f32> = (0..n_samples)
370 .flat_map(|i| {
371 (0..n_features).map(|j| x_data[i * n_features + j] - x_mean[j]).collect::<Vec<_>>()
372 })
373 .collect();
374
375 let y_centered: Vec<f32> = y_data.iter().map(|&y| y - y_mean).collect();
376
377 (x_centered, y_centered, x_mean, y_mean)
378 } else {
379 (x_data.to_vec(), y_data.to_vec(), vec![0.0; n_features], 0.0)
380 };
381
382 let mut xtx = vec![0.0f32; n_features * n_features];
384 for i in 0..n_features {
385 for j in 0..n_features {
386 let mut sum = 0.0f32;
387 for k in 0..n_samples {
388 sum += x_centered[k * n_features + i] * x_centered[k * n_features + j];
389 }
390 xtx[i * n_features + j] = sum;
391 if i == j {
392 xtx[i * n_features + j] += self.alpha;
393 }
394 }
395 }
396
397 let mut xty = vec![0.0f32; n_features];
399 for i in 0..n_features {
400 let mut sum = 0.0f32;
401 for k in 0..n_samples {
402 sum += x_centered[k * n_features + i] * y_centered[k];
403 }
404 xty[i] = sum;
405 }
406
407 let weights = self.solve_cholesky(&xtx, &xty, n_features);
409
410 self.coef_ = Some(weights.clone());
411
412 if self.fit_intercept {
414 let mut intercept = y_mean;
415 for j in 0..n_features {
416 intercept -= weights[j] * x_mean[j];
417 }
418 self.intercept_ = Some(intercept);
419 } else {
420 self.intercept_ = Some(0.0);
421 }
422 }
423
424 fn solve_cholesky(&self, a: &[f32], b: &[f32], n: usize) -> Vec<f32> {
425 let mut l = vec![0.0f32; n * n];
426
427 for i in 0..n {
428 for j in 0..=i {
429 let mut sum = 0.0f32;
430 if i == j {
431 for k in 0..j {
432 sum += l[j * n + k] * l[j * n + k];
433 }
434 l[j * n + j] = (a[j * n + j] - sum).max(1e-10).sqrt();
435 } else {
436 for k in 0..j {
437 sum += l[i * n + k] * l[j * n + k];
438 }
439 l[i * n + j] = (a[i * n + j] - sum) / l[j * n + j].max(1e-10);
440 }
441 }
442 }
443
444 let mut y = vec![0.0f32; n];
445 for i in 0..n {
446 let mut sum = 0.0f32;
447 for j in 0..i {
448 sum += l[i * n + j] * y[j];
449 }
450 y[i] = (b[i] - sum) / l[i * n + i].max(1e-10);
451 }
452
453 let mut x = vec![0.0f32; n];
454 for i in (0..n).rev() {
455 let mut sum = 0.0f32;
456 for j in (i + 1)..n {
457 sum += l[j * n + i] * x[j];
458 }
459 x[i] = (y[i] - sum) / l[i * n + i].max(1e-10);
460 }
461
462 x
463 }
464
465 pub fn predict(&self, x: &Tensor) -> Tensor {
466 let x_data = x.data_f32();
467 let n_samples = x.dims()[0];
468 let n_features = x.dims()[1];
469
470 let coef = self.coef_.as_ref().expect("Model not fitted");
471 let intercept = self.intercept_.unwrap_or(0.0);
472
473 let predictions: Vec<f32> = (0..n_samples)
474 .map(|i| {
475 let mut pred = intercept;
476 for j in 0..n_features {
477 pred += coef[j] * x_data[i * n_features + j];
478 }
479 pred
480 })
481 .collect();
482
483 Tensor::from_slice(&predictions, &[n_samples]).unwrap()
484 }
485
486 pub fn score(&self, x: &Tensor, y: &Tensor) -> f32 {
487 let predictions = self.predict(x);
488 let pred_data = predictions.data_f32();
489 let y_data = y.data_f32();
490
491 let y_mean: f32 = y_data.iter().sum::<f32>() / y_data.len() as f32;
492 let ss_res: f32 = pred_data.iter().zip(y_data.iter()).map(|(&p, &y)| (y - p).powi(2)).sum();
493 let ss_tot: f32 = y_data.iter().map(|&y| (y - y_mean).powi(2)).sum();
494
495 1.0 - ss_res / ss_tot.max(1e-10)
496 }
497}
498
499
500pub struct Lasso {
502 pub coef_: Option<Vec<f32>>,
503 pub intercept_: Option<f32>,
504 pub alpha: f32,
505 pub fit_intercept: bool,
506 pub max_iter: usize,
507 pub tol: f32,
508}
509
510impl Lasso {
511 pub fn new(alpha: f32) -> Self {
512 Lasso {
513 coef_: None,
514 intercept_: None,
515 alpha,
516 fit_intercept: true,
517 max_iter: 1000,
518 tol: 1e-4,
519 }
520 }
521
522 fn soft_threshold(x: f32, lambda: f32) -> f32 {
524 if x > lambda {
525 x - lambda
526 } else if x < -lambda {
527 x + lambda
528 } else {
529 0.0
530 }
531 }
532
533 pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
534 let x_data = x.data_f32();
535 let y_data = y.data_f32();
536 let n_samples = x.dims()[0];
537 let n_features = x.dims()[1];
538
539 let x_mean: Vec<f32> = (0..n_features)
541 .map(|j| (0..n_samples).map(|i| x_data[i * n_features + j]).sum::<f32>() / n_samples as f32)
542 .collect();
543
544 let y_mean = if self.fit_intercept {
545 y_data.iter().sum::<f32>() / n_samples as f32
546 } else {
547 0.0
548 };
549
550 let x_centered: Vec<f32> = (0..n_samples)
551 .flat_map(|i| (0..n_features).map(|j| x_data[i * n_features + j] - x_mean[j]).collect::<Vec<_>>())
552 .collect();
553
554 let y_centered: Vec<f32> = y_data.iter().map(|&y| y - y_mean).collect();
555
556 let mut x_sq_sum = vec![0.0f32; n_features];
558 for j in 0..n_features {
559 for i in 0..n_samples {
560 x_sq_sum[j] += x_centered[i * n_features + j].powi(2);
561 }
562 }
563
564 let mut weights = vec![0.0f32; n_features];
566 let mut residuals = y_centered.clone();
567
568 for _iter in 0..self.max_iter {
570 let weights_old = weights.clone();
571
572 for j in 0..n_features {
573 for i in 0..n_samples {
575 residuals[i] += weights[j] * x_centered[i * n_features + j];
576 }
577
578 let mut rho = 0.0f32;
580 for i in 0..n_samples {
581 rho += x_centered[i * n_features + j] * residuals[i];
582 }
583
584 let lambda = self.alpha * n_samples as f32;
586 weights[j] = Self::soft_threshold(rho, lambda) / x_sq_sum[j].max(1e-10);
587
588 for i in 0..n_samples {
590 residuals[i] -= weights[j] * x_centered[i * n_features + j];
591 }
592 }
593
594 let max_change: f32 = weights.iter()
596 .zip(weights_old.iter())
597 .map(|(&w, &w_old)| (w - w_old).abs())
598 .fold(0.0f32, f32::max);
599
600 if max_change < self.tol {
601 break;
602 }
603 }
604
605 self.coef_ = Some(weights.clone());
606
607 if self.fit_intercept {
609 let mut intercept = y_mean;
610 for j in 0..n_features {
611 intercept -= weights[j] * x_mean[j];
612 }
613 self.intercept_ = Some(intercept);
614 } else {
615 self.intercept_ = Some(0.0);
616 }
617 }
618
619 pub fn predict(&self, x: &Tensor) -> Tensor {
620 let x_data = x.data_f32();
621 let n_samples = x.dims()[0];
622 let n_features = x.dims()[1];
623
624 let coef = self.coef_.as_ref().expect("Model not fitted");
625 let intercept = self.intercept_.unwrap_or(0.0);
626
627 let predictions: Vec<f32> = (0..n_samples)
628 .map(|i| {
629 let mut pred = intercept;
630 for j in 0..n_features {
631 pred += coef[j] * x_data[i * n_features + j];
632 }
633 pred
634 })
635 .collect();
636
637 Tensor::from_slice(&predictions, &[n_samples]).unwrap()
638 }
639
640 pub fn score(&self, x: &Tensor, y: &Tensor) -> f32 {
641 let predictions = self.predict(x);
642 let pred_data = predictions.data_f32();
643 let y_data = y.data_f32();
644
645 let y_mean: f32 = y_data.iter().sum::<f32>() / y_data.len() as f32;
646 let ss_res: f32 = pred_data.iter().zip(y_data.iter()).map(|(&p, &y)| (y - p).powi(2)).sum();
647 let ss_tot: f32 = y_data.iter().map(|&y| (y - y_mean).powi(2)).sum();
648
649 1.0 - ss_res / ss_tot.max(1e-10)
650 }
651}
652
653
654pub struct ElasticNet {
656 pub coef_: Option<Vec<f32>>,
657 pub intercept_: Option<f32>,
658 pub alpha: f32,
659 pub l1_ratio: f32,
660 pub fit_intercept: bool,
661 pub max_iter: usize,
662 pub tol: f32,
663}
664
665impl ElasticNet {
666 pub fn new(alpha: f32, l1_ratio: f32) -> Self {
667 ElasticNet {
668 coef_: None,
669 intercept_: None,
670 alpha,
671 l1_ratio: l1_ratio.clamp(0.0, 1.0),
672 fit_intercept: true,
673 max_iter: 1000,
674 tol: 1e-4,
675 }
676 }
677
678 fn soft_threshold(x: f32, lambda: f32) -> f32 {
679 if x > lambda {
680 x - lambda
681 } else if x < -lambda {
682 x + lambda
683 } else {
684 0.0
685 }
686 }
687
688 pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
689 let x_data = x.data_f32();
690 let y_data = y.data_f32();
691 let n_samples = x.dims()[0];
692 let n_features = x.dims()[1];
693
694 let l1_reg = self.alpha * self.l1_ratio * n_samples as f32;
695 let l2_reg = self.alpha * (1.0 - self.l1_ratio) * n_samples as f32;
696
697 let x_mean: Vec<f32> = (0..n_features)
699 .map(|j| (0..n_samples).map(|i| x_data[i * n_features + j]).sum::<f32>() / n_samples as f32)
700 .collect();
701
702 let y_mean = if self.fit_intercept {
703 y_data.iter().sum::<f32>() / n_samples as f32
704 } else {
705 0.0
706 };
707
708 let x_centered: Vec<f32> = (0..n_samples)
709 .flat_map(|i| (0..n_features).map(|j| x_data[i * n_features + j] - x_mean[j]).collect::<Vec<_>>())
710 .collect();
711
712 let y_centered: Vec<f32> = y_data.iter().map(|&y| y - y_mean).collect();
713
714 let mut x_sq_sum = vec![0.0f32; n_features];
716 for j in 0..n_features {
717 for i in 0..n_samples {
718 x_sq_sum[j] += x_centered[i * n_features + j].powi(2);
719 }
720 }
721
722 let mut weights = vec![0.0f32; n_features];
723 let mut residuals = y_centered.clone();
724
725 for _iter in 0..self.max_iter {
726 let weights_old = weights.clone();
727
728 for j in 0..n_features {
729 for i in 0..n_samples {
730 residuals[i] += weights[j] * x_centered[i * n_features + j];
731 }
732
733 let mut rho = 0.0f32;
734 for i in 0..n_samples {
735 rho += x_centered[i * n_features + j] * residuals[i];
736 }
737
738 let denom = x_sq_sum[j] + l2_reg;
740 weights[j] = Self::soft_threshold(rho, l1_reg) / denom.max(1e-10);
741
742 for i in 0..n_samples {
743 residuals[i] -= weights[j] * x_centered[i * n_features + j];
744 }
745 }
746
747 let max_change: f32 = weights.iter()
748 .zip(weights_old.iter())
749 .map(|(&w, &w_old)| (w - w_old).abs())
750 .fold(0.0f32, f32::max);
751
752 if max_change < self.tol {
753 break;
754 }
755 }
756
757 self.coef_ = Some(weights.clone());
758
759 if self.fit_intercept {
760 let mut intercept = y_mean;
761 for j in 0..n_features {
762 intercept -= weights[j] * x_mean[j];
763 }
764 self.intercept_ = Some(intercept);
765 } else {
766 self.intercept_ = Some(0.0);
767 }
768 }
769
770 pub fn predict(&self, x: &Tensor) -> Tensor {
771 let x_data = x.data_f32();
772 let n_samples = x.dims()[0];
773 let n_features = x.dims()[1];
774
775 let coef = self.coef_.as_ref().expect("Model not fitted");
776 let intercept = self.intercept_.unwrap_or(0.0);
777
778 let predictions: Vec<f32> = (0..n_samples)
779 .map(|i| {
780 let mut pred = intercept;
781 for j in 0..n_features {
782 pred += coef[j] * x_data[i * n_features + j];
783 }
784 pred
785 })
786 .collect();
787
788 Tensor::from_slice(&predictions, &[n_samples]).unwrap()
789 }
790
791 pub fn score(&self, x: &Tensor, y: &Tensor) -> f32 {
792 let predictions = self.predict(x);
793 let pred_data = predictions.data_f32();
794 let y_data = y.data_f32();
795
796 let y_mean: f32 = y_data.iter().sum::<f32>() / y_data.len() as f32;
797 let ss_res: f32 = pred_data.iter().zip(y_data.iter()).map(|(&p, &y)| (y - p).powi(2)).sum();
798 let ss_tot: f32 = y_data.iter().map(|&y| (y - y_mean).powi(2)).sum();
799
800 1.0 - ss_res / ss_tot.max(1e-10)
801 }
802}
803
804
805pub struct LogisticRegression {
807 pub coef_: Option<Vec<Vec<f32>>>,
808 pub intercept_: Option<Vec<f32>>,
809 pub penalty: Penalty,
810 pub c: f32,
811 pub fit_intercept: bool,
812 pub max_iter: usize,
813 pub tol: f32,
814 pub multi_class: MultiClass,
815 n_classes: usize,
816}
817
818#[derive(Clone, Copy, Debug)]
819pub enum Penalty {
820 None,
821 L1,
822 L2,
823 ElasticNet(f32),
824}
825
826#[derive(Clone, Copy, Debug)]
827pub enum MultiClass {
828 OvR,
830 Multinomial,
832}
833
834impl LogisticRegression {
835 pub fn new() -> Self {
836 LogisticRegression {
837 coef_: None,
838 intercept_: None,
839 penalty: Penalty::L2,
840 c: 1.0,
841 fit_intercept: true,
842 max_iter: 100,
843 tol: 1e-4,
844 multi_class: MultiClass::OvR,
845 n_classes: 0,
846 }
847 }
848
849 pub fn penalty(mut self, penalty: Penalty) -> Self {
850 self.penalty = penalty;
851 self
852 }
853
854 pub fn c(mut self, c: f32) -> Self {
855 self.c = c;
856 self
857 }
858
859 pub fn max_iter(mut self, n: usize) -> Self {
860 self.max_iter = n;
861 self
862 }
863
864 pub fn multi_class(mut self, mc: MultiClass) -> Self {
865 self.multi_class = mc;
866 self
867 }
868
869 fn sigmoid(x: f32) -> f32 {
870 if x >= 0.0 {
871 1.0 / (1.0 + (-x).exp())
872 } else {
873 let exp_x = x.exp();
874 exp_x / (1.0 + exp_x)
875 }
876 }
877
878 pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
879 let y_data = y.data_f32();
880 self.n_classes = y_data.iter().map(|&v| v as usize).max().unwrap_or(0) + 1;
881
882 if self.n_classes == 2 {
883 self.fit_binary(x, y);
884 } else {
885 match self.multi_class {
886 MultiClass::OvR => self.fit_ovr(x, y),
887 MultiClass::Multinomial => self.fit_multinomial(x, y),
888 }
889 }
890 }
891
892 fn fit_binary(&mut self, x: &Tensor, y: &Tensor) {
893 let x_data = x.data_f32();
894 let y_data = y.data_f32();
895 let n_samples = x.dims()[0];
896 let n_features = x.dims()[1];
897
898 let mut weights = vec![0.0f32; n_features];
899 let mut bias = 0.0f32;
900
901 let reg = 1.0 / self.c;
902
903 for _iter in 0..self.max_iter {
905 let mut grad_w = vec![0.0f32; n_features];
906 let mut grad_b = 0.0f32;
907
908 for i in 0..n_samples {
909 let mut z = bias;
910 for j in 0..n_features {
911 z += weights[j] * x_data[i * n_features + j];
912 }
913 let p = Self::sigmoid(z);
914 let error = p - y_data[i];
915
916 grad_b += error;
917 for j in 0..n_features {
918 grad_w[j] += error * x_data[i * n_features + j];
919 }
920 }
921
922 match self.penalty {
924 Penalty::L2 => {
925 for j in 0..n_features {
926 grad_w[j] += reg * weights[j];
927 }
928 }
929 Penalty::L1 => {
930 for j in 0..n_features {
931 grad_w[j] += reg * weights[j].signum();
932 }
933 }
934 _ => {}
935 }
936
937 let scale = 1.0 / n_samples as f32;
939 grad_b *= scale;
940 for g in &mut grad_w {
941 *g *= scale;
942 }
943
944 let lr = 0.1;
946 if self.fit_intercept {
947 bias -= lr * grad_b;
948 }
949 for j in 0..n_features {
950 weights[j] -= lr * grad_w[j];
951 }
952
953 let grad_norm: f32 = grad_w.iter().map(|&g| g * g).sum::<f32>().sqrt();
955 if grad_norm < self.tol {
956 break;
957 }
958 }
959
960 self.coef_ = Some(vec![weights]);
961 self.intercept_ = Some(vec![bias]);
962 }
963
964 fn fit_ovr(&mut self, x: &Tensor, y: &Tensor) {
965 let y_data = y.data_f32();
966 let n_samples = x.dims()[0];
967 let _n_features = x.dims()[1];
968
969 let mut all_coef = Vec::with_capacity(self.n_classes);
970 let mut all_intercept = Vec::with_capacity(self.n_classes);
971
972 for c in 0..self.n_classes {
973 let y_binary: Vec<f32> = y_data.iter()
975 .map(|&yi| if yi as usize == c { 1.0 } else { 0.0 })
976 .collect();
977 let y_tensor = Tensor::from_slice(&y_binary, &[n_samples]).unwrap();
978
979 let mut binary_lr = LogisticRegression::new()
980 .penalty(self.penalty)
981 .c(self.c)
982 .max_iter(self.max_iter);
983 binary_lr.n_classes = 2;
984 binary_lr.fit_binary(x, &y_tensor);
985
986 all_coef.push(binary_lr.coef_.unwrap().into_iter().next().unwrap());
987 all_intercept.push(binary_lr.intercept_.unwrap().into_iter().next().unwrap());
988 }
989
990 self.coef_ = Some(all_coef);
991 self.intercept_ = Some(all_intercept);
992 }
993
994 fn fit_multinomial(&mut self, x: &Tensor, y: &Tensor) {
995 let x_data = x.data_f32();
996 let y_data = y.data_f32();
997 let n_samples = x.dims()[0];
998 let n_features = x.dims()[1];
999
1000 let mut weights = vec![vec![0.0f32; n_features]; self.n_classes];
1001 let mut biases = vec![0.0f32; self.n_classes];
1002
1003 let reg = 1.0 / self.c;
1004 let lr = 0.1;
1005
1006 for _iter in 0..self.max_iter {
1007 let mut probs = vec![vec![0.0f32; self.n_classes]; n_samples];
1009
1010 for i in 0..n_samples {
1011 let mut max_z = f32::NEG_INFINITY;
1012 let mut z = vec![0.0f32; self.n_classes];
1013
1014 for c in 0..self.n_classes {
1015 z[c] = biases[c];
1016 for j in 0..n_features {
1017 z[c] += weights[c][j] * x_data[i * n_features + j];
1018 }
1019 max_z = max_z.max(z[c]);
1020 }
1021
1022 let mut sum_exp = 0.0f32;
1023 for c in 0..self.n_classes {
1024 probs[i][c] = (z[c] - max_z).exp();
1025 sum_exp += probs[i][c];
1026 }
1027 for c in 0..self.n_classes {
1028 probs[i][c] /= sum_exp;
1029 }
1030 }
1031
1032 let mut grad_w = vec![vec![0.0f32; n_features]; self.n_classes];
1034 let mut grad_b = vec![0.0f32; self.n_classes];
1035
1036 for i in 0..n_samples {
1037 let true_class = y_data[i] as usize;
1038 for c in 0..self.n_classes {
1039 let target = if c == true_class { 1.0 } else { 0.0 };
1040 let error = probs[i][c] - target;
1041
1042 grad_b[c] += error;
1043 for j in 0..n_features {
1044 grad_w[c][j] += error * x_data[i * n_features + j];
1045 }
1046 }
1047 }
1048
1049 let scale = 1.0 / n_samples as f32;
1051 for c in 0..self.n_classes {
1052 grad_b[c] *= scale;
1053 for j in 0..n_features {
1054 grad_w[c][j] = grad_w[c][j] * scale + reg * weights[c][j];
1055 }
1056 }
1057
1058 for c in 0..self.n_classes {
1060 if self.fit_intercept {
1061 biases[c] -= lr * grad_b[c];
1062 }
1063 for j in 0..n_features {
1064 weights[c][j] -= lr * grad_w[c][j];
1065 }
1066 }
1067 }
1068
1069 self.coef_ = Some(weights);
1070 self.intercept_ = Some(biases);
1071 }
1072
1073 pub fn predict(&self, x: &Tensor) -> Tensor {
1074 let proba = self.predict_proba(x);
1075 let proba_data = proba.data_f32();
1076 let n_samples = x.dims()[0];
1077
1078 let predictions: Vec<f32> = (0..n_samples)
1079 .map(|i| {
1080 let start = i * self.n_classes;
1081 let sample_probs = &proba_data[start..start + self.n_classes];
1082 sample_probs.iter()
1083 .enumerate()
1084 .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
1085 .map(|(idx, _)| idx as f32)
1086 .unwrap_or(0.0)
1087 })
1088 .collect();
1089
1090 Tensor::from_slice(&predictions, &[n_samples]).unwrap()
1091 }
1092
1093 pub fn predict_proba(&self, x: &Tensor) -> Tensor {
1094 let x_data = x.data_f32();
1095 let n_samples = x.dims()[0];
1096 let n_features = x.dims()[1];
1097
1098 let coef = self.coef_.as_ref().expect("Model not fitted");
1099 let intercept = self.intercept_.as_ref().expect("Model not fitted");
1100
1101 let mut probs = Vec::with_capacity(n_samples * self.n_classes);
1102
1103 for i in 0..n_samples {
1104 if self.n_classes == 2 {
1105 let mut z = intercept[0];
1106 for j in 0..n_features {
1107 z += coef[0][j] * x_data[i * n_features + j];
1108 }
1109 let p = Self::sigmoid(z);
1110 probs.push(1.0 - p);
1111 probs.push(p);
1112 } else {
1113 let mut z = vec![0.0f32; self.n_classes];
1114 let mut max_z = f32::NEG_INFINITY;
1115
1116 for c in 0..self.n_classes {
1117 z[c] = intercept[c];
1118 for j in 0..n_features {
1119 z[c] += coef[c][j] * x_data[i * n_features + j];
1120 }
1121 max_z = max_z.max(z[c]);
1122 }
1123
1124 let mut sum_exp = 0.0f32;
1125 for c in 0..self.n_classes {
1126 z[c] = (z[c] - max_z).exp();
1127 sum_exp += z[c];
1128 }
1129
1130 for c in 0..self.n_classes {
1131 probs.push(z[c] / sum_exp);
1132 }
1133 }
1134 }
1135
1136 Tensor::from_slice(&probs, &[n_samples, self.n_classes]).unwrap()
1137 }
1138
1139 pub fn score(&self, x: &Tensor, y: &Tensor) -> f32 {
1140 let predictions = self.predict(x);
1141 let pred_data = predictions.data_f32();
1142 let y_data = y.data_f32();
1143
1144 let correct: usize = pred_data.iter()
1145 .zip(y_data.iter())
1146 .filter(|(&p, &y)| (p - y).abs() < 0.5)
1147 .count();
1148
1149 correct as f32 / y_data.len() as f32
1150 }
1151}
1152
1153impl Default for LogisticRegression {
1154 fn default() -> Self {
1155 Self::new()
1156 }
1157}
1158
1159#[cfg(test)]
1160mod tests {
1161 use super::*;
1162
1163 #[test]
1164 fn test_linear_regression() {
1165 let x = Tensor::from_slice(&[1.0f32, 2.0, 3.0, 4.0, 5.0], &[5, 1]).unwrap();
1166 let y = Tensor::from_slice(&[2.0f32, 4.0, 6.0, 8.0, 10.0], &[5]).unwrap();
1167
1168 let mut lr = LinearRegression::new();
1169 lr.fit(&x, &y);
1170
1171 let _predictions = lr.predict(&x);
1172 let score = lr.score(&x, &y);
1173
1174 assert!(score > 0.99, "R² should be close to 1.0");
1175 }
1176
1177 #[test]
1178 fn test_ridge() {
1179 let x = Tensor::from_slice(&[1.0f32, 2.0, 3.0, 4.0, 5.0], &[5, 1]).unwrap();
1180 let y = Tensor::from_slice(&[2.1f32, 3.9, 6.2, 7.8, 10.1], &[5]).unwrap();
1181
1182 let mut ridge = Ridge::new(1.0);
1183 ridge.fit(&x, &y);
1184
1185 let score = ridge.score(&x, &y);
1186 assert!(score > 0.95);
1187 }
1188
1189 #[test]
1190 fn test_logistic_regression() {
1191 let x = Tensor::from_slice(&[0.0f32, 0.0,
1192 0.0, 1.0,
1193 1.0, 0.0,
1194 1.0, 1.0,
1195 ], &[4, 2]).unwrap();
1196 let y = Tensor::from_slice(&[0.0f32, 0.0, 0.0, 1.0], &[4]).unwrap();
1197
1198 let mut lr = LogisticRegression::new().max_iter(1000);
1199 lr.fit(&x, &y);
1200
1201 let predictions = lr.predict(&x);
1202 assert_eq!(predictions.dims(), &[4]);
1203 }
1204}
1205
1206