1use ghostflow_core::Tensor;
4use rand::prelude::*;
5
6pub struct SGDClassifier {
8 pub loss: SGDLoss,
9 pub penalty: Penalty,
10 pub alpha: f32,
11 pub l1_ratio: f32,
12 pub max_iter: usize,
13 pub tol: f32,
14 pub learning_rate: LearningRate,
15 pub eta0: f32,
16 pub power_t: f32,
17 pub shuffle: bool,
18 pub warm_start: bool,
19 pub class_weight: Option<Vec<f32>>,
20 coef_: Option<Vec<Vec<f32>>>,
21 intercept_: Option<Vec<f32>>,
22 classes_: Vec<i32>,
23 n_iter_: usize,
24}
25
26#[derive(Clone, Copy)]
27pub enum SGDLoss {
28 Hinge, Log, ModifiedHuber, SquaredHinge, Perceptron, }
34
35#[derive(Clone, Copy)]
36pub enum Penalty {
37 L2,
38 L1,
39 ElasticNet,
40 None,
41}
42
43#[derive(Clone, Copy)]
44pub enum LearningRate {
45 Constant,
46 Optimal,
47 InvScaling,
48 Adaptive,
49}
50
51impl SGDClassifier {
52 pub fn new() -> Self {
53 SGDClassifier {
54 loss: SGDLoss::Hinge,
55 penalty: Penalty::L2,
56 alpha: 0.0001,
57 l1_ratio: 0.15,
58 max_iter: 1000,
59 tol: 1e-3,
60 learning_rate: LearningRate::Optimal,
61 eta0: 0.0,
62 power_t: 0.5,
63 shuffle: true,
64 warm_start: false,
65 class_weight: None,
66 coef_: None,
67 intercept_: None,
68 classes_: Vec::new(),
69 n_iter_: 0,
70 }
71 }
72
73 pub fn loss(mut self, l: SGDLoss) -> Self { self.loss = l; self }
74 pub fn penalty(mut self, p: Penalty) -> Self { self.penalty = p; self }
75 pub fn alpha(mut self, a: f32) -> Self { self.alpha = a; self }
76 pub fn max_iter(mut self, n: usize) -> Self { self.max_iter = n; self }
77
78 fn compute_loss_gradient(&self, margin: f32, y: f32) -> f32 {
79 match self.loss {
80 SGDLoss::Hinge => {
81 if y * margin < 1.0 { -y } else { 0.0 }
82 }
83 SGDLoss::Log => {
84 let p = 1.0 / (1.0 + (-margin).exp());
85 p - (y + 1.0) / 2.0 }
87 SGDLoss::ModifiedHuber => {
88 let z = y * margin;
89 if z >= 1.0 { 0.0 }
90 else if z >= -1.0 { -2.0 * y * (1.0 - z) }
91 else { -4.0 * y }
92 }
93 SGDLoss::SquaredHinge => {
94 let z = y * margin;
95 if z < 1.0 { -2.0 * y * (1.0 - z) } else { 0.0 }
96 }
97 SGDLoss::Perceptron => {
98 if y * margin <= 0.0 { -y } else { 0.0 }
99 }
100 }
101 }
102
103 fn get_learning_rate(&self, t: usize) -> f32 {
104 match self.learning_rate {
105 LearningRate::Constant => self.eta0,
106 LearningRate::Optimal => 1.0 / (self.alpha * (t as f32 + 1.0)),
107 LearningRate::InvScaling => self.eta0 / (t as f32 + 1.0).powf(self.power_t),
108 LearningRate::Adaptive => self.eta0,
109 }
110 }
111
112 fn apply_penalty(&self, coef: &mut [f32], lr: f32) {
113 match self.penalty {
114 Penalty::L2 => {
115 for c in coef.iter_mut() {
116 *c *= 1.0 - lr * self.alpha;
117 }
118 }
119 Penalty::L1 => {
120 for c in coef.iter_mut() {
121 let sign = c.signum();
122 *c = (*c - lr * self.alpha * sign).max(0.0) * sign;
123 }
124 }
125 Penalty::ElasticNet => {
126 let l1_penalty = self.alpha * self.l1_ratio;
127 let l2_penalty = self.alpha * (1.0 - self.l1_ratio);
128 for c in coef.iter_mut() {
129 *c *= 1.0 - lr * l2_penalty;
130 let sign = c.signum();
131 *c = (*c - lr * l1_penalty * sign).max(0.0) * sign;
132 }
133 }
134 Penalty::None => {}
135 }
136 }
137
138 pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
139 let x_data = x.data_f32();
140 let y_data = y.data_f32();
141 let n_samples = x.dims()[0];
142 let n_features = x.dims()[1];
143
144 let mut classes: Vec<i32> = y_data.iter().map(|&v| v as i32).collect();
146 classes.sort();
147 classes.dedup();
148 self.classes_ = classes.clone();
149
150 let n_classes = self.classes_.len();
151 let mut rng = thread_rng();
152
153 let mut coef = if self.warm_start && self.coef_.is_some() {
155 self.coef_.clone().unwrap()
156 } else if n_classes == 2 {
157 vec![vec![0.0f32; n_features]]
158 } else {
159 vec![vec![0.0f32; n_features]; n_classes]
160 };
161
162 let mut intercept = if self.warm_start && self.intercept_.is_some() {
163 self.intercept_.clone().unwrap()
164 } else if n_classes == 2 {
165 vec![0.0f32]
166 } else {
167 vec![0.0f32; n_classes]
168 };
169
170 let mut indices: Vec<usize> = (0..n_samples).collect();
172 let mut t = 0usize;
173
174 for epoch in 0..self.max_iter {
175 if self.shuffle {
176 indices.shuffle(&mut rng);
177 }
178
179 let mut total_loss = 0.0f32;
180
181 for &i in &indices {
182 let xi = &x_data[i * n_features..(i + 1) * n_features];
183 let yi = y_data[i] as i32;
184
185 let lr = self.get_learning_rate(t);
186 t += 1;
187
188 if n_classes == 2 {
189 let y_signed = if yi == self.classes_[1] { 1.0f32 } else { -1.0f32 };
191
192 let mut margin = intercept[0];
193 for j in 0..n_features {
194 margin += coef[0][j] * xi[j];
195 }
196
197 let grad = self.compute_loss_gradient(margin, y_signed);
198 total_loss += grad.abs();
199
200 for j in 0..n_features {
202 coef[0][j] -= lr * grad * xi[j];
203 }
204 intercept[0] -= lr * grad;
205
206 self.apply_penalty(&mut coef[0], lr);
207 } else {
208 for k in 0..n_classes {
210 let y_signed = if yi == self.classes_[k] { 1.0f32 } else { -1.0f32 };
211
212 let mut margin = intercept[k];
213 for j in 0..n_features {
214 margin += coef[k][j] * xi[j];
215 }
216
217 let grad = self.compute_loss_gradient(margin, y_signed);
218 total_loss += grad.abs();
219
220 for j in 0..n_features {
221 coef[k][j] -= lr * grad * xi[j];
222 }
223 intercept[k] -= lr * grad;
224
225 self.apply_penalty(&mut coef[k], lr);
226 }
227 }
228 }
229
230 self.n_iter_ = epoch + 1;
231
232 if total_loss / (n_samples as f32) < self.tol {
233 break;
234 }
235 }
236
237 self.coef_ = Some(coef);
238 self.intercept_ = Some(intercept);
239 }
240
241 pub fn predict(&self, x: &Tensor) -> Tensor {
242 let x_data = x.data_f32();
243 let n_samples = x.dims()[0];
244 let n_features = x.dims()[1];
245
246 let coef = self.coef_.as_ref().expect("Model not fitted");
247 let intercept = self.intercept_.as_ref().unwrap();
248
249 let predictions: Vec<f32> = (0..n_samples)
250 .map(|i| {
251 let xi = &x_data[i * n_features..(i + 1) * n_features];
252
253 if self.classes_.len() == 2 {
254 let mut score = intercept[0];
255 for j in 0..n_features {
256 score += coef[0][j] * xi[j];
257 }
258 if score >= 0.0 { self.classes_[1] as f32 } else { self.classes_[0] as f32 }
259 } else {
260 let mut best_score = f32::NEG_INFINITY;
261 let mut best_class = self.classes_[0];
262 for k in 0..self.classes_.len() {
263 let mut score = intercept[k];
264 for j in 0..n_features {
265 score += coef[k][j] * xi[j];
266 }
267 if score > best_score {
268 best_score = score;
269 best_class = self.classes_[k];
270 }
271 }
272 best_class as f32
273 }
274 })
275 .collect();
276
277 Tensor::from_slice(&predictions, &[n_samples]).unwrap()
278 }
279
280 pub fn decision_function(&self, x: &Tensor) -> Tensor {
281 let x_data = x.data_f32();
282 let n_samples = x.dims()[0];
283 let n_features = x.dims()[1];
284
285 let coef = self.coef_.as_ref().expect("Model not fitted");
286 let intercept = self.intercept_.as_ref().unwrap();
287
288 if self.classes_.len() == 2 {
289 let scores: Vec<f32> = (0..n_samples)
290 .map(|i| {
291 let xi = &x_data[i * n_features..(i + 1) * n_features];
292 let mut score = intercept[0];
293 for j in 0..n_features {
294 score += coef[0][j] * xi[j];
295 }
296 score
297 })
298 .collect();
299 Tensor::from_slice(&scores, &[n_samples]).unwrap()
300 } else {
301 let n_classes = self.classes_.len();
302 let scores: Vec<f32> = (0..n_samples)
303 .flat_map(|i| {
304 let xi = &x_data[i * n_features..(i + 1) * n_features];
305 (0..n_classes).map(move |k| {
306 let mut score = intercept[k];
307 for j in 0..n_features {
308 score += coef[k][j] * xi[j];
309 }
310 score
311 })
312 })
313 .collect();
314 Tensor::from_slice(&scores, &[n_samples, n_classes]).unwrap()
315 }
316 }
317}
318
319impl Default for SGDClassifier {
320 fn default() -> Self { Self::new() }
321}
322
323pub struct SGDRegressor {
325 pub loss: SGDRegressorLoss,
326 pub penalty: Penalty,
327 pub alpha: f32,
328 pub l1_ratio: f32,
329 pub max_iter: usize,
330 pub tol: f32,
331 pub learning_rate: LearningRate,
332 pub eta0: f32,
333 pub power_t: f32,
334 pub epsilon: f32,
335 pub shuffle: bool,
336 coef_: Option<Vec<f32>>,
337 intercept_: f32,
338 n_iter_: usize,
339}
340
341#[derive(Clone, Copy)]
342pub enum SGDRegressorLoss {
343 SquaredError,
344 Huber,
345 EpsilonInsensitive,
346 SquaredEpsilonInsensitive,
347}
348
349impl SGDRegressor {
350 pub fn new() -> Self {
351 SGDRegressor {
352 loss: SGDRegressorLoss::SquaredError,
353 penalty: Penalty::L2,
354 alpha: 0.0001,
355 l1_ratio: 0.15,
356 max_iter: 1000,
357 tol: 1e-3,
358 learning_rate: LearningRate::InvScaling,
359 eta0: 0.01,
360 power_t: 0.25,
361 epsilon: 0.1,
362 shuffle: true,
363 coef_: None,
364 intercept_: 0.0,
365 n_iter_: 0,
366 }
367 }
368
369 pub fn loss(mut self, l: SGDRegressorLoss) -> Self { self.loss = l; self }
370 pub fn penalty(mut self, p: Penalty) -> Self { self.penalty = p; self }
371 pub fn alpha(mut self, a: f32) -> Self { self.alpha = a; self }
372
373 fn compute_loss_gradient(&self, pred: f32, y: f32) -> f32 {
374 let residual = pred - y;
375 match self.loss {
376 SGDRegressorLoss::SquaredError => residual,
377 SGDRegressorLoss::Huber => {
378 if residual.abs() <= self.epsilon {
379 residual
380 } else {
381 self.epsilon * residual.signum()
382 }
383 }
384 SGDRegressorLoss::EpsilonInsensitive => {
385 if residual.abs() <= self.epsilon {
386 0.0
387 } else {
388 residual.signum()
389 }
390 }
391 SGDRegressorLoss::SquaredEpsilonInsensitive => {
392 if residual.abs() <= self.epsilon {
393 0.0
394 } else {
395 residual - self.epsilon * residual.signum()
396 }
397 }
398 }
399 }
400
401 fn get_learning_rate(&self, t: usize) -> f32 {
402 match self.learning_rate {
403 LearningRate::Constant => self.eta0,
404 LearningRate::Optimal => 1.0 / (self.alpha * (t as f32 + 1.0)),
405 LearningRate::InvScaling => self.eta0 / (t as f32 + 1.0).powf(self.power_t),
406 LearningRate::Adaptive => self.eta0,
407 }
408 }
409
410 fn apply_penalty(&self, coef: &mut [f32], lr: f32) {
411 match self.penalty {
412 Penalty::L2 => {
413 for c in coef.iter_mut() {
414 *c *= 1.0 - lr * self.alpha;
415 }
416 }
417 Penalty::L1 => {
418 for c in coef.iter_mut() {
419 let sign = c.signum();
420 *c = (*c - lr * self.alpha * sign).max(0.0) * sign;
421 }
422 }
423 Penalty::ElasticNet => {
424 let l1 = self.alpha * self.l1_ratio;
425 let l2 = self.alpha * (1.0 - self.l1_ratio);
426 for c in coef.iter_mut() {
427 *c *= 1.0 - lr * l2;
428 let sign = c.signum();
429 *c = (*c - lr * l1 * sign).max(0.0) * sign;
430 }
431 }
432 Penalty::None => {}
433 }
434 }
435
436 pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
437 let x_data = x.data_f32();
438 let y_data = y.data_f32();
439 let n_samples = x.dims()[0];
440 let n_features = x.dims()[1];
441
442 let mut rng = thread_rng();
443 let mut coef = vec![0.0f32; n_features];
444 let mut intercept = 0.0f32;
445
446 let mut indices: Vec<usize> = (0..n_samples).collect();
447 let mut t = 0usize;
448
449 for epoch in 0..self.max_iter {
450 if self.shuffle {
451 indices.shuffle(&mut rng);
452 }
453
454 let mut total_loss = 0.0f32;
455
456 for &i in &indices {
457 let xi = &x_data[i * n_features..(i + 1) * n_features];
458 let yi = y_data[i];
459
460 let lr = self.get_learning_rate(t);
461 t += 1;
462
463 let mut pred = intercept;
465 for j in 0..n_features {
466 pred += coef[j] * xi[j];
467 }
468
469 let grad = self.compute_loss_gradient(pred, yi);
470 total_loss += grad.abs();
471
472 for j in 0..n_features {
474 coef[j] -= lr * grad * xi[j];
475 }
476 intercept -= lr * grad;
477
478 self.apply_penalty(&mut coef, lr);
479 }
480
481 self.n_iter_ = epoch + 1;
482
483 if total_loss / (n_samples as f32) < self.tol {
484 break;
485 }
486 }
487
488 self.coef_ = Some(coef);
489 self.intercept_ = intercept;
490 }
491
492 pub fn predict(&self, x: &Tensor) -> Tensor {
493 let x_data = x.data_f32();
494 let n_samples = x.dims()[0];
495 let n_features = x.dims()[1];
496
497 let coef = self.coef_.as_ref().expect("Model not fitted");
498
499 let predictions: Vec<f32> = (0..n_samples)
500 .map(|i| {
501 let xi = &x_data[i * n_features..(i + 1) * n_features];
502 let mut pred = self.intercept_;
503 for j in 0..n_features {
504 pred += coef[j] * xi[j];
505 }
506 pred
507 })
508 .collect();
509
510 Tensor::from_slice(&predictions, &[n_samples]).unwrap()
511 }
512}
513
514impl Default for SGDRegressor {
515 fn default() -> Self { Self::new() }
516}
517
518pub struct RidgeClassifier {
520 pub alpha: f32,
521 pub fit_intercept: bool,
522 pub normalize: bool,
523 pub class_weight: Option<Vec<f32>>,
524 coef_: Option<Vec<Vec<f32>>>,
525 intercept_: Option<Vec<f32>>,
526 classes_: Vec<i32>,
527}
528
529impl RidgeClassifier {
530 pub fn new() -> Self {
531 RidgeClassifier {
532 alpha: 1.0,
533 fit_intercept: true,
534 normalize: false,
535 class_weight: None,
536 coef_: None,
537 intercept_: None,
538 classes_: Vec::new(),
539 }
540 }
541
542 pub fn alpha(mut self, a: f32) -> Self { self.alpha = a; self }
543
544 fn solve_ridge(&self, x: &[f32], y: &[f32], n_samples: usize, n_features: usize) -> (Vec<f32>, f32) {
545 let x_mean: Vec<f32> = (0..n_features)
547 .map(|j| (0..n_samples).map(|i| x[i * n_features + j]).sum::<f32>() / n_samples as f32)
548 .collect();
549 let y_mean = y.iter().sum::<f32>() / n_samples as f32;
550
551 let mut xtx = vec![0.0f32; n_features * n_features];
553 let mut xty = vec![0.0f32; n_features];
554
555 for i in 0..n_features {
556 for k in 0..n_samples {
557 let xki = x[k * n_features + i] - x_mean[i];
558 xty[i] += xki * (y[k] - y_mean);
559 }
560 for j in 0..n_features {
561 for k in 0..n_samples {
562 xtx[i * n_features + j] +=
563 (x[k * n_features + i] - x_mean[i]) * (x[k * n_features + j] - x_mean[j]);
564 }
565 }
566 xtx[i * n_features + i] += self.alpha;
567 }
568
569 let coef = solve_cholesky(&xtx, &xty, n_features);
571 let intercept = y_mean - coef.iter().zip(x_mean.iter()).map(|(&c, &m)| c * m).sum::<f32>();
572
573 (coef, intercept)
574 }
575
576 pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
577 let x_data = x.data_f32();
578 let y_data = y.data_f32();
579 let n_samples = x.dims()[0];
580 let n_features = x.dims()[1];
581
582 let mut classes: Vec<i32> = y_data.iter().map(|&v| v as i32).collect();
584 classes.sort();
585 classes.dedup();
586 self.classes_ = classes.clone();
587
588 if self.classes_.len() == 2 {
589 let y_encoded: Vec<f32> = y_data.iter()
591 .map(|&v| if v as i32 == self.classes_[1] { 1.0 } else { -1.0 })
592 .collect();
593
594 let (coef, intercept) = self.solve_ridge(&x_data, &y_encoded, n_samples, n_features);
595 self.coef_ = Some(vec![coef]);
596 self.intercept_ = Some(vec![intercept]);
597 } else {
598 let mut all_coef = Vec::new();
600 let mut all_intercept = Vec::new();
601
602 for &class in &self.classes_ {
603 let y_encoded: Vec<f32> = y_data.iter()
604 .map(|&v| if v as i32 == class { 1.0 } else { -1.0 })
605 .collect();
606
607 let (coef, intercept) = self.solve_ridge(&x_data, &y_encoded, n_samples, n_features);
608 all_coef.push(coef);
609 all_intercept.push(intercept);
610 }
611
612 self.coef_ = Some(all_coef);
613 self.intercept_ = Some(all_intercept);
614 }
615 }
616
617 pub fn predict(&self, x: &Tensor) -> Tensor {
618 let x_data = x.data_f32();
619 let n_samples = x.dims()[0];
620 let n_features = x.dims()[1];
621
622 let coef = self.coef_.as_ref().expect("Model not fitted");
623 let intercept = self.intercept_.as_ref().unwrap();
624
625 let predictions: Vec<f32> = (0..n_samples)
626 .map(|i| {
627 let xi = &x_data[i * n_features..(i + 1) * n_features];
628
629 if self.classes_.len() == 2 {
630 let mut score = intercept[0];
631 for j in 0..n_features {
632 score += coef[0][j] * xi[j];
633 }
634 if score >= 0.0 { self.classes_[1] as f32 } else { self.classes_[0] as f32 }
635 } else {
636 let mut best_score = f32::NEG_INFINITY;
637 let mut best_class = self.classes_[0];
638 for k in 0..self.classes_.len() {
639 let mut score = intercept[k];
640 for j in 0..n_features {
641 score += coef[k][j] * xi[j];
642 }
643 if score > best_score {
644 best_score = score;
645 best_class = self.classes_[k];
646 }
647 }
648 best_class as f32
649 }
650 })
651 .collect();
652
653 Tensor::from_slice(&predictions, &[n_samples]).unwrap()
654 }
655
656 pub fn decision_function(&self, x: &Tensor) -> Tensor {
657 let x_data = x.data_f32();
658 let n_samples = x.dims()[0];
659 let n_features = x.dims()[1];
660
661 let coef = self.coef_.as_ref().expect("Model not fitted");
662 let intercept = self.intercept_.as_ref().unwrap();
663
664 if self.classes_.len() == 2 {
665 let scores: Vec<f32> = (0..n_samples)
666 .map(|i| {
667 let xi = &x_data[i * n_features..(i + 1) * n_features];
668 let mut score = intercept[0];
669 for j in 0..n_features {
670 score += coef[0][j] * xi[j];
671 }
672 score
673 })
674 .collect();
675 Tensor::from_slice(&scores, &[n_samples]).unwrap()
676 } else {
677 let n_classes = self.classes_.len();
678 let scores: Vec<f32> = (0..n_samples)
679 .flat_map(|i| {
680 let xi = &x_data[i * n_features..(i + 1) * n_features];
681 (0..n_classes).map(move |k| {
682 let mut score = intercept[k];
683 for j in 0..n_features {
684 score += coef[k][j] * xi[j];
685 }
686 score
687 })
688 })
689 .collect();
690 Tensor::from_slice(&scores, &[n_samples, n_classes]).unwrap()
691 }
692 }
693}
694
695impl Default for RidgeClassifier {
696 fn default() -> Self { Self::new() }
697}
698
699fn solve_cholesky(a: &[f32], b: &[f32], n: usize) -> Vec<f32> {
700 let mut l = vec![0.0f32; n * n];
701
702 for i in 0..n {
703 for j in 0..=i {
704 let mut sum = a[i * n + j];
705 for k in 0..j {
706 sum -= l[i * n + k] * l[j * n + k];
707 }
708 if i == j {
709 l[i * n + j] = sum.max(1e-10).sqrt();
710 } else {
711 l[i * n + j] = sum / l[j * n + j].max(1e-10);
712 }
713 }
714 }
715
716 let mut y = vec![0.0f32; n];
718 for i in 0..n {
719 let mut sum = b[i];
720 for j in 0..i {
721 sum -= l[i * n + j] * y[j];
722 }
723 y[i] = sum / l[i * n + i].max(1e-10);
724 }
725
726 let mut x = vec![0.0f32; n];
728 for i in (0..n).rev() {
729 let mut sum = y[i];
730 for j in (i + 1)..n {
731 sum -= l[j * n + i] * x[j];
732 }
733 x[i] = sum / l[i * n + i].max(1e-10);
734 }
735
736 x
737}
738
739#[cfg(test)]
740mod tests {
741 use super::*;
742
743 #[test]
744 fn test_sgd_classifier() {
745 let x = Tensor::from_slice(&[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0], &[4, 2]).unwrap();
746 let y = Tensor::from_slice(&[0.0, 0.0, 1.0, 1.0], &[4]).unwrap();
747
748 let mut clf = SGDClassifier::new().max_iter(100);
749 clf.fit(&x, &y);
750 let pred = clf.predict(&x);
751 assert_eq!(pred.dims(), &[4]);
752 }
753
754 #[test]
755 fn test_sgd_regressor() {
756 let x = Tensor::from_slice(&[1.0, 2.0, 3.0, 4.0, 5.0, 6.0], &[3, 2]).unwrap();
757 let y = Tensor::from_slice(&[1.0, 2.0, 3.0], &[3]).unwrap();
758
759 let mut reg = SGDRegressor::new();
760 reg.max_iter = 100;
761 reg.fit(&x, &y);
762 let pred = reg.predict(&x);
763 assert_eq!(pred.dims(), &[3]);
764 }
765
766 #[test]
767 fn test_ridge_classifier() {
768 let x = Tensor::from_slice(&[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0], &[4, 2]).unwrap();
769 let y = Tensor::from_slice(&[0.0, 0.0, 1.0, 1.0], &[4]).unwrap();
770
771 let mut clf = RidgeClassifier::new();
772 clf.fit(&x, &y);
773 let pred = clf.predict(&x);
774 assert_eq!(pred.dims(), &[4]);
775 }
776}