1use ghostflow_core::Tensor;
4use rand::prelude::*;
5
6pub struct Perceptron {
8 pub max_iter: usize,
9 pub eta0: f32,
10 pub tol: f32,
11 pub shuffle: bool,
12 pub penalty: Option<PerceptronPenalty>,
13 pub alpha: f32,
14 coef_: Option<Vec<f32>>,
15 intercept_: Option<f32>,
16 n_iter_: usize,
17}
18
19#[derive(Clone, Copy, Debug)]
20pub enum PerceptronPenalty {
21 L1,
22 L2,
23 ElasticNet,
24}
25
26impl Perceptron {
27 pub fn new() -> Self {
28 Perceptron {
29 max_iter: 1000,
30 eta0: 1.0,
31 tol: 1e-3,
32 shuffle: true,
33 penalty: None,
34 alpha: 0.0001,
35 coef_: None,
36 intercept_: None,
37 n_iter_: 0,
38 }
39 }
40
41 pub fn max_iter(mut self, n: usize) -> Self {
42 self.max_iter = n;
43 self
44 }
45
46 pub fn eta0(mut self, eta: f32) -> Self {
47 self.eta0 = eta;
48 self
49 }
50
51 pub fn penalty(mut self, p: PerceptronPenalty) -> Self {
52 self.penalty = Some(p);
53 self
54 }
55
56 pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
57 let x_data = x.data_f32();
58 let y_data = y.data_f32();
59 let n_samples = x.dims()[0];
60 let n_features = x.dims()[1];
61
62 let y_binary: Vec<f32> = y_data.iter()
64 .map(|&y| if y > 0.5 { 1.0 } else { -1.0 })
65 .collect();
66
67 let mut weights = vec![0.0f32; n_features];
68 let mut bias = 0.0f32;
69 let mut indices: Vec<usize> = (0..n_samples).collect();
70
71 for iter in 0..self.max_iter {
72 if self.shuffle {
73 indices.shuffle(&mut thread_rng());
74 }
75
76 let mut n_errors = 0;
77
78 for &i in &indices {
79 let xi = &x_data[i * n_features..(i + 1) * n_features];
80
81 let mut pred = bias;
83 for j in 0..n_features {
84 pred += weights[j] * xi[j];
85 }
86 let pred_sign = if pred >= 0.0 { 1.0 } else { -1.0 };
87
88 if pred_sign != y_binary[i] {
90 n_errors += 1;
91
92 for j in 0..n_features {
93 weights[j] += self.eta0 * y_binary[i] * xi[j];
94 }
95 bias += self.eta0 * y_binary[i];
96
97 if let Some(penalty) = self.penalty {
99 match penalty {
100 PerceptronPenalty::L2 => {
101 for w in &mut weights {
102 *w *= 1.0 - self.alpha * self.eta0;
103 }
104 }
105 PerceptronPenalty::L1 => {
106 for w in &mut weights {
107 let sign = w.signum();
108 *w = (*w - self.alpha * self.eta0 * sign).max(0.0) * sign;
109 }
110 }
111 PerceptronPenalty::ElasticNet => {
112 for w in &mut weights {
113 *w *= 1.0 - 0.5 * self.alpha * self.eta0;
114 let sign = w.signum();
115 *w = (*w - 0.5 * self.alpha * self.eta0 * sign).max(0.0) * sign;
116 }
117 }
118 }
119 }
120 }
121 }
122
123 self.n_iter_ = iter + 1;
124
125 if n_errors == 0 {
126 break;
127 }
128 }
129
130 self.coef_ = Some(weights);
131 self.intercept_ = Some(bias);
132 }
133
134 pub fn predict(&self, x: &Tensor) -> Tensor {
135 let x_data = x.data_f32();
136 let n_samples = x.dims()[0];
137 let n_features = x.dims()[1];
138
139 let weights = self.coef_.as_ref().expect("Model not fitted");
140 let bias = self.intercept_.unwrap_or(0.0);
141
142 let predictions: Vec<f32> = (0..n_samples)
143 .map(|i| {
144 let xi = &x_data[i * n_features..(i + 1) * n_features];
145 let mut pred = bias;
146 for j in 0..n_features {
147 pred += weights[j] * xi[j];
148 }
149 if pred >= 0.0 { 1.0 } else { 0.0 }
150 })
151 .collect();
152
153 Tensor::from_slice(&predictions, &[n_samples]).unwrap()
154 }
155
156 pub fn score(&self, x: &Tensor, y: &Tensor) -> f32 {
157 let predictions = self.predict(x);
158 let pred_data = predictions.data_f32();
159 let y_data = y.data_f32();
160
161 let correct: usize = pred_data.iter()
162 .zip(y_data.iter())
163 .filter(|(&p, &y)| (p - y).abs() < 0.5)
164 .count();
165
166 correct as f32 / y_data.len() as f32
167 }
168}
169
170impl Default for Perceptron {
171 fn default() -> Self {
172 Self::new()
173 }
174}
175
176
177pub struct MLPClassifier {
179 pub hidden_layer_sizes: Vec<usize>,
180 pub activation: Activation,
181 pub solver: MLPSolver,
182 pub alpha: f32,
183 pub learning_rate_init: f32,
184 pub max_iter: usize,
185 pub tol: f32,
186 pub batch_size: usize,
187 pub momentum: f32,
188 weights_: Vec<Vec<f32>>,
189 biases_: Vec<Vec<f32>>,
190 n_classes_: usize,
191 n_features_: usize,
192 n_iter_: usize,
193 loss_: f32,
194}
195
196#[derive(Clone, Copy, Debug)]
197pub enum Activation {
198 ReLU,
199 Tanh,
200 Sigmoid,
201 Identity,
202}
203
204#[derive(Clone, Copy, Debug)]
205pub enum MLPSolver {
206 SGD,
207 Adam,
208}
209
210impl MLPClassifier {
211 pub fn new(hidden_layer_sizes: Vec<usize>) -> Self {
212 MLPClassifier {
213 hidden_layer_sizes,
214 activation: Activation::ReLU,
215 solver: MLPSolver::Adam,
216 alpha: 0.0001,
217 learning_rate_init: 0.001,
218 max_iter: 200,
219 tol: 1e-4,
220 batch_size: 32,
221 momentum: 0.9,
222 weights_: Vec::new(),
223 biases_: Vec::new(),
224 n_classes_: 0,
225 n_features_: 0,
226 n_iter_: 0,
227 loss_: 0.0,
228 }
229 }
230
231 pub fn activation(mut self, act: Activation) -> Self {
232 self.activation = act;
233 self
234 }
235
236 pub fn solver(mut self, solver: MLPSolver) -> Self {
237 self.solver = solver;
238 self
239 }
240
241 pub fn max_iter(mut self, n: usize) -> Self {
242 self.max_iter = n;
243 self
244 }
245
246 pub fn learning_rate(mut self, lr: f32) -> Self {
247 self.learning_rate_init = lr;
248 self
249 }
250
251 fn activate(&self, x: f32) -> f32 {
252 match self.activation {
253 Activation::ReLU => x.max(0.0),
254 Activation::Tanh => x.tanh(),
255 Activation::Sigmoid => 1.0 / (1.0 + (-x).exp()),
256 Activation::Identity => x,
257 }
258 }
259
260 fn activate_derivative(&self, x: f32) -> f32 {
261 match self.activation {
262 Activation::ReLU => if x > 0.0 { 1.0 } else { 0.0 },
263 Activation::Tanh => 1.0 - x.tanh().powi(2),
264 Activation::Sigmoid => {
265 let s = 1.0 / (1.0 + (-x).exp());
266 s * (1.0 - s)
267 }
268 Activation::Identity => 1.0,
269 }
270 }
271
272 fn softmax(x: &[f32]) -> Vec<f32> {
273 let max_x = x.iter().cloned().fold(f32::NEG_INFINITY, f32::max);
274 let exp_x: Vec<f32> = x.iter().map(|&xi| (xi - max_x).exp()).collect();
275 let sum: f32 = exp_x.iter().sum();
276 exp_x.iter().map(|&e| e / sum).collect()
277 }
278
279 fn forward(&self, x: &[f32]) -> (Vec<Vec<f32>>, Vec<Vec<f32>>) {
280 let mut activations = vec![x.to_vec()];
281 let mut pre_activations = Vec::new();
282
283 let mut current = x.to_vec();
284
285 for (layer_idx, (weights, biases)) in self.weights_.iter().zip(self.biases_.iter()).enumerate() {
286 let n_in = current.len();
287 let n_out = biases.len();
288
289 let mut z = vec![0.0f32; n_out];
290 for j in 0..n_out {
291 z[j] = biases[j];
292 for i in 0..n_in {
293 z[j] += weights[i * n_out + j] * current[i];
294 }
295 }
296
297 pre_activations.push(z.clone());
298
299 let a = if layer_idx == self.weights_.len() - 1 {
301 Self::softmax(&z)
302 } else {
303 z.iter().map(|&zi| self.activate(zi)).collect()
304 };
305
306 activations.push(a.clone());
307 current = a;
308 }
309
310 (activations, pre_activations)
311 }
312
313 fn initialize_weights(&mut self, n_features: usize, n_classes: usize) {
314 let mut rng = thread_rng();
315
316 let mut layer_sizes = vec![n_features];
317 layer_sizes.extend(&self.hidden_layer_sizes);
318 layer_sizes.push(n_classes);
319
320 self.weights_.clear();
321 self.biases_.clear();
322
323 for i in 0..layer_sizes.len() - 1 {
324 let n_in = layer_sizes[i];
325 let n_out = layer_sizes[i + 1];
326
327 let scale = (2.0 / (n_in + n_out) as f32).sqrt();
329 let weights: Vec<f32> = (0..n_in * n_out)
330 .map(|_| (rng.gen::<f32>() - 0.5) * 2.0 * scale)
331 .collect();
332 let biases = vec![0.0f32; n_out];
333
334 self.weights_.push(weights);
335 self.biases_.push(biases);
336 }
337 }
338
339 pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
340 let x_data = x.data_f32();
341 let y_data = y.data_f32();
342 let n_samples = x.dims()[0];
343 let n_features = x.dims()[1];
344
345 self.n_features_ = n_features;
346 self.n_classes_ = y_data.iter().map(|&v| v as usize).max().unwrap_or(0) + 1;
347
348 let mut y_onehot = vec![vec![0.0f32; self.n_classes_]; n_samples];
350 for i in 0..n_samples {
351 let class = y_data[i] as usize;
352 if class < self.n_classes_ {
353 y_onehot[i][class] = 1.0;
354 }
355 }
356
357 self.initialize_weights(n_features, self.n_classes_);
359
360 let mut m_weights: Vec<Vec<f32>> = self.weights_.iter().map(|w| vec![0.0f32; w.len()]).collect();
362 let mut v_weights: Vec<Vec<f32>> = self.weights_.iter().map(|w| vec![0.0f32; w.len()]).collect();
363 let mut m_biases: Vec<Vec<f32>> = self.biases_.iter().map(|b| vec![0.0f32; b.len()]).collect();
364 let mut v_biases: Vec<Vec<f32>> = self.biases_.iter().map(|b| vec![0.0f32; b.len()]).collect();
365
366 let beta1 = 0.9f32;
367 let beta2 = 0.999f32;
368 let eps = 1e-8f32;
369
370 let mut indices: Vec<usize> = (0..n_samples).collect();
371 let mut prev_loss = f32::INFINITY;
372
373 for iter in 0..self.max_iter {
374 indices.shuffle(&mut thread_rng());
375
376 let mut total_loss = 0.0f32;
377
378 for batch_start in (0..n_samples).step_by(self.batch_size) {
380 let batch_end = (batch_start + self.batch_size).min(n_samples);
381 let batch_size = batch_end - batch_start;
382
383 let mut grad_weights: Vec<Vec<f32>> = self.weights_.iter().map(|w| vec![0.0f32; w.len()]).collect();
385 let mut grad_biases: Vec<Vec<f32>> = self.biases_.iter().map(|b| vec![0.0f32; b.len()]).collect();
386
387 for &idx in &indices[batch_start..batch_end] {
388 let xi = &x_data[idx * n_features..(idx + 1) * n_features];
389 let yi = &y_onehot[idx];
390
391 let (activations, pre_activations) = self.forward(xi);
393
394 let output = activations.last().unwrap();
396 for c in 0..self.n_classes_ {
397 if yi[c] > 0.5 {
398 total_loss -= output[c].max(1e-10).ln();
399 }
400 }
401
402 let n_layers = self.weights_.len();
404 let mut delta: Vec<f32> = output.iter().zip(yi.iter())
405 .map(|(&o, &y)| o - y)
406 .collect();
407
408 for layer in (0..n_layers).rev() {
409 let a_prev = &activations[layer];
410 let n_in = a_prev.len();
411 let n_out = delta.len();
412
413 for i in 0..n_in {
415 for j in 0..n_out {
416 grad_weights[layer][i * n_out + j] += a_prev[i] * delta[j];
417 }
418 }
419 for j in 0..n_out {
420 grad_biases[layer][j] += delta[j];
421 }
422
423 if layer > 0 {
425 let mut new_delta = vec![0.0f32; n_in];
426 for i in 0..n_in {
427 for j in 0..n_out {
428 new_delta[i] += self.weights_[layer][i * n_out + j] * delta[j];
429 }
430 new_delta[i] *= self.activate_derivative(pre_activations[layer - 1][i]);
431 }
432 delta = new_delta;
433 }
434 }
435 }
436
437 let t = (iter * (n_samples / self.batch_size) + batch_start / self.batch_size + 1) as f32;
439
440 for layer in 0..self.weights_.len() {
441 for i in 0..self.weights_[layer].len() {
442 let g = grad_weights[layer][i] / batch_size as f32 + self.alpha * self.weights_[layer][i];
443
444 m_weights[layer][i] = beta1 * m_weights[layer][i] + (1.0 - beta1) * g;
445 v_weights[layer][i] = beta2 * v_weights[layer][i] + (1.0 - beta2) * g * g;
446
447 let m_hat = m_weights[layer][i] / (1.0 - beta1.powf(t));
448 let v_hat = v_weights[layer][i] / (1.0 - beta2.powf(t));
449
450 self.weights_[layer][i] -= self.learning_rate_init * m_hat / (v_hat.sqrt() + eps);
451 }
452
453 for i in 0..self.biases_[layer].len() {
454 let g = grad_biases[layer][i] / batch_size as f32;
455
456 m_biases[layer][i] = beta1 * m_biases[layer][i] + (1.0 - beta1) * g;
457 v_biases[layer][i] = beta2 * v_biases[layer][i] + (1.0 - beta2) * g * g;
458
459 let m_hat = m_biases[layer][i] / (1.0 - beta1.powf(t));
460 let v_hat = v_biases[layer][i] / (1.0 - beta2.powf(t));
461
462 self.biases_[layer][i] -= self.learning_rate_init * m_hat / (v_hat.sqrt() + eps);
463 }
464 }
465 }
466
467 self.loss_ = total_loss / n_samples as f32;
468 self.n_iter_ = iter + 1;
469
470 if (prev_loss - self.loss_).abs() < self.tol {
472 break;
473 }
474 prev_loss = self.loss_;
475 }
476 }
477
478 pub fn predict(&self, x: &Tensor) -> Tensor {
479 let proba = self.predict_proba(x);
480 let proba_data = proba.data_f32();
481 let n_samples = x.dims()[0];
482
483 let predictions: Vec<f32> = (0..n_samples)
484 .map(|i| {
485 let start = i * self.n_classes_;
486 let probs = &proba_data[start..start + self.n_classes_];
487 probs.iter()
488 .enumerate()
489 .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
490 .map(|(c, _)| c as f32)
491 .unwrap_or(0.0)
492 })
493 .collect();
494
495 Tensor::from_slice(&predictions, &[n_samples]).unwrap()
496 }
497
498 pub fn predict_proba(&self, x: &Tensor) -> Tensor {
499 let x_data = x.data_f32();
500 let n_samples = x.dims()[0];
501 let n_features = x.dims()[1];
502
503 let mut probs: Vec<f32> = Vec::with_capacity(n_samples * self.n_classes_);
504
505 for i in 0..n_samples {
506 let xi = &x_data[i * n_features..(i + 1) * n_features];
507 let (activations, _) = self.forward(xi);
508 probs.extend(activations.last().unwrap());
509 }
510
511 Tensor::from_slice(&probs, &[n_samples, self.n_classes_]).unwrap()
512 }
513
514 pub fn score(&self, x: &Tensor, y: &Tensor) -> f32 {
515 let predictions = self.predict(x);
516 let pred_data = predictions.data_f32();
517 let y_data = y.data_f32();
518
519 let correct: usize = pred_data.iter()
520 .zip(y_data.iter())
521 .filter(|(&p, &y)| (p - y).abs() < 0.5)
522 .count();
523
524 correct as f32 / y_data.len() as f32
525 }
526}
527
528
529pub struct MLPRegressor {
531 pub hidden_layer_sizes: Vec<usize>,
532 pub activation: Activation,
533 pub solver: MLPSolver,
534 pub alpha: f32,
535 pub learning_rate_init: f32,
536 pub max_iter: usize,
537 pub tol: f32,
538 pub batch_size: usize,
539 weights_: Vec<Vec<f32>>,
540 biases_: Vec<Vec<f32>>,
541 n_features_: usize,
542 n_outputs_: usize,
543 n_iter_: usize,
544 loss_: f32,
545}
546
547impl MLPRegressor {
548 pub fn new(hidden_layer_sizes: Vec<usize>) -> Self {
549 MLPRegressor {
550 hidden_layer_sizes,
551 activation: Activation::ReLU,
552 solver: MLPSolver::Adam,
553 alpha: 0.0001,
554 learning_rate_init: 0.001,
555 max_iter: 200,
556 tol: 1e-4,
557 batch_size: 32,
558 weights_: Vec::new(),
559 biases_: Vec::new(),
560 n_features_: 0,
561 n_outputs_: 1,
562 n_iter_: 0,
563 loss_: 0.0,
564 }
565 }
566
567 pub fn activation(mut self, act: Activation) -> Self {
568 self.activation = act;
569 self
570 }
571
572 pub fn max_iter(mut self, n: usize) -> Self {
573 self.max_iter = n;
574 self
575 }
576
577 fn activate(&self, x: f32) -> f32 {
578 match self.activation {
579 Activation::ReLU => x.max(0.0),
580 Activation::Tanh => x.tanh(),
581 Activation::Sigmoid => 1.0 / (1.0 + (-x).exp()),
582 Activation::Identity => x,
583 }
584 }
585
586 fn activate_derivative(&self, x: f32) -> f32 {
587 match self.activation {
588 Activation::ReLU => if x > 0.0 { 1.0 } else { 0.0 },
589 Activation::Tanh => 1.0 - x.tanh().powi(2),
590 Activation::Sigmoid => {
591 let s = 1.0 / (1.0 + (-x).exp());
592 s * (1.0 - s)
593 }
594 Activation::Identity => 1.0,
595 }
596 }
597
598 fn forward(&self, x: &[f32]) -> (Vec<Vec<f32>>, Vec<Vec<f32>>) {
599 let mut activations = vec![x.to_vec()];
600 let mut pre_activations = Vec::new();
601 let mut current = x.to_vec();
602
603 for (layer_idx, (weights, biases)) in self.weights_.iter().zip(self.biases_.iter()).enumerate() {
604 let n_in = current.len();
605 let n_out = biases.len();
606
607 let mut z = vec![0.0f32; n_out];
608 for j in 0..n_out {
609 z[j] = biases[j];
610 for i in 0..n_in {
611 z[j] += weights[i * n_out + j] * current[i];
612 }
613 }
614
615 pre_activations.push(z.clone());
616
617 let a = if layer_idx == self.weights_.len() - 1 {
619 z.clone()
620 } else {
621 z.iter().map(|&zi| self.activate(zi)).collect()
622 };
623
624 activations.push(a.clone());
625 current = a;
626 }
627
628 (activations, pre_activations)
629 }
630
631 fn initialize_weights(&mut self, n_features: usize, n_outputs: usize) {
632 let mut rng = thread_rng();
633
634 let mut layer_sizes = vec![n_features];
635 layer_sizes.extend(&self.hidden_layer_sizes);
636 layer_sizes.push(n_outputs);
637
638 self.weights_.clear();
639 self.biases_.clear();
640
641 for i in 0..layer_sizes.len() - 1 {
642 let n_in = layer_sizes[i];
643 let n_out = layer_sizes[i + 1];
644
645 let scale = (2.0 / (n_in + n_out) as f32).sqrt();
646 let weights: Vec<f32> = (0..n_in * n_out)
647 .map(|_| (rng.gen::<f32>() - 0.5) * 2.0 * scale)
648 .collect();
649 let biases = vec![0.0f32; n_out];
650
651 self.weights_.push(weights);
652 self.biases_.push(biases);
653 }
654 }
655
656 pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
657 let x_data = x.data_f32();
658 let y_data = y.data_f32();
659 let n_samples = x.dims()[0];
660 let n_features = x.dims()[1];
661
662 self.n_features_ = n_features;
663 self.n_outputs_ = 1;
664
665 self.initialize_weights(n_features, self.n_outputs_);
666
667 let mut m_weights: Vec<Vec<f32>> = self.weights_.iter().map(|w| vec![0.0f32; w.len()]).collect();
669 let mut v_weights: Vec<Vec<f32>> = self.weights_.iter().map(|w| vec![0.0f32; w.len()]).collect();
670 let mut m_biases: Vec<Vec<f32>> = self.biases_.iter().map(|b| vec![0.0f32; b.len()]).collect();
671 let mut v_biases: Vec<Vec<f32>> = self.biases_.iter().map(|b| vec![0.0f32; b.len()]).collect();
672
673 let beta1 = 0.9f32;
674 let beta2 = 0.999f32;
675 let eps = 1e-8f32;
676
677 let mut indices: Vec<usize> = (0..n_samples).collect();
678 let mut prev_loss = f32::INFINITY;
679
680 for iter in 0..self.max_iter {
681 indices.shuffle(&mut thread_rng());
682 let mut total_loss = 0.0f32;
683
684 for batch_start in (0..n_samples).step_by(self.batch_size) {
685 let batch_end = (batch_start + self.batch_size).min(n_samples);
686 let batch_size = batch_end - batch_start;
687
688 let mut grad_weights: Vec<Vec<f32>> = self.weights_.iter().map(|w| vec![0.0f32; w.len()]).collect();
689 let mut grad_biases: Vec<Vec<f32>> = self.biases_.iter().map(|b| vec![0.0f32; b.len()]).collect();
690
691 for &idx in &indices[batch_start..batch_end] {
692 let xi = &x_data[idx * n_features..(idx + 1) * n_features];
693 let yi = y_data[idx];
694
695 let (activations, pre_activations) = self.forward(xi);
696 let output = activations.last().unwrap()[0];
697
698 let error = output - yi;
700 total_loss += error * error;
701
702 let n_layers = self.weights_.len();
704 let mut delta = vec![error];
705
706 for layer in (0..n_layers).rev() {
707 let a_prev = &activations[layer];
708 let n_in = a_prev.len();
709 let n_out = delta.len();
710
711 for i in 0..n_in {
712 for j in 0..n_out {
713 grad_weights[layer][i * n_out + j] += a_prev[i] * delta[j];
714 }
715 }
716 for j in 0..n_out {
717 grad_biases[layer][j] += delta[j];
718 }
719
720 if layer > 0 {
721 let mut new_delta = vec![0.0f32; n_in];
722 for i in 0..n_in {
723 for j in 0..n_out {
724 new_delta[i] += self.weights_[layer][i * n_out + j] * delta[j];
725 }
726 new_delta[i] *= self.activate_derivative(pre_activations[layer - 1][i]);
727 }
728 delta = new_delta;
729 }
730 }
731 }
732
733 let t = (iter * (n_samples / self.batch_size) + batch_start / self.batch_size + 1) as f32;
735
736 for layer in 0..self.weights_.len() {
737 for i in 0..self.weights_[layer].len() {
738 let g = grad_weights[layer][i] / batch_size as f32 + self.alpha * self.weights_[layer][i];
739 m_weights[layer][i] = beta1 * m_weights[layer][i] + (1.0 - beta1) * g;
740 v_weights[layer][i] = beta2 * v_weights[layer][i] + (1.0 - beta2) * g * g;
741 let m_hat = m_weights[layer][i] / (1.0 - beta1.powf(t));
742 let v_hat = v_weights[layer][i] / (1.0 - beta2.powf(t));
743 self.weights_[layer][i] -= self.learning_rate_init * m_hat / (v_hat.sqrt() + eps);
744 }
745
746 for i in 0..self.biases_[layer].len() {
747 let g = grad_biases[layer][i] / batch_size as f32;
748 m_biases[layer][i] = beta1 * m_biases[layer][i] + (1.0 - beta1) * g;
749 v_biases[layer][i] = beta2 * v_biases[layer][i] + (1.0 - beta2) * g * g;
750 let m_hat = m_biases[layer][i] / (1.0 - beta1.powf(t));
751 let v_hat = v_biases[layer][i] / (1.0 - beta2.powf(t));
752 self.biases_[layer][i] -= self.learning_rate_init * m_hat / (v_hat.sqrt() + eps);
753 }
754 }
755 }
756
757 self.loss_ = total_loss / n_samples as f32;
758 self.n_iter_ = iter + 1;
759
760 if (prev_loss - self.loss_).abs() < self.tol {
761 break;
762 }
763 prev_loss = self.loss_;
764 }
765 }
766
767 pub fn predict(&self, x: &Tensor) -> Tensor {
768 let x_data = x.data_f32();
769 let n_samples = x.dims()[0];
770 let n_features = x.dims()[1];
771
772 let predictions: Vec<f32> = (0..n_samples)
773 .map(|i| {
774 let xi = &x_data[i * n_features..(i + 1) * n_features];
775 let (activations, _) = self.forward(xi);
776 activations.last().unwrap()[0]
777 })
778 .collect();
779
780 Tensor::from_slice(&predictions, &[n_samples]).unwrap()
781 }
782
783 pub fn score(&self, x: &Tensor, y: &Tensor) -> f32 {
784 let predictions = self.predict(x);
785 let pred_data = predictions.data_f32();
786 let y_data = y.data_f32();
787
788 let y_mean: f32 = y_data.iter().sum::<f32>() / y_data.len() as f32;
789 let ss_res: f32 = pred_data.iter().zip(y_data.iter()).map(|(&p, &y)| (y - p).powi(2)).sum();
790 let ss_tot: f32 = y_data.iter().map(|&y| (y - y_mean).powi(2)).sum();
791
792 1.0 - ss_res / ss_tot.max(1e-10)
793 }
794}
795
796#[cfg(test)]
797mod tests {
798 use super::*;
799
800 #[test]
801 fn test_perceptron() {
802 let x = Tensor::from_slice(&[0.0f32, 0.0,
803 0.0, 1.0,
804 1.0, 0.0,
805 1.0, 1.0,
806 ], &[4, 2]).unwrap();
807
808 let y = Tensor::from_slice(&[0.0f32, 0.0, 0.0, 1.0], &[4]).unwrap();
809
810 let mut p = Perceptron::new().max_iter(100);
811 p.fit(&x, &y);
812
813 let predictions = p.predict(&x);
814 assert_eq!(predictions.dims(), &[4]);
815 }
816
817 #[test]
818 fn test_mlp_classifier() {
819 let x = Tensor::from_slice(&[0.0f32, 0.0,
820 0.0, 1.0,
821 1.0, 0.0,
822 1.0, 1.0,
823 ], &[4, 2]).unwrap();
824
825 let y = Tensor::from_slice(&[0.0f32, 1.0, 1.0, 0.0], &[4]).unwrap();
826
827 let mut mlp = MLPClassifier::new(vec![4]).max_iter(100);
828 mlp.fit(&x, &y);
829
830 let predictions = mlp.predict(&x);
831 assert_eq!(predictions.dims(), &[4]);
832 }
833
834 #[test]
835 fn test_mlp_regressor() {
836 let x = Tensor::from_slice(&[1.0f32, 2.0, 3.0, 4.0, 5.0], &[5, 1]).unwrap();
837 let y = Tensor::from_slice(&[2.0f32, 4.0, 6.0, 8.0, 10.0], &[5]).unwrap();
838
839 let mut mlp = MLPRegressor::new(vec![10]).max_iter(500);
840 mlp.fit(&x, &y);
841
842 let predictions = mlp.predict(&x);
843 assert_eq!(predictions.dims(), &[5]);
844 }
845}
846
847