ghostflow_ml/
neural_network.rs

1//! Neural Network models - Perceptron, MLP
2
3use ghostflow_core::Tensor;
4use rand::prelude::*;
5
6/// Perceptron classifier
7pub struct Perceptron {
8    pub max_iter: usize,
9    pub eta0: f32,
10    pub tol: f32,
11    pub shuffle: bool,
12    pub penalty: Option<PerceptronPenalty>,
13    pub alpha: f32,
14    coef_: Option<Vec<f32>>,
15    intercept_: Option<f32>,
16    n_iter_: usize,
17}
18
19#[derive(Clone, Copy, Debug)]
20pub enum PerceptronPenalty {
21    L1,
22    L2,
23    ElasticNet,
24}
25
26impl Perceptron {
27    pub fn new() -> Self {
28        Perceptron {
29            max_iter: 1000,
30            eta0: 1.0,
31            tol: 1e-3,
32            shuffle: true,
33            penalty: None,
34            alpha: 0.0001,
35            coef_: None,
36            intercept_: None,
37            n_iter_: 0,
38        }
39    }
40
41    pub fn max_iter(mut self, n: usize) -> Self {
42        self.max_iter = n;
43        self
44    }
45
46    pub fn eta0(mut self, eta: f32) -> Self {
47        self.eta0 = eta;
48        self
49    }
50
51    pub fn penalty(mut self, p: PerceptronPenalty) -> Self {
52        self.penalty = Some(p);
53        self
54    }
55
56    pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
57        let x_data = x.data_f32();
58        let y_data = y.data_f32();
59        let n_samples = x.dims()[0];
60        let n_features = x.dims()[1];
61
62        // Convert labels to {-1, 1}
63        let y_binary: Vec<f32> = y_data.iter()
64            .map(|&y| if y > 0.5 { 1.0 } else { -1.0 })
65            .collect();
66
67        let mut weights = vec![0.0f32; n_features];
68        let mut bias = 0.0f32;
69        let mut indices: Vec<usize> = (0..n_samples).collect();
70
71        for iter in 0..self.max_iter {
72            if self.shuffle {
73                indices.shuffle(&mut thread_rng());
74            }
75
76            let mut n_errors = 0;
77
78            for &i in &indices {
79                let xi = &x_data[i * n_features..(i + 1) * n_features];
80                
81                // Compute prediction
82                let mut pred = bias;
83                for j in 0..n_features {
84                    pred += weights[j] * xi[j];
85                }
86                let pred_sign = if pred >= 0.0 { 1.0 } else { -1.0 };
87
88                // Update if misclassified
89                if pred_sign != y_binary[i] {
90                    n_errors += 1;
91                    
92                    for j in 0..n_features {
93                        weights[j] += self.eta0 * y_binary[i] * xi[j];
94                    }
95                    bias += self.eta0 * y_binary[i];
96
97                    // Apply penalty
98                    if let Some(penalty) = self.penalty {
99                        match penalty {
100                            PerceptronPenalty::L2 => {
101                                for w in &mut weights {
102                                    *w *= 1.0 - self.alpha * self.eta0;
103                                }
104                            }
105                            PerceptronPenalty::L1 => {
106                                for w in &mut weights {
107                                    let sign = w.signum();
108                                    *w = (*w - self.alpha * self.eta0 * sign).max(0.0) * sign;
109                                }
110                            }
111                            PerceptronPenalty::ElasticNet => {
112                                for w in &mut weights {
113                                    *w *= 1.0 - 0.5 * self.alpha * self.eta0;
114                                    let sign = w.signum();
115                                    *w = (*w - 0.5 * self.alpha * self.eta0 * sign).max(0.0) * sign;
116                                }
117                            }
118                        }
119                    }
120                }
121            }
122
123            self.n_iter_ = iter + 1;
124
125            if n_errors == 0 {
126                break;
127            }
128        }
129
130        self.coef_ = Some(weights);
131        self.intercept_ = Some(bias);
132    }
133
134    pub fn predict(&self, x: &Tensor) -> Tensor {
135        let x_data = x.data_f32();
136        let n_samples = x.dims()[0];
137        let n_features = x.dims()[1];
138
139        let weights = self.coef_.as_ref().expect("Model not fitted");
140        let bias = self.intercept_.unwrap_or(0.0);
141
142        let predictions: Vec<f32> = (0..n_samples)
143            .map(|i| {
144                let xi = &x_data[i * n_features..(i + 1) * n_features];
145                let mut pred = bias;
146                for j in 0..n_features {
147                    pred += weights[j] * xi[j];
148                }
149                if pred >= 0.0 { 1.0 } else { 0.0 }
150            })
151            .collect();
152
153        Tensor::from_slice(&predictions, &[n_samples]).unwrap()
154    }
155
156    pub fn score(&self, x: &Tensor, y: &Tensor) -> f32 {
157        let predictions = self.predict(x);
158        let pred_data = predictions.data_f32();
159        let y_data = y.data_f32();
160
161        let correct: usize = pred_data.iter()
162            .zip(y_data.iter())
163            .filter(|(&p, &y)| (p - y).abs() < 0.5)
164            .count();
165
166        correct as f32 / y_data.len() as f32
167    }
168}
169
170impl Default for Perceptron {
171    fn default() -> Self {
172        Self::new()
173    }
174}
175
176
177/// Multi-Layer Perceptron Classifier
178pub struct MLPClassifier {
179    pub hidden_layer_sizes: Vec<usize>,
180    pub activation: Activation,
181    pub solver: MLPSolver,
182    pub alpha: f32,
183    pub learning_rate_init: f32,
184    pub max_iter: usize,
185    pub tol: f32,
186    pub batch_size: usize,
187    pub momentum: f32,
188    weights_: Vec<Vec<f32>>,
189    biases_: Vec<Vec<f32>>,
190    n_classes_: usize,
191    n_features_: usize,
192    n_iter_: usize,
193    loss_: f32,
194}
195
196#[derive(Clone, Copy, Debug)]
197pub enum Activation {
198    ReLU,
199    Tanh,
200    Sigmoid,
201    Identity,
202}
203
204#[derive(Clone, Copy, Debug)]
205pub enum MLPSolver {
206    SGD,
207    Adam,
208}
209
210impl MLPClassifier {
211    pub fn new(hidden_layer_sizes: Vec<usize>) -> Self {
212        MLPClassifier {
213            hidden_layer_sizes,
214            activation: Activation::ReLU,
215            solver: MLPSolver::Adam,
216            alpha: 0.0001,
217            learning_rate_init: 0.001,
218            max_iter: 200,
219            tol: 1e-4,
220            batch_size: 32,
221            momentum: 0.9,
222            weights_: Vec::new(),
223            biases_: Vec::new(),
224            n_classes_: 0,
225            n_features_: 0,
226            n_iter_: 0,
227            loss_: 0.0,
228        }
229    }
230
231    pub fn activation(mut self, act: Activation) -> Self {
232        self.activation = act;
233        self
234    }
235
236    pub fn solver(mut self, solver: MLPSolver) -> Self {
237        self.solver = solver;
238        self
239    }
240
241    pub fn max_iter(mut self, n: usize) -> Self {
242        self.max_iter = n;
243        self
244    }
245
246    pub fn learning_rate(mut self, lr: f32) -> Self {
247        self.learning_rate_init = lr;
248        self
249    }
250
251    fn activate(&self, x: f32) -> f32 {
252        match self.activation {
253            Activation::ReLU => x.max(0.0),
254            Activation::Tanh => x.tanh(),
255            Activation::Sigmoid => 1.0 / (1.0 + (-x).exp()),
256            Activation::Identity => x,
257        }
258    }
259
260    fn activate_derivative(&self, x: f32) -> f32 {
261        match self.activation {
262            Activation::ReLU => if x > 0.0 { 1.0 } else { 0.0 },
263            Activation::Tanh => 1.0 - x.tanh().powi(2),
264            Activation::Sigmoid => {
265                let s = 1.0 / (1.0 + (-x).exp());
266                s * (1.0 - s)
267            }
268            Activation::Identity => 1.0,
269        }
270    }
271
272    fn softmax(x: &[f32]) -> Vec<f32> {
273        let max_x = x.iter().cloned().fold(f32::NEG_INFINITY, f32::max);
274        let exp_x: Vec<f32> = x.iter().map(|&xi| (xi - max_x).exp()).collect();
275        let sum: f32 = exp_x.iter().sum();
276        exp_x.iter().map(|&e| e / sum).collect()
277    }
278
279    fn forward(&self, x: &[f32]) -> (Vec<Vec<f32>>, Vec<Vec<f32>>) {
280        let mut activations = vec![x.to_vec()];
281        let mut pre_activations = Vec::new();
282
283        let mut current = x.to_vec();
284
285        for (layer_idx, (weights, biases)) in self.weights_.iter().zip(self.biases_.iter()).enumerate() {
286            let n_in = current.len();
287            let n_out = biases.len();
288
289            let mut z = vec![0.0f32; n_out];
290            for j in 0..n_out {
291                z[j] = biases[j];
292                for i in 0..n_in {
293                    z[j] += weights[i * n_out + j] * current[i];
294                }
295            }
296
297            pre_activations.push(z.clone());
298
299            // Apply activation (softmax for last layer)
300            let a = if layer_idx == self.weights_.len() - 1 {
301                Self::softmax(&z)
302            } else {
303                z.iter().map(|&zi| self.activate(zi)).collect()
304            };
305
306            activations.push(a.clone());
307            current = a;
308        }
309
310        (activations, pre_activations)
311    }
312
313    fn initialize_weights(&mut self, n_features: usize, n_classes: usize) {
314        let mut rng = thread_rng();
315        
316        let mut layer_sizes = vec![n_features];
317        layer_sizes.extend(&self.hidden_layer_sizes);
318        layer_sizes.push(n_classes);
319
320        self.weights_.clear();
321        self.biases_.clear();
322
323        for i in 0..layer_sizes.len() - 1 {
324            let n_in = layer_sizes[i];
325            let n_out = layer_sizes[i + 1];
326
327            // Xavier initialization
328            let scale = (2.0 / (n_in + n_out) as f32).sqrt();
329            let weights: Vec<f32> = (0..n_in * n_out)
330                .map(|_| (rng.gen::<f32>() - 0.5) * 2.0 * scale)
331                .collect();
332            let biases = vec![0.0f32; n_out];
333
334            self.weights_.push(weights);
335            self.biases_.push(biases);
336        }
337    }
338
339    pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
340        let x_data = x.data_f32();
341        let y_data = y.data_f32();
342        let n_samples = x.dims()[0];
343        let n_features = x.dims()[1];
344
345        self.n_features_ = n_features;
346        self.n_classes_ = y_data.iter().map(|&v| v as usize).max().unwrap_or(0) + 1;
347
348        // One-hot encode labels
349        let mut y_onehot = vec![vec![0.0f32; self.n_classes_]; n_samples];
350        for i in 0..n_samples {
351            let class = y_data[i] as usize;
352            if class < self.n_classes_ {
353                y_onehot[i][class] = 1.0;
354            }
355        }
356
357        // Initialize weights
358        self.initialize_weights(n_features, self.n_classes_);
359
360        // Adam optimizer state
361        let mut m_weights: Vec<Vec<f32>> = self.weights_.iter().map(|w| vec![0.0f32; w.len()]).collect();
362        let mut v_weights: Vec<Vec<f32>> = self.weights_.iter().map(|w| vec![0.0f32; w.len()]).collect();
363        let mut m_biases: Vec<Vec<f32>> = self.biases_.iter().map(|b| vec![0.0f32; b.len()]).collect();
364        let mut v_biases: Vec<Vec<f32>> = self.biases_.iter().map(|b| vec![0.0f32; b.len()]).collect();
365
366        let beta1 = 0.9f32;
367        let beta2 = 0.999f32;
368        let eps = 1e-8f32;
369
370        let mut indices: Vec<usize> = (0..n_samples).collect();
371        let mut prev_loss = f32::INFINITY;
372
373        for iter in 0..self.max_iter {
374            indices.shuffle(&mut thread_rng());
375
376            let mut total_loss = 0.0f32;
377
378            // Mini-batch training
379            for batch_start in (0..n_samples).step_by(self.batch_size) {
380                let batch_end = (batch_start + self.batch_size).min(n_samples);
381                let batch_size = batch_end - batch_start;
382
383                // Accumulate gradients
384                let mut grad_weights: Vec<Vec<f32>> = self.weights_.iter().map(|w| vec![0.0f32; w.len()]).collect();
385                let mut grad_biases: Vec<Vec<f32>> = self.biases_.iter().map(|b| vec![0.0f32; b.len()]).collect();
386
387                for &idx in &indices[batch_start..batch_end] {
388                    let xi = &x_data[idx * n_features..(idx + 1) * n_features];
389                    let yi = &y_onehot[idx];
390
391                    // Forward pass
392                    let (activations, pre_activations) = self.forward(xi);
393
394                    // Compute loss (cross-entropy)
395                    let output = activations.last().unwrap();
396                    for c in 0..self.n_classes_ {
397                        if yi[c] > 0.5 {
398                            total_loss -= output[c].max(1e-10).ln();
399                        }
400                    }
401
402                    // Backward pass
403                    let n_layers = self.weights_.len();
404                    let mut delta: Vec<f32> = output.iter().zip(yi.iter())
405                        .map(|(&o, &y)| o - y)
406                        .collect();
407
408                    for layer in (0..n_layers).rev() {
409                        let a_prev = &activations[layer];
410                        let n_in = a_prev.len();
411                        let n_out = delta.len();
412
413                        // Gradient for weights and biases
414                        for i in 0..n_in {
415                            for j in 0..n_out {
416                                grad_weights[layer][i * n_out + j] += a_prev[i] * delta[j];
417                            }
418                        }
419                        for j in 0..n_out {
420                            grad_biases[layer][j] += delta[j];
421                        }
422
423                        // Propagate delta to previous layer
424                        if layer > 0 {
425                            let mut new_delta = vec![0.0f32; n_in];
426                            for i in 0..n_in {
427                                for j in 0..n_out {
428                                    new_delta[i] += self.weights_[layer][i * n_out + j] * delta[j];
429                                }
430                                new_delta[i] *= self.activate_derivative(pre_activations[layer - 1][i]);
431                            }
432                            delta = new_delta;
433                        }
434                    }
435                }
436
437                // Update weights using Adam
438                let t = (iter * (n_samples / self.batch_size) + batch_start / self.batch_size + 1) as f32;
439
440                for layer in 0..self.weights_.len() {
441                    for i in 0..self.weights_[layer].len() {
442                        let g = grad_weights[layer][i] / batch_size as f32 + self.alpha * self.weights_[layer][i];
443                        
444                        m_weights[layer][i] = beta1 * m_weights[layer][i] + (1.0 - beta1) * g;
445                        v_weights[layer][i] = beta2 * v_weights[layer][i] + (1.0 - beta2) * g * g;
446                        
447                        let m_hat = m_weights[layer][i] / (1.0 - beta1.powf(t));
448                        let v_hat = v_weights[layer][i] / (1.0 - beta2.powf(t));
449                        
450                        self.weights_[layer][i] -= self.learning_rate_init * m_hat / (v_hat.sqrt() + eps);
451                    }
452
453                    for i in 0..self.biases_[layer].len() {
454                        let g = grad_biases[layer][i] / batch_size as f32;
455                        
456                        m_biases[layer][i] = beta1 * m_biases[layer][i] + (1.0 - beta1) * g;
457                        v_biases[layer][i] = beta2 * v_biases[layer][i] + (1.0 - beta2) * g * g;
458                        
459                        let m_hat = m_biases[layer][i] / (1.0 - beta1.powf(t));
460                        let v_hat = v_biases[layer][i] / (1.0 - beta2.powf(t));
461                        
462                        self.biases_[layer][i] -= self.learning_rate_init * m_hat / (v_hat.sqrt() + eps);
463                    }
464                }
465            }
466
467            self.loss_ = total_loss / n_samples as f32;
468            self.n_iter_ = iter + 1;
469
470            // Check convergence
471            if (prev_loss - self.loss_).abs() < self.tol {
472                break;
473            }
474            prev_loss = self.loss_;
475        }
476    }
477
478    pub fn predict(&self, x: &Tensor) -> Tensor {
479        let proba = self.predict_proba(x);
480        let proba_data = proba.data_f32();
481        let n_samples = x.dims()[0];
482
483        let predictions: Vec<f32> = (0..n_samples)
484            .map(|i| {
485                let start = i * self.n_classes_;
486                let probs = &proba_data[start..start + self.n_classes_];
487                probs.iter()
488                    .enumerate()
489                    .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
490                    .map(|(c, _)| c as f32)
491                    .unwrap_or(0.0)
492            })
493            .collect();
494
495        Tensor::from_slice(&predictions, &[n_samples]).unwrap()
496    }
497
498    pub fn predict_proba(&self, x: &Tensor) -> Tensor {
499        let x_data = x.data_f32();
500        let n_samples = x.dims()[0];
501        let n_features = x.dims()[1];
502
503        let mut probs: Vec<f32> = Vec::with_capacity(n_samples * self.n_classes_);
504
505        for i in 0..n_samples {
506            let xi = &x_data[i * n_features..(i + 1) * n_features];
507            let (activations, _) = self.forward(xi);
508            probs.extend(activations.last().unwrap());
509        }
510
511        Tensor::from_slice(&probs, &[n_samples, self.n_classes_]).unwrap()
512    }
513
514    pub fn score(&self, x: &Tensor, y: &Tensor) -> f32 {
515        let predictions = self.predict(x);
516        let pred_data = predictions.data_f32();
517        let y_data = y.data_f32();
518
519        let correct: usize = pred_data.iter()
520            .zip(y_data.iter())
521            .filter(|(&p, &y)| (p - y).abs() < 0.5)
522            .count();
523
524        correct as f32 / y_data.len() as f32
525    }
526}
527
528
529/// Multi-Layer Perceptron Regressor
530pub struct MLPRegressor {
531    pub hidden_layer_sizes: Vec<usize>,
532    pub activation: Activation,
533    pub solver: MLPSolver,
534    pub alpha: f32,
535    pub learning_rate_init: f32,
536    pub max_iter: usize,
537    pub tol: f32,
538    pub batch_size: usize,
539    weights_: Vec<Vec<f32>>,
540    biases_: Vec<Vec<f32>>,
541    n_features_: usize,
542    n_outputs_: usize,
543    n_iter_: usize,
544    loss_: f32,
545}
546
547impl MLPRegressor {
548    pub fn new(hidden_layer_sizes: Vec<usize>) -> Self {
549        MLPRegressor {
550            hidden_layer_sizes,
551            activation: Activation::ReLU,
552            solver: MLPSolver::Adam,
553            alpha: 0.0001,
554            learning_rate_init: 0.001,
555            max_iter: 200,
556            tol: 1e-4,
557            batch_size: 32,
558            weights_: Vec::new(),
559            biases_: Vec::new(),
560            n_features_: 0,
561            n_outputs_: 1,
562            n_iter_: 0,
563            loss_: 0.0,
564        }
565    }
566
567    pub fn activation(mut self, act: Activation) -> Self {
568        self.activation = act;
569        self
570    }
571
572    pub fn max_iter(mut self, n: usize) -> Self {
573        self.max_iter = n;
574        self
575    }
576
577    fn activate(&self, x: f32) -> f32 {
578        match self.activation {
579            Activation::ReLU => x.max(0.0),
580            Activation::Tanh => x.tanh(),
581            Activation::Sigmoid => 1.0 / (1.0 + (-x).exp()),
582            Activation::Identity => x,
583        }
584    }
585
586    fn activate_derivative(&self, x: f32) -> f32 {
587        match self.activation {
588            Activation::ReLU => if x > 0.0 { 1.0 } else { 0.0 },
589            Activation::Tanh => 1.0 - x.tanh().powi(2),
590            Activation::Sigmoid => {
591                let s = 1.0 / (1.0 + (-x).exp());
592                s * (1.0 - s)
593            }
594            Activation::Identity => 1.0,
595        }
596    }
597
598    fn forward(&self, x: &[f32]) -> (Vec<Vec<f32>>, Vec<Vec<f32>>) {
599        let mut activations = vec![x.to_vec()];
600        let mut pre_activations = Vec::new();
601        let mut current = x.to_vec();
602
603        for (layer_idx, (weights, biases)) in self.weights_.iter().zip(self.biases_.iter()).enumerate() {
604            let n_in = current.len();
605            let n_out = biases.len();
606
607            let mut z = vec![0.0f32; n_out];
608            for j in 0..n_out {
609                z[j] = biases[j];
610                for i in 0..n_in {
611                    z[j] += weights[i * n_out + j] * current[i];
612                }
613            }
614
615            pre_activations.push(z.clone());
616
617            // Identity activation for output layer
618            let a = if layer_idx == self.weights_.len() - 1 {
619                z.clone()
620            } else {
621                z.iter().map(|&zi| self.activate(zi)).collect()
622            };
623
624            activations.push(a.clone());
625            current = a;
626        }
627
628        (activations, pre_activations)
629    }
630
631    fn initialize_weights(&mut self, n_features: usize, n_outputs: usize) {
632        let mut rng = thread_rng();
633        
634        let mut layer_sizes = vec![n_features];
635        layer_sizes.extend(&self.hidden_layer_sizes);
636        layer_sizes.push(n_outputs);
637
638        self.weights_.clear();
639        self.biases_.clear();
640
641        for i in 0..layer_sizes.len() - 1 {
642            let n_in = layer_sizes[i];
643            let n_out = layer_sizes[i + 1];
644
645            let scale = (2.0 / (n_in + n_out) as f32).sqrt();
646            let weights: Vec<f32> = (0..n_in * n_out)
647                .map(|_| (rng.gen::<f32>() - 0.5) * 2.0 * scale)
648                .collect();
649            let biases = vec![0.0f32; n_out];
650
651            self.weights_.push(weights);
652            self.biases_.push(biases);
653        }
654    }
655
656    pub fn fit(&mut self, x: &Tensor, y: &Tensor) {
657        let x_data = x.data_f32();
658        let y_data = y.data_f32();
659        let n_samples = x.dims()[0];
660        let n_features = x.dims()[1];
661
662        self.n_features_ = n_features;
663        self.n_outputs_ = 1;
664
665        self.initialize_weights(n_features, self.n_outputs_);
666
667        // Adam optimizer state
668        let mut m_weights: Vec<Vec<f32>> = self.weights_.iter().map(|w| vec![0.0f32; w.len()]).collect();
669        let mut v_weights: Vec<Vec<f32>> = self.weights_.iter().map(|w| vec![0.0f32; w.len()]).collect();
670        let mut m_biases: Vec<Vec<f32>> = self.biases_.iter().map(|b| vec![0.0f32; b.len()]).collect();
671        let mut v_biases: Vec<Vec<f32>> = self.biases_.iter().map(|b| vec![0.0f32; b.len()]).collect();
672
673        let beta1 = 0.9f32;
674        let beta2 = 0.999f32;
675        let eps = 1e-8f32;
676
677        let mut indices: Vec<usize> = (0..n_samples).collect();
678        let mut prev_loss = f32::INFINITY;
679
680        for iter in 0..self.max_iter {
681            indices.shuffle(&mut thread_rng());
682            let mut total_loss = 0.0f32;
683
684            for batch_start in (0..n_samples).step_by(self.batch_size) {
685                let batch_end = (batch_start + self.batch_size).min(n_samples);
686                let batch_size = batch_end - batch_start;
687
688                let mut grad_weights: Vec<Vec<f32>> = self.weights_.iter().map(|w| vec![0.0f32; w.len()]).collect();
689                let mut grad_biases: Vec<Vec<f32>> = self.biases_.iter().map(|b| vec![0.0f32; b.len()]).collect();
690
691                for &idx in &indices[batch_start..batch_end] {
692                    let xi = &x_data[idx * n_features..(idx + 1) * n_features];
693                    let yi = y_data[idx];
694
695                    let (activations, pre_activations) = self.forward(xi);
696                    let output = activations.last().unwrap()[0];
697
698                    // MSE loss
699                    let error = output - yi;
700                    total_loss += error * error;
701
702                    // Backward pass
703                    let n_layers = self.weights_.len();
704                    let mut delta = vec![error];
705
706                    for layer in (0..n_layers).rev() {
707                        let a_prev = &activations[layer];
708                        let n_in = a_prev.len();
709                        let n_out = delta.len();
710
711                        for i in 0..n_in {
712                            for j in 0..n_out {
713                                grad_weights[layer][i * n_out + j] += a_prev[i] * delta[j];
714                            }
715                        }
716                        for j in 0..n_out {
717                            grad_biases[layer][j] += delta[j];
718                        }
719
720                        if layer > 0 {
721                            let mut new_delta = vec![0.0f32; n_in];
722                            for i in 0..n_in {
723                                for j in 0..n_out {
724                                    new_delta[i] += self.weights_[layer][i * n_out + j] * delta[j];
725                                }
726                                new_delta[i] *= self.activate_derivative(pre_activations[layer - 1][i]);
727                            }
728                            delta = new_delta;
729                        }
730                    }
731                }
732
733                // Adam update
734                let t = (iter * (n_samples / self.batch_size) + batch_start / self.batch_size + 1) as f32;
735
736                for layer in 0..self.weights_.len() {
737                    for i in 0..self.weights_[layer].len() {
738                        let g = grad_weights[layer][i] / batch_size as f32 + self.alpha * self.weights_[layer][i];
739                        m_weights[layer][i] = beta1 * m_weights[layer][i] + (1.0 - beta1) * g;
740                        v_weights[layer][i] = beta2 * v_weights[layer][i] + (1.0 - beta2) * g * g;
741                        let m_hat = m_weights[layer][i] / (1.0 - beta1.powf(t));
742                        let v_hat = v_weights[layer][i] / (1.0 - beta2.powf(t));
743                        self.weights_[layer][i] -= self.learning_rate_init * m_hat / (v_hat.sqrt() + eps);
744                    }
745
746                    for i in 0..self.biases_[layer].len() {
747                        let g = grad_biases[layer][i] / batch_size as f32;
748                        m_biases[layer][i] = beta1 * m_biases[layer][i] + (1.0 - beta1) * g;
749                        v_biases[layer][i] = beta2 * v_biases[layer][i] + (1.0 - beta2) * g * g;
750                        let m_hat = m_biases[layer][i] / (1.0 - beta1.powf(t));
751                        let v_hat = v_biases[layer][i] / (1.0 - beta2.powf(t));
752                        self.biases_[layer][i] -= self.learning_rate_init * m_hat / (v_hat.sqrt() + eps);
753                    }
754                }
755            }
756
757            self.loss_ = total_loss / n_samples as f32;
758            self.n_iter_ = iter + 1;
759
760            if (prev_loss - self.loss_).abs() < self.tol {
761                break;
762            }
763            prev_loss = self.loss_;
764        }
765    }
766
767    pub fn predict(&self, x: &Tensor) -> Tensor {
768        let x_data = x.data_f32();
769        let n_samples = x.dims()[0];
770        let n_features = x.dims()[1];
771
772        let predictions: Vec<f32> = (0..n_samples)
773            .map(|i| {
774                let xi = &x_data[i * n_features..(i + 1) * n_features];
775                let (activations, _) = self.forward(xi);
776                activations.last().unwrap()[0]
777            })
778            .collect();
779
780        Tensor::from_slice(&predictions, &[n_samples]).unwrap()
781    }
782
783    pub fn score(&self, x: &Tensor, y: &Tensor) -> f32 {
784        let predictions = self.predict(x);
785        let pred_data = predictions.data_f32();
786        let y_data = y.data_f32();
787
788        let y_mean: f32 = y_data.iter().sum::<f32>() / y_data.len() as f32;
789        let ss_res: f32 = pred_data.iter().zip(y_data.iter()).map(|(&p, &y)| (y - p).powi(2)).sum();
790        let ss_tot: f32 = y_data.iter().map(|&y| (y - y_mean).powi(2)).sum();
791
792        1.0 - ss_res / ss_tot.max(1e-10)
793    }
794}
795
796#[cfg(test)]
797mod tests {
798    use super::*;
799
800    #[test]
801    fn test_perceptron() {
802        let x = Tensor::from_slice(&[0.0f32, 0.0,
803            0.0, 1.0,
804            1.0, 0.0,
805            1.0, 1.0,
806        ], &[4, 2]).unwrap();
807        
808        let y = Tensor::from_slice(&[0.0f32, 0.0, 0.0, 1.0], &[4]).unwrap();
809        
810        let mut p = Perceptron::new().max_iter(100);
811        p.fit(&x, &y);
812        
813        let predictions = p.predict(&x);
814        assert_eq!(predictions.dims(), &[4]);
815    }
816
817    #[test]
818    fn test_mlp_classifier() {
819        let x = Tensor::from_slice(&[0.0f32, 0.0,
820            0.0, 1.0,
821            1.0, 0.0,
822            1.0, 1.0,
823        ], &[4, 2]).unwrap();
824        
825        let y = Tensor::from_slice(&[0.0f32, 1.0, 1.0, 0.0], &[4]).unwrap();
826        
827        let mut mlp = MLPClassifier::new(vec![4]).max_iter(100);
828        mlp.fit(&x, &y);
829        
830        let predictions = mlp.predict(&x);
831        assert_eq!(predictions.dims(), &[4]);
832    }
833
834    #[test]
835    fn test_mlp_regressor() {
836        let x = Tensor::from_slice(&[1.0f32, 2.0, 3.0, 4.0, 5.0], &[5, 1]).unwrap();
837        let y = Tensor::from_slice(&[2.0f32, 4.0, 6.0, 8.0, 10.0], &[5]).unwrap();
838        
839        let mut mlp = MLPRegressor::new(vec![10]).max_iter(500);
840        mlp.fit(&x, &y);
841        
842        let predictions = mlp.predict(&x);
843        assert_eq!(predictions.dims(), &[5]);
844    }
845}
846
847