rust_ml/model/
logistic_regression.rs

1use crate::bench::classification_metrics::ClassificationMetrics;
2use crate::builders::logistic_regression::LogisticRegressionBuilder;
3use crate::core::activations::activation::Activation;
4use crate::core::activations::activation_functions::ActivationFn;
5use crate::core::activations::leaky_relu::LeakyReLU;
6use crate::core::activations::relu::ReLU;
7use crate::core::activations::sigmoid::Sigmoid;
8use crate::core::activations::tanh::Tanh;
9use crate::core::error::ModelError;
10use crate::core::types::{Matrix, Vector};
11use crate::model::core::base::{BaseModel, OptimizableModel};
12use crate::model::core::classification_model::ClassificationModel;
13use crate::model::core::param_collection::{GradientCollection, ParamCollection};
14use ndarray::{ArrayView, ArrayViewMut, Dimension, IxDyn};
15
16/// A logistic regression model for binary classification.
17///
18/// This model uses a linear combination of features and weights, followed by an activation function,
19/// to predict the probability of an input belonging to a positive class.
20#[derive(Debug)]
21pub struct LogisticRegression {
22    /// Model weights for each feature
23    pub weights: Vector,
24    /// Bias term (intercept)
25    pub bias: Vector,
26    /// Activation function used for prediction
27    pub activation_fn: ActivationFn,
28    /// Gradient of the loss function with respect to the weights
29    dw: Vector,
30    /// Gradient of the loss function with respect to the bias
31    db: Vector,
32    threshold: f64,
33}
34
35impl LogisticRegression {
36    /// Creates a new LogisticRegression model with the specified number of features and activation function.
37    ///
38    /// # Arguments
39    ///
40    /// * `n_features` - The number of input features
41    /// * `activation_fn` - The activation function to use
42    ///
43    /// # Returns
44    ///
45    /// A new LogisticRegression instance with weights initialized to zeros
46    pub fn new(n_features: usize, activation_fn: ActivationFn, threshold: f64) -> Self {
47        let weights = Vector::zeros(n_features);
48        let bias = Vector::from_elem(1, 0.0);
49
50        if !(0.0..=1.0).contains(&threshold) {
51            panic!("Threshold must be between 0 and 1");
52        }
53
54        Self {
55            weights,
56            bias,
57            activation_fn,
58            threshold,
59            dw: Vector::zeros(n_features),
60            db: Vector::from_elem(1, 0.0),
61        }
62    }
63
64    /// Returns a builder for creating LogisticRegression models with custom configurations.
65    ///
66    /// # Returns
67    ///
68    /// A new LogisticRegressionBuilder instance
69    pub fn builder() -> LogisticRegressionBuilder {
70        LogisticRegressionBuilder::new()
71    }
72
73    /// Computes the activation for the given input vector.
74    ///
75    /// # Arguments
76    ///
77    /// * `z` - Input vector
78    ///
79    /// # Returns
80    ///
81    /// The result of applying the activation function to the input vector
82    fn compute_activation(&self, z: &Vector) -> Result<Vector, ModelError> {
83        match self.activation_fn {
84            ActivationFn::Sigmoid => Ok(Sigmoid::activate(z)),
85            ActivationFn::ReLU => Ok(ReLU::activate(z)),
86            ActivationFn::Tanh => Ok(Tanh::activate(z)),
87            ActivationFn::LeakyReLU => Ok(LeakyReLU::activate(z)),
88        }
89    }
90
91    fn compute_z(&self, x: &Matrix) -> Result<Vector, ModelError> {
92        let z = self.weights.t().dot(x) + &self.bias;
93        Ok(z)
94    }
95
96    /// Computes the derivative of the activation function for the given input vector.
97    ///
98    /// # Arguments
99    ///
100    /// * `z` - Input vector
101    ///
102    /// # Returns
103    ///
104    /// The derivative of the activation function applied to the input
105    fn compute_derivative(&self, z: &Vector) -> Result<Vector, ModelError> {
106        match self.activation_fn {
107            ActivationFn::Sigmoid => Ok(Sigmoid::derivative(z)),
108            ActivationFn::ReLU => Ok(ReLU::derivative(z)),
109            ActivationFn::Tanh => Ok(Tanh::derivative(z)),
110            ActivationFn::LeakyReLU => Ok(LeakyReLU::derivative(z)),
111        }
112    }
113}
114
115impl ParamCollection for LogisticRegression {
116    fn get<D: Dimension>(&self, key: &str) -> Result<ArrayView<f64, D>, ModelError> {
117        match key {
118            "weights" => Ok(self.weights.view().into_dimensionality::<D>()?),
119            "bias" => Ok(self.bias.view().into_dimensionality::<D>()?),
120            _ => Err(ModelError::KeyError(key.to_string())),
121        }
122    }
123
124    fn get_mut<D: Dimension>(&mut self, key: &str) -> Result<ArrayViewMut<f64, D>, ModelError> {
125        match key {
126            "weights" => Ok(self.weights.view_mut().into_dimensionality::<D>()?),
127            "bias" => Ok(self.bias.view_mut().into_dimensionality::<D>()?),
128            _ => Err(ModelError::KeyError(key.to_string())),
129        }
130    }
131
132    fn set<D: Dimension>(&mut self, key: &str, value: ArrayView<f64, D>) -> Result<(), ModelError> {
133        match key {
134            "weights" => {
135                self.weights.assign(&value.to_shape(self.weights.shape())?);
136                Ok(())
137            }
138            "bias" => {
139                self.bias.assign(&value.to_shape(self.bias.shape())?);
140                Ok(())
141            }
142            _ => Err(ModelError::KeyError(key.to_string())),
143        }
144    }
145
146    fn param_iter(&self) -> Vec<(&str, ArrayView<f64, IxDyn>)> {
147        vec![
148            ("weights", self.weights.view().into_dyn()),
149            ("bias", self.bias.view().into_dyn()),
150        ]
151    }
152}
153
154impl GradientCollection for LogisticRegression {
155    fn get_gradient<D: Dimension>(&self, key: &str) -> Result<ArrayView<f64, D>, ModelError> {
156        match key {
157            "weights" => Ok(self.dw.view().into_dimensionality::<D>()?),
158            "bias" => Ok(self.db.view().into_dimensionality::<D>()?),
159            _ => Err(ModelError::KeyError(key.to_string())),
160        }
161    }
162
163    fn set_gradient<D: Dimension>(
164        &mut self,
165        key: &str,
166        value: ArrayView<f64, D>,
167    ) -> Result<(), ModelError> {
168        match key {
169            "weights" => {
170                self.dw.assign(&value.to_shape(self.weights.shape())?);
171                Ok(())
172            }
173            "bias" => {
174                self.db.assign(&value.to_shape(self.bias.shape())?);
175                Ok(())
176            }
177            _ => Err(ModelError::KeyError(key.to_string())),
178        }
179    }
180}
181
182impl OptimizableModel<Matrix, Vector> for LogisticRegression {
183    fn forward(&self, input: &Matrix) -> Result<Vector, ModelError> {
184        let z = self.compute_z(input)?;
185        let a = self.compute_activation(&z)?;
186        // Make activation numerically safer
187        let epsilon = 1e-15;
188        let a_safe = a.mapv(|val| val.max(epsilon).min(1.0 - epsilon));
189        Ok(a_safe)
190    }
191
192    fn backward(&mut self, input: &Matrix, dz: &Vector) -> Result<(), ModelError> {
193        let m = input.shape()[1] as f64;
194
195        // Calculate gradients
196        // For matrix dimensions to work correctly:
197        // - input has shape (n_features, n_samples)
198        // - dz has shape (n_samples,)
199        // - to get dw with shape (n_features,), we need to do input.dot(dz)
200        let dw = input.dot(dz) / m;
201        let db = dz.sum() / m;
202
203        // Set the gradients
204        self.set_gradient("weights", dw.view())?;
205        self.set_gradient("bias", ArrayView::from(&[db]))?;
206
207        Ok(())
208    }
209
210    /// Computes the gradient of the loss function with regards to the prediction (dJ/dy)
211    ///
212    /// # Arguments
213    ///
214    /// * `x` - Input feature matrix
215    /// * `y` - Expected output vector
216    ///
217    /// # Returns
218    ///
219    /// The gradient vector
220    fn compute_output_gradient(&self, x: &Matrix, y: &Vector) -> Result<Vector, ModelError> {
221        // Forward pass to get predictions
222        let z = self.compute_z(x)?;
223        let y_hat = self.compute_activation(&z)?;
224
225        // Compute the derivative of the activation function
226        let g_prime_of_z = self.compute_derivative(&z)?;
227
228        // Compute the gradient of the loss function with respect to the predictions
229        let dy = (1.0 - y) / (1.0 - &y_hat) - y / &y_hat;
230
231        // Apply the chain rule to get the gradient of the loss function with respect to z
232        // dz = dy * g'(z)
233        let dz = dy * g_prime_of_z;
234        Ok(dz)
235    }
236}
237
238#[cfg(test)]
239mod optimizable_model_tests {
240    use ndarray::{ArrayView1, arr1, arr2};
241
242    use crate::builders::builder::Builder;
243    use crate::core::activations::activation::Activation;
244    use crate::core::activations::activation_functions::ActivationFn;
245    use crate::core::activations::sigmoid::Sigmoid;
246    use crate::core::types::{Matrix, Scalar};
247    use crate::model::core::base::OptimizableModel;
248    use crate::model::core::param_collection::GradientCollection;
249    use crate::model::logistic_regression::LogisticRegression;
250
251    #[test]
252    fn test_logistic_regression_forward_sigmoid() {
253        // Create model
254        let mut model = LogisticRegression::builder()
255            .n_features(3)
256            .activation_function(ActivationFn::Sigmoid)
257            .build()
258            .unwrap();
259        // Assign weights and bias explicitly.
260        let weights = arr1(&[0.5, -0.2, 0.1]);
261        let bias = Scalar::from_elem((), 0.2);
262        model.weights.assign(&weights);
263        model.bias.assign(&bias);
264        // Create input data
265        let input = Matrix::zeros((3, 3));
266        // Compute expected output.
267        let z = model.weights.t().dot(&input) + bias;
268        let a = Sigmoid::activate(&z);
269        let expected_output = a;
270
271        // Forward pass
272        let output = model.forward(&input).unwrap();
273
274        assert_eq!(output.shape(), [3]);
275        assert_eq!(output, expected_output);
276    }
277
278    #[test]
279    fn test_compute_output_gradient() {
280        // Create model with sigmoid activation
281        let mut model = LogisticRegression::builder()
282            .n_features(2)
283            .activation_function(ActivationFn::Sigmoid)
284            .build()
285            .unwrap();
286        let weights = arr1(&[0.5, -0.3]);
287        let bias = Scalar::from_elem((), 0.1);
288        model.weights.assign(&weights);
289        model.bias.assign(&bias);
290
291        // Create test data
292        let x = arr2(&[[0.2, 0.7], [0.3, 0.5]]);
293        let y = arr1(&[0.5, 1.0]); // Expected outputs
294
295        // Compute forward pass to get predictions
296        let y_hat = model.forward(&x).unwrap();
297        // Manually compute the gradient using the formula for sigmoid
298        // dz = y_hat - y
299        let expected_dz = &y_hat - &y;
300        // Get the computed gradient
301        let dz = model.compute_output_gradient(&x, &y).unwrap();
302
303        // Check dimensions and values (with a small epsilon for floating point comparison)
304        assert_eq!(dz.shape(), expected_dz.shape());
305
306        for (a, b) in dz.iter().zip(expected_dz.iter()) {
307            assert!((a - b).abs() < 1e-5, "Expected {}, got {}", b, a);
308        }
309    }
310
311    #[test]
312    fn test_backward() {
313        // Create model with sigmoid activation
314        let mut model = LogisticRegression::builder()
315            .n_features(2)
316            .activation_function(ActivationFn::Sigmoid)
317            .build()
318            .unwrap();
319        let weights = arr1(&[0.5, -0.3]);
320        let bias = Scalar::from_elem((), 0.1);
321        model.weights.assign(&weights);
322        model.bias.assign(&bias);
323
324        // Create test data
325        let x = arr2(&[[0.2, 0.7], [0.3, 0.5]]); // 2x2 matrix with 2 features and 2 samples
326
327        // Create a test gradient vector
328        let dz = arr1(&[0.1, -0.2]);
329
330        println!("x.shape(): {:?}", x.shape());
331        println!("dz.shape(): {:?}", dz.shape());
332
333        // Call backward
334        model.backward(&x, &dz).unwrap();
335
336        // Manually compute the expected gradients
337        // For weights: dw = (x.dot(dz)) / m where m is number of samples
338        let m = x.shape()[1] as f64;
339        let expected_dw = x.dot(&dz) / m;
340
341        // For bias: db = sum(dz) / m
342        let expected_db = dz.sum() / m;
343
344        // Get the gradients from the model
345        let actual_dw: ArrayView1<f64> = model.get_gradient("weights").unwrap();
346        let actual_db: ArrayView1<f64> = model.get_gradient("bias").unwrap();
347        let actual_db_value = actual_db[0];
348
349        // Check dimensions and values (with a small epsilon for floating point comparison)
350        assert!(
351            (actual_db_value - expected_db).abs() < 1e-5,
352            "Expected {}, got {}",
353            expected_db,
354            actual_db_value
355        );
356
357        for (a, b) in actual_dw.iter().zip(expected_dw.iter()) {
358            assert!((a - b).abs() < 1e-5, "Expected {}, got {}", b, a);
359        }
360    }
361}
362/// Implementation of BaseModel trait for LogisticRegression
363impl BaseModel<Matrix, Vector> for LogisticRegression {
364    /// Makes binary predictions for the given input data.
365    ///
366    /// # Arguments
367    ///
368    /// * `x` - Input feature matrix
369    ///
370    /// # Returns
371    ///
372    /// A vector of predictions (0.0 or 1.0)
373    fn predict(&self, x: &Matrix) -> Result<Vector, ModelError> {
374        let bias = self.bias[0];
375        let z = self.weights.dot(x) + bias;
376        let a = self.compute_activation(&z)?;
377        let y_hat = a.mapv(|x| if x >= self.threshold { 1.0 } else { 0.0 });
378        Ok(y_hat)
379    }
380
381    /// Computes the cost/loss for the given input and expected output.
382    ///
383    /// # Arguments
384    ///
385    /// * `x` - Input feature matrix
386    /// * `y` - Expected output vector
387    ///
388    /// # Returns
389    ///
390    /// The computed loss value
391    fn compute_cost(&self, x: &Matrix, y: &Vector) -> Result<f64, ModelError> {
392        let m = y.len() as f64;
393        let y_hat = self.forward(x)?;
394        let loss = -(y * y_hat.ln() + (1.0 - y) * (1.0 - &y_hat).ln());
395        let cost = loss.sum() / m;
396        Ok(cost)
397    }
398}
399
400#[cfg(test)]
401mod base_model_tests {
402    use super::*;
403    use crate::builders::builder::Builder;
404    use crate::model::core::base::BaseModel;
405    use ndarray::{arr1, arr2};
406
407    #[test]
408    fn test_predict() {
409        // Create a model with known weights and bias
410        let mut model = LogisticRegression::builder()
411            .n_features(2)
412            .activation_function(ActivationFn::Sigmoid)
413            .build()
414            .unwrap();
415        model.weights = arr1(&[0.5, -0.5]);
416        model.bias = arr1(&[0.1]);
417
418        // Create test data
419        let x = arr2(&[[0.2, 0.8], [0.9, 0.1]]); // 2 features, 2 examples
420
421        // Predict
422        let predictions = model.predict(&x).unwrap();
423        // The expected output from the activation function is:
424        // [0.4378235 , 0.61063923], so the prediction should return:
425        // [0.0, 1.0]
426        let expected = arr1(&[0.0, 1.0]);
427
428        // Assert predictions are as expected
429        assert_eq!(predictions.len(), 2);
430        assert_eq!(predictions, expected);
431    }
432
433    #[test]
434    fn test_compute_cost() {
435        // Create a model
436        let mut model = LogisticRegression::builder()
437            .n_features(2)
438            .activation_function(ActivationFn::Sigmoid)
439            .build()
440            .unwrap();
441
442        model.weights = arr1(&[1.0, 1.0]);
443        model.bias = arr1(&[0.0]);
444
445        // Create test data
446        let x = arr2(&[[10.0, -10.0], [10.0, -10.0]]); // 2 features, 2 examples
447        let y = arr1(&[0.0, 1.0]); // Target values
448
449        // First manually calculate the expected output
450        let y_hat = model.forward(&x).unwrap();
451        let loss = -(&y * &y_hat.ln() + (1.0 - &y) * (1.0 - &y_hat).ln());
452        // &y * &y_hat.ln() + (1.0 - &y) *
453        println!("loss: {:?}", loss);
454        let expected_cost = loss.sum() / 2.0; // Average over the number of samples
455
456        // Get the cost from the model
457        let cost = model.compute_cost(&x, &y).unwrap();
458
459        // Allow for small floating-point differences
460        assert!(
461            (cost - expected_cost).abs() < 1e-5,
462            "Expected cost {}, got {}",
463            expected_cost,
464            cost
465        );
466    }
467
468    #[test]
469    fn test_predict_all_classes() {
470        // Create model with weights that will produce both 0 and 1 predictions
471        let mut model = LogisticRegression::builder()
472            .n_features(2)
473            .activation_function(ActivationFn::Sigmoid)
474            .build()
475            .unwrap();
476        model.weights = arr1(&[2.0, -2.0]);
477        model.bias = arr1(&[0.0]);
478
479        // Create test data to generate both classes
480        let x = arr2(&[
481            [1.0, 0.1], // should predict 1 (positive activation)
482            [0.1, 1.0], // should predict 0 (negative activation)
483        ]);
484
485        // Get predictions
486        let predictions = model.predict(&x).unwrap();
487
488        // Check that we have both classes
489        assert_eq!(predictions[0], 1.0);
490        assert_eq!(predictions[1], 0.0);
491    }
492}
493
494/// Implementation of ClassificationModel trait for LogisticRegression
495impl ClassificationModel<Matrix, Vector> for LogisticRegression {
496    /// Calculates the accuracy of the model on the given data.
497    ///
498    /// # Arguments
499    ///
500    /// * `x` - Input feature matrix
501    /// * `y` - Expected output vector
502    ///
503    /// # Returns
504    ///
505    /// The accuracy as a value between 0.0 and 1.0
506    fn accuracy(&self, x: &Matrix, y: &Vector) -> Result<f64, ModelError> {
507        let y_pred = self.predict(x)?;
508        let y_pred_binary = y_pred.mapv(|val| if val >= self.threshold { 1.0 } else { 0.0 });
509        let correct = y_pred_binary
510            .iter()
511            .zip(y.iter())
512            .filter(|&(pred, actual)| (pred - actual).abs() < f64::EPSILON)
513            .count();
514        Ok(correct as f64 / y.len() as f64)
515    }
516
517    /// Calculates the loss (binary cross-entropy) of the model on the given data.
518    ///
519    /// # Arguments
520    ///
521    /// * `x` - Input feature matrix
522    /// * `y` - Expected output vector
523    ///
524    /// # Returns
525    ///
526    /// The computed loss value
527    fn loss(&self, x: &Matrix, y: &Vector) -> Result<f64, ModelError> {
528        // Binary cross-entropy loss
529        let y_pred = self.predict(x)?;
530        let epsilon = 1e-15; // prevent log(0)
531        let y_pred = y_pred.mapv(|val| val.max(epsilon).min(1.0 - epsilon));
532        let loss = y
533            .iter()
534            .zip(y_pred.iter())
535            .map(|(y_i, y_pred_i)| -y_i * y_pred_i.ln() - (1.0 - y_i) * (1.0 - y_pred_i).ln())
536            .sum::<f64>()
537            / y.len() as f64;
538        Ok(loss)
539    }
540
541    /// Calculates the recall (sensitivity) of the model on the given data.
542    ///
543    /// # Arguments
544    ///
545    /// * `x` - Input feature matrix
546    /// * `y` - Expected output vector
547    ///
548    /// # Returns
549    ///
550    /// The recall as a value between 0.0 and 1.0
551    fn recall(&self, x: &Matrix, y: &Vector) -> Result<f64, ModelError> {
552        let y_pred = self.predict(x)?;
553
554        let true_positives = y_pred
555            .iter()
556            .zip(y.iter())
557            .filter(|&(pred, actual)| *pred > 0.5 && *actual > 0.5)
558            .count();
559
560        let actual_positives = y.iter().filter(|&&actual| actual > 0.5).count();
561
562        if actual_positives == 0 {
563            return Ok(0.0);
564        }
565
566        Ok(true_positives as f64 / actual_positives as f64)
567    }
568
569    /// Calculates the F1 score of the model on the given data.
570    ///
571    /// # Arguments
572    ///
573    /// * `x` - Input feature matrix
574    /// * `y` - Expected output vector
575    ///
576    /// # Returns
577    ///
578    /// The F1 score as a value between 0.0 and 1.0
579    fn f1_score(&self, x: &Matrix, y: &Vector) -> Result<f64, ModelError> {
580        let y_pred = self.predict(x)?;
581        let y_pred_binary = y_pred.mapv(|val| if val >= 0.5 { 1.0 } else { 0.0 });
582
583        let true_positives = y_pred_binary
584            .iter()
585            .zip(y.iter())
586            .filter(|&(pred, actual)| *pred > 0.5 && *actual > 0.5)
587            .count() as f64;
588
589        let false_positives = y_pred_binary
590            .iter()
591            .zip(y.iter())
592            .filter(|&(pred, actual)| *pred > 0.5 && *actual <= 0.5)
593            .count() as f64;
594
595        let false_negatives = y_pred_binary
596            .iter()
597            .zip(y.iter())
598            .filter(|&(pred, actual)| *pred <= 0.5 && *actual > 0.5)
599            .count() as f64;
600
601        let precision = if true_positives + false_positives == 0.0 {
602            0.0
603        } else {
604            true_positives / (true_positives + false_positives)
605        };
606
607        let recall = if true_positives + false_negatives == 0.0 {
608            0.0
609        } else {
610            true_positives / (true_positives + false_negatives)
611        };
612
613        if precision + recall == 0.0 {
614            return Ok(0.0);
615        }
616
617        Ok(2.0 * precision * recall / (precision + recall))
618    }
619
620    /// Computes all classification metrics for the model on the given data.
621    ///
622    /// # Arguments
623    ///
624    /// * `x` - Input feature matrix
625    /// * `y` - Expected output vector
626    ///
627    /// # Returns
628    ///
629    /// A ClassificationMetrics struct containing accuracy, loss, precision, recall and F1 score
630    fn compute_metrics(&self, x: &Matrix, y: &Vector) -> Result<ClassificationMetrics, ModelError> {
631        let accuracy = self.accuracy(x, y)?;
632        let loss = self.loss(x, y)?;
633        let recall = self.recall(x, y)?;
634        let f1 = self.f1_score(x, y)?;
635
636        // Calculate precision
637        let y_pred = self.predict(x)?;
638        let y_pred_binary = y_pred.mapv(|val| if val >= 0.5 { 1.0 } else { 0.0 });
639
640        let true_positives = y_pred_binary
641            .iter()
642            .zip(y.iter())
643            .filter(|&(pred, actual)| *pred > 0.5 && *actual > 0.5)
644            .count() as f64;
645
646        let false_positives = y_pred_binary
647            .iter()
648            .zip(y.iter())
649            .filter(|&(pred, actual)| *pred > 0.5 && *actual <= 0.5)
650            .count() as f64;
651
652        let precision = if true_positives + false_positives == 0.0 {
653            0.0
654        } else {
655            true_positives / (true_positives + false_positives)
656        };
657
658        Ok(ClassificationMetrics {
659            accuracy,
660            loss,
661            precision,
662            recall,
663            f1_score: f1,
664        })
665    }
666}