rust_ml/model/
linear_regression.rs

1use crate::bench::regression_metrics::RegressionMetrics;
2use crate::builders::linear_regression::LinearRegressionBuilder;
3use crate::core::error::ModelError;
4use crate::core::types::{Matrix, Scalar, Vector};
5use crate::model::core::base::{BaseModel, OptimizableModel};
6use crate::model::core::param_collection::{GradientCollection, ParamCollection};
7use crate::model::core::regression_model::RegressionModel;
8use ndarray::{Array0, Array1, ArrayView, ArrayView0, ArrayView1};
9
10/// A Linear Regression model that fits the equation y = W.T @ x + b
11///
12/// This model predicts a continuous value output based on input features
13/// using a linear relationship where W is the weight vector and b is the bias.
14/// Linear regression is one of the most fundamental machine learning algorithms
15/// used for predicting numerical values by establishing a linear relationship
16/// between the independent variables (features) and the dependent variable (target).
17///
18/// The model minimizes the Mean Squared Error (MSE) between predictions and actual values.
19#[derive(Debug)]
20pub struct LinearRegression {
21    /// The weights vector (W) for the linear model representing the coefficients
22    /// for each feature in the input data
23    pub w: Vector,
24    /// The bias term (b) for the linear model representing the y-intercept
25    /// of the linear equation
26    pub b: Scalar,
27    /// Gradient of the cost function J with respect to the weights
28    dw: Vector,
29    /// Gradient of the cost function J with respect to the bias
30    db: Scalar,
31}
32
33impl LinearRegression {
34    /// Creates a new LinearRegression model with zero-initialized weights and bias
35    ///
36    /// Initializes a linear regression model with all weights set to zero and bias
37    /// set to zero. This creates an untrained model that can be later optimized
38    /// using various training algorithms.
39    ///
40    /// # Arguments
41    /// * `n_x` - Number of input features in the dataset
42    ///
43    /// # Returns
44    /// * `Result<Self, ModelError>` - A new LinearRegression instance or an error if the initialization fails
45    ///
46    pub fn new(n_x: usize) -> Result<Self, ModelError> {
47        let weights = Array1::<f64>::zeros(n_x);
48        let bias = Array0::<f64>::from_elem((), 0.0);
49        Ok(Self {
50            w: weights,
51            b: bias,
52            dw: Array1::<f64>::zeros(n_x),
53            db: Array0::<f64>::from_elem((), 0.0),
54        })
55    }
56    /// Returns a builder for creating a LinearRegression with custom configuration
57    ///
58    /// The builder pattern allows for more flexible initialization of the model
59    /// with various optional parameters and configurations.
60    ///
61    /// # Returns
62    /// * `LinearRegressionBuilder` - A builder for LinearRegression with fluent API
63    ///
64    pub fn builder() -> LinearRegressionBuilder {
65        LinearRegressionBuilder::new()
66    }
67}
68
69impl ParamCollection for LinearRegression {
70    fn get<D: ndarray::Dimension>(&self, key: &str) -> Result<ArrayView<f64, D>, ModelError> {
71        match key {
72            "weights" => Ok(self.w.view().into_dimensionality::<D>().unwrap()),
73            "bias" => Ok(self.b.view().into_dimensionality::<D>().unwrap()),
74            _ => Err(ModelError::KeyError(key.to_string())),
75        }
76    }
77
78    fn get_mut<D: ndarray::Dimension>(
79        &mut self,
80        key: &str,
81    ) -> Result<ndarray::ArrayViewMut<f64, D>, ModelError> {
82        match key {
83            "weights" => Ok(self.w.view_mut().into_dimensionality::<D>().unwrap()),
84            "bias" => Ok(self.b.view_mut().into_dimensionality::<D>().unwrap()),
85            _ => Err(ModelError::KeyError(key.to_string())),
86        }
87    }
88
89    fn set<D: ndarray::Dimension>(
90        &mut self,
91        key: &str,
92        value: ArrayView<f64, D>,
93    ) -> Result<(), ModelError> {
94        match key {
95            "weights" => {
96                self.w
97                    .assign(&value.into_dimensionality::<ndarray::Ix1>().unwrap());
98                Ok(())
99            }
100            "bias" => {
101                self.b
102                    .assign(&value.into_dimensionality::<ndarray::Ix0>().unwrap());
103                Ok(())
104            }
105            _ => Err(ModelError::KeyError(key.to_string())),
106        }
107    }
108
109    fn param_iter(&self) -> Vec<(&str, ArrayView<f64, ndarray::IxDyn>)> {
110        vec![
111            ("weights", self.w.view().into_dyn()),
112            ("bias", self.b.view().into_dyn()),
113        ]
114    }
115}
116
117impl GradientCollection for LinearRegression {
118    fn get_gradient<D: ndarray::Dimension>(
119        &self,
120        key: &str,
121    ) -> Result<ArrayView<f64, D>, ModelError> {
122        match key {
123            "weights" => Ok(self.dw.view().into_dimensionality::<D>().unwrap()),
124            "bias" => Ok(self.db.view().into_dimensionality::<D>().unwrap()),
125            _ => Err(ModelError::KeyError(key.to_string())),
126        }
127    }
128
129    fn set_gradient<D: ndarray::Dimension>(
130        &mut self,
131        key: &str,
132        value: ArrayView<f64, D>,
133    ) -> Result<(), ModelError> {
134        match key {
135            "weights" => {
136                self.dw
137                    .assign(&value.into_dimensionality::<ndarray::Ix1>().unwrap());
138                Ok(())
139            }
140            "bias" => {
141                self.db
142                    .assign(&value.into_dimensionality::<ndarray::Ix0>().unwrap());
143                Ok(())
144            }
145            _ => Err(ModelError::KeyError(key.to_string())),
146        }
147    }
148}
149
150impl BaseModel<Matrix, Vector> for LinearRegression {
151    /// Predicts output values for given input features
152    ///
153    /// Applies the linear regression model to make predictions on new data.
154    ///
155    /// # Arguments
156    /// * `x` - Input features of shape (n_x, m)
157    ///
158    /// # Returns
159    /// * `Result<Vector, ModelError>` - Predicted values of shape (m, )
160    fn predict(&self, x: &Matrix) -> Result<Vector, ModelError> {
161        let w: ArrayView1<f64> = self.get("weights")?;
162        let b: ArrayView0<f64> = self.get("bias")?;
163        // For matrix-vector multiplication, transpose weights if needed or use a more specific method
164        let y_hat = w.t().dot(x) + b;
165        Ok(y_hat)
166    }
167
168    /// Computes the Mean Squared Error cost between predictions and target values
169    ///
170    /// Calculates the cost using the formula: J = (1/2m) * Σ(y_hat - y)²
171    /// where m is the number of examples
172    ///
173    /// # Arguments
174    /// * `x` - Input features of shape (n_x, m)
175    /// * `y` - Target values of shape (m, )
176    ///
177    /// # Returns
178    /// * `Result<f64, ModelError>` - The computed cost value
179    fn compute_cost(&self, x: &Matrix, y: &Vector) -> Result<f64, ModelError> {
180        let y_hat = self.predict(x)?;
181        let m = x.len() as f64;
182        let cost = (&y_hat - y).powi(2).sum() / (2.0 * m);
183        Ok(cost)
184    }
185}
186
187#[cfg(test)]
188mod lr_base_model_tests {
189    use crate::model::core::base::BaseModel;
190    use crate::model::linear_regression::LinearRegression;
191    use ndarray::{arr0, arr1, arr2};
192
193    #[test]
194    fn test_predict() {
195        let mut lr = LinearRegression::new(2).unwrap();
196        let x = arr2(&[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]);
197        let weights = arr1(&[1.0, 2.0]);
198        let bias = arr0(0.0);
199        // Set weights and bias explicitly.
200        lr.w = weights;
201        lr.b = bias;
202        // y = [1.0 * 1.0 + 2.0*4.0, 1.0 * 2.0 + 2.0 * 5.0, 1.0 * 3.0 + 2.0 * 6.0]
203        // y = [1 + 8, 2 + 10, 3 + 12]
204        // y = [9, 12, 15]
205        let y = arr1(&[9.0, 12.0, 15.0]);
206
207        let y_hat = lr.predict(&x).unwrap();
208
209        assert_eq!(&y, &y_hat);
210    }
211}
212
213/// Implementation of the `OptimizableModel` trait for `LinearRegression`
214impl OptimizableModel<Matrix, Vector> for LinearRegression {
215    /// Performs forward propagation to compute predictions
216    ///
217    /// Calculates the linear function y_hat = W.T @ x + b for a batch of examples.
218    ///
219    /// # Arguments
220    /// * `input` - Input features of shape (n_x, m) where n_x is the number of features and m is the batch size
221    ///
222    /// # Returns
223    /// * `Result<Vector, ModelError>` - The predicted values as a vector of shape (m, )
224    fn forward(&self, input: &Matrix) -> Result<Vector, ModelError> {
225        let y_hat = &self.w.t().dot(input) + &self.b;
226        Ok(y_hat)
227    }
228
229    /// Performs backward propagation to compute gradients
230    ///
231    /// Calculates the gradients of the cost function with respect to the model parameters (weights and bias).
232    /// For linear regression with MSE loss, the gradients are:
233    /// dw = (1/m) * X @ (y_hat - y).T
234    /// db = (1/m) * sum(y_hat - y)
235    ///
236    /// # Arguments
237    /// * `input` - Input features of shape (n_x, m)
238    /// * `output_grad` - Gradient of the cost with respect to the output, typically (y_hat - y)
239    ///
240    /// # Returns
241    /// * `Result<(), ModelError>` - Success or error status of the operation
242    fn backward(&mut self, input: &Matrix, output_grad: &Vector) -> Result<(), ModelError> {
243        let m = input.shape()[1] as f64;
244
245        // Compute weights grads dw
246        let dw: Vector = input.dot(&output_grad.t()) / m;
247        let dw: ArrayView1<f64> = dw.view();
248
249        // Compute bias grad db.
250        // Mind that in linear regression db = dy = output_grad, and output_grad is a scalar
251        // wrapped in a vector of size 1.
252        let db = output_grad.sum() / m;
253        let binding = Scalar::from_elem((), db);
254        let db: ArrayView0<f64> = binding.view();
255
256        // Update gradients
257        self.set_gradient("weights", dw)?;
258        self.set_gradient("bias", db)?;
259
260        Ok(())
261    }
262
263    /// Computes the gradient of the cost with respect to the output predictions
264    ///
265    /// For Mean Squared Error, the gradient dJ/dy_hat is: (y_hat - y)
266    /// where y_hat is the model's prediction and y is the ground truth.
267    ///
268    /// # Arguments
269    /// * `x` - Input features of shape (n_x, m)
270    /// * `y` - Target values of shape (m, )
271    ///
272    /// # Returns
273    /// * `Result<Vector, ModelError>` - The gradient of the cost function with respect to outputs
274    fn compute_output_gradient(&self, x: &Matrix, y: &Vector) -> Result<Vector, ModelError> {
275        let y_hat = self.forward(x)?;
276        let dy = &y_hat - y;
277        // The compute gradient returns an array, so we'll return a 'broadcast' array with the value
278        // of `dy`.
279        Ok(dy)
280    }
281}
282
283#[cfg(test)]
284mod lr_optimizable_model_tests {
285    use ndarray::{ArrayView0, ArrayView1, arr0, arr1, arr2};
286
287    use crate::{
288        builders::builder::Builder,
289        model::core::{base::OptimizableModel, param_collection::GradientCollection},
290    };
291
292    use super::LinearRegression;
293
294    #[test]
295    /// Test the forward propagation of the LinearRegression model
296    fn test_forward_propagation() {
297        let n_features = 2;
298        let mut model = LinearRegression::builder()
299            .n_input_features(n_features)
300            .build()
301            .unwrap();
302
303        // Initialize weights and bias
304        let weights = arr1(&[1.0, 2.0]);
305        model.w.assign(&weights);
306        let bias = arr0(0.0);
307        model.b.assign(&bias);
308
309        let x = arr2(&[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]);
310        // y = [1.0 * 1.0 + 2.0*4.0, 1.0 * 2.0 + 2.0 * 5.0, 1.0 * 3.0 + 2.0 * 6.0]
311        // y = [1 + 8, 2 + 10, 3 + 12]
312        let y = arr1(&[9.0, 12.0, 15.0]);
313        // Perform forward propagation
314        let y_hat = model.forward(&x).unwrap();
315        // Check if the predicted values match the expected values
316        assert_eq!(y_hat, y);
317        // Check if the weights and bias are unchanged
318        assert_eq!(model.w, weights);
319        assert_eq!(model.b, bias);
320    }
321
322    #[test]
323    fn test_compute_output_grad() {
324        let n_features = 2;
325        let mut model = LinearRegression::builder()
326            .n_input_features(n_features)
327            .build()
328            .unwrap();
329
330        // Initialize weights and bias
331        let weights = arr1(&[1.0, 2.0]);
332        model.w.assign(&weights);
333        let bias = arr0(0.0);
334        model.b.assign(&bias);
335
336        let x = arr2(&[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]);
337        let y = arr1(&[8.0, 11.0, 14.0]);
338        // y_hat - y = [1.0, 1.0, 1.0]
339        // dy = (y_hat - y).sum() / m
340        // dy = 3.0 / 3.0 = 1
341        let expected_grad = 1.0;
342        // Compute the output gradient
343        let output_grad = model.compute_output_gradient(&x, &y).unwrap();
344        // Check if the output gradient is computed correctly
345
346        assert_eq!(output_grad[0], expected_grad);
347    }
348
349    #[test]
350    fn test_backward_propagation() {
351        let n_features = 2;
352        let mut model = LinearRegression::builder()
353            .n_input_features(n_features)
354            .build()
355            .unwrap();
356
357        // Initialize weights and bias
358        let weights = arr1(&[1.0, 2.0]);
359        model.w.assign(&weights);
360        let bias = arr0(0.0);
361        model.b.assign(&bias);
362
363        let x = arr2(&[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]);
364        let y = arr1(&[8.0, 11.0, 14.0]);
365        // Compute the output gradient
366        let output_grad = model.compute_output_gradient(&x, &y).unwrap();
367        // Perform backward propagation
368        model.backward(&x, &output_grad).unwrap();
369        // Compute expected gradients
370        // dy = (y_hat - y) = [1.0, 1.0, 1.0]
371        // expected_dw = (1/m) * (x @ dy.T)
372        // expected_dw = (1/3) * ([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]] @ [1.0, 1.0, 1.0].T)
373        // expected_dw = (1/3) * [[1.0*1.0 + 2.0*1.0 + 3.0*1.0], [4.0*1.0 + 5.0*1.0 + 6.0*1.0]]
374        // expected_dw = (1/3) * [[6.0], [15.0]]
375        let expected_dw = arr1(&[2.0, 5.0]);
376        let expected_db = arr0(1.0);
377
378        // Check if the gradients are computed correctly
379        let dw: ArrayView1<f64> = model.get_gradient("weights").unwrap();
380        let db: ArrayView0<f64> = model.get_gradient("bias").unwrap();
381        assert_eq!(dw, expected_dw);
382        assert_eq!(db, expected_db);
383    }
384}
385
386impl RegressionModel<Matrix, Vector> for LinearRegression {
387    /// Calculates the Mean Squared Error between predictions and target values
388    ///
389    /// # Arguments
390    /// * `x` - Input features of shape (n_x, m)
391    /// * `y` - Target values of shape (m, )
392    ///
393    /// # Returns
394    /// * `Result<f64, ModelError>` - The MSE value
395    fn mse(&self, x: &Matrix, y: &Vector) -> Result<f64, ModelError> {
396        let y_hat = self.predict(x)?;
397        let m = x.len() as f64;
398        Ok((&y_hat - y).mapv(|v| v.powi(2)).sum() / m)
399    }
400
401    /// Calculates the Root Mean Squared Error between predictions and target values
402    ///
403    /// # Arguments
404    /// * `x` - Input features of shape (n_x, m)
405    /// * `y` - Target values of shape (m, )
406    ///
407    /// # Returns
408    /// * `Result<f64, ModelError>` - The RMSE value (square root of MSE)
409    fn rmse(&self, x: &Matrix, y: &Vector) -> Result<f64, ModelError> {
410        let y_hat = self.predict(x)?;
411        let m = x.len() as f64;
412        let rmse = ((&y_hat - y).mapv(|v| v.powi(2)).sum() / m).sqrt();
413        Ok(rmse)
414    }
415
416    /// Calculates the R-squared (coefficient of determination) for the model
417    ///
418    /// R² represents the proportion of variance in the dependent variable
419    /// that is predictable from the independent variables.
420    /// R² = 1 - (MSE / Variance of y)
421    ///
422    /// # Arguments
423    /// * `x` - Input features of shape (n_x, m)
424    /// * `y` - Target values of shape (m, )
425    ///
426    /// # Returns
427    /// * `Result<f64, ModelError>` - The R² value between 0 and 1
428    fn r2(&self, x: &Matrix, y: &Vector) -> Result<f64, ModelError> {
429        let y_hat = self.predict(x)?;
430        let numerator = self.mse(x, y)?;
431        let denominator = (y_hat - y.mean().unwrap()).powi(2).sum();
432        Ok(1.0 - numerator / denominator)
433    }
434
435    /// Computes a complete set of regression metrics for model evaluation
436    ///
437    /// # Arguments
438    /// * `x` - Input features of shape (n_x, m)
439    /// * `y` - Target values of shape (m,)
440    ///
441    /// # Returns
442    /// * `Result<RegressionMetrics, ModelError>` - Struct containing MSE, RMSE, and R²
443    fn compute_metrics(&self, x: &Matrix, y: &Vector) -> Result<RegressionMetrics, ModelError> {
444        let mse = self.mse(x, y)?;
445        let rmse = self.rmse(x, y)?;
446        let r2 = self.r2(x, y)?;
447        Ok(RegressionMetrics { mse, rmse, r2 })
448    }
449}