rust_ml/model/linear_regression.rs
1use crate::bench::regression_metrics::RegressionMetrics;
2use crate::builders::linear_regression::LinearRegressionBuilder;
3use crate::core::error::ModelError;
4use crate::core::types::{Matrix, Scalar, Vector};
5use crate::model::core::base::{BaseModel, OptimizableModel};
6use crate::model::core::param_collection::{GradientCollection, ParamCollection};
7use crate::model::core::regression_model::RegressionModel;
8use ndarray::{Array0, Array1, ArrayView, ArrayView0, ArrayView1};
9
10/// A Linear Regression model that fits the equation y = W.T @ x + b
11///
12/// This model predicts a continuous value output based on input features
13/// using a linear relationship where W is the weight vector and b is the bias.
14/// Linear regression is one of the most fundamental machine learning algorithms
15/// used for predicting numerical values by establishing a linear relationship
16/// between the independent variables (features) and the dependent variable (target).
17///
18/// The model minimizes the Mean Squared Error (MSE) between predictions and actual values.
19#[derive(Debug)]
20pub struct LinearRegression {
21 /// The weights vector (W) for the linear model representing the coefficients
22 /// for each feature in the input data
23 pub w: Vector,
24 /// The bias term (b) for the linear model representing the y-intercept
25 /// of the linear equation
26 pub b: Scalar,
27 /// Gradient of the cost function J with respect to the weights
28 dw: Vector,
29 /// Gradient of the cost function J with respect to the bias
30 db: Scalar,
31}
32
33impl LinearRegression {
34 /// Creates a new LinearRegression model with zero-initialized weights and bias
35 ///
36 /// Initializes a linear regression model with all weights set to zero and bias
37 /// set to zero. This creates an untrained model that can be later optimized
38 /// using various training algorithms.
39 ///
40 /// # Arguments
41 /// * `n_x` - Number of input features in the dataset
42 ///
43 /// # Returns
44 /// * `Result<Self, ModelError>` - A new LinearRegression instance or an error if the initialization fails
45 ///
46 pub fn new(n_x: usize) -> Result<Self, ModelError> {
47 let weights = Array1::<f64>::zeros(n_x);
48 let bias = Array0::<f64>::from_elem((), 0.0);
49 Ok(Self {
50 w: weights,
51 b: bias,
52 dw: Array1::<f64>::zeros(n_x),
53 db: Array0::<f64>::from_elem((), 0.0),
54 })
55 }
56 /// Returns a builder for creating a LinearRegression with custom configuration
57 ///
58 /// The builder pattern allows for more flexible initialization of the model
59 /// with various optional parameters and configurations.
60 ///
61 /// # Returns
62 /// * `LinearRegressionBuilder` - A builder for LinearRegression with fluent API
63 ///
64 pub fn builder() -> LinearRegressionBuilder {
65 LinearRegressionBuilder::new()
66 }
67}
68
69impl ParamCollection for LinearRegression {
70 fn get<D: ndarray::Dimension>(&self, key: &str) -> Result<ArrayView<f64, D>, ModelError> {
71 match key {
72 "weights" => Ok(self.w.view().into_dimensionality::<D>().unwrap()),
73 "bias" => Ok(self.b.view().into_dimensionality::<D>().unwrap()),
74 _ => Err(ModelError::KeyError(key.to_string())),
75 }
76 }
77
78 fn get_mut<D: ndarray::Dimension>(
79 &mut self,
80 key: &str,
81 ) -> Result<ndarray::ArrayViewMut<f64, D>, ModelError> {
82 match key {
83 "weights" => Ok(self.w.view_mut().into_dimensionality::<D>().unwrap()),
84 "bias" => Ok(self.b.view_mut().into_dimensionality::<D>().unwrap()),
85 _ => Err(ModelError::KeyError(key.to_string())),
86 }
87 }
88
89 fn set<D: ndarray::Dimension>(
90 &mut self,
91 key: &str,
92 value: ArrayView<f64, D>,
93 ) -> Result<(), ModelError> {
94 match key {
95 "weights" => {
96 self.w
97 .assign(&value.into_dimensionality::<ndarray::Ix1>().unwrap());
98 Ok(())
99 }
100 "bias" => {
101 self.b
102 .assign(&value.into_dimensionality::<ndarray::Ix0>().unwrap());
103 Ok(())
104 }
105 _ => Err(ModelError::KeyError(key.to_string())),
106 }
107 }
108
109 fn param_iter(&self) -> Vec<(&str, ArrayView<f64, ndarray::IxDyn>)> {
110 vec![
111 ("weights", self.w.view().into_dyn()),
112 ("bias", self.b.view().into_dyn()),
113 ]
114 }
115}
116
117impl GradientCollection for LinearRegression {
118 fn get_gradient<D: ndarray::Dimension>(
119 &self,
120 key: &str,
121 ) -> Result<ArrayView<f64, D>, ModelError> {
122 match key {
123 "weights" => Ok(self.dw.view().into_dimensionality::<D>().unwrap()),
124 "bias" => Ok(self.db.view().into_dimensionality::<D>().unwrap()),
125 _ => Err(ModelError::KeyError(key.to_string())),
126 }
127 }
128
129 fn set_gradient<D: ndarray::Dimension>(
130 &mut self,
131 key: &str,
132 value: ArrayView<f64, D>,
133 ) -> Result<(), ModelError> {
134 match key {
135 "weights" => {
136 self.dw
137 .assign(&value.into_dimensionality::<ndarray::Ix1>().unwrap());
138 Ok(())
139 }
140 "bias" => {
141 self.db
142 .assign(&value.into_dimensionality::<ndarray::Ix0>().unwrap());
143 Ok(())
144 }
145 _ => Err(ModelError::KeyError(key.to_string())),
146 }
147 }
148}
149
150impl BaseModel<Matrix, Vector> for LinearRegression {
151 /// Predicts output values for given input features
152 ///
153 /// Applies the linear regression model to make predictions on new data.
154 ///
155 /// # Arguments
156 /// * `x` - Input features of shape (n_x, m)
157 ///
158 /// # Returns
159 /// * `Result<Vector, ModelError>` - Predicted values of shape (m, )
160 fn predict(&self, x: &Matrix) -> Result<Vector, ModelError> {
161 let w: ArrayView1<f64> = self.get("weights")?;
162 let b: ArrayView0<f64> = self.get("bias")?;
163 // For matrix-vector multiplication, transpose weights if needed or use a more specific method
164 let y_hat = w.t().dot(x) + b;
165 Ok(y_hat)
166 }
167
168 /// Computes the Mean Squared Error cost between predictions and target values
169 ///
170 /// Calculates the cost using the formula: J = (1/2m) * Σ(y_hat - y)²
171 /// where m is the number of examples
172 ///
173 /// # Arguments
174 /// * `x` - Input features of shape (n_x, m)
175 /// * `y` - Target values of shape (m, )
176 ///
177 /// # Returns
178 /// * `Result<f64, ModelError>` - The computed cost value
179 fn compute_cost(&self, x: &Matrix, y: &Vector) -> Result<f64, ModelError> {
180 let y_hat = self.predict(x)?;
181 let m = x.len() as f64;
182 let cost = (&y_hat - y).powi(2).sum() / (2.0 * m);
183 Ok(cost)
184 }
185}
186
187#[cfg(test)]
188mod lr_base_model_tests {
189 use crate::model::core::base::BaseModel;
190 use crate::model::linear_regression::LinearRegression;
191 use ndarray::{arr0, arr1, arr2};
192
193 #[test]
194 fn test_predict() {
195 let mut lr = LinearRegression::new(2).unwrap();
196 let x = arr2(&[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]);
197 let weights = arr1(&[1.0, 2.0]);
198 let bias = arr0(0.0);
199 // Set weights and bias explicitly.
200 lr.w = weights;
201 lr.b = bias;
202 // y = [1.0 * 1.0 + 2.0*4.0, 1.0 * 2.0 + 2.0 * 5.0, 1.0 * 3.0 + 2.0 * 6.0]
203 // y = [1 + 8, 2 + 10, 3 + 12]
204 // y = [9, 12, 15]
205 let y = arr1(&[9.0, 12.0, 15.0]);
206
207 let y_hat = lr.predict(&x).unwrap();
208
209 assert_eq!(&y, &y_hat);
210 }
211}
212
213/// Implementation of the `OptimizableModel` trait for `LinearRegression`
214impl OptimizableModel<Matrix, Vector> for LinearRegression {
215 /// Performs forward propagation to compute predictions
216 ///
217 /// Calculates the linear function y_hat = W.T @ x + b for a batch of examples.
218 ///
219 /// # Arguments
220 /// * `input` - Input features of shape (n_x, m) where n_x is the number of features and m is the batch size
221 ///
222 /// # Returns
223 /// * `Result<Vector, ModelError>` - The predicted values as a vector of shape (m, )
224 fn forward(&self, input: &Matrix) -> Result<Vector, ModelError> {
225 let y_hat = &self.w.t().dot(input) + &self.b;
226 Ok(y_hat)
227 }
228
229 /// Performs backward propagation to compute gradients
230 ///
231 /// Calculates the gradients of the cost function with respect to the model parameters (weights and bias).
232 /// For linear regression with MSE loss, the gradients are:
233 /// dw = (1/m) * X @ (y_hat - y).T
234 /// db = (1/m) * sum(y_hat - y)
235 ///
236 /// # Arguments
237 /// * `input` - Input features of shape (n_x, m)
238 /// * `output_grad` - Gradient of the cost with respect to the output, typically (y_hat - y)
239 ///
240 /// # Returns
241 /// * `Result<(), ModelError>` - Success or error status of the operation
242 fn backward(&mut self, input: &Matrix, output_grad: &Vector) -> Result<(), ModelError> {
243 let m = input.shape()[1] as f64;
244
245 // Compute weights grads dw
246 let dw: Vector = input.dot(&output_grad.t()) / m;
247 let dw: ArrayView1<f64> = dw.view();
248
249 // Compute bias grad db.
250 // Mind that in linear regression db = dy = output_grad, and output_grad is a scalar
251 // wrapped in a vector of size 1.
252 let db = output_grad.sum() / m;
253 let binding = Scalar::from_elem((), db);
254 let db: ArrayView0<f64> = binding.view();
255
256 // Update gradients
257 self.set_gradient("weights", dw)?;
258 self.set_gradient("bias", db)?;
259
260 Ok(())
261 }
262
263 /// Computes the gradient of the cost with respect to the output predictions
264 ///
265 /// For Mean Squared Error, the gradient dJ/dy_hat is: (y_hat - y)
266 /// where y_hat is the model's prediction and y is the ground truth.
267 ///
268 /// # Arguments
269 /// * `x` - Input features of shape (n_x, m)
270 /// * `y` - Target values of shape (m, )
271 ///
272 /// # Returns
273 /// * `Result<Vector, ModelError>` - The gradient of the cost function with respect to outputs
274 fn compute_output_gradient(&self, x: &Matrix, y: &Vector) -> Result<Vector, ModelError> {
275 let y_hat = self.forward(x)?;
276 let dy = &y_hat - y;
277 // The compute gradient returns an array, so we'll return a 'broadcast' array with the value
278 // of `dy`.
279 Ok(dy)
280 }
281}
282
283#[cfg(test)]
284mod lr_optimizable_model_tests {
285 use ndarray::{ArrayView0, ArrayView1, arr0, arr1, arr2};
286
287 use crate::{
288 builders::builder::Builder,
289 model::core::{base::OptimizableModel, param_collection::GradientCollection},
290 };
291
292 use super::LinearRegression;
293
294 #[test]
295 /// Test the forward propagation of the LinearRegression model
296 fn test_forward_propagation() {
297 let n_features = 2;
298 let mut model = LinearRegression::builder()
299 .n_input_features(n_features)
300 .build()
301 .unwrap();
302
303 // Initialize weights and bias
304 let weights = arr1(&[1.0, 2.0]);
305 model.w.assign(&weights);
306 let bias = arr0(0.0);
307 model.b.assign(&bias);
308
309 let x = arr2(&[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]);
310 // y = [1.0 * 1.0 + 2.0*4.0, 1.0 * 2.0 + 2.0 * 5.0, 1.0 * 3.0 + 2.0 * 6.0]
311 // y = [1 + 8, 2 + 10, 3 + 12]
312 let y = arr1(&[9.0, 12.0, 15.0]);
313 // Perform forward propagation
314 let y_hat = model.forward(&x).unwrap();
315 // Check if the predicted values match the expected values
316 assert_eq!(y_hat, y);
317 // Check if the weights and bias are unchanged
318 assert_eq!(model.w, weights);
319 assert_eq!(model.b, bias);
320 }
321
322 #[test]
323 fn test_compute_output_grad() {
324 let n_features = 2;
325 let mut model = LinearRegression::builder()
326 .n_input_features(n_features)
327 .build()
328 .unwrap();
329
330 // Initialize weights and bias
331 let weights = arr1(&[1.0, 2.0]);
332 model.w.assign(&weights);
333 let bias = arr0(0.0);
334 model.b.assign(&bias);
335
336 let x = arr2(&[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]);
337 let y = arr1(&[8.0, 11.0, 14.0]);
338 // y_hat - y = [1.0, 1.0, 1.0]
339 // dy = (y_hat - y).sum() / m
340 // dy = 3.0 / 3.0 = 1
341 let expected_grad = 1.0;
342 // Compute the output gradient
343 let output_grad = model.compute_output_gradient(&x, &y).unwrap();
344 // Check if the output gradient is computed correctly
345
346 assert_eq!(output_grad[0], expected_grad);
347 }
348
349 #[test]
350 fn test_backward_propagation() {
351 let n_features = 2;
352 let mut model = LinearRegression::builder()
353 .n_input_features(n_features)
354 .build()
355 .unwrap();
356
357 // Initialize weights and bias
358 let weights = arr1(&[1.0, 2.0]);
359 model.w.assign(&weights);
360 let bias = arr0(0.0);
361 model.b.assign(&bias);
362
363 let x = arr2(&[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]);
364 let y = arr1(&[8.0, 11.0, 14.0]);
365 // Compute the output gradient
366 let output_grad = model.compute_output_gradient(&x, &y).unwrap();
367 // Perform backward propagation
368 model.backward(&x, &output_grad).unwrap();
369 // Compute expected gradients
370 // dy = (y_hat - y) = [1.0, 1.0, 1.0]
371 // expected_dw = (1/m) * (x @ dy.T)
372 // expected_dw = (1/3) * ([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]] @ [1.0, 1.0, 1.0].T)
373 // expected_dw = (1/3) * [[1.0*1.0 + 2.0*1.0 + 3.0*1.0], [4.0*1.0 + 5.0*1.0 + 6.0*1.0]]
374 // expected_dw = (1/3) * [[6.0], [15.0]]
375 let expected_dw = arr1(&[2.0, 5.0]);
376 let expected_db = arr0(1.0);
377
378 // Check if the gradients are computed correctly
379 let dw: ArrayView1<f64> = model.get_gradient("weights").unwrap();
380 let db: ArrayView0<f64> = model.get_gradient("bias").unwrap();
381 assert_eq!(dw, expected_dw);
382 assert_eq!(db, expected_db);
383 }
384}
385
386impl RegressionModel<Matrix, Vector> for LinearRegression {
387 /// Calculates the Mean Squared Error between predictions and target values
388 ///
389 /// # Arguments
390 /// * `x` - Input features of shape (n_x, m)
391 /// * `y` - Target values of shape (m, )
392 ///
393 /// # Returns
394 /// * `Result<f64, ModelError>` - The MSE value
395 fn mse(&self, x: &Matrix, y: &Vector) -> Result<f64, ModelError> {
396 let y_hat = self.predict(x)?;
397 let m = x.len() as f64;
398 Ok((&y_hat - y).mapv(|v| v.powi(2)).sum() / m)
399 }
400
401 /// Calculates the Root Mean Squared Error between predictions and target values
402 ///
403 /// # Arguments
404 /// * `x` - Input features of shape (n_x, m)
405 /// * `y` - Target values of shape (m, )
406 ///
407 /// # Returns
408 /// * `Result<f64, ModelError>` - The RMSE value (square root of MSE)
409 fn rmse(&self, x: &Matrix, y: &Vector) -> Result<f64, ModelError> {
410 let y_hat = self.predict(x)?;
411 let m = x.len() as f64;
412 let rmse = ((&y_hat - y).mapv(|v| v.powi(2)).sum() / m).sqrt();
413 Ok(rmse)
414 }
415
416 /// Calculates the R-squared (coefficient of determination) for the model
417 ///
418 /// R² represents the proportion of variance in the dependent variable
419 /// that is predictable from the independent variables.
420 /// R² = 1 - (MSE / Variance of y)
421 ///
422 /// # Arguments
423 /// * `x` - Input features of shape (n_x, m)
424 /// * `y` - Target values of shape (m, )
425 ///
426 /// # Returns
427 /// * `Result<f64, ModelError>` - The R² value between 0 and 1
428 fn r2(&self, x: &Matrix, y: &Vector) -> Result<f64, ModelError> {
429 let y_hat = self.predict(x)?;
430 let numerator = self.mse(x, y)?;
431 let denominator = (y_hat - y.mean().unwrap()).powi(2).sum();
432 Ok(1.0 - numerator / denominator)
433 }
434
435 /// Computes a complete set of regression metrics for model evaluation
436 ///
437 /// # Arguments
438 /// * `x` - Input features of shape (n_x, m)
439 /// * `y` - Target values of shape (m,)
440 ///
441 /// # Returns
442 /// * `Result<RegressionMetrics, ModelError>` - Struct containing MSE, RMSE, and R²
443 fn compute_metrics(&self, x: &Matrix, y: &Vector) -> Result<RegressionMetrics, ModelError> {
444 let mse = self.mse(x, y)?;
445 let rmse = self.rmse(x, y)?;
446 let r2 = self.r2(x, y)?;
447 Ok(RegressionMetrics { mse, rmse, r2 })
448 }
449}