rustyml 0.11.0

A high-performance machine learning & deep learning library in pure Rust, offering ML algorithms and neural network support
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
pub use super::RegularizationType;
use super::helper_function::{
    preliminary_check, validate_learning_rate, validate_max_iterations, validate_regulation_type,
    validate_tolerance,
};
use crate::error::ModelError;
use crate::math::sum_of_squared_errors;
use crate::{Deserialize, Serialize};
use indicatif::{ProgressBar, ProgressStyle};
use ndarray::{Array1, ArrayBase, Data, Ix1, Ix2};
use rayon::prelude::{
    IndexedParallelIterator, IntoParallelRefIterator, IntoParallelRefMutIterator, ParallelBridge,
    ParallelIterator,
};

/// Threshold for using parallel computation in Linear Regression.
/// When the number of samples or features is below this threshold, sequential computation is used.
const LINEAR_REGRESSION_PARALLEL_THRESHOLD: usize = 200;

/// Linear Regression model implementation
///
/// Trains a simple linear regression model using gradient descent algorithm. This implementation
/// supports multivariate regression, optional intercept term, and allows adjustment of learning rate,
/// maximum iterations, and convergence tolerance.
///
/// # Fields
///
/// - `coefficients` - Model coefficients (slopes), None before training
/// - `intercept` - Model intercept, None before training
/// - `fit_intercept` - Whether to include an intercept term in the model
/// - `learning_rate` - Learning rate for gradient descent
/// - `max_iter` - Maximum number of iterations for gradient descent
/// - `tol` - Convergence tolerance
/// - `n_iter` - Number of iterations the algorithm ran for after fitting
/// - `regularization_type` - Regularization type and strength
///
/// # Examples
/// ```rust
/// use rustyml::machine_learning::linear_regression::*;
/// use ndarray::{Array1, Array2, array};
///
/// // Create a linear regression model
/// let mut model = LinearRegression::new(true, 0.01, 1000, 1e-6, None).unwrap();
///
/// // Prepare training data
/// let raw_x = vec![vec![1.0, 2.0], vec![2.0, 3.0], vec![3.0, 4.0]];
/// let raw_y = vec![6.0, 9.0, 12.0];
///
/// // Convert Vec to ndarray types
/// let x = Array2::from_shape_vec((3, 2), raw_x.into_iter().flatten().collect()).unwrap();
/// let y = Array1::from_vec(raw_y);
///
/// // Train the model
/// model.fit(&x, &y).unwrap();
///
/// // Make predictions
/// let new_data = Array2::from_shape_vec((1, 2), vec![4.0, 5.0]).unwrap();
/// let predictions = model.predict(&new_data);
///
/// // Save the trained model to a file
/// model.save_to_path("linear_regression_model.json").unwrap();
///
/// // Load the model from the file
/// let loaded_model = LinearRegression::load_from_path("linear_regression_model.json").unwrap();
///
/// // Use the loaded model for predictions
/// let loaded_predictions = loaded_model.predict(&new_data);
///
/// // Since Clone is implemented, the model can be easily cloned
/// let model_copy = model.clone();
///
/// // Since Debug is implemented, detailed model information can be printed
/// println!("{:?}", model);
///
/// // Clean up: remove the created file
/// std::fs::remove_file("linear_regression_model.json").unwrap();
/// ```
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct LinearRegression {
    coefficients: Option<Array1<f64>>,
    intercept: Option<f64>,
    fit_intercept: bool,
    learning_rate: f64,
    max_iter: usize,
    tol: f64,
    n_iter: Option<usize>,
    regularization_type: Option<RegularizationType>,
}

impl Default for LinearRegression {
    /// Creates a new LinearRegression instance with default parameter values.
    ///
    /// # Default Values
    ///
    /// - `coefficients` - `None` - Model coefficients are not initialized until training
    /// - `intercept` - `None` - Model intercept is not initialized until training
    /// - `fit_intercept` - `true` - Include an intercept term in the linear model
    /// - `learning_rate` - `0.01` - Learning rate for gradient descent optimization
    /// - `max_iter` - `1000` - Maximum number of iterations for gradient descent
    /// - `tol` - `1e-5` - Convergence tolerance (0.00001) for stopping criteria
    /// - `n_iter` - `None` - Number of actual iterations performed (set after training)
    /// - `regularization_type` - `None` - No regularization applied by default
    ///
    /// # Returns
    ///
    /// - `LinearRegression` - A new instance with sensible default parameters for most use cases
    fn default() -> Self {
        Self {
            coefficients: None,
            intercept: None,
            fit_intercept: true,
            learning_rate: 0.01,
            max_iter: 1000,
            tol: 1e-5,
            n_iter: None,
            regularization_type: None,
        }
    }
}

impl LinearRegression {
    /// Creates a new linear regression model with custom parameters.
    ///
    /// This constructor validates all input parameters to ensure they are within
    /// acceptable numerical ranges before returning the model instance.
    ///
    /// # Parameters
    ///
    /// - `fit_intercept` - Whether to calculate the intercept for this model
    /// - `learning_rate` - The learning rate for gradient descent optimization
    /// - `max_iterations` - Maximum number of iterations for gradient descent
    /// - `tolerance` - The tolerance for stopping criteria
    /// - `regularization_type` - Optional regularization to prevent overfitting (L1, L2, or None)
    ///
    /// # Returns
    ///
    /// - `Result<Self, ModelError>` - A new instance of LinearRegression, or an error if parameters are invalid
    ///
    /// # Errors
    ///
    /// - `ModelError::InputValidationError` - If learning_rate, max_iterations or tolerance is invalid
    pub fn new(
        fit_intercept: bool,
        learning_rate: f64,
        max_iterations: usize,
        tolerance: f64,
        regularization_type: Option<RegularizationType>,
    ) -> Result<Self, ModelError> {
        // Input validation
        validate_learning_rate(learning_rate)?;
        validate_max_iterations(max_iterations)?;
        validate_tolerance(tolerance)?;
        validate_regulation_type(regularization_type)?;

        Ok(LinearRegression {
            coefficients: None,
            intercept: None,
            fit_intercept,
            learning_rate,
            max_iter: max_iterations,
            tol: tolerance,
            n_iter: None,
            regularization_type,
        })
    }

    // Getters
    get_field!(get_fit_intercept, fit_intercept, bool);
    get_field!(get_learning_rate, learning_rate, f64);
    get_field!(get_max_iter, max_iter, usize);
    get_field!(get_tolerance, tol, f64);
    get_field!(get_max_iterations, max_iter, usize);
    get_field!(get_actual_iterations, n_iter, Option<usize>);
    get_field!(
        get_regularization_type,
        regularization_type,
        Option<RegularizationType>
    );
    get_field_as_ref!(get_coefficients, coefficients, Option<&Array1<f64>>);
    get_field!(get_intercept, intercept, Option<f64>);

    /// Fits the linear regression model using gradient descent.
    ///
    /// Iteratively updates the model's coefficients and intercept to minimize the cost function.
    /// The process includes real-time progress tracking and early stopping if convergence is reached.
    ///
    /// # Parameters
    ///
    /// - `x` - Feature matrix, each row is a sample, each column is a feature
    /// - `y` - Target variable vector
    ///
    /// # Returns
    ///
    /// - `Result<&mut Self, ModelError>` - Returns mutable reference to self for method chaining
    ///
    /// # Errors
    ///
    /// - `ModelError::ProcessingError` - If numerical issues like NaN or infinity occur during training
    /// - `ModelError::InputValidationError` - If input dimensions are inconsistent
    ///
    /// # Performance
    ///
    /// Parallel computation is automatically used for L1 regularization and gradient updates
    /// when the number of features exceeds `LINEAR_REGRESSION_PARALLEL_THRESHOLD` (200).
    pub fn fit<S>(
        &mut self,
        x: &ArrayBase<S, Ix2>,
        y: &ArrayBase<S, Ix1>,
    ) -> Result<&mut Self, ModelError>
    where
        S: Data<Elem = f64>,
    {
        // Use preliminary_check for input validation
        preliminary_check(x, Some(y))?;

        let n_samples = x.nrows();
        let n_features = x.ncols();

        // Initialize parameters
        let mut weights = Array1::<f64>::zeros(n_features); // Initialize weights to zero
        let mut intercept = 0.0; // Initialize intercept to zero

        let mut prev_cost = f64::INFINITY;
        let mut convergence_count = 0; // Track consecutive convergences for stability
        const CONVERGENCE_THRESHOLD: usize = 3; // Require 3 consecutive convergences

        let mut n_iter = 0;

        // Pre-allocate arrays to avoid repeated memory allocation
        let mut predictions = Array1::<f64>::zeros(n_samples);
        let mut error_vec = Array1::<f64>::zeros(n_samples);

        // Create progress bar for training iterations
        let progress_bar = ProgressBar::new(self.max_iter as u64);
        progress_bar.set_style(
            ProgressStyle::default_bar()
                .template("[{elapsed_precise}] {bar:40.cyan/blue} {pos}/{len} | Cost: {msg}")
                .expect("Failed to set progress bar template")
                .progress_chars("█▓░"),
        );
        progress_bar.set_message(format!(
            "{:.6} | Convergence: 0/{}",
            f64::INFINITY,
            CONVERGENCE_THRESHOLD
        ));

        // Gradient descent iterations
        while n_iter < self.max_iter {
            n_iter += 1;

            // Calculate predictions - vectorized operation
            predictions.assign(&x.dot(&weights));
            if self.fit_intercept {
                predictions += intercept;
            }

            // Calculate errors - use borrowing to avoid moving predictions
            error_vec.assign(&(&predictions - y));

            // Calculate cost using math module's sum_of_squared_errors
            let sse = sum_of_squared_errors(&predictions, &y);

            let regularization_term = match &self.regularization_type {
                None => 0.0,
                Some(RegularizationType::L1(alpha)) => {
                    // Use parallel computation for L1 regularization when feature count is large
                    if n_features >= LINEAR_REGRESSION_PARALLEL_THRESHOLD {
                        alpha * weights.iter().par_bridge().map(|w| w.abs()).sum::<f64>()
                    } else {
                        alpha * weights.iter().map(|w| w.abs()).sum::<f64>()
                    }
                }
                Some(RegularizationType::L2(alpha)) => alpha * weights.dot(&weights),
            };

            let cost = sse / (2.0 * n_samples as f64) + regularization_term;

            // Update progress bar with current cost and convergence status
            progress_bar.set_message(format!(
                "{:.6} | Convergence: {}/{}",
                cost, convergence_count, CONVERGENCE_THRESHOLD
            ));
            progress_bar.inc(1);

            // Check for numerical issues in cost
            if !cost.is_finite() {
                progress_bar.finish_with_message("Error: NaN or infinite cost");
                return Err(ModelError::ProcessingError(
                    "Cost calculation resulted in NaN or infinite value".to_string(),
                ));
            }

            // Calculate gradients using matrix operations
            let mut weight_gradients = x.t().dot(&error_vec) / (n_samples as f64);
            let intercept_gradient = if self.fit_intercept {
                error_vec.sum() / (n_samples as f64)
            } else {
                0.0
            };

            // Check for numerical issues in gradients
            if weight_gradients.iter().any(|&val| !val.is_finite())
                || !intercept_gradient.is_finite()
            {
                progress_bar.finish_with_message("Error: NaN or infinite gradients");
                return Err(ModelError::ProcessingError(
                    "Gradient calculation resulted in NaN or infinite values".to_string(),
                ));
            }

            // Add regularization terms to gradients
            match &self.regularization_type {
                None => {}
                Some(RegularizationType::L1(alpha)) => {
                    // Use parallel computation for L1 gradient when feature count is large
                    let alpha_val = *alpha;
                    if n_features >= LINEAR_REGRESSION_PARALLEL_THRESHOLD {
                        let weights_slice = weights.as_slice().unwrap();
                        let gradients_slice = weight_gradients.as_slice_mut().unwrap();

                        gradients_slice
                            .par_iter_mut()
                            .zip(weights_slice.par_iter())
                            .for_each(|(grad, w)| {
                                *grad += alpha_val * w.signum();
                            });
                    } else {
                        weight_gradients
                            .iter_mut()
                            .zip(weights.iter())
                            .for_each(|(grad, w)| {
                                *grad += alpha_val * w.signum();
                            });
                    }
                }
                Some(RegularizationType::L2(alpha)) => {
                    weight_gradients.scaled_add(*alpha, &weights);
                }
            }

            // Update parameters
            weights.scaled_add(-self.learning_rate, &weight_gradients);
            if self.fit_intercept {
                intercept -= self.learning_rate * intercept_gradient;
            }

            // Check for numerical issues in updated parameters
            if weights.iter().any(|&val| !val.is_finite()) || !intercept.is_finite() {
                progress_bar.finish_with_message("Error: NaN or infinite parameters");
                return Err(ModelError::ProcessingError(
                    "Parameter update resulted in NaN or infinite values".to_string(),
                ));
            }

            // Enhanced convergence check with stability requirement
            let cost_change = (prev_cost - cost).abs();
            if cost_change < self.tol {
                convergence_count += 1;
                if convergence_count >= CONVERGENCE_THRESHOLD {
                    break;
                }
            } else {
                convergence_count = 0; // Reset if not converged
            }

            prev_cost = cost;
        }

        // Finish progress bar with final statistics
        let convergence_status = if n_iter < self.max_iter {
            "Converged"
        } else {
            "Max iterations"
        };
        progress_bar.finish_with_message(format!(
            "{:.6} | {} | Iterations: {}",
            prev_cost, convergence_status, n_iter
        ));

        // Save training results
        self.coefficients = Some(weights);
        self.intercept = Some(if self.fit_intercept { intercept } else { 0.0 });
        self.n_iter = Some(n_iter);

        println!(
            "\nLinear Regression training completed: {} samples, {} features, {} iterations, final cost: {:.6}",
            n_samples, n_features, n_iter, prev_cost
        );

        Ok(self)
    }

    /// Makes predictions using the trained model.
    ///
    /// Applies the learned coefficients and intercept to the provided feature matrix.
    ///
    /// # Parameters
    ///
    /// - `x` - Prediction data, each row is a sample, each column is a feature
    ///
    /// # Returns
    ///
    /// - `Result<Array1<f64>, ModelError>` - A vector of predictions
    ///
    /// # Errors
    ///
    /// - `ModelError::NotFitted` - If the model has not been trained yet
    /// - `ModelError::InputValidationError` - If features count doesn't match or data contains invalid values
    /// - `ModelError::ProcessingError` - If prediction results in non-finite values
    pub fn predict<S>(&self, x: &ArrayBase<S, Ix2>) -> Result<Array1<f64>, ModelError>
    where
        S: Data<Elem = f64>,
    {
        // Check if model has been fitted
        if self.coefficients.is_none() {
            return Err(ModelError::NotFitted);
        }

        let coeffs = self.coefficients.as_ref().unwrap();
        let intercept = self.intercept.unwrap_or(0.0);

        // Check for empty input data
        if x.is_empty() {
            return Err(ModelError::InputValidationError(
                "Cannot predict on empty dataset".to_string(),
            ));
        }

        // Check feature dimension match
        if x.ncols() != coeffs.len() {
            return Err(ModelError::InputValidationError(format!(
                "Number of features does not match training data, x columns: {}, coefficients: {}",
                x.ncols(),
                coeffs.len()
            )));
        }

        // Check for invalid values in input data
        if x.iter().any(|&val| !val.is_finite()) {
            return Err(ModelError::InputValidationError(
                "Input data contains NaN or infinite values".to_string(),
            ));
        }

        // Calculate predictions using matrix operations
        let mut predictions = x.dot(coeffs);
        if self.fit_intercept {
            predictions += intercept;
        }

        // Check if predictions contain invalid values
        if predictions.iter().any(|&val| !val.is_finite()) {
            return Err(ModelError::ProcessingError(
                "Prediction calculation resulted in NaN or infinite values".to_string(),
            ));
        }

        Ok(predictions)
    }

    /// Fits the model to the training data and then makes predictions on the same data.
    ///
    /// A convenience method that sequentially executes `fit` and then `predict`.
    ///
    /// # Parameters
    ///
    /// - `x` - The input features matrix
    /// - `y` - The target values corresponding to each training example
    ///
    /// # Returns
    ///
    /// - `Result<Array1<f64>, ModelError>` - The predicted values for the input data
    ///
    /// # Errors
    ///
    /// - `ModelError::InputValidationError` - If input data is invalid
    /// - `ModelError::ProcessingError` - If an error occurs during fitting or prediction
    pub fn fit_predict<S>(
        &mut self,
        x: &ArrayBase<S, Ix2>,
        y: &ArrayBase<S, Ix1>,
    ) -> Result<Array1<f64>, ModelError>
    where
        S: Data<Elem = f64>,
    {
        self.fit(x, y)?;
        Ok(self.predict(x)?)
    }

    model_save_and_load_methods!(LinearRegression);
}