sklears_python/linear/
linear_regression.rs

1//! Python bindings for Linear Regression
2//!
3//! This module provides Python bindings for Linear Regression,
4//! offering scikit-learn compatible interfaces with high-performance OLS implementation
5//! using the sklears-linear crate.
6
7use super::common::*;
8use numpy::IntoPyArray;
9use pyo3::types::PyDict;
10use pyo3::Bound;
11use sklears_core::traits::{Fit, Predict, Score, Trained};
12use sklears_linear::{LinearRegression, LinearRegressionConfig};
13
14/// Python-specific configuration wrapper
15#[derive(Debug, Clone)]
16pub struct PyLinearRegressionConfig {
17    pub fit_intercept: bool,
18    pub copy_x: bool,
19    pub n_jobs: Option<i32>,
20    pub positive: bool,
21}
22
23impl Default for PyLinearRegressionConfig {
24    fn default() -> Self {
25        Self {
26            fit_intercept: true,
27            copy_x: true,
28            n_jobs: None,
29            positive: false,
30        }
31    }
32}
33
34impl From<PyLinearRegressionConfig> for LinearRegressionConfig {
35    fn from(py_config: PyLinearRegressionConfig) -> Self {
36        let mut config = LinearRegressionConfig::default();
37        config.fit_intercept = py_config.fit_intercept;
38        // Note: copy_x, n_jobs, and positive are Python-specific and handled at the Python level
39        config
40    }
41}
42
43/// Ordinary least squares Linear Regression.
44///
45/// LinearRegression fits a linear model with coefficients w = (w1, ..., wp)
46/// to minimize the residual sum of squares between the observed targets in
47/// the dataset, and the targets predicted by the linear approximation.
48///
49/// Parameters
50/// ----------
51/// fit_intercept : bool, default=True
52///     Whether to calculate the intercept for this model. If set
53///     to False, no intercept will be used in calculations
54///     (i.e. data is expected to be centered).
55///
56/// copy_X : bool, default=True
57///     If True, X will be copied; else, it may be overwritten.
58///
59/// n_jobs : int, default=None
60///     The number of jobs to use for the computation. This will only provide
61///     speedup in case of sufficiently large problems, that is if firstly
62///     `n_targets > 1` and secondly `X` is sparse or if `positive` is set
63///     to `True`. ``None`` means 1 unless in a
64///     :obj:`joblib.parallel_backend` context. ``-1`` means using all
65///     processors.
66///
67/// positive : bool, default=False
68///     When set to ``True``, forces the coefficients to be positive. This
69///     option is only supported for dense arrays.
70///
71/// Attributes
72/// ----------
73/// coef_ : array of shape (n_features,) or (n_targets, n_features)
74///     Estimated coefficients for the linear regression problem.
75///     If multiple targets are passed during the fit (y 2D), this
76///     is a 2D array of shape (n_targets, n_features), while if only
77///     one target is passed, this is a 1D array of length n_features.
78///
79/// intercept_ : float or array of shape (n_targets,)
80///     Independent term in the linear model. Set to 0.0 if
81///     `fit_intercept = False`.
82///
83/// n_features_in_ : int
84///     Number of features seen during :term:`fit`.
85///
86/// Examples
87/// --------
88/// >>> import numpy as np
89/// >>> from sklears_python import LinearRegression
90/// >>> X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])
91/// >>> # y = 1 * x_0 + 2 * x_1 + 3
92/// >>> y = np.dot(X, [1, 2]) + 3
93/// >>> reg = LinearRegression().fit(X, y)
94/// >>> reg.score(X, y)
95/// 1.0
96/// >>> reg.coef_
97/// array([1., 2.])
98/// >>> reg.intercept_
99/// 3.0...
100/// >>> reg.predict(np.array([[3, 5]]))
101/// array([16.])
102///
103/// Notes
104/// -----
105/// From the implementation point of view, this is just plain Ordinary
106/// Least Squares (scipy.linalg.lstsq) or Non Negative Least Squares
107/// (scipy.optimize.nnls) wrapped as a predictor object.
108#[pyclass(name = "LinearRegression")]
109pub struct PyLinearRegression {
110    py_config: PyLinearRegressionConfig,
111    fitted_model: Option<LinearRegression<Trained>>,
112}
113
114#[pymethods]
115impl PyLinearRegression {
116    #[new]
117    #[pyo3(signature = (fit_intercept=true, copy_x=true, n_jobs=None, positive=false))]
118    fn new(fit_intercept: bool, copy_x: bool, n_jobs: Option<i32>, positive: bool) -> Self {
119        let py_config = PyLinearRegressionConfig {
120            fit_intercept,
121            copy_x,
122            n_jobs,
123            positive,
124        };
125
126        Self {
127            py_config,
128            fitted_model: None,
129        }
130    }
131
132    /// Fit linear model.
133    ///
134    /// Parameters
135    /// ----------
136    /// X : {array-like, sparse matrix} of shape (n_samples, n_features)
137    ///     Training data.
138    ///
139    /// y : array-like of shape (n_samples,) or (n_samples, n_targets)
140    ///     Target values. Will be cast to X's dtype if necessary.
141    ///
142    /// sample_weight : array-like of shape (n_samples,), default=None
143    ///     Individual weights for each sample
144    ///
145    /// Returns
146    /// -------
147    /// self : object
148    ///     Fitted Estimator.
149    fn fit(&mut self, x: PyReadonlyArray2<f64>, y: PyReadonlyArray1<f64>) -> PyResult<()> {
150        let x_array = x.as_array().to_owned();
151        let y_array = y.as_array().to_owned();
152
153        // Validate input arrays
154        validate_fit_arrays(&x_array, &y_array)?;
155
156        // Create sklears-linear model with configuration
157        let config = LinearRegressionConfig::from(self.py_config.clone());
158        let model = LinearRegression::new().fit_intercept(config.fit_intercept);
159
160        // Fit the model using sklears-linear's implementation
161        match model.fit(&x_array, &y_array) {
162            Ok(fitted_model) => {
163                self.fitted_model = Some(fitted_model);
164                Ok(())
165            }
166            Err(e) => Err(PyValueError::new_err(format!(
167                "Failed to fit model: {:?}",
168                e
169            ))),
170        }
171    }
172
173    /// Predict using the linear model.
174    ///
175    /// Parameters
176    /// ----------
177    /// X : array-like or sparse matrix, shape (n_samples, n_features)
178    ///     Samples.
179    ///
180    /// Returns
181    /// -------
182    /// C : array, shape (n_samples,)
183    ///     Returns predicted values.
184    fn predict(&self, x: PyReadonlyArray2<f64>) -> PyResult<Py<PyArray1<f64>>> {
185        let fitted = self
186            .fitted_model
187            .as_ref()
188            .ok_or_else(|| PyValueError::new_err("Model not fitted. Call fit() first."))?;
189
190        let x_array = x.as_array().to_owned();
191        validate_predict_array(&x_array)?;
192
193        match fitted.predict(&x_array) {
194            Ok(predictions) => {
195                let py = unsafe { Python::assume_attached() };
196                Ok(predictions.into_pyarray(py).into())
197            }
198            Err(e) => Err(PyValueError::new_err(format!("Prediction failed: {:?}", e))),
199        }
200    }
201
202    /// Get model coefficients
203    #[getter]
204    fn coef_(&self) -> PyResult<Py<PyArray1<f64>>> {
205        let fitted = self
206            .fitted_model
207            .as_ref()
208            .ok_or_else(|| PyValueError::new_err("Model not fitted. Call fit() first."))?;
209
210        let py = unsafe { Python::assume_attached() };
211        Ok(fitted.coef().clone().into_pyarray(py).into())
212    }
213
214    /// Get model intercept
215    #[getter]
216    fn intercept_(&self) -> PyResult<f64> {
217        let fitted = self
218            .fitted_model
219            .as_ref()
220            .ok_or_else(|| PyValueError::new_err("Model not fitted. Call fit() first."))?;
221
222        Ok(fitted.intercept().unwrap_or(0.0))
223    }
224
225    /// Return the coefficient of determination of the prediction.
226    ///
227    /// The coefficient of determination :math:`R^2` is defined as
228    /// :math:`(1 - \\frac{SS_{res}}{SS_{tot}})` where :math:`SS_{res} = \\sum_i (y_i - y(x_i))^2`
229    /// is the residual sum of squares, and :math:`SS_{tot} = \\sum_i (y_i - \\bar{y})^2`
230    /// is the total sum of squares.
231    ///
232    /// The best possible score is 1.0 and it can be negative (because the
233    /// model can be arbitrarily worse). A constant model that always predicts
234    /// the expected value of `y`, disregarding the input features, would get
235    /// a :math:`R^2` score of 0.0.
236    ///
237    /// Parameters
238    /// ----------
239    /// X : array-like of shape (n_samples, n_features)
240    ///     Test samples. For some estimators this may be a precomputed
241    ///     kernel matrix or a list of generic objects instead with shape
242    ///     ``(n_samples, n_samples_fitted)``, where ``n_samples_fitted``
243    ///     is the number of samples used in the fitting for the estimator.
244    ///
245    /// y : array-like of shape (n_samples,) or (n_samples, n_outputs)
246    ///     True values for `X`.
247    ///
248    /// sample_weight : array-like of shape (n_samples,), default=None
249    ///     Sample weights.
250    ///
251    /// Returns
252    /// -------
253    /// score : float
254    ///     :math:`R^2` of ``self.predict(X)`` w.r.t. `y`.
255    ///
256    /// Notes
257    /// -----
258    /// The :math:`R^2` score used when calling ``score`` on a regressor uses
259    /// ``multioutput='uniform_average'`` from version 0.23 to keep consistent
260    /// with default value of :func:`~sklearn.metrics.r2_score`.
261    /// This influences the ``score`` method of all the multioutput
262    /// regressors (except for
263    /// :class:`~sklearn.multioutput.MultiOutputRegressor`).
264    fn score(&self, x: PyReadonlyArray2<f64>, y: PyReadonlyArray1<f64>) -> PyResult<f64> {
265        let fitted = self
266            .fitted_model
267            .as_ref()
268            .ok_or_else(|| PyValueError::new_err("Model not fitted. Call fit() first."))?;
269
270        let x_array = x.as_array().to_owned();
271        let y_array = y.as_array().to_owned();
272
273        match fitted.score(&x_array, &y_array) {
274            Ok(score) => Ok(score),
275            Err(e) => Err(PyValueError::new_err(format!(
276                "Score calculation failed: {:?}",
277                e
278            ))),
279        }
280    }
281
282    /// Get number of features
283    #[getter]
284    fn n_features_in_(&self) -> PyResult<usize> {
285        let fitted = self
286            .fitted_model
287            .as_ref()
288            .ok_or_else(|| PyValueError::new_err("Model not fitted. Call fit() first."))?;
289
290        // Infer number of features from coefficient array length
291        Ok(fitted.coef().len())
292    }
293
294    /// Return parameters for this estimator (sklearn compatibility)
295    fn get_params(&self, deep: Option<bool>) -> PyResult<Py<PyDict>> {
296        let _deep = deep.unwrap_or(true);
297
298        let py = unsafe { Python::assume_attached() };
299        let dict = PyDict::new(py);
300
301        dict.set_item("fit_intercept", self.py_config.fit_intercept)?;
302        dict.set_item("copy_X", self.py_config.copy_x)?;
303        dict.set_item("n_jobs", self.py_config.n_jobs)?;
304        dict.set_item("positive", self.py_config.positive)?;
305
306        Ok(dict.into())
307    }
308
309    /// Set parameters for this estimator (sklearn compatibility)
310    fn set_params(&mut self, kwargs: &Bound<'_, PyDict>) -> PyResult<()> {
311        // Update configuration parameters
312        if let Some(fit_intercept) = kwargs.get_item("fit_intercept")? {
313            self.py_config.fit_intercept = fit_intercept.extract()?;
314        }
315        if let Some(copy_x) = kwargs.get_item("copy_X")? {
316            self.py_config.copy_x = copy_x.extract()?;
317        }
318        if let Some(n_jobs) = kwargs.get_item("n_jobs")? {
319            self.py_config.n_jobs = n_jobs.extract()?;
320        }
321        if let Some(positive) = kwargs.get_item("positive")? {
322            self.py_config.positive = positive.extract()?;
323        }
324
325        // Clear fitted model since config changed
326        self.fitted_model = None;
327
328        Ok(())
329    }
330
331    /// String representation
332    fn __repr__(&self) -> String {
333        format!(
334            "LinearRegression(fit_intercept={}, copy_X={}, n_jobs={:?}, positive={})",
335            self.py_config.fit_intercept,
336            self.py_config.copy_x,
337            self.py_config.n_jobs,
338            self.py_config.positive
339        )
340    }
341}