sklears_python/linear/
linear_regression.rs

1//! Python bindings for Linear Regression
2//!
3//! This module provides Python bindings for Linear Regression,
4//! offering scikit-learn compatible interfaces with high-performance OLS implementation
5//! using the sklears-linear crate.
6
7use super::common::*;
8use pyo3::types::PyDict;
9use pyo3::Bound;
10use sklears_core::traits::{Fit, Predict, Score, Trained};
11use sklears_linear::{LinearRegression, LinearRegressionConfig};
12
13/// Python-specific configuration wrapper
14#[derive(Debug, Clone)]
15pub struct PyLinearRegressionConfig {
16    pub fit_intercept: bool,
17    pub copy_x: bool,
18    pub n_jobs: Option<i32>,
19    pub positive: bool,
20}
21
22impl Default for PyLinearRegressionConfig {
23    fn default() -> Self {
24        Self {
25            fit_intercept: true,
26            copy_x: true,
27            n_jobs: None,
28            positive: false,
29        }
30    }
31}
32
33impl From<PyLinearRegressionConfig> for LinearRegressionConfig {
34    fn from(py_config: PyLinearRegressionConfig) -> Self {
35        // Note: copy_x, n_jobs, and positive are Python-specific and handled at the Python level
36        LinearRegressionConfig {
37            fit_intercept: py_config.fit_intercept,
38            ..Default::default()
39        }
40    }
41}
42
43/// Ordinary least squares Linear Regression.
44///
45/// LinearRegression fits a linear model with coefficients w = (w1, ..., wp)
46/// to minimize the residual sum of squares between the observed targets in
47/// the dataset, and the targets predicted by the linear approximation.
48///
49/// Parameters
50/// ----------
51/// fit_intercept : bool, default=True
52///     Whether to calculate the intercept for this model. If set
53///     to False, no intercept will be used in calculations
54///     (i.e. data is expected to be centered).
55///
56/// copy_X : bool, default=True
57///     If True, X will be copied; else, it may be overwritten.
58///
59/// n_jobs : int, default=None
60///     The number of jobs to use for the computation. This will only provide
61///     speedup in case of sufficiently large problems, that is if firstly
62///     `n_targets > 1` and secondly `X` is sparse or if `positive` is set
63///     to `True`. ``None`` means 1 unless in a
64///     :obj:`joblib.parallel_backend` context. ``-1`` means using all
65///     processors.
66///
67/// positive : bool, default=False
68///     When set to ``True``, forces the coefficients to be positive. This
69///     option is only supported for dense arrays.
70///
71/// Attributes
72/// ----------
73/// coef_ : array of shape (n_features,) or (n_targets, n_features)
74///     Estimated coefficients for the linear regression problem.
75///     If multiple targets are passed during the fit (y 2D), this
76///     is a 2D array of shape (n_targets, n_features), while if only
77///     one target is passed, this is a 1D array of length n_features.
78///
79/// intercept_ : float or array of shape (n_targets,)
80///     Independent term in the linear model. Set to 0.0 if
81///     `fit_intercept = False`.
82///
83/// n_features_in_ : int
84///     Number of features seen during :term:`fit`.
85///
86/// Examples
87/// --------
88/// >>> import numpy as np
89/// >>> from sklears_python import LinearRegression
90/// >>> X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])
91/// >>> # y = 1 * x_0 + 2 * x_1 + 3
92/// >>> y = np.dot(X, [1, 2]) + 3
93/// >>> reg = LinearRegression().fit(X, y)
94/// >>> reg.score(X, y)
95/// 1.0
96/// >>> reg.coef_
97/// array([1., 2.])
98/// >>> reg.intercept_
99/// 3.0...
100/// >>> reg.predict(np.array([[3, 5]]))
101/// array([16.])
102///
103/// Notes
104/// -----
105/// From the implementation point of view, this is just plain Ordinary
106/// Least Squares (scipy.linalg.lstsq) or Non Negative Least Squares
107/// (scipy.optimize.nnls) wrapped as a predictor object.
108#[pyclass(name = "LinearRegression")]
109pub struct PyLinearRegression {
110    py_config: PyLinearRegressionConfig,
111    fitted_model: Option<LinearRegression<Trained>>,
112}
113
114#[pymethods]
115impl PyLinearRegression {
116    #[new]
117    #[pyo3(signature = (fit_intercept=true, copy_x=true, n_jobs=None, positive=false))]
118    fn new(fit_intercept: bool, copy_x: bool, n_jobs: Option<i32>, positive: bool) -> Self {
119        let py_config = PyLinearRegressionConfig {
120            fit_intercept,
121            copy_x,
122            n_jobs,
123            positive,
124        };
125
126        Self {
127            py_config,
128            fitted_model: None,
129        }
130    }
131
132    /// Fit linear model.
133    ///
134    /// Parameters
135    /// ----------
136    /// X : {array-like, sparse matrix} of shape (n_samples, n_features)
137    ///     Training data.
138    ///
139    /// y : array-like of shape (n_samples,) or (n_samples, n_targets)
140    ///     Target values. Will be cast to X's dtype if necessary.
141    ///
142    /// sample_weight : array-like of shape (n_samples,), default=None
143    ///     Individual weights for each sample
144    ///
145    /// Returns
146    /// -------
147    /// self : object
148    ///     Fitted Estimator.
149    fn fit(&mut self, x: PyReadonlyArray2<f64>, y: PyReadonlyArray1<f64>) -> PyResult<()> {
150        let x_array = pyarray_to_core_array2(x)?;
151        let y_array = pyarray_to_core_array1(y)?;
152
153        // Validate input arrays
154        validate_fit_arrays(&x_array, &y_array)?;
155
156        // Create sklears-linear model with configuration
157        let config = LinearRegressionConfig::from(self.py_config.clone());
158        let model = LinearRegression::new().fit_intercept(config.fit_intercept);
159
160        // Fit the model using sklears-linear's implementation
161        match model.fit(&x_array, &y_array) {
162            Ok(fitted_model) => {
163                self.fitted_model = Some(fitted_model);
164                Ok(())
165            }
166            Err(e) => Err(PyValueError::new_err(format!(
167                "Failed to fit model: {:?}",
168                e
169            ))),
170        }
171    }
172
173    /// Predict using the linear model.
174    ///
175    /// Parameters
176    /// ----------
177    /// X : array-like or sparse matrix, shape (n_samples, n_features)
178    ///     Samples.
179    ///
180    /// Returns
181    /// -------
182    /// C : array, shape (n_samples,)
183    ///     Returns predicted values.
184    fn predict(&self, py: Python<'_>, x: PyReadonlyArray2<f64>) -> PyResult<Py<PyArray1<f64>>> {
185        let fitted = self
186            .fitted_model
187            .as_ref()
188            .ok_or_else(|| PyValueError::new_err("Model not fitted. Call fit() first."))?;
189
190        let x_array = pyarray_to_core_array2(x)?;
191        validate_predict_array(&x_array)?;
192
193        match fitted.predict(&x_array) {
194            Ok(predictions) => Ok(core_array1_to_py(py, &predictions)),
195            Err(e) => Err(PyValueError::new_err(format!("Prediction failed: {:?}", e))),
196        }
197    }
198
199    /// Get model coefficients
200    #[getter]
201    fn coef_(&self, py: Python<'_>) -> PyResult<Py<PyArray1<f64>>> {
202        let fitted = self
203            .fitted_model
204            .as_ref()
205            .ok_or_else(|| PyValueError::new_err("Model not fitted. Call fit() first."))?;
206
207        Ok(core_array1_to_py(py, fitted.coef()))
208    }
209
210    /// Get model intercept
211    #[getter]
212    fn intercept_(&self) -> PyResult<f64> {
213        let fitted = self
214            .fitted_model
215            .as_ref()
216            .ok_or_else(|| PyValueError::new_err("Model not fitted. Call fit() first."))?;
217
218        Ok(fitted.intercept().unwrap_or(0.0))
219    }
220
221    /// Return the coefficient of determination of the prediction.
222    ///
223    /// The coefficient of determination :math:`R^2` is defined as
224    /// :math:`(1 - \\frac{SS_{res}}{SS_{tot}})` where :math:`SS_{res} = \\sum_i (y_i - y(x_i))^2`
225    /// is the residual sum of squares, and :math:`SS_{tot} = \\sum_i (y_i - \\bar{y})^2`
226    /// is the total sum of squares.
227    ///
228    /// The best possible score is 1.0 and it can be negative (because the
229    /// model can be arbitrarily worse). A constant model that always predicts
230    /// the expected value of `y`, disregarding the input features, would get
231    /// a :math:`R^2` score of 0.0.
232    ///
233    /// Parameters
234    /// ----------
235    /// X : array-like of shape (n_samples, n_features)
236    ///     Test samples. For some estimators this may be a precomputed
237    ///     kernel matrix or a list of generic objects instead with shape
238    ///     ``(n_samples, n_samples_fitted)``, where ``n_samples_fitted``
239    ///     is the number of samples used in the fitting for the estimator.
240    ///
241    /// y : array-like of shape (n_samples,) or (n_samples, n_outputs)
242    ///     True values for `X`.
243    ///
244    /// sample_weight : array-like of shape (n_samples,), default=None
245    ///     Sample weights.
246    ///
247    /// Returns
248    /// -------
249    /// score : float
250    ///     :math:`R^2` of ``self.predict(X)`` w.r.t. `y`.
251    ///
252    /// Notes
253    /// -----
254    /// The :math:`R^2` score used when calling ``score`` on a regressor uses
255    /// ``multioutput='uniform_average'`` from version 0.23 to keep consistent
256    /// with default value of :func:`~sklearn.metrics.r2_score`.
257    /// This influences the ``score`` method of all the multioutput
258    /// regressors (except for
259    /// :class:`~sklearn.multioutput.MultiOutputRegressor`).
260    fn score(&self, x: PyReadonlyArray2<f64>, y: PyReadonlyArray1<f64>) -> PyResult<f64> {
261        let fitted = self
262            .fitted_model
263            .as_ref()
264            .ok_or_else(|| PyValueError::new_err("Model not fitted. Call fit() first."))?;
265
266        let x_array = pyarray_to_core_array2(x)?;
267        let y_array = pyarray_to_core_array1(y)?;
268
269        match fitted.score(&x_array, &y_array) {
270            Ok(score) => Ok(score),
271            Err(e) => Err(PyValueError::new_err(format!(
272                "Score calculation failed: {:?}",
273                e
274            ))),
275        }
276    }
277
278    /// Get number of features
279    #[getter]
280    fn n_features_in_(&self) -> PyResult<usize> {
281        let fitted = self
282            .fitted_model
283            .as_ref()
284            .ok_or_else(|| PyValueError::new_err("Model not fitted. Call fit() first."))?;
285
286        // Infer number of features from coefficient array length
287        Ok(fitted.coef().len())
288    }
289
290    /// Return parameters for this estimator (sklearn compatibility)
291    fn get_params(&self, py: Python<'_>, deep: Option<bool>) -> PyResult<Py<PyDict>> {
292        let _deep = deep.unwrap_or(true);
293
294        let dict = PyDict::new(py);
295
296        dict.set_item("fit_intercept", self.py_config.fit_intercept)?;
297        dict.set_item("copy_X", self.py_config.copy_x)?;
298        dict.set_item("n_jobs", self.py_config.n_jobs)?;
299        dict.set_item("positive", self.py_config.positive)?;
300
301        Ok(dict.into())
302    }
303
304    /// Set parameters for this estimator (sklearn compatibility)
305    fn set_params(&mut self, kwargs: &Bound<'_, PyDict>) -> PyResult<()> {
306        // Update configuration parameters
307        if let Some(fit_intercept) = kwargs.get_item("fit_intercept")? {
308            self.py_config.fit_intercept = fit_intercept.extract()?;
309        }
310        if let Some(copy_x) = kwargs.get_item("copy_X")? {
311            self.py_config.copy_x = copy_x.extract()?;
312        }
313        if let Some(n_jobs) = kwargs.get_item("n_jobs")? {
314            self.py_config.n_jobs = n_jobs.extract()?;
315        }
316        if let Some(positive) = kwargs.get_item("positive")? {
317            self.py_config.positive = positive.extract()?;
318        }
319
320        // Clear fitted model since config changed
321        self.fitted_model = None;
322
323        Ok(())
324    }
325
326    /// String representation
327    fn __repr__(&self) -> String {
328        format!(
329            "LinearRegression(fit_intercept={}, copy_X={}, n_jobs={:?}, positive={})",
330            self.py_config.fit_intercept,
331            self.py_config.copy_x,
332            self.py_config.n_jobs,
333            self.py_config.positive
334        )
335    }
336}