sklears_python/linear/linear_regression.rs
1//! Python bindings for Linear Regression
2//!
3//! This module provides Python bindings for Linear Regression,
4//! offering scikit-learn compatible interfaces with high-performance OLS implementation
5//! using the sklears-linear crate.
6
7use super::common::*;
8use pyo3::types::PyDict;
9use pyo3::Bound;
10use sklears_core::traits::{Fit, Predict, Score, Trained};
11use sklears_linear::{LinearRegression, LinearRegressionConfig};
12
13/// Python-specific configuration wrapper
14#[derive(Debug, Clone)]
15pub struct PyLinearRegressionConfig {
16 pub fit_intercept: bool,
17 pub copy_x: bool,
18 pub n_jobs: Option<i32>,
19 pub positive: bool,
20}
21
22impl Default for PyLinearRegressionConfig {
23 fn default() -> Self {
24 Self {
25 fit_intercept: true,
26 copy_x: true,
27 n_jobs: None,
28 positive: false,
29 }
30 }
31}
32
33impl From<PyLinearRegressionConfig> for LinearRegressionConfig {
34 fn from(py_config: PyLinearRegressionConfig) -> Self {
35 // Note: copy_x, n_jobs, and positive are Python-specific and handled at the Python level
36 LinearRegressionConfig {
37 fit_intercept: py_config.fit_intercept,
38 ..Default::default()
39 }
40 }
41}
42
43/// Ordinary least squares Linear Regression.
44///
45/// LinearRegression fits a linear model with coefficients w = (w1, ..., wp)
46/// to minimize the residual sum of squares between the observed targets in
47/// the dataset, and the targets predicted by the linear approximation.
48///
49/// Parameters
50/// ----------
51/// fit_intercept : bool, default=True
52/// Whether to calculate the intercept for this model. If set
53/// to False, no intercept will be used in calculations
54/// (i.e. data is expected to be centered).
55///
56/// copy_X : bool, default=True
57/// If True, X will be copied; else, it may be overwritten.
58///
59/// n_jobs : int, default=None
60/// The number of jobs to use for the computation. This will only provide
61/// speedup in case of sufficiently large problems, that is if firstly
62/// `n_targets > 1` and secondly `X` is sparse or if `positive` is set
63/// to `True`. ``None`` means 1 unless in a
64/// :obj:`joblib.parallel_backend` context. ``-1`` means using all
65/// processors.
66///
67/// positive : bool, default=False
68/// When set to ``True``, forces the coefficients to be positive. This
69/// option is only supported for dense arrays.
70///
71/// Attributes
72/// ----------
73/// coef_ : array of shape (n_features,) or (n_targets, n_features)
74/// Estimated coefficients for the linear regression problem.
75/// If multiple targets are passed during the fit (y 2D), this
76/// is a 2D array of shape (n_targets, n_features), while if only
77/// one target is passed, this is a 1D array of length n_features.
78///
79/// intercept_ : float or array of shape (n_targets,)
80/// Independent term in the linear model. Set to 0.0 if
81/// `fit_intercept = False`.
82///
83/// n_features_in_ : int
84/// Number of features seen during :term:`fit`.
85///
86/// Examples
87/// --------
88/// >>> import numpy as np
89/// >>> from sklears_python import LinearRegression
90/// >>> X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])
91/// >>> # y = 1 * x_0 + 2 * x_1 + 3
92/// >>> y = np.dot(X, [1, 2]) + 3
93/// >>> reg = LinearRegression().fit(X, y)
94/// >>> reg.score(X, y)
95/// 1.0
96/// >>> reg.coef_
97/// array([1., 2.])
98/// >>> reg.intercept_
99/// 3.0...
100/// >>> reg.predict(np.array([[3, 5]]))
101/// array([16.])
102///
103/// Notes
104/// -----
105/// From the implementation point of view, this is just plain Ordinary
106/// Least Squares (scipy.linalg.lstsq) or Non Negative Least Squares
107/// (scipy.optimize.nnls) wrapped as a predictor object.
108#[pyclass(name = "LinearRegression")]
109pub struct PyLinearRegression {
110 py_config: PyLinearRegressionConfig,
111 fitted_model: Option<LinearRegression<Trained>>,
112}
113
114#[pymethods]
115impl PyLinearRegression {
116 #[new]
117 #[pyo3(signature = (fit_intercept=true, copy_x=true, n_jobs=None, positive=false))]
118 fn new(fit_intercept: bool, copy_x: bool, n_jobs: Option<i32>, positive: bool) -> Self {
119 let py_config = PyLinearRegressionConfig {
120 fit_intercept,
121 copy_x,
122 n_jobs,
123 positive,
124 };
125
126 Self {
127 py_config,
128 fitted_model: None,
129 }
130 }
131
132 /// Fit linear model.
133 ///
134 /// Parameters
135 /// ----------
136 /// X : {array-like, sparse matrix} of shape (n_samples, n_features)
137 /// Training data.
138 ///
139 /// y : array-like of shape (n_samples,) or (n_samples, n_targets)
140 /// Target values. Will be cast to X's dtype if necessary.
141 ///
142 /// sample_weight : array-like of shape (n_samples,), default=None
143 /// Individual weights for each sample
144 ///
145 /// Returns
146 /// -------
147 /// self : object
148 /// Fitted Estimator.
149 fn fit(&mut self, x: PyReadonlyArray2<f64>, y: PyReadonlyArray1<f64>) -> PyResult<()> {
150 let x_array = pyarray_to_core_array2(x)?;
151 let y_array = pyarray_to_core_array1(y)?;
152
153 // Validate input arrays
154 validate_fit_arrays(&x_array, &y_array)?;
155
156 // Create sklears-linear model with configuration
157 let config = LinearRegressionConfig::from(self.py_config.clone());
158 let model = LinearRegression::new().fit_intercept(config.fit_intercept);
159
160 // Fit the model using sklears-linear's implementation
161 match model.fit(&x_array, &y_array) {
162 Ok(fitted_model) => {
163 self.fitted_model = Some(fitted_model);
164 Ok(())
165 }
166 Err(e) => Err(PyValueError::new_err(format!(
167 "Failed to fit model: {:?}",
168 e
169 ))),
170 }
171 }
172
173 /// Predict using the linear model.
174 ///
175 /// Parameters
176 /// ----------
177 /// X : array-like or sparse matrix, shape (n_samples, n_features)
178 /// Samples.
179 ///
180 /// Returns
181 /// -------
182 /// C : array, shape (n_samples,)
183 /// Returns predicted values.
184 fn predict(&self, py: Python<'_>, x: PyReadonlyArray2<f64>) -> PyResult<Py<PyArray1<f64>>> {
185 let fitted = self
186 .fitted_model
187 .as_ref()
188 .ok_or_else(|| PyValueError::new_err("Model not fitted. Call fit() first."))?;
189
190 let x_array = pyarray_to_core_array2(x)?;
191 validate_predict_array(&x_array)?;
192
193 match fitted.predict(&x_array) {
194 Ok(predictions) => Ok(core_array1_to_py(py, &predictions)),
195 Err(e) => Err(PyValueError::new_err(format!("Prediction failed: {:?}", e))),
196 }
197 }
198
199 /// Get model coefficients
200 #[getter]
201 fn coef_(&self, py: Python<'_>) -> PyResult<Py<PyArray1<f64>>> {
202 let fitted = self
203 .fitted_model
204 .as_ref()
205 .ok_or_else(|| PyValueError::new_err("Model not fitted. Call fit() first."))?;
206
207 Ok(core_array1_to_py(py, fitted.coef()))
208 }
209
210 /// Get model intercept
211 #[getter]
212 fn intercept_(&self) -> PyResult<f64> {
213 let fitted = self
214 .fitted_model
215 .as_ref()
216 .ok_or_else(|| PyValueError::new_err("Model not fitted. Call fit() first."))?;
217
218 Ok(fitted.intercept().unwrap_or(0.0))
219 }
220
221 /// Return the coefficient of determination of the prediction.
222 ///
223 /// The coefficient of determination :math:`R^2` is defined as
224 /// :math:`(1 - \\frac{SS_{res}}{SS_{tot}})` where :math:`SS_{res} = \\sum_i (y_i - y(x_i))^2`
225 /// is the residual sum of squares, and :math:`SS_{tot} = \\sum_i (y_i - \\bar{y})^2`
226 /// is the total sum of squares.
227 ///
228 /// The best possible score is 1.0 and it can be negative (because the
229 /// model can be arbitrarily worse). A constant model that always predicts
230 /// the expected value of `y`, disregarding the input features, would get
231 /// a :math:`R^2` score of 0.0.
232 ///
233 /// Parameters
234 /// ----------
235 /// X : array-like of shape (n_samples, n_features)
236 /// Test samples. For some estimators this may be a precomputed
237 /// kernel matrix or a list of generic objects instead with shape
238 /// ``(n_samples, n_samples_fitted)``, where ``n_samples_fitted``
239 /// is the number of samples used in the fitting for the estimator.
240 ///
241 /// y : array-like of shape (n_samples,) or (n_samples, n_outputs)
242 /// True values for `X`.
243 ///
244 /// sample_weight : array-like of shape (n_samples,), default=None
245 /// Sample weights.
246 ///
247 /// Returns
248 /// -------
249 /// score : float
250 /// :math:`R^2` of ``self.predict(X)`` w.r.t. `y`.
251 ///
252 /// Notes
253 /// -----
254 /// The :math:`R^2` score used when calling ``score`` on a regressor uses
255 /// ``multioutput='uniform_average'`` from version 0.23 to keep consistent
256 /// with default value of :func:`~sklearn.metrics.r2_score`.
257 /// This influences the ``score`` method of all the multioutput
258 /// regressors (except for
259 /// :class:`~sklearn.multioutput.MultiOutputRegressor`).
260 fn score(&self, x: PyReadonlyArray2<f64>, y: PyReadonlyArray1<f64>) -> PyResult<f64> {
261 let fitted = self
262 .fitted_model
263 .as_ref()
264 .ok_or_else(|| PyValueError::new_err("Model not fitted. Call fit() first."))?;
265
266 let x_array = pyarray_to_core_array2(x)?;
267 let y_array = pyarray_to_core_array1(y)?;
268
269 match fitted.score(&x_array, &y_array) {
270 Ok(score) => Ok(score),
271 Err(e) => Err(PyValueError::new_err(format!(
272 "Score calculation failed: {:?}",
273 e
274 ))),
275 }
276 }
277
278 /// Get number of features
279 #[getter]
280 fn n_features_in_(&self) -> PyResult<usize> {
281 let fitted = self
282 .fitted_model
283 .as_ref()
284 .ok_or_else(|| PyValueError::new_err("Model not fitted. Call fit() first."))?;
285
286 // Infer number of features from coefficient array length
287 Ok(fitted.coef().len())
288 }
289
290 /// Return parameters for this estimator (sklearn compatibility)
291 fn get_params(&self, py: Python<'_>, deep: Option<bool>) -> PyResult<Py<PyDict>> {
292 let _deep = deep.unwrap_or(true);
293
294 let dict = PyDict::new(py);
295
296 dict.set_item("fit_intercept", self.py_config.fit_intercept)?;
297 dict.set_item("copy_X", self.py_config.copy_x)?;
298 dict.set_item("n_jobs", self.py_config.n_jobs)?;
299 dict.set_item("positive", self.py_config.positive)?;
300
301 Ok(dict.into())
302 }
303
304 /// Set parameters for this estimator (sklearn compatibility)
305 fn set_params(&mut self, kwargs: &Bound<'_, PyDict>) -> PyResult<()> {
306 // Update configuration parameters
307 if let Some(fit_intercept) = kwargs.get_item("fit_intercept")? {
308 self.py_config.fit_intercept = fit_intercept.extract()?;
309 }
310 if let Some(copy_x) = kwargs.get_item("copy_X")? {
311 self.py_config.copy_x = copy_x.extract()?;
312 }
313 if let Some(n_jobs) = kwargs.get_item("n_jobs")? {
314 self.py_config.n_jobs = n_jobs.extract()?;
315 }
316 if let Some(positive) = kwargs.get_item("positive")? {
317 self.py_config.positive = positive.extract()?;
318 }
319
320 // Clear fitted model since config changed
321 self.fitted_model = None;
322
323 Ok(())
324 }
325
326 /// String representation
327 fn __repr__(&self) -> String {
328 format!(
329 "LinearRegression(fit_intercept={}, copy_X={}, n_jobs={:?}, positive={})",
330 self.py_config.fit_intercept,
331 self.py_config.copy_x,
332 self.py_config.n_jobs,
333 self.py_config.positive
334 )
335 }
336}