sklears_python/linear/linear_regression.rs
1//! Python bindings for Linear Regression
2//!
3//! This module provides Python bindings for Linear Regression,
4//! offering scikit-learn compatible interfaces with high-performance OLS implementation
5//! using the sklears-linear crate.
6
7use super::common::*;
8use numpy::IntoPyArray;
9use pyo3::types::PyDict;
10use pyo3::Bound;
11use sklears_core::traits::{Fit, Predict, Score, Trained};
12use sklears_linear::{LinearRegression, LinearRegressionConfig};
13
14/// Python-specific configuration wrapper
15#[derive(Debug, Clone)]
16pub struct PyLinearRegressionConfig {
17 pub fit_intercept: bool,
18 pub copy_x: bool,
19 pub n_jobs: Option<i32>,
20 pub positive: bool,
21}
22
23impl Default for PyLinearRegressionConfig {
24 fn default() -> Self {
25 Self {
26 fit_intercept: true,
27 copy_x: true,
28 n_jobs: None,
29 positive: false,
30 }
31 }
32}
33
34impl From<PyLinearRegressionConfig> for LinearRegressionConfig {
35 fn from(py_config: PyLinearRegressionConfig) -> Self {
36 let mut config = LinearRegressionConfig::default();
37 config.fit_intercept = py_config.fit_intercept;
38 // Note: copy_x, n_jobs, and positive are Python-specific and handled at the Python level
39 config
40 }
41}
42
43/// Ordinary least squares Linear Regression.
44///
45/// LinearRegression fits a linear model with coefficients w = (w1, ..., wp)
46/// to minimize the residual sum of squares between the observed targets in
47/// the dataset, and the targets predicted by the linear approximation.
48///
49/// Parameters
50/// ----------
51/// fit_intercept : bool, default=True
52/// Whether to calculate the intercept for this model. If set
53/// to False, no intercept will be used in calculations
54/// (i.e. data is expected to be centered).
55///
56/// copy_X : bool, default=True
57/// If True, X will be copied; else, it may be overwritten.
58///
59/// n_jobs : int, default=None
60/// The number of jobs to use for the computation. This will only provide
61/// speedup in case of sufficiently large problems, that is if firstly
62/// `n_targets > 1` and secondly `X` is sparse or if `positive` is set
63/// to `True`. ``None`` means 1 unless in a
64/// :obj:`joblib.parallel_backend` context. ``-1`` means using all
65/// processors.
66///
67/// positive : bool, default=False
68/// When set to ``True``, forces the coefficients to be positive. This
69/// option is only supported for dense arrays.
70///
71/// Attributes
72/// ----------
73/// coef_ : array of shape (n_features,) or (n_targets, n_features)
74/// Estimated coefficients for the linear regression problem.
75/// If multiple targets are passed during the fit (y 2D), this
76/// is a 2D array of shape (n_targets, n_features), while if only
77/// one target is passed, this is a 1D array of length n_features.
78///
79/// intercept_ : float or array of shape (n_targets,)
80/// Independent term in the linear model. Set to 0.0 if
81/// `fit_intercept = False`.
82///
83/// n_features_in_ : int
84/// Number of features seen during :term:`fit`.
85///
86/// Examples
87/// --------
88/// >>> import numpy as np
89/// >>> from sklears_python import LinearRegression
90/// >>> X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])
91/// >>> # y = 1 * x_0 + 2 * x_1 + 3
92/// >>> y = np.dot(X, [1, 2]) + 3
93/// >>> reg = LinearRegression().fit(X, y)
94/// >>> reg.score(X, y)
95/// 1.0
96/// >>> reg.coef_
97/// array([1., 2.])
98/// >>> reg.intercept_
99/// 3.0...
100/// >>> reg.predict(np.array([[3, 5]]))
101/// array([16.])
102///
103/// Notes
104/// -----
105/// From the implementation point of view, this is just plain Ordinary
106/// Least Squares (scipy.linalg.lstsq) or Non Negative Least Squares
107/// (scipy.optimize.nnls) wrapped as a predictor object.
108#[pyclass(name = "LinearRegression")]
109pub struct PyLinearRegression {
110 py_config: PyLinearRegressionConfig,
111 fitted_model: Option<LinearRegression<Trained>>,
112}
113
114#[pymethods]
115impl PyLinearRegression {
116 #[new]
117 #[pyo3(signature = (fit_intercept=true, copy_x=true, n_jobs=None, positive=false))]
118 fn new(fit_intercept: bool, copy_x: bool, n_jobs: Option<i32>, positive: bool) -> Self {
119 let py_config = PyLinearRegressionConfig {
120 fit_intercept,
121 copy_x,
122 n_jobs,
123 positive,
124 };
125
126 Self {
127 py_config,
128 fitted_model: None,
129 }
130 }
131
132 /// Fit linear model.
133 ///
134 /// Parameters
135 /// ----------
136 /// X : {array-like, sparse matrix} of shape (n_samples, n_features)
137 /// Training data.
138 ///
139 /// y : array-like of shape (n_samples,) or (n_samples, n_targets)
140 /// Target values. Will be cast to X's dtype if necessary.
141 ///
142 /// sample_weight : array-like of shape (n_samples,), default=None
143 /// Individual weights for each sample
144 ///
145 /// Returns
146 /// -------
147 /// self : object
148 /// Fitted Estimator.
149 fn fit(&mut self, x: PyReadonlyArray2<f64>, y: PyReadonlyArray1<f64>) -> PyResult<()> {
150 let x_array = x.as_array().to_owned();
151 let y_array = y.as_array().to_owned();
152
153 // Validate input arrays
154 validate_fit_arrays(&x_array, &y_array)?;
155
156 // Create sklears-linear model with configuration
157 let config = LinearRegressionConfig::from(self.py_config.clone());
158 let model = LinearRegression::new().fit_intercept(config.fit_intercept);
159
160 // Fit the model using sklears-linear's implementation
161 match model.fit(&x_array, &y_array) {
162 Ok(fitted_model) => {
163 self.fitted_model = Some(fitted_model);
164 Ok(())
165 }
166 Err(e) => Err(PyValueError::new_err(format!(
167 "Failed to fit model: {:?}",
168 e
169 ))),
170 }
171 }
172
173 /// Predict using the linear model.
174 ///
175 /// Parameters
176 /// ----------
177 /// X : array-like or sparse matrix, shape (n_samples, n_features)
178 /// Samples.
179 ///
180 /// Returns
181 /// -------
182 /// C : array, shape (n_samples,)
183 /// Returns predicted values.
184 fn predict(&self, x: PyReadonlyArray2<f64>) -> PyResult<Py<PyArray1<f64>>> {
185 let fitted = self
186 .fitted_model
187 .as_ref()
188 .ok_or_else(|| PyValueError::new_err("Model not fitted. Call fit() first."))?;
189
190 let x_array = x.as_array().to_owned();
191 validate_predict_array(&x_array)?;
192
193 match fitted.predict(&x_array) {
194 Ok(predictions) => {
195 let py = unsafe { Python::assume_attached() };
196 Ok(predictions.into_pyarray(py).into())
197 }
198 Err(e) => Err(PyValueError::new_err(format!("Prediction failed: {:?}", e))),
199 }
200 }
201
202 /// Get model coefficients
203 #[getter]
204 fn coef_(&self) -> PyResult<Py<PyArray1<f64>>> {
205 let fitted = self
206 .fitted_model
207 .as_ref()
208 .ok_or_else(|| PyValueError::new_err("Model not fitted. Call fit() first."))?;
209
210 let py = unsafe { Python::assume_attached() };
211 Ok(fitted.coef().clone().into_pyarray(py).into())
212 }
213
214 /// Get model intercept
215 #[getter]
216 fn intercept_(&self) -> PyResult<f64> {
217 let fitted = self
218 .fitted_model
219 .as_ref()
220 .ok_or_else(|| PyValueError::new_err("Model not fitted. Call fit() first."))?;
221
222 Ok(fitted.intercept().unwrap_or(0.0))
223 }
224
225 /// Return the coefficient of determination of the prediction.
226 ///
227 /// The coefficient of determination :math:`R^2` is defined as
228 /// :math:`(1 - \\frac{SS_{res}}{SS_{tot}})` where :math:`SS_{res} = \\sum_i (y_i - y(x_i))^2`
229 /// is the residual sum of squares, and :math:`SS_{tot} = \\sum_i (y_i - \\bar{y})^2`
230 /// is the total sum of squares.
231 ///
232 /// The best possible score is 1.0 and it can be negative (because the
233 /// model can be arbitrarily worse). A constant model that always predicts
234 /// the expected value of `y`, disregarding the input features, would get
235 /// a :math:`R^2` score of 0.0.
236 ///
237 /// Parameters
238 /// ----------
239 /// X : array-like of shape (n_samples, n_features)
240 /// Test samples. For some estimators this may be a precomputed
241 /// kernel matrix or a list of generic objects instead with shape
242 /// ``(n_samples, n_samples_fitted)``, where ``n_samples_fitted``
243 /// is the number of samples used in the fitting for the estimator.
244 ///
245 /// y : array-like of shape (n_samples,) or (n_samples, n_outputs)
246 /// True values for `X`.
247 ///
248 /// sample_weight : array-like of shape (n_samples,), default=None
249 /// Sample weights.
250 ///
251 /// Returns
252 /// -------
253 /// score : float
254 /// :math:`R^2` of ``self.predict(X)`` w.r.t. `y`.
255 ///
256 /// Notes
257 /// -----
258 /// The :math:`R^2` score used when calling ``score`` on a regressor uses
259 /// ``multioutput='uniform_average'`` from version 0.23 to keep consistent
260 /// with default value of :func:`~sklearn.metrics.r2_score`.
261 /// This influences the ``score`` method of all the multioutput
262 /// regressors (except for
263 /// :class:`~sklearn.multioutput.MultiOutputRegressor`).
264 fn score(&self, x: PyReadonlyArray2<f64>, y: PyReadonlyArray1<f64>) -> PyResult<f64> {
265 let fitted = self
266 .fitted_model
267 .as_ref()
268 .ok_or_else(|| PyValueError::new_err("Model not fitted. Call fit() first."))?;
269
270 let x_array = x.as_array().to_owned();
271 let y_array = y.as_array().to_owned();
272
273 match fitted.score(&x_array, &y_array) {
274 Ok(score) => Ok(score),
275 Err(e) => Err(PyValueError::new_err(format!(
276 "Score calculation failed: {:?}",
277 e
278 ))),
279 }
280 }
281
282 /// Get number of features
283 #[getter]
284 fn n_features_in_(&self) -> PyResult<usize> {
285 let fitted = self
286 .fitted_model
287 .as_ref()
288 .ok_or_else(|| PyValueError::new_err("Model not fitted. Call fit() first."))?;
289
290 // Infer number of features from coefficient array length
291 Ok(fitted.coef().len())
292 }
293
294 /// Return parameters for this estimator (sklearn compatibility)
295 fn get_params(&self, deep: Option<bool>) -> PyResult<Py<PyDict>> {
296 let _deep = deep.unwrap_or(true);
297
298 let py = unsafe { Python::assume_attached() };
299 let dict = PyDict::new(py);
300
301 dict.set_item("fit_intercept", self.py_config.fit_intercept)?;
302 dict.set_item("copy_X", self.py_config.copy_x)?;
303 dict.set_item("n_jobs", self.py_config.n_jobs)?;
304 dict.set_item("positive", self.py_config.positive)?;
305
306 Ok(dict.into())
307 }
308
309 /// Set parameters for this estimator (sklearn compatibility)
310 fn set_params(&mut self, kwargs: &Bound<'_, PyDict>) -> PyResult<()> {
311 // Update configuration parameters
312 if let Some(fit_intercept) = kwargs.get_item("fit_intercept")? {
313 self.py_config.fit_intercept = fit_intercept.extract()?;
314 }
315 if let Some(copy_x) = kwargs.get_item("copy_X")? {
316 self.py_config.copy_x = copy_x.extract()?;
317 }
318 if let Some(n_jobs) = kwargs.get_item("n_jobs")? {
319 self.py_config.n_jobs = n_jobs.extract()?;
320 }
321 if let Some(positive) = kwargs.get_item("positive")? {
322 self.py_config.positive = positive.extract()?;
323 }
324
325 // Clear fitted model since config changed
326 self.fitted_model = None;
327
328 Ok(())
329 }
330
331 /// String representation
332 fn __repr__(&self) -> String {
333 format!(
334 "LinearRegression(fit_intercept={}, copy_X={}, n_jobs={:?}, positive={})",
335 self.py_config.fit_intercept,
336 self.py_config.copy_x,
337 self.py_config.n_jobs,
338 self.py_config.positive
339 )
340 }
341}