sklears_python/preprocessing/
minmax_scaler.rs

1//! Python bindings for MinMaxScaler
2//!
3//! This module provides Python bindings for MinMaxScaler,
4//! offering scikit-learn compatible min-max normalization.
5
6use super::common::*;
7use scirs2_core::ndarray::Array1;
8
9/// MinMaxScaler state after fitting
10#[derive(Debug, Clone)]
11struct MinMaxScalerState {
12    data_min: Array1<f64>,
13    data_max: Array1<f64>,
14    data_range: Array1<f64>,
15    scale: Array1<f64>,
16    min_: Array1<f64>,
17    n_features: usize,
18    n_samples_seen: usize,
19    feature_range: (f64, f64),
20}
21
22/// Transform features by scaling each feature to a given range.
23///
24/// This estimator scales and translates each feature individually such
25/// that it is in the given range on the training set, e.g. between
26/// zero and one.
27///
28/// The transformation is given by:
29///
30///     X_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))
31///     X_scaled = X_std * (max - min) + min
32///
33/// where min, max = feature_range.
34///
35/// This transformation is often used as an alternative to zero mean,
36/// unit variance scaling.
37///
38/// Parameters
39/// ----------
40/// feature_range : tuple (min, max), default=(0, 1)
41///     Desired range of transformed data.
42///
43/// copy : bool, default=True
44///     Set to False to perform inplace row normalization and avoid a
45///     copy (if the input is already a numpy array).
46///
47/// clip : bool, default=False
48///     Set to True to clip transformed values of held-out data to
49///     provided `feature range`.
50///
51/// Attributes
52/// ----------
53/// min_ : ndarray of shape (n_features,)
54///     Per feature adjustment for minimum. Equivalent to
55///     ``min - X.min(axis=0) * self.scale_``
56///
57/// scale_ : ndarray of shape (n_features,)
58///     Per feature relative scaling of the data. Equivalent to
59///     ``(max - min) / (X.max(axis=0) - X.min(axis=0))``
60///
61/// data_min_ : ndarray of shape (n_features,)
62///     Per feature minimum seen in the data
63///
64/// data_max_ : ndarray of shape (n_features,)
65///     Per feature maximum seen in the data
66///
67/// data_range_ : ndarray of shape (n_features,)
68///     Per feature range ``(data_max_ - data_min_)`` seen in the data
69///
70/// n_features_in_ : int
71///     Number of features seen during :term:`fit`.
72///
73/// n_samples_seen_ : int
74///     The number of samples processed by the estimator.
75///     It will be reset on new calls to fit, but increments across
76///     ``partial_fit`` calls.
77///
78/// Examples
79/// --------
80/// >>> from sklears_python import MinMaxScaler
81/// >>> import numpy as np
82/// >>> data = [[-1, 2], [-0.5, 6], [0, 10], [1, 18]]
83/// >>> scaler = MinMaxScaler()
84/// >>> scaler.fit(data)
85/// MinMaxScaler()
86/// >>> print(scaler.data_max_)
87/// [ 1. 18.]
88/// >>> print(scaler.transform(data))
89/// [[0.   0.  ]
90///  [0.25 0.25]
91///  [0.5  0.5 ]
92///  [1.   1.  ]]
93/// >>> print(scaler.transform([[2, 2]]))
94/// [[1.5 0. ]]
95#[pyclass(name = "MinMaxScaler")]
96pub struct PyMinMaxScaler {
97    feature_range: (f64, f64),
98    copy: bool,
99    clip: bool,
100    state: Option<MinMaxScalerState>,
101}
102
103#[pymethods]
104impl PyMinMaxScaler {
105    #[new]
106    #[pyo3(signature = (feature_range=(0.0, 1.0), copy=true, clip=false))]
107    fn new(feature_range: (f64, f64), copy: bool, clip: bool) -> Self {
108        Self {
109            feature_range,
110            copy,
111            clip,
112            state: None,
113        }
114    }
115
116    /// Compute the minimum and maximum to be used for later scaling.
117    ///
118    /// Parameters
119    /// ----------
120    /// X : {array-like, sparse matrix} of shape (n_samples, n_features)
121    ///     The data used to compute the per-feature minimum and maximum
122    ///     used for later scaling along the features axis.
123    ///
124    /// y : None
125    ///     Ignored.
126    ///
127    /// Returns
128    /// -------
129    /// self : object
130    ///     Fitted scaler.
131    fn fit(&mut self, x: PyReadonlyArray2<f64>) -> PyResult<()> {
132        let x_array = pyarray_to_core_array2(&x)?;
133        validate_fit_array(&x_array)?;
134
135        let n_samples = x_array.nrows();
136        let n_features = x_array.ncols();
137
138        // Compute min and max for each feature
139        let mut data_min = Array1::zeros(n_features);
140        let mut data_max = Array1::zeros(n_features);
141
142        for j in 0..n_features {
143            let col = x_array.column(j);
144            data_min[j] = col.iter().cloned().fold(f64::INFINITY, |a, b| a.min(b));
145            data_max[j] = col.iter().cloned().fold(f64::NEG_INFINITY, |a, b| a.max(b));
146        }
147
148        // Compute data range
149        let data_range = &data_max - &data_min;
150
151        // Compute scale and min_
152        let (feature_min, feature_max) = self.feature_range;
153        let feature_range = feature_max - feature_min;
154
155        let mut scale = Array1::zeros(n_features);
156        let mut min_ = Array1::zeros(n_features);
157
158        for j in 0..n_features {
159            if data_range[j].abs() < 1e-10 {
160                // Handle constant features
161                scale[j] = 1.0;
162                min_[j] = feature_min - data_min[j];
163            } else {
164                scale[j] = feature_range / data_range[j];
165                min_[j] = feature_min - data_min[j] * scale[j];
166            }
167        }
168
169        self.state = Some(MinMaxScalerState {
170            data_min,
171            data_max,
172            data_range,
173            scale,
174            min_,
175            n_features,
176            n_samples_seen: n_samples,
177            feature_range: self.feature_range,
178        });
179
180        Ok(())
181    }
182
183    /// Scale features of X according to feature_range.
184    ///
185    /// Parameters
186    /// ----------
187    /// X : {array-like, sparse matrix} of shape (n_samples, n_features)
188    ///     Input data that will be transformed.
189    ///
190    /// Returns
191    /// -------
192    /// Xt : ndarray of shape (n_samples, n_features)
193    ///     Transformed data.
194    fn transform(&self, py: Python<'_>, x: PyReadonlyArray2<f64>) -> PyResult<Py<PyArray2<f64>>> {
195        let state = self
196            .state
197            .as_ref()
198            .ok_or_else(|| PyValueError::new_err("Scaler not fitted. Call fit() first."))?;
199
200        let x_array = pyarray_to_core_array2(&x)?;
201        validate_transform_array(&x_array, state.n_features)?;
202
203        let mut transformed = x_array.clone();
204
205        // Apply scaling: X_scaled = X * scale + min_
206        for j in 0..state.n_features {
207            for i in 0..transformed.nrows() {
208                transformed[[i, j]] = transformed[[i, j]] * state.scale[j] + state.min_[j];
209
210                // Clip values if requested
211                if self.clip {
212                    let (min_val, max_val) = state.feature_range;
213                    transformed[[i, j]] = transformed[[i, j]].clamp(min_val, max_val);
214                }
215            }
216        }
217
218        core_array2_to_py(py, &transformed)
219    }
220
221    /// Fit to data, then transform it.
222    ///
223    /// Fits transformer to `X` and returns a transformed version of `X`.
224    ///
225    /// Parameters
226    /// ----------
227    /// X : {array-like, sparse matrix} of shape (n_samples, n_features)
228    ///     Input samples.
229    ///
230    /// y :  array-like of shape (n_samples,) or (n_samples, n_outputs), default=None
231    ///     Target values (None for unsupervised transformations).
232    ///
233    /// Returns
234    /// -------
235    /// X_new : ndarray array of shape (n_samples, n_features_new)
236    ///     Transformed array.
237    fn fit_transform(
238        &mut self,
239        py: Python<'_>,
240        x: PyReadonlyArray2<f64>,
241    ) -> PyResult<Py<PyArray2<f64>>> {
242        let x_array = pyarray_to_core_array2(&x)?;
243        self.fit(x)?;
244
245        // Transform using the saved x_array
246        let state = self
247            .state
248            .as_ref()
249            .ok_or_else(|| PyValueError::new_err("Scaler not fitted. Call fit() first."))?;
250
251        let mut transformed = x_array.clone();
252
253        // Apply scaling: X_scaled = X * scale + min_
254        for j in 0..state.n_features {
255            for i in 0..transformed.nrows() {
256                transformed[[i, j]] = transformed[[i, j]] * state.scale[j] + state.min_[j];
257
258                // Clip values if requested
259                if self.clip {
260                    let (min_val, max_val) = state.feature_range;
261                    transformed[[i, j]] = transformed[[i, j]].clamp(min_val, max_val);
262                }
263            }
264        }
265
266        core_array2_to_py(py, &transformed)
267    }
268
269    /// Undo the scaling of X according to feature_range.
270    ///
271    /// Parameters
272    /// ----------
273    /// X : {array-like, sparse matrix} of shape (n_samples, n_features)
274    ///     Input data that will be transformed. It cannot be sparse.
275    ///
276    /// Returns
277    /// -------
278    /// Xt : ndarray of shape (n_samples, n_features)
279    ///     Transformed data.
280    fn inverse_transform(
281        &self,
282        py: Python<'_>,
283        x: PyReadonlyArray2<f64>,
284    ) -> PyResult<Py<PyArray2<f64>>> {
285        let state = self
286            .state
287            .as_ref()
288            .ok_or_else(|| PyValueError::new_err("Scaler not fitted. Call fit() first."))?;
289
290        let x_array = pyarray_to_core_array2(&x)?;
291        validate_transform_array(&x_array, state.n_features)?;
292
293        let mut inverse = x_array.clone();
294
295        // Reverse scaling: X = (X_scaled - min_) / scale
296        for j in 0..state.n_features {
297            for i in 0..inverse.nrows() {
298                inverse[[i, j]] = (inverse[[i, j]] - state.min_[j]) / state.scale[j];
299            }
300        }
301
302        core_array2_to_py(py, &inverse)
303    }
304
305    /// Per feature minimum seen in the data
306    #[getter]
307    fn data_min_(&self, py: Python<'_>) -> PyResult<Py<PyArray1<f64>>> {
308        let state = self
309            .state
310            .as_ref()
311            .ok_or_else(|| PyValueError::new_err("Scaler not fitted. Call fit() first."))?;
312
313        Ok(core_array1_to_py(py, &state.data_min))
314    }
315
316    /// Per feature maximum seen in the data
317    #[getter]
318    fn data_max_(&self, py: Python<'_>) -> PyResult<Py<PyArray1<f64>>> {
319        let state = self
320            .state
321            .as_ref()
322            .ok_or_else(|| PyValueError::new_err("Scaler not fitted. Call fit() first."))?;
323
324        Ok(core_array1_to_py(py, &state.data_max))
325    }
326
327    /// Per feature range (data_max_ - data_min_) seen in the data
328    #[getter]
329    fn data_range_(&self, py: Python<'_>) -> PyResult<Py<PyArray1<f64>>> {
330        let state = self
331            .state
332            .as_ref()
333            .ok_or_else(|| PyValueError::new_err("Scaler not fitted. Call fit() first."))?;
334
335        Ok(core_array1_to_py(py, &state.data_range))
336    }
337
338    /// Per feature relative scaling of the data
339    #[getter]
340    fn scale_(&self, py: Python<'_>) -> PyResult<Py<PyArray1<f64>>> {
341        let state = self
342            .state
343            .as_ref()
344            .ok_or_else(|| PyValueError::new_err("Scaler not fitted. Call fit() first."))?;
345
346        Ok(core_array1_to_py(py, &state.scale))
347    }
348
349    /// Per feature adjustment for minimum
350    #[getter]
351    fn min_(&self, py: Python<'_>) -> PyResult<Py<PyArray1<f64>>> {
352        let state = self
353            .state
354            .as_ref()
355            .ok_or_else(|| PyValueError::new_err("Scaler not fitted. Call fit() first."))?;
356
357        Ok(core_array1_to_py(py, &state.min_))
358    }
359
360    /// Number of features seen during fit.
361    #[getter]
362    fn n_features_in_(&self) -> PyResult<usize> {
363        let state = self
364            .state
365            .as_ref()
366            .ok_or_else(|| PyValueError::new_err("Scaler not fitted. Call fit() first."))?;
367
368        Ok(state.n_features)
369    }
370
371    /// The number of samples processed by the estimator.
372    #[getter]
373    fn n_samples_seen_(&self) -> PyResult<usize> {
374        let state = self
375            .state
376            .as_ref()
377            .ok_or_else(|| PyValueError::new_err("Scaler not fitted. Call fit() first."))?;
378
379        Ok(state.n_samples_seen)
380    }
381
382    /// String representation
383    fn __repr__(&self) -> String {
384        format!(
385            "MinMaxScaler(feature_range=({}, {}), copy={}, clip={})",
386            self.feature_range.0, self.feature_range.1, self.copy, self.clip
387        )
388    }
389}