sklears_python/preprocessing/minmax_scaler.rs
1//! Python bindings for MinMaxScaler
2//!
3//! This module provides Python bindings for MinMaxScaler,
4//! offering scikit-learn compatible min-max normalization.
5
6use super::common::*;
7use scirs2_core::ndarray::Array1;
8
9/// MinMaxScaler state after fitting
10#[derive(Debug, Clone)]
11struct MinMaxScalerState {
12 data_min: Array1<f64>,
13 data_max: Array1<f64>,
14 data_range: Array1<f64>,
15 scale: Array1<f64>,
16 min_: Array1<f64>,
17 n_features: usize,
18 n_samples_seen: usize,
19 feature_range: (f64, f64),
20}
21
22/// Transform features by scaling each feature to a given range.
23///
24/// This estimator scales and translates each feature individually such
25/// that it is in the given range on the training set, e.g. between
26/// zero and one.
27///
28/// The transformation is given by:
29///
30/// X_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))
31/// X_scaled = X_std * (max - min) + min
32///
33/// where min, max = feature_range.
34///
35/// This transformation is often used as an alternative to zero mean,
36/// unit variance scaling.
37///
38/// Parameters
39/// ----------
40/// feature_range : tuple (min, max), default=(0, 1)
41/// Desired range of transformed data.
42///
43/// copy : bool, default=True
44/// Set to False to perform inplace row normalization and avoid a
45/// copy (if the input is already a numpy array).
46///
47/// clip : bool, default=False
48/// Set to True to clip transformed values of held-out data to
49/// provided `feature range`.
50///
51/// Attributes
52/// ----------
53/// min_ : ndarray of shape (n_features,)
54/// Per feature adjustment for minimum. Equivalent to
55/// ``min - X.min(axis=0) * self.scale_``
56///
57/// scale_ : ndarray of shape (n_features,)
58/// Per feature relative scaling of the data. Equivalent to
59/// ``(max - min) / (X.max(axis=0) - X.min(axis=0))``
60///
61/// data_min_ : ndarray of shape (n_features,)
62/// Per feature minimum seen in the data
63///
64/// data_max_ : ndarray of shape (n_features,)
65/// Per feature maximum seen in the data
66///
67/// data_range_ : ndarray of shape (n_features,)
68/// Per feature range ``(data_max_ - data_min_)`` seen in the data
69///
70/// n_features_in_ : int
71/// Number of features seen during :term:`fit`.
72///
73/// n_samples_seen_ : int
74/// The number of samples processed by the estimator.
75/// It will be reset on new calls to fit, but increments across
76/// ``partial_fit`` calls.
77///
78/// Examples
79/// --------
80/// >>> from sklears_python import MinMaxScaler
81/// >>> import numpy as np
82/// >>> data = [[-1, 2], [-0.5, 6], [0, 10], [1, 18]]
83/// >>> scaler = MinMaxScaler()
84/// >>> scaler.fit(data)
85/// MinMaxScaler()
86/// >>> print(scaler.data_max_)
87/// [ 1. 18.]
88/// >>> print(scaler.transform(data))
89/// [[0. 0. ]
90/// [0.25 0.25]
91/// [0.5 0.5 ]
92/// [1. 1. ]]
93/// >>> print(scaler.transform([[2, 2]]))
94/// [[1.5 0. ]]
95#[pyclass(name = "MinMaxScaler")]
96pub struct PyMinMaxScaler {
97 feature_range: (f64, f64),
98 copy: bool,
99 clip: bool,
100 state: Option<MinMaxScalerState>,
101}
102
103#[pymethods]
104impl PyMinMaxScaler {
105 #[new]
106 #[pyo3(signature = (feature_range=(0.0, 1.0), copy=true, clip=false))]
107 fn new(feature_range: (f64, f64), copy: bool, clip: bool) -> Self {
108 Self {
109 feature_range,
110 copy,
111 clip,
112 state: None,
113 }
114 }
115
116 /// Compute the minimum and maximum to be used for later scaling.
117 ///
118 /// Parameters
119 /// ----------
120 /// X : {array-like, sparse matrix} of shape (n_samples, n_features)
121 /// The data used to compute the per-feature minimum and maximum
122 /// used for later scaling along the features axis.
123 ///
124 /// y : None
125 /// Ignored.
126 ///
127 /// Returns
128 /// -------
129 /// self : object
130 /// Fitted scaler.
131 fn fit(&mut self, x: PyReadonlyArray2<f64>) -> PyResult<()> {
132 let x_array = pyarray_to_core_array2(&x)?;
133 validate_fit_array(&x_array)?;
134
135 let n_samples = x_array.nrows();
136 let n_features = x_array.ncols();
137
138 // Compute min and max for each feature
139 let mut data_min = Array1::zeros(n_features);
140 let mut data_max = Array1::zeros(n_features);
141
142 for j in 0..n_features {
143 let col = x_array.column(j);
144 data_min[j] = col.iter().cloned().fold(f64::INFINITY, |a, b| a.min(b));
145 data_max[j] = col.iter().cloned().fold(f64::NEG_INFINITY, |a, b| a.max(b));
146 }
147
148 // Compute data range
149 let data_range = &data_max - &data_min;
150
151 // Compute scale and min_
152 let (feature_min, feature_max) = self.feature_range;
153 let feature_range = feature_max - feature_min;
154
155 let mut scale = Array1::zeros(n_features);
156 let mut min_ = Array1::zeros(n_features);
157
158 for j in 0..n_features {
159 if data_range[j].abs() < 1e-10 {
160 // Handle constant features
161 scale[j] = 1.0;
162 min_[j] = feature_min - data_min[j];
163 } else {
164 scale[j] = feature_range / data_range[j];
165 min_[j] = feature_min - data_min[j] * scale[j];
166 }
167 }
168
169 self.state = Some(MinMaxScalerState {
170 data_min,
171 data_max,
172 data_range,
173 scale,
174 min_,
175 n_features,
176 n_samples_seen: n_samples,
177 feature_range: self.feature_range,
178 });
179
180 Ok(())
181 }
182
183 /// Scale features of X according to feature_range.
184 ///
185 /// Parameters
186 /// ----------
187 /// X : {array-like, sparse matrix} of shape (n_samples, n_features)
188 /// Input data that will be transformed.
189 ///
190 /// Returns
191 /// -------
192 /// Xt : ndarray of shape (n_samples, n_features)
193 /// Transformed data.
194 fn transform(&self, py: Python<'_>, x: PyReadonlyArray2<f64>) -> PyResult<Py<PyArray2<f64>>> {
195 let state = self
196 .state
197 .as_ref()
198 .ok_or_else(|| PyValueError::new_err("Scaler not fitted. Call fit() first."))?;
199
200 let x_array = pyarray_to_core_array2(&x)?;
201 validate_transform_array(&x_array, state.n_features)?;
202
203 let mut transformed = x_array.clone();
204
205 // Apply scaling: X_scaled = X * scale + min_
206 for j in 0..state.n_features {
207 for i in 0..transformed.nrows() {
208 transformed[[i, j]] = transformed[[i, j]] * state.scale[j] + state.min_[j];
209
210 // Clip values if requested
211 if self.clip {
212 let (min_val, max_val) = state.feature_range;
213 transformed[[i, j]] = transformed[[i, j]].clamp(min_val, max_val);
214 }
215 }
216 }
217
218 core_array2_to_py(py, &transformed)
219 }
220
221 /// Fit to data, then transform it.
222 ///
223 /// Fits transformer to `X` and returns a transformed version of `X`.
224 ///
225 /// Parameters
226 /// ----------
227 /// X : {array-like, sparse matrix} of shape (n_samples, n_features)
228 /// Input samples.
229 ///
230 /// y : array-like of shape (n_samples,) or (n_samples, n_outputs), default=None
231 /// Target values (None for unsupervised transformations).
232 ///
233 /// Returns
234 /// -------
235 /// X_new : ndarray array of shape (n_samples, n_features_new)
236 /// Transformed array.
237 fn fit_transform(
238 &mut self,
239 py: Python<'_>,
240 x: PyReadonlyArray2<f64>,
241 ) -> PyResult<Py<PyArray2<f64>>> {
242 let x_array = pyarray_to_core_array2(&x)?;
243 self.fit(x)?;
244
245 // Transform using the saved x_array
246 let state = self
247 .state
248 .as_ref()
249 .ok_or_else(|| PyValueError::new_err("Scaler not fitted. Call fit() first."))?;
250
251 let mut transformed = x_array.clone();
252
253 // Apply scaling: X_scaled = X * scale + min_
254 for j in 0..state.n_features {
255 for i in 0..transformed.nrows() {
256 transformed[[i, j]] = transformed[[i, j]] * state.scale[j] + state.min_[j];
257
258 // Clip values if requested
259 if self.clip {
260 let (min_val, max_val) = state.feature_range;
261 transformed[[i, j]] = transformed[[i, j]].clamp(min_val, max_val);
262 }
263 }
264 }
265
266 core_array2_to_py(py, &transformed)
267 }
268
269 /// Undo the scaling of X according to feature_range.
270 ///
271 /// Parameters
272 /// ----------
273 /// X : {array-like, sparse matrix} of shape (n_samples, n_features)
274 /// Input data that will be transformed. It cannot be sparse.
275 ///
276 /// Returns
277 /// -------
278 /// Xt : ndarray of shape (n_samples, n_features)
279 /// Transformed data.
280 fn inverse_transform(
281 &self,
282 py: Python<'_>,
283 x: PyReadonlyArray2<f64>,
284 ) -> PyResult<Py<PyArray2<f64>>> {
285 let state = self
286 .state
287 .as_ref()
288 .ok_or_else(|| PyValueError::new_err("Scaler not fitted. Call fit() first."))?;
289
290 let x_array = pyarray_to_core_array2(&x)?;
291 validate_transform_array(&x_array, state.n_features)?;
292
293 let mut inverse = x_array.clone();
294
295 // Reverse scaling: X = (X_scaled - min_) / scale
296 for j in 0..state.n_features {
297 for i in 0..inverse.nrows() {
298 inverse[[i, j]] = (inverse[[i, j]] - state.min_[j]) / state.scale[j];
299 }
300 }
301
302 core_array2_to_py(py, &inverse)
303 }
304
305 /// Per feature minimum seen in the data
306 #[getter]
307 fn data_min_(&self, py: Python<'_>) -> PyResult<Py<PyArray1<f64>>> {
308 let state = self
309 .state
310 .as_ref()
311 .ok_or_else(|| PyValueError::new_err("Scaler not fitted. Call fit() first."))?;
312
313 Ok(core_array1_to_py(py, &state.data_min))
314 }
315
316 /// Per feature maximum seen in the data
317 #[getter]
318 fn data_max_(&self, py: Python<'_>) -> PyResult<Py<PyArray1<f64>>> {
319 let state = self
320 .state
321 .as_ref()
322 .ok_or_else(|| PyValueError::new_err("Scaler not fitted. Call fit() first."))?;
323
324 Ok(core_array1_to_py(py, &state.data_max))
325 }
326
327 /// Per feature range (data_max_ - data_min_) seen in the data
328 #[getter]
329 fn data_range_(&self, py: Python<'_>) -> PyResult<Py<PyArray1<f64>>> {
330 let state = self
331 .state
332 .as_ref()
333 .ok_or_else(|| PyValueError::new_err("Scaler not fitted. Call fit() first."))?;
334
335 Ok(core_array1_to_py(py, &state.data_range))
336 }
337
338 /// Per feature relative scaling of the data
339 #[getter]
340 fn scale_(&self, py: Python<'_>) -> PyResult<Py<PyArray1<f64>>> {
341 let state = self
342 .state
343 .as_ref()
344 .ok_or_else(|| PyValueError::new_err("Scaler not fitted. Call fit() first."))?;
345
346 Ok(core_array1_to_py(py, &state.scale))
347 }
348
349 /// Per feature adjustment for minimum
350 #[getter]
351 fn min_(&self, py: Python<'_>) -> PyResult<Py<PyArray1<f64>>> {
352 let state = self
353 .state
354 .as_ref()
355 .ok_or_else(|| PyValueError::new_err("Scaler not fitted. Call fit() first."))?;
356
357 Ok(core_array1_to_py(py, &state.min_))
358 }
359
360 /// Number of features seen during fit.
361 #[getter]
362 fn n_features_in_(&self) -> PyResult<usize> {
363 let state = self
364 .state
365 .as_ref()
366 .ok_or_else(|| PyValueError::new_err("Scaler not fitted. Call fit() first."))?;
367
368 Ok(state.n_features)
369 }
370
371 /// The number of samples processed by the estimator.
372 #[getter]
373 fn n_samples_seen_(&self) -> PyResult<usize> {
374 let state = self
375 .state
376 .as_ref()
377 .ok_or_else(|| PyValueError::new_err("Scaler not fitted. Call fit() first."))?;
378
379 Ok(state.n_samples_seen)
380 }
381
382 /// String representation
383 fn __repr__(&self) -> String {
384 format!(
385 "MinMaxScaler(feature_range=({}, {}), copy={}, clip={})",
386 self.feature_range.0, self.feature_range.1, self.copy, self.clip
387 )
388 }
389}