use super::common::*;
use scirs2_core::ndarray::Array1;
#[derive(Debug, Clone)]
struct MinMaxScalerState {
data_min: Array1<f64>,
data_max: Array1<f64>,
data_range: Array1<f64>,
scale: Array1<f64>,
min_: Array1<f64>,
n_features: usize,
n_samples_seen: usize,
feature_range: (f64, f64),
}
#[pyclass(name = "MinMaxScaler")]
pub struct PyMinMaxScaler {
feature_range: (f64, f64),
copy: bool,
clip: bool,
state: Option<MinMaxScalerState>,
}
#[pymethods]
impl PyMinMaxScaler {
#[new]
#[pyo3(signature = (feature_range=(0.0, 1.0), copy=true, clip=false))]
fn new(feature_range: (f64, f64), copy: bool, clip: bool) -> Self {
Self {
feature_range,
copy,
clip,
state: None,
}
}
fn fit(&mut self, x: PyReadonlyArray2<f64>) -> PyResult<()> {
let x_array = pyarray_to_core_array2(&x)?;
validate_fit_array(&x_array)?;
let n_samples = x_array.nrows();
let n_features = x_array.ncols();
let mut data_min = Array1::zeros(n_features);
let mut data_max = Array1::zeros(n_features);
for j in 0..n_features {
let col = x_array.column(j);
data_min[j] = col.iter().cloned().fold(f64::INFINITY, |a, b| a.min(b));
data_max[j] = col.iter().cloned().fold(f64::NEG_INFINITY, |a, b| a.max(b));
}
let data_range = &data_max - &data_min;
let (feature_min, feature_max) = self.feature_range;
let feature_range = feature_max - feature_min;
let mut scale = Array1::zeros(n_features);
let mut min_ = Array1::zeros(n_features);
for j in 0..n_features {
if data_range[j].abs() < 1e-10 {
scale[j] = 1.0;
min_[j] = feature_min - data_min[j];
} else {
scale[j] = feature_range / data_range[j];
min_[j] = feature_min - data_min[j] * scale[j];
}
}
self.state = Some(MinMaxScalerState {
data_min,
data_max,
data_range,
scale,
min_,
n_features,
n_samples_seen: n_samples,
feature_range: self.feature_range,
});
Ok(())
}
fn transform<'py>(&self, py: Python<'py>, x: PyReadonlyArray2<f64>) -> PyResult<Py<PyArray2<f64>>> {
let state = self
.state
.as_ref()
.ok_or_else(|| PyValueError::new_err("Scaler not fitted. Call fit() first."))?;
let x_array = pyarray_to_core_array2(&x)?;
validate_transform_array(&x_array, state.n_features)?;
let mut transformed = x_array.clone();
for j in 0..state.n_features {
for i in 0..transformed.nrows() {
transformed[[i, j]] = transformed[[i, j]] * state.scale[j] + state.min_[j];
if self.clip {
let (min_val, max_val) = state.feature_range;
transformed[[i, j]] = transformed[[i, j]].clamp(min_val, max_val);
}
}
}
core_array2_to_py(py, &transformed)
}
fn fit_transform<'py>(
&mut self,
py: Python<'py>,
x: PyReadonlyArray2<f64>,
) -> PyResult<Py<PyArray2<f64>>> {
let x_array = pyarray_to_core_array2(&x)?;
self.fit(x)?;
let state = self
.state
.as_ref()
.ok_or_else(|| PyValueError::new_err("Scaler not fitted. Call fit() first."))?;
let mut transformed = x_array.clone();
for j in 0..state.n_features {
for i in 0..transformed.nrows() {
transformed[[i, j]] = transformed[[i, j]] * state.scale[j] + state.min_[j];
if self.clip {
let (min_val, max_val) = state.feature_range;
transformed[[i, j]] = transformed[[i, j]].clamp(min_val, max_val);
}
}
}
core_array2_to_py(py, &transformed)
}
fn inverse_transform<'py>(
&self,
py: Python<'py>,
x: PyReadonlyArray2<f64>,
) -> PyResult<Py<PyArray2<f64>>> {
let state = self
.state
.as_ref()
.ok_or_else(|| PyValueError::new_err("Scaler not fitted. Call fit() first."))?;
let x_array = pyarray_to_core_array2(&x)?;
validate_transform_array(&x_array, state.n_features)?;
let mut inverse = x_array.clone();
for j in 0..state.n_features {
for i in 0..inverse.nrows() {
inverse[[i, j]] = (inverse[[i, j]] - state.min_[j]) / state.scale[j];
}
}
core_array2_to_py(py, &inverse)
}
#[getter]
fn data_min_<'py>(&self, py: Python<'py>) -> PyResult<Py<PyArray1<f64>>> {
let state = self
.state
.as_ref()
.ok_or_else(|| PyValueError::new_err("Scaler not fitted. Call fit() first."))?;
Ok(core_array1_to_py(py, &state.data_min))
}
#[getter]
fn data_max_<'py>(&self, py: Python<'py>) -> PyResult<Py<PyArray1<f64>>> {
let state = self
.state
.as_ref()
.ok_or_else(|| PyValueError::new_err("Scaler not fitted. Call fit() first."))?;
Ok(core_array1_to_py(py, &state.data_max))
}
#[getter]
fn data_range_<'py>(&self, py: Python<'py>) -> PyResult<Py<PyArray1<f64>>> {
let state = self
.state
.as_ref()
.ok_or_else(|| PyValueError::new_err("Scaler not fitted. Call fit() first."))?;
Ok(core_array1_to_py(py, &state.data_range))
}
#[getter]
fn scale_<'py>(&self, py: Python<'py>) -> PyResult<Py<PyArray1<f64>>> {
let state = self
.state
.as_ref()
.ok_or_else(|| PyValueError::new_err("Scaler not fitted. Call fit() first."))?;
Ok(core_array1_to_py(py, &state.scale))
}
#[getter]
fn min_<'py>(&self, py: Python<'py>) -> PyResult<Py<PyArray1<f64>>> {
let state = self
.state
.as_ref()
.ok_or_else(|| PyValueError::new_err("Scaler not fitted. Call fit() first."))?;
Ok(core_array1_to_py(py, &state.min_))
}
#[getter]
fn n_features_in_(&self) -> PyResult<usize> {
let state = self
.state
.as_ref()
.ok_or_else(|| PyValueError::new_err("Scaler not fitted. Call fit() first."))?;
Ok(state.n_features)
}
#[getter]
fn n_samples_seen_(&self) -> PyResult<usize> {
let state = self
.state
.as_ref()
.ok_or_else(|| PyValueError::new_err("Scaler not fitted. Call fit() first."))?;
Ok(state.n_samples_seen)
}
fn __repr__(&self) -> String {
format!(
"MinMaxScaler(feature_range=({}, {}), copy={}, clip={})",
self.feature_range.0, self.feature_range.1, self.copy, self.clip
)
}
}