use learning::error::{Error, ErrorKind};
use linalg::{Matrix, BaseMatrix, BaseMatrixMut, Vector};
use super::{Invertible, Transformer};
use rulinalg::utils;
use libnum::Float;
#[derive(Debug)]
pub struct MinMaxScaler<T: Float> {
scale_factors: Option<Vector<T>>,
const_factors: Option<Vector<T>>,
scaled_min: T,
scaled_max: T,
}
impl<T: Float> Default for MinMaxScaler<T> {
fn default() -> MinMaxScaler<T> {
MinMaxScaler::new(T::zero(), T::one())
}
}
impl<T: Float> MinMaxScaler<T> {
pub fn new(min: T, max: T) -> MinMaxScaler<T> {
MinMaxScaler {
scale_factors: None,
const_factors: None,
scaled_min: min,
scaled_max: max,
}
}
}
impl<T: Float> Transformer<Matrix<T>> for MinMaxScaler<T> {
fn fit(&mut self, inputs: &Matrix<T>) -> Result<(), Error> {
let features = inputs.cols();
let mut input_min_max = vec![(T::max_value(), T::min_value()); features];
for row in inputs.iter_rows() {
for (idx, (feature, min_max)) in row.into_iter().zip(input_min_max.iter_mut()).enumerate() {
if !feature.is_finite() {
return Err(Error::new(ErrorKind::InvalidData,
format!("Data point in column {} cannot be \
processed",
idx)));
}
if *feature < min_max.0 {
min_max.0 = *feature;
}
if *feature > min_max.1 {
min_max.1 = *feature;
}
}
}
let scales = try!(input_min_max.iter()
.map(|&(x, y)| {
let s = (self.scaled_max - self.scaled_min) / (y - x);
if s.is_finite() {
Ok(s)
} else {
Err(Error::new(ErrorKind::InvalidData,
"Constant feature columns not supported."))
}
})
.collect::<Result<Vec<_>, _>>());
let consts = input_min_max.iter()
.zip(scales.iter())
.map(|(&(_, x), &s)| self.scaled_max - x * s)
.collect::<Vec<_>>();
self.scale_factors = Some(Vector::new(scales));
self.const_factors = Some(Vector::new(consts));
Ok(())
}
fn transform(&mut self, mut inputs: Matrix<T>) -> Result<Matrix<T>, Error> {
if let (&None, &None) = (&self.scale_factors, &self.const_factors) {
try!(self.fit(&inputs));
}
if let (&Some(ref scales), &Some(ref consts)) = (&self.scale_factors, &self.const_factors) {
if scales.size() != inputs.cols() {
Err(Error::new(ErrorKind::InvalidData,
"Input data has different number of columns from fitted data."))
} else {
for row in inputs.iter_rows_mut() {
utils::in_place_vec_bin_op(row, scales.data(), |x, &y| {
*x = *x * y;
});
utils::in_place_vec_bin_op(row, consts.data(), |x, &y| {
*x = *x + y;
});
}
Ok(inputs)
}
} else {
Err(Error::new(ErrorKind::InvalidState, "Transformer has not been fitted."))
}
}
}
impl<T: Float> Invertible<Matrix<T>> for MinMaxScaler<T> {
fn inv_transform(&self, mut inputs: Matrix<T>) -> Result<Matrix<T>, Error> {
if let (&Some(ref scales), &Some(ref consts)) = (&self.scale_factors, &self.const_factors) {
let features = scales.size();
if inputs.cols() != features {
return Err(Error::new(ErrorKind::InvalidData,
"Inputs have different feature count than transformer."));
}
for row in inputs.iter_rows_mut() {
for i in 0..features {
row[i] = (row[i] - consts[i]) / scales[i];
}
}
Ok(inputs)
} else {
Err(Error::new(ErrorKind::InvalidState, "Transformer has not been fitted."))
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use super::super::{Transformer, Invertible};
use linalg::Matrix;
use std::f64;
#[test]
fn nan_data_test() {
let inputs = Matrix::new(2, 2, vec![f64::NAN; 4]);
let mut scaler = MinMaxScaler::new(0.0, 1.0);
let res = scaler.transform(inputs);
assert!(res.is_err());
}
#[test]
fn infinity_data_test() {
let inputs = Matrix::new(2, 2, vec![f64::INFINITY; 4]);
let mut scaler = MinMaxScaler::new(0.0, 1.0);
let res = scaler.transform(inputs);
assert!(res.is_err());
}
#[test]
fn basic_scale_test() {
let inputs = Matrix::new(2, 2, vec![-1.0f32, 2.0, 0.0, 3.0]);
let mut scaler = MinMaxScaler::new(0.0, 1.0);
let transformed = scaler.transform(inputs).unwrap();
assert!(transformed.data().iter().all(|&x| x >= 0.0));
assert!(transformed.data().iter().all(|&x| x <= 1.0));
transformed[[0, 0]].abs() < 1e-10;
transformed[[0, 1]].abs() < 1e-10;
(transformed[[1, 0]] - 1.0).abs() < 1e-10;
(transformed[[1, 1]] - 1.0).abs() < 1e-10;
}
#[test]
fn custom_scale_test() {
let inputs = Matrix::new(2, 2, vec![-1.0f32, 2.0, 0.0, 3.0]);
let mut scaler = MinMaxScaler::new(1.0, 3.0);
let transformed = scaler.transform(inputs).unwrap();
assert!(transformed.data().iter().all(|&x| x >= 1.0));
assert!(transformed.data().iter().all(|&x| x <= 3.0));
(transformed[[0, 0]] - 1.0).abs() < 1e-10;
(transformed[[0, 1]] - 1.0).abs() < 1e-10;
(transformed[[1, 0]] - 3.0).abs() < 1e-10;
(transformed[[1, 1]] - 3.0).abs() < 1e-10;
}
#[test]
fn constant_feature_test() {
let inputs = Matrix::new(2, 2, vec![1.0, 2.0, 1.0, 3.0]);
let mut scaler = MinMaxScaler::new(0.0, 1.0);
let res = scaler.transform(inputs);
assert!(res.is_err());
}
#[test]
fn inv_transform_identity_test() {
let inputs = Matrix::new(2, 2, vec![-1.0f32, 2.0, 0.0, 3.0]);
let mut scaler = MinMaxScaler::new(1.0, 3.0);
let transformed = scaler.transform(inputs.clone()).unwrap();
let original = scaler.inv_transform(transformed).unwrap();
assert!((inputs - original).data().iter().all(|x| x.abs() < 1e-5));
}
}