rusty_machine/data/transforms/
minmax.rs

1//! The Min-Max transformer
2//!
3//! This module contains the `MinMaxScaler` transformer.
4//!
5//! The `MinMaxScaler` transformer is used to transform input data
6//! so that the minimum and maximum of each column are as specified.
7//! This is commonly used to transform the data to have a minimum of
8//! `0` and a maximum of `1`.
9//!
10//! # Examples
11//!
12//! ```
13//! use rusty_machine::data::transforms::{Transformer, MinMaxScaler};
14//! use rusty_machine::linalg::Matrix;
15//!
16//! // Constructs a new `MinMaxScaler` to map minimum to 0 and maximum
17//! // to 1.
18//! let mut transformer = MinMaxScaler::default();
19//!
20//! let inputs = Matrix::new(2, 2, vec![-1.0, 2.0, 1.5, 3.0]);
21//!
22//! // Transform the inputs to get output data with required minimum
23//! // and maximum.
24//! let transformed = transformer.transform(inputs).unwrap();
25//! ```
26
27use learning::error::{Error, ErrorKind};
28use linalg::{Matrix, BaseMatrix, BaseMatrixMut, Vector};
29use super::{Invertible, Transformer};
30
31use rulinalg::utils;
32
33use libnum::Float;
34
35/// The `MinMaxScaler`
36///
37/// The `MinMaxScaler` provides an implementation of `Transformer`
38/// which allows us to transform the input data to have a new minimum
39/// and maximum per column.
40///
41/// See the module description for more information.
42#[derive(Debug)]
43pub struct MinMaxScaler<T: Float> {
44    /// Values to scale each column by
45    scale_factors: Option<Vector<T>>,
46    /// Values to add to each column after scaling
47    const_factors: Option<Vector<T>>,
48    /// The min of the new data (default 0)
49    scaled_min: T,
50    /// The max of the new data (default 1)
51    scaled_max: T,
52}
53
54/// Create a default `MinMaxScaler` with minimum of `0` and
55/// maximum of `1`.
56impl<T: Float> Default for MinMaxScaler<T> {
57    fn default() -> MinMaxScaler<T> {
58        MinMaxScaler::new(T::zero(), T::one())
59    }
60}
61
62impl<T: Float> MinMaxScaler<T> {
63    /// Constructs a new MinMaxScaler with the specified scale.
64    ///
65    /// # Examples
66    ///
67    /// ```
68    /// use rusty_machine::data::transforms::{MinMaxScaler, Transformer};
69    ///
70    /// // Constructs a new `MinMaxScaler` which will give the data
71    /// // minimum `0` and maximum `2`.
72    /// let transformer = MinMaxScaler::new(0.0, 2.0);
73    /// ```
74    pub fn new(min: T, max: T) -> MinMaxScaler<T> {
75        MinMaxScaler {
76            scale_factors: None,
77            const_factors: None,
78            scaled_min: min,
79            scaled_max: max,
80        }
81    }
82}
83
84impl<T: Float> Transformer<Matrix<T>> for MinMaxScaler<T> {
85
86    fn fit(&mut self, inputs: &Matrix<T>) -> Result<(), Error> {
87        let features = inputs.cols();
88
89        // ToDo: can use min, max
90        // https://github.com/AtheMathmo/rulinalg/pull/115
91        let mut input_min_max = vec![(T::max_value(), T::min_value()); features];
92
93        for row in inputs.iter_rows() {
94            for (idx, (feature, min_max)) in row.into_iter().zip(input_min_max.iter_mut()).enumerate() {
95                if !feature.is_finite() {
96                    return Err(Error::new(ErrorKind::InvalidData,
97                                          format!("Data point in column {} cannot be \
98                                                   processed",
99                                                  idx)));
100                }
101                // Update min
102                if *feature < min_max.0 {
103                    min_max.0 = *feature;
104                }
105                // Update max
106                if *feature > min_max.1 {
107                    min_max.1 = *feature;
108                }
109            }
110        }
111
112        // We'll scale each feature by a * x + b.
113        // Where scales holds `a` per column and consts
114        // holds `b`.
115        let scales = try!(input_min_max.iter()
116            .map(|&(x, y)| {
117                let s = (self.scaled_max - self.scaled_min) / (y - x);
118                if s.is_finite() {
119                    Ok(s)
120                } else {
121                    Err(Error::new(ErrorKind::InvalidData,
122                                   "Constant feature columns not supported."))
123                }
124            })
125            .collect::<Result<Vec<_>, _>>());
126
127        let consts = input_min_max.iter()
128            .zip(scales.iter())
129            .map(|(&(_, x), &s)| self.scaled_max - x * s)
130            .collect::<Vec<_>>();
131
132        self.scale_factors = Some(Vector::new(scales));
133        self.const_factors = Some(Vector::new(consts));
134        Ok(())
135    }
136
137    fn transform(&mut self, mut inputs: Matrix<T>) -> Result<Matrix<T>, Error> {
138        if let (&None, &None) = (&self.scale_factors, &self.const_factors) {
139            // if Transformer is not fitted to the data, fit for backward-compat.
140            try!(self.fit(&inputs));
141        }
142
143        if let (&Some(ref scales), &Some(ref consts)) = (&self.scale_factors, &self.const_factors) {
144            if scales.size() != inputs.cols() {
145                Err(Error::new(ErrorKind::InvalidData,
146                               "Input data has different number of columns from fitted data."))
147            } else {
148                for row in inputs.iter_rows_mut() {
149                    utils::in_place_vec_bin_op(row, scales.data(), |x, &y| {
150                        *x = *x * y;
151                    });
152
153                    utils::in_place_vec_bin_op(row, consts.data(), |x, &y| {
154                        *x = *x + y;
155                    });
156                }
157                Ok(inputs)
158            }
159        } else {
160            // can't happen
161            Err(Error::new(ErrorKind::InvalidState, "Transformer has not been fitted."))
162        }
163    }
164}
165
166impl<T: Float> Invertible<Matrix<T>> for MinMaxScaler<T> {
167
168    fn inv_transform(&self, mut inputs: Matrix<T>) -> Result<Matrix<T>, Error> {
169        if let (&Some(ref scales), &Some(ref consts)) = (&self.scale_factors, &self.const_factors) {
170
171            let features = scales.size();
172            if inputs.cols() != features {
173                return Err(Error::new(ErrorKind::InvalidData,
174                                      "Inputs have different feature count than transformer."));
175            }
176
177            for row in inputs.iter_rows_mut() {
178                for i in 0..features {
179                    row[i] = (row[i] - consts[i]) / scales[i];
180                }
181            }
182
183            Ok(inputs)
184        } else {
185            Err(Error::new(ErrorKind::InvalidState, "Transformer has not been fitted."))
186        }
187    }
188}
189
190#[cfg(test)]
191mod tests {
192    use super::*;
193    use super::super::{Transformer, Invertible};
194    use linalg::Matrix;
195    use std::f64;
196
197    #[test]
198    fn nan_data_test() {
199        let inputs = Matrix::new(2, 2, vec![f64::NAN; 4]);
200
201        let mut scaler = MinMaxScaler::new(0.0, 1.0);
202        let res = scaler.transform(inputs);
203
204        assert!(res.is_err());
205    }
206
207    #[test]
208    fn infinity_data_test() {
209        let inputs = Matrix::new(2, 2, vec![f64::INFINITY; 4]);
210
211        let mut scaler = MinMaxScaler::new(0.0, 1.0);
212        let res = scaler.transform(inputs);
213
214        assert!(res.is_err());
215    }
216
217    #[test]
218    fn basic_scale_test() {
219        let inputs = Matrix::new(2, 2, vec![-1.0f32, 2.0, 0.0, 3.0]);
220
221        let mut scaler = MinMaxScaler::new(0.0, 1.0);
222        let transformed = scaler.transform(inputs).unwrap();
223
224        assert!(transformed.data().iter().all(|&x| x >= 0.0));
225        assert!(transformed.data().iter().all(|&x| x <= 1.0));
226
227        // First row scales to 0 and second to 1
228        transformed[[0, 0]].abs() < 1e-10;
229        transformed[[0, 1]].abs() < 1e-10;
230        (transformed[[1, 0]] - 1.0).abs() < 1e-10;
231        (transformed[[1, 1]] - 1.0).abs() < 1e-10;
232    }
233
234    #[test]
235    fn custom_scale_test() {
236        let inputs = Matrix::new(2, 2, vec![-1.0f32, 2.0, 0.0, 3.0]);
237
238        let mut scaler = MinMaxScaler::new(1.0, 3.0);
239        let transformed = scaler.transform(inputs).unwrap();
240
241        assert!(transformed.data().iter().all(|&x| x >= 1.0));
242        assert!(transformed.data().iter().all(|&x| x <= 3.0));
243
244        // First row scales to 1 and second to 3
245        (transformed[[0, 0]] - 1.0).abs() < 1e-10;
246        (transformed[[0, 1]] - 1.0).abs() < 1e-10;
247        (transformed[[1, 0]] - 3.0).abs() < 1e-10;
248        (transformed[[1, 1]] - 3.0).abs() < 1e-10;
249    }
250
251    #[test]
252    fn constant_feature_test() {
253        let inputs = Matrix::new(2, 2, vec![1.0, 2.0, 1.0, 3.0]);
254
255        let mut scaler = MinMaxScaler::new(0.0, 1.0);
256        let res = scaler.transform(inputs);
257
258        assert!(res.is_err());
259    }
260
261    #[test]
262    fn inv_transform_identity_test() {
263        let inputs = Matrix::new(2, 2, vec![-1.0f32, 2.0, 0.0, 3.0]);
264
265        let mut scaler = MinMaxScaler::new(1.0, 3.0);
266        let transformed = scaler.transform(inputs.clone()).unwrap();
267
268        let original = scaler.inv_transform(transformed).unwrap();
269
270        assert!((inputs - original).data().iter().all(|x| x.abs() < 1e-5));
271    }
272}