rusty_machine/data/transforms/
minmax.rs1use learning::error::{Error, ErrorKind};
28use linalg::{Matrix, BaseMatrix, BaseMatrixMut, Vector};
29use super::{Invertible, Transformer};
30
31use rulinalg::utils;
32
33use libnum::Float;
34
35#[derive(Debug)]
43pub struct MinMaxScaler<T: Float> {
44 scale_factors: Option<Vector<T>>,
46 const_factors: Option<Vector<T>>,
48 scaled_min: T,
50 scaled_max: T,
52}
53
54impl<T: Float> Default for MinMaxScaler<T> {
57 fn default() -> MinMaxScaler<T> {
58 MinMaxScaler::new(T::zero(), T::one())
59 }
60}
61
62impl<T: Float> MinMaxScaler<T> {
63 pub fn new(min: T, max: T) -> MinMaxScaler<T> {
75 MinMaxScaler {
76 scale_factors: None,
77 const_factors: None,
78 scaled_min: min,
79 scaled_max: max,
80 }
81 }
82}
83
84impl<T: Float> Transformer<Matrix<T>> for MinMaxScaler<T> {
85
86 fn fit(&mut self, inputs: &Matrix<T>) -> Result<(), Error> {
87 let features = inputs.cols();
88
89 let mut input_min_max = vec![(T::max_value(), T::min_value()); features];
92
93 for row in inputs.iter_rows() {
94 for (idx, (feature, min_max)) in row.into_iter().zip(input_min_max.iter_mut()).enumerate() {
95 if !feature.is_finite() {
96 return Err(Error::new(ErrorKind::InvalidData,
97 format!("Data point in column {} cannot be \
98 processed",
99 idx)));
100 }
101 if *feature < min_max.0 {
103 min_max.0 = *feature;
104 }
105 if *feature > min_max.1 {
107 min_max.1 = *feature;
108 }
109 }
110 }
111
112 let scales = try!(input_min_max.iter()
116 .map(|&(x, y)| {
117 let s = (self.scaled_max - self.scaled_min) / (y - x);
118 if s.is_finite() {
119 Ok(s)
120 } else {
121 Err(Error::new(ErrorKind::InvalidData,
122 "Constant feature columns not supported."))
123 }
124 })
125 .collect::<Result<Vec<_>, _>>());
126
127 let consts = input_min_max.iter()
128 .zip(scales.iter())
129 .map(|(&(_, x), &s)| self.scaled_max - x * s)
130 .collect::<Vec<_>>();
131
132 self.scale_factors = Some(Vector::new(scales));
133 self.const_factors = Some(Vector::new(consts));
134 Ok(())
135 }
136
137 fn transform(&mut self, mut inputs: Matrix<T>) -> Result<Matrix<T>, Error> {
138 if let (&None, &None) = (&self.scale_factors, &self.const_factors) {
139 try!(self.fit(&inputs));
141 }
142
143 if let (&Some(ref scales), &Some(ref consts)) = (&self.scale_factors, &self.const_factors) {
144 if scales.size() != inputs.cols() {
145 Err(Error::new(ErrorKind::InvalidData,
146 "Input data has different number of columns from fitted data."))
147 } else {
148 for row in inputs.iter_rows_mut() {
149 utils::in_place_vec_bin_op(row, scales.data(), |x, &y| {
150 *x = *x * y;
151 });
152
153 utils::in_place_vec_bin_op(row, consts.data(), |x, &y| {
154 *x = *x + y;
155 });
156 }
157 Ok(inputs)
158 }
159 } else {
160 Err(Error::new(ErrorKind::InvalidState, "Transformer has not been fitted."))
162 }
163 }
164}
165
166impl<T: Float> Invertible<Matrix<T>> for MinMaxScaler<T> {
167
168 fn inv_transform(&self, mut inputs: Matrix<T>) -> Result<Matrix<T>, Error> {
169 if let (&Some(ref scales), &Some(ref consts)) = (&self.scale_factors, &self.const_factors) {
170
171 let features = scales.size();
172 if inputs.cols() != features {
173 return Err(Error::new(ErrorKind::InvalidData,
174 "Inputs have different feature count than transformer."));
175 }
176
177 for row in inputs.iter_rows_mut() {
178 for i in 0..features {
179 row[i] = (row[i] - consts[i]) / scales[i];
180 }
181 }
182
183 Ok(inputs)
184 } else {
185 Err(Error::new(ErrorKind::InvalidState, "Transformer has not been fitted."))
186 }
187 }
188}
189
190#[cfg(test)]
191mod tests {
192 use super::*;
193 use super::super::{Transformer, Invertible};
194 use linalg::Matrix;
195 use std::f64;
196
197 #[test]
198 fn nan_data_test() {
199 let inputs = Matrix::new(2, 2, vec![f64::NAN; 4]);
200
201 let mut scaler = MinMaxScaler::new(0.0, 1.0);
202 let res = scaler.transform(inputs);
203
204 assert!(res.is_err());
205 }
206
207 #[test]
208 fn infinity_data_test() {
209 let inputs = Matrix::new(2, 2, vec![f64::INFINITY; 4]);
210
211 let mut scaler = MinMaxScaler::new(0.0, 1.0);
212 let res = scaler.transform(inputs);
213
214 assert!(res.is_err());
215 }
216
217 #[test]
218 fn basic_scale_test() {
219 let inputs = Matrix::new(2, 2, vec![-1.0f32, 2.0, 0.0, 3.0]);
220
221 let mut scaler = MinMaxScaler::new(0.0, 1.0);
222 let transformed = scaler.transform(inputs).unwrap();
223
224 assert!(transformed.data().iter().all(|&x| x >= 0.0));
225 assert!(transformed.data().iter().all(|&x| x <= 1.0));
226
227 transformed[[0, 0]].abs() < 1e-10;
229 transformed[[0, 1]].abs() < 1e-10;
230 (transformed[[1, 0]] - 1.0).abs() < 1e-10;
231 (transformed[[1, 1]] - 1.0).abs() < 1e-10;
232 }
233
234 #[test]
235 fn custom_scale_test() {
236 let inputs = Matrix::new(2, 2, vec![-1.0f32, 2.0, 0.0, 3.0]);
237
238 let mut scaler = MinMaxScaler::new(1.0, 3.0);
239 let transformed = scaler.transform(inputs).unwrap();
240
241 assert!(transformed.data().iter().all(|&x| x >= 1.0));
242 assert!(transformed.data().iter().all(|&x| x <= 3.0));
243
244 (transformed[[0, 0]] - 1.0).abs() < 1e-10;
246 (transformed[[0, 1]] - 1.0).abs() < 1e-10;
247 (transformed[[1, 0]] - 3.0).abs() < 1e-10;
248 (transformed[[1, 1]] - 3.0).abs() < 1e-10;
249 }
250
251 #[test]
252 fn constant_feature_test() {
253 let inputs = Matrix::new(2, 2, vec![1.0, 2.0, 1.0, 3.0]);
254
255 let mut scaler = MinMaxScaler::new(0.0, 1.0);
256 let res = scaler.transform(inputs);
257
258 assert!(res.is_err());
259 }
260
261 #[test]
262 fn inv_transform_identity_test() {
263 let inputs = Matrix::new(2, 2, vec![-1.0f32, 2.0, 0.0, 3.0]);
264
265 let mut scaler = MinMaxScaler::new(1.0, 3.0);
266 let transformed = scaler.transform(inputs.clone()).unwrap();
267
268 let original = scaler.inv_transform(transformed).unwrap();
269
270 assert!((inputs - original).data().iter().all(|x| x.abs() < 1e-5));
271 }
272}