use core::f64;
use augurs_core::{FloatIterExt, NanMinMaxResult};
use super::{Error, Transformer};
#[derive(Debug, Clone, Copy)]
struct MinMax {
min: f64,
max: f64,
}
impl MinMax {
fn zero_one() -> Self {
Self {
min: 0.0 + f64::EPSILON,
max: 1.0 - f64::EPSILON,
}
}
}
#[derive(Debug, Clone)]
struct FittedMinMaxScalerParams {
input_scale: MinMax,
scale_factor: f64,
offset: f64,
}
impl FittedMinMaxScalerParams {
fn new(input_scale: MinMax, output_scale: MinMax) -> Self {
let scale_factor =
(output_scale.max - output_scale.min) / (input_scale.max - input_scale.min);
Self {
input_scale,
scale_factor,
offset: output_scale.min - (input_scale.min * scale_factor),
}
}
}
#[derive(Debug, Clone)]
pub struct MinMaxScaler {
output_scale: MinMax,
params: Option<FittedMinMaxScalerParams>,
}
impl Default for MinMaxScaler {
fn default() -> Self {
Self::new()
}
}
impl MinMaxScaler {
pub fn new() -> Self {
Self {
output_scale: MinMax::zero_one(),
params: None,
}
}
pub fn with_scaled_range(mut self, min: f64, max: f64) -> Self {
self.output_scale = MinMax { min, max };
self.params.iter_mut().for_each(|p| {
let input_scale = p.input_scale;
*p = FittedMinMaxScalerParams::new(input_scale, self.output_scale);
});
self
}
pub fn with_data_range(mut self, min: f64, max: f64) -> Self {
let data_range = MinMax { min, max };
self.params = Some(FittedMinMaxScalerParams::new(data_range, self.output_scale));
self
}
}
impl Transformer for MinMaxScaler {
fn fit(&mut self, data: &[f64]) -> Result<(), Error> {
let params = match data.iter().copied().nanminmax(true) {
NanMinMaxResult::NaN => unreachable!(),
e @ NanMinMaxResult::NoElements | e @ NanMinMaxResult::OneElement(_) => {
return Err(e.into())
}
NanMinMaxResult::MinMax(min, max) => {
FittedMinMaxScalerParams::new(MinMax { min, max }, self.output_scale)
}
};
self.params = Some(params);
Ok(())
}
fn transform(&self, data: &mut [f64]) -> Result<(), Error> {
let params = self.params.as_ref().ok_or(Error::NotFitted)?;
data.iter_mut()
.for_each(|x| *x = *x * params.scale_factor + params.offset);
Ok(())
}
fn inverse_transform(&self, data: &mut [f64]) -> Result<(), Error> {
let params = self.params.as_ref().ok_or(Error::NotFitted)?;
data.iter_mut()
.for_each(|x| *x = (*x - params.offset) / params.scale_factor);
Ok(())
}
}
#[derive(Debug, Clone)]
pub struct StandardScaleParams {
pub mean: f64,
pub std_dev: f64,
}
impl StandardScaleParams {
pub fn new(mean: f64, std_dev: f64) -> Self {
Self { mean, std_dev }
}
pub fn from_data<T>(data: T) -> Self
where
T: Iterator<Item = f64>,
{
let mut count = 0_u64;
let mut mean = 0.0;
let mut m2 = 0.0;
for x in data {
count += 1;
let delta = x - mean;
mean += delta / count as f64;
let delta2 = x - mean;
m2 += delta * delta2;
}
if count == 0 {
return Self::new(0.0, 1.0);
}
let std_dev = (m2 / count as f64).sqrt();
Self { mean, std_dev }
}
pub fn from_data_ignoring_nans<T: Iterator<Item = f64>>(data: T) -> Self {
Self::from_data(data.filter(|x| !x.is_nan()))
}
}
#[derive(Debug, Clone, Default)]
pub struct StandardScaler {
params: Option<StandardScaleParams>,
ignore_nans: bool,
}
impl StandardScaler {
pub fn new() -> Self {
Self::default()
}
pub fn with_parameters(mut self, params: StandardScaleParams) -> Self {
self.params = Some(params);
self
}
pub fn ignore_nans(mut self, ignore_nans: bool) -> Self {
self.ignore_nans = ignore_nans;
self
}
}
impl Transformer for StandardScaler {
fn fit(&mut self, data: &[f64]) -> Result<(), Error> {
self.params = Some(if self.ignore_nans {
StandardScaleParams::from_data_ignoring_nans(data.iter().copied())
} else {
StandardScaleParams::from_data(data.iter().copied())
});
Ok(())
}
fn transform(&self, data: &mut [f64]) -> Result<(), Error> {
let params = self.params.as_ref().ok_or(Error::NotFitted)?;
data.iter_mut()
.for_each(|x| *x = (*x - params.mean) / params.std_dev);
Ok(())
}
fn inverse_transform(&self, data: &mut [f64]) -> Result<(), Error> {
let params = self.params.as_ref().ok_or(Error::NotFitted)?;
data.iter_mut()
.for_each(|x| *x = (*x * params.std_dev) + params.mean);
Ok(())
}
}
#[cfg(test)]
mod test {
use augurs_testing::{assert_all_close, assert_approx_eq};
use super::*;
#[test]
fn min_max_scale() {
let mut data = vec![1.0, 2.0, 3.0];
let expected = vec![0.0, 0.5, 1.0];
let mut scaler = MinMaxScaler::new();
scaler.fit_transform(&mut data).unwrap();
assert_all_close(&expected, &data);
}
#[test]
fn min_max_scale_custom() {
let mut data = vec![1.0, 2.0, 3.0];
let expected = vec![0.0, 5.0, 10.0];
let mut scaler = MinMaxScaler::new().with_scaled_range(0.0, 10.0);
scaler.fit_transform(&mut data).unwrap();
assert_all_close(&expected, &data);
}
#[test]
fn inverse_min_max_scale() {
let mut data = vec![0.0, 0.5, 1.0];
let expected = vec![1.0, 2.0, 3.0];
let scaler = MinMaxScaler::new().with_data_range(1.0, 3.0);
scaler.inverse_transform(&mut data).unwrap();
assert_all_close(&expected, &data);
}
#[test]
fn inverse_min_max_scale_custom() {
let mut data = vec![0.0, 5.0, 10.0];
let expected = vec![1.0, 2.0, 3.0];
let scaler = MinMaxScaler::new()
.with_scaled_range(0.0, 10.0)
.with_data_range(1.0, 3.0);
scaler.inverse_transform(&mut data).unwrap();
assert_all_close(&expected, &data);
}
#[test]
fn standard_scale() {
let mut data = vec![1.0, 2.0, 3.0];
let expected = vec![-1.224744871391589, 0.0, 1.224744871391589];
let mut scaler = StandardScaler::new(); scaler.fit_transform(&mut data).unwrap();
assert_all_close(&expected, &data);
}
#[test]
fn standard_scale_custom() {
let mut data = vec![1.0, 2.0, 3.0];
let expected = vec![-1.0, 0.0, 1.0];
let params = StandardScaleParams::new(2.0, 1.0); let scaler = StandardScaler::new().with_parameters(params);
scaler.transform(&mut data).unwrap();
assert_all_close(&expected, &data);
}
#[test]
fn inverse_standard_scale() {
let mut data = vec![-1.0, 0.0, 1.0];
let expected = vec![1.0, 2.0, 3.0];
let params = StandardScaleParams::new(2.0, 1.0); let scaler = StandardScaler::new().with_parameters(params);
scaler.inverse_transform(&mut data).unwrap();
assert_all_close(&expected, &data);
}
#[test]
fn standard_scale_params_from_data() {
let data = vec![1.0, 2.0, 3.0];
let params = StandardScaleParams::from_data(data.into_iter());
assert_approx_eq!(params.mean, 2.0);
assert_approx_eq!(params.std_dev, 0.816496580927726);
let data = vec![2.0, 4.0, 4.0, 4.0, 5.0, 5.0, 7.0, 9.0];
let params = StandardScaleParams::from_data(data.into_iter());
assert_approx_eq!(params.mean, 5.0);
assert_approx_eq!(params.std_dev, 2.0);
let data: Vec<f64> = vec![];
let params = StandardScaleParams::from_data(data.into_iter());
assert_approx_eq!(params.mean, 0.0);
assert_approx_eq!(params.std_dev, 1.0);
let data = vec![42.0];
let params = StandardScaleParams::from_data(data.into_iter());
assert_approx_eq!(params.mean, 42.0);
assert_approx_eq!(params.std_dev, 0.0); }
#[test]
fn min_max_scale_with_nan() {
let mut data = vec![1.0, f64::NAN, 2.0, 3.0, f64::NAN];
let expected = vec![0.0, f64::NAN, 0.5, 1.0, f64::NAN];
let mut scaler = MinMaxScaler::new();
scaler.fit_transform(&mut data).unwrap();
assert_all_close(&expected, &data);
}
#[test]
fn inverse_min_max_scale_with_nan() {
let mut data = vec![0.0, f64::NAN, 0.5, 1.0, f64::NAN];
let expected = vec![1.0, f64::NAN, 2.0, 3.0, f64::NAN];
let scaler = MinMaxScaler::new().with_data_range(1.0, 3.0);
scaler.inverse_transform(&mut data).unwrap();
assert_all_close(&expected, &data);
}
#[test]
fn standard_scale_with_nan() {
let mut data = vec![1.0, f64::NAN, 2.0, 3.0, f64::NAN];
let expected = vec![
-1.224744871391589,
f64::NAN,
0.0,
1.224744871391589,
f64::NAN,
];
let mut scaler = StandardScaler::new().ignore_nans(true);
scaler.fit_transform(&mut data).unwrap();
assert_all_close(&expected, &data);
}
#[test]
fn standard_scale_params_from_data_with_nan() {
let data = vec![1.0, f64::NAN, 2.0, 3.0, f64::NAN];
let params = StandardScaleParams::from_data_ignoring_nans(data.into_iter());
assert_approx_eq!(params.mean, 2.0);
assert_approx_eq!(params.std_dev, 0.816496580927726);
}
#[test]
fn inverse_standard_scale_with_nan() {
let mut data = vec![-1.0, f64::NAN, 0.0, 1.0, f64::NAN];
let expected = vec![1.0, f64::NAN, 2.0, 3.0, f64::NAN];
let params = StandardScaleParams::new(2.0, 1.0);
let scaler = StandardScaler::new().with_parameters(params);
scaler.inverse_transform(&mut data).unwrap();
assert_all_close(&expected, &data);
}
}