use crate::core::traits::Transformer;
#[derive(Debug, Clone, Copy)]
pub struct StandardScalerConfig {
pub with_mean: bool,
pub with_std: bool,
}
impl StandardScalerConfig {
pub fn new() -> Self {
Self {
with_mean: true,
with_std: true,
}
}
}
impl Default for StandardScalerConfig {
fn default() -> Self {
Self::new()
}
}
pub struct StandardScaler;
impl Transformer for StandardScaler {
type Config = StandardScalerConfig;
fn transform(config: &Self::Config, x: &[Vec<f64>]) -> Vec<Vec<f64>> {
assert!(!x.is_empty(), "Input must have at least one sample");
assert!(
x.iter().all(|s| s.len() == x[0].len()),
"All samples must have same length"
);
let with_mean = config.with_mean;
let with_std = config.with_std;
#[cfg(feature = "parallel")]
{
use rayon::prelude::*;
return x
.par_iter()
.map(|sample| standard_scale_single(sample, with_mean, with_std))
.collect();
}
#[cfg(not(feature = "parallel"))]
x.iter()
.map(|sample| standard_scale_single(sample, with_mean, with_std))
.collect()
}
}
fn standard_scale_single(x: &[f64], with_mean: bool, with_std: bool) -> Vec<f64> {
let n = x.len() as f64;
let actual_mean = x.iter().sum::<f64>() / n;
let center = if with_mean { actual_mean } else { 0.0 };
let inv_std = if with_std {
let variance = x
.iter()
.map(|&v| (v - actual_mean) * (v - actual_mean))
.sum::<f64>()
/ n;
let s = variance.sqrt();
if s == 0.0 {
1.0
} else {
1.0 / s
}
} else {
1.0
};
x.iter().map(|&v| (v - center) * inv_std).collect()
}
#[derive(Debug, Clone, Copy)]
pub struct MinMaxScalerConfig {
pub sample_range: (f64, f64),
}
impl MinMaxScalerConfig {
pub fn new() -> Self {
Self {
sample_range: (0.0, 1.0),
}
}
}
impl Default for MinMaxScalerConfig {
fn default() -> Self {
Self::new()
}
}
pub struct MinMaxScaler;
impl Transformer for MinMaxScaler {
type Config = MinMaxScalerConfig;
fn transform(config: &Self::Config, x: &[Vec<f64>]) -> Vec<Vec<f64>> {
assert!(!x.is_empty(), "Input must have at least one sample");
assert!(
config.sample_range.0 < config.sample_range.1,
"sample_range min must be less than max"
);
let (range_min, range_max) = config.sample_range;
let scale_range = |sample: &Vec<f64>| {
let (x_min, x_max) = sample
.iter()
.copied()
.fold((f64::INFINITY, f64::NEG_INFINITY), |(mn, mx), v| {
(mn.min(v), mx.max(v))
});
let data_range = x_max - x_min;
if data_range == 0.0 {
vec![range_min; sample.len()]
} else {
let scale = (range_max - range_min) / data_range;
sample
.iter()
.map(|&v| (v - x_min) * scale + range_min)
.collect()
}
};
#[cfg(feature = "parallel")]
{
use rayon::prelude::*;
return x.par_iter().map(scale_range).collect();
}
#[cfg(not(feature = "parallel"))]
x.iter().map(scale_range).collect()
}
}
pub struct MaxAbsScaler;
impl Transformer for MaxAbsScaler {
type Config = ();
fn transform(_config: &Self::Config, x: &[Vec<f64>]) -> Vec<Vec<f64>> {
assert!(!x.is_empty(), "Input must have at least one sample");
x.iter()
.map(|sample| {
let max_abs = sample
.iter()
.copied()
.fold(0.0_f64, |acc, v| acc.max(v.abs()));
let scale = if max_abs == 0.0 { 1.0 } else { max_abs };
sample.iter().map(|&v| v / scale).collect()
})
.collect()
}
}
#[derive(Debug, Clone, Copy)]
pub struct RobustScalerConfig {
pub with_centering: bool,
pub with_scaling: bool,
pub quantile_range: (f64, f64),
}
impl RobustScalerConfig {
pub fn new() -> Self {
Self {
with_centering: true,
with_scaling: true,
quantile_range: (25.0, 75.0),
}
}
}
impl Default for RobustScalerConfig {
fn default() -> Self {
Self::new()
}
}
pub struct RobustScaler;
impl Transformer for RobustScaler {
type Config = RobustScalerConfig;
fn transform(config: &Self::Config, x: &[Vec<f64>]) -> Vec<Vec<f64>> {
assert!(!x.is_empty(), "Input must have at least one sample");
assert!(
config.quantile_range.0 < config.quantile_range.1,
"quantile_range lower must be less than upper"
);
assert!(
config.quantile_range.0 >= 0.0 && config.quantile_range.1 <= 100.0,
"quantile_range must be in [0, 100]"
);
x.iter()
.map(|sample| {
let center = if config.with_centering {
percentile(sample, 50.0)
} else {
0.0
};
let scale = if config.with_scaling {
let q_lo = percentile(sample, config.quantile_range.0);
let q_hi = percentile(sample, config.quantile_range.1);
let iqr = q_hi - q_lo;
if iqr == 0.0 {
1.0
} else {
iqr
}
} else {
1.0
};
sample.iter().map(|&v| (v - center) / scale).collect()
})
.collect()
}
}
pub fn percentile(data: &[f64], p: f64) -> f64 {
assert!(!data.is_empty(), "Cannot compute percentile of empty slice");
assert!((0.0..=100.0).contains(&p), "Percentile must be in [0, 100]");
let mut sorted: Vec<f64> = data.to_vec();
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
if sorted.len() == 1 {
return sorted[0];
}
let n = sorted.len();
let idx = p / 100.0 * (n - 1) as f64;
let lo = idx.floor() as usize;
let hi = lo + 1;
let frac = idx - lo as f64;
if hi >= n {
sorted[n - 1]
} else {
sorted[lo] + frac * (sorted[hi] - sorted[lo])
}
}
#[cfg(test)]
mod tests {
use super::*;
fn assert_close(actual: &[f64], expected: &[f64], eps: f64) {
assert_eq!(actual.len(), expected.len());
for (i, (a, e)) in actual.iter().zip(expected.iter()).enumerate() {
assert!(
(a - e).abs() < eps,
"[{i}]: actual={a}, expected={e}, eps={eps}"
);
}
}
#[test]
fn test_standard_scaler_basic() {
let config = StandardScalerConfig::new();
let x = vec![vec![1.0, 2.0, 3.0, 4.0, 5.0]];
let result = StandardScaler::transform(&config, &x);
let std = 2.0_f64.sqrt();
let expected: Vec<f64> = (1..=5).map(|v| (v as f64 - 3.0) / std).collect();
assert_close(&result[0], &expected, 1e-10);
}
#[test]
fn test_standard_scaler_no_mean() {
let config = StandardScalerConfig {
with_mean: false,
with_std: true,
};
let x = vec![vec![1.0, 2.0, 3.0]];
let result = StandardScaler::transform(&config, &x);
let var: f64 = 2.0 / 3.0;
let std = var.sqrt();
let expected: Vec<f64> = vec![1.0 / std, 2.0 / std, 3.0 / std];
assert_close(&result[0], &expected, 1e-10);
}
#[test]
fn test_standard_scaler_constant() {
let config = StandardScalerConfig::new();
let x = vec![vec![5.0, 5.0, 5.0]];
let result = StandardScaler::transform(&config, &x);
assert_close(&result[0], &[0.0, 0.0, 0.0], 1e-10);
}
#[test]
fn test_minmax_scaler_basic() {
let config = MinMaxScalerConfig::new();
let x = vec![vec![1.0, 2.0, 3.0, 4.0, 5.0]];
let result = MinMaxScaler::transform(&config, &x);
assert_close(&result[0], &[0.0, 0.25, 0.5, 0.75, 1.0], 1e-10);
}
#[test]
fn test_minmax_scaler_custom_range() {
let config = MinMaxScalerConfig {
sample_range: (-1.0, 1.0),
};
let x = vec![vec![0.0, 5.0, 10.0]];
let result = MinMaxScaler::transform(&config, &x);
assert_close(&result[0], &[-1.0, 0.0, 1.0], 1e-10);
}
#[test]
fn test_minmax_scaler_constant() {
let config = MinMaxScalerConfig::new();
let x = vec![vec![3.0, 3.0, 3.0]];
let result = MinMaxScaler::transform(&config, &x);
assert_close(&result[0], &[0.0, 0.0, 0.0], 1e-10);
}
#[test]
fn test_maxabs_scaler_basic() {
let x = vec![vec![-3.0, 1.0, 2.0]];
let result = MaxAbsScaler::transform(&(), &x);
assert_close(&result[0], &[-1.0, 1.0 / 3.0, 2.0 / 3.0], 1e-10);
}
#[test]
fn test_maxabs_scaler_zero() {
let x = vec![vec![0.0, 0.0, 0.0]];
let result = MaxAbsScaler::transform(&(), &x);
assert_close(&result[0], &[0.0, 0.0, 0.0], 1e-10);
}
#[test]
fn test_robust_scaler_basic() {
let config = RobustScalerConfig::new();
let x = vec![vec![1.0, 2.0, 3.0, 4.0, 5.0]];
let result = RobustScaler::transform(&config, &x);
let median = 3.0;
let q25 = percentile(&x[0], 25.0);
let q75 = percentile(&x[0], 75.0);
let iqr = q75 - q25;
let expected: Vec<f64> = x[0].iter().map(|&v| (v - median) / iqr).collect();
assert_close(&result[0], &expected, 1e-10);
}
#[test]
fn test_percentile_basic() {
let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
assert!((percentile(&data, 0.0) - 1.0).abs() < 1e-10);
assert!((percentile(&data, 50.0) - 3.0).abs() < 1e-10);
assert!((percentile(&data, 100.0) - 5.0).abs() < 1e-10);
assert!((percentile(&data, 25.0) - 2.0).abs() < 1e-10);
assert!((percentile(&data, 75.0) - 4.0).abs() < 1e-10);
}
#[test]
fn test_percentile_interpolation() {
let data = vec![0.0, 10.0];
assert!((percentile(&data, 50.0) - 5.0).abs() < 1e-10);
assert!((percentile(&data, 25.0) - 2.5).abs() < 1e-10);
}
#[test]
fn test_multiple_samples() {
let config = StandardScalerConfig::new();
let x = vec![vec![1.0, 2.0, 3.0], vec![10.0, 20.0, 30.0]];
let result = StandardScaler::transform(&config, &x);
assert_eq!(result.len(), 2);
assert_close(&result[0], &result[1], 1e-10);
}
}