oxits 0.1.0 - Docs.rs

use crate::core::traits::Transformer;

// --- StandardScaler ---

#[derive(Debug, Clone, Copy)]
pub struct StandardScalerConfig {
    pub with_mean: bool,
    pub with_std: bool,
}

impl StandardScalerConfig {
    pub fn new() -> Self {
        Self {
            with_mean: true,
            with_std: true,
        }
    }
}

impl Default for StandardScalerConfig {
    fn default() -> Self {
        Self::new()
    }
}

pub struct StandardScaler;

impl Transformer for StandardScaler {
    type Config = StandardScalerConfig;

    fn transform(config: &Self::Config, x: &[Vec<f64>]) -> Vec<Vec<f64>> {
        assert!(!x.is_empty(), "Input must have at least one sample");
        assert!(
            x.iter().all(|s| s.len() == x[0].len()),
            "All samples must have same length"
        );

        let with_mean = config.with_mean;
        let with_std = config.with_std;

        #[cfg(feature = "parallel")]
        {
            use rayon::prelude::*;
            return x
                .par_iter()
                .map(|sample| standard_scale_single(sample, with_mean, with_std))
                .collect();
        }

        #[cfg(not(feature = "parallel"))]
        x.iter()
            .map(|sample| standard_scale_single(sample, with_mean, with_std))
            .collect()
    }
}

fn standard_scale_single(x: &[f64], with_mean: bool, with_std: bool) -> Vec<f64> {
    let n = x.len() as f64;
    // Always compute actual mean for variance calculation
    let actual_mean = x.iter().sum::<f64>() / n;
    let center = if with_mean { actual_mean } else { 0.0 };
    let inv_std = if with_std {
        let variance = x
            .iter()
            .map(|&v| (v - actual_mean) * (v - actual_mean))
            .sum::<f64>()
            / n;
        let s = variance.sqrt();
        if s == 0.0 {
            1.0
        } else {
            1.0 / s
        }
    } else {
        1.0
    };
    x.iter().map(|&v| (v - center) * inv_std).collect()
}

// --- MinMaxScaler ---

#[derive(Debug, Clone, Copy)]
pub struct MinMaxScalerConfig {
    pub sample_range: (f64, f64),
}

impl MinMaxScalerConfig {
    pub fn new() -> Self {
        Self {
            sample_range: (0.0, 1.0),
        }
    }
}

impl Default for MinMaxScalerConfig {
    fn default() -> Self {
        Self::new()
    }
}

pub struct MinMaxScaler;

impl Transformer for MinMaxScaler {
    type Config = MinMaxScalerConfig;

    fn transform(config: &Self::Config, x: &[Vec<f64>]) -> Vec<Vec<f64>> {
        assert!(!x.is_empty(), "Input must have at least one sample");
        assert!(
            config.sample_range.0 < config.sample_range.1,
            "sample_range min must be less than max"
        );

        let (range_min, range_max) = config.sample_range;
        let scale_range = |sample: &Vec<f64>| {
            let (x_min, x_max) = sample
                .iter()
                .copied()
                .fold((f64::INFINITY, f64::NEG_INFINITY), |(mn, mx), v| {
                    (mn.min(v), mx.max(v))
                });
            let data_range = x_max - x_min;
            if data_range == 0.0 {
                vec![range_min; sample.len()]
            } else {
                let scale = (range_max - range_min) / data_range;
                sample
                    .iter()
                    .map(|&v| (v - x_min) * scale + range_min)
                    .collect()
            }
        };

        #[cfg(feature = "parallel")]
        {
            use rayon::prelude::*;
            return x.par_iter().map(scale_range).collect();
        }

        #[cfg(not(feature = "parallel"))]
        x.iter().map(scale_range).collect()
    }
}

// --- MaxAbsScaler ---

pub struct MaxAbsScaler;

impl Transformer for MaxAbsScaler {
    type Config = ();

    fn transform(_config: &Self::Config, x: &[Vec<f64>]) -> Vec<Vec<f64>> {
        assert!(!x.is_empty(), "Input must have at least one sample");

        x.iter()
            .map(|sample| {
                let max_abs = sample
                    .iter()
                    .copied()
                    .fold(0.0_f64, |acc, v| acc.max(v.abs()));
                let scale = if max_abs == 0.0 { 1.0 } else { max_abs };
                sample.iter().map(|&v| v / scale).collect()
            })
            .collect()
    }
}

// --- RobustScaler ---

#[derive(Debug, Clone, Copy)]
pub struct RobustScalerConfig {
    pub with_centering: bool,
    pub with_scaling: bool,
    pub quantile_range: (f64, f64),
}

impl RobustScalerConfig {
    pub fn new() -> Self {
        Self {
            with_centering: true,
            with_scaling: true,
            quantile_range: (25.0, 75.0),
        }
    }
}

impl Default for RobustScalerConfig {
    fn default() -> Self {
        Self::new()
    }
}

pub struct RobustScaler;

impl Transformer for RobustScaler {
    type Config = RobustScalerConfig;

    fn transform(config: &Self::Config, x: &[Vec<f64>]) -> Vec<Vec<f64>> {
        assert!(!x.is_empty(), "Input must have at least one sample");
        assert!(
            config.quantile_range.0 < config.quantile_range.1,
            "quantile_range lower must be less than upper"
        );
        assert!(
            config.quantile_range.0 >= 0.0 && config.quantile_range.1 <= 100.0,
            "quantile_range must be in [0, 100]"
        );

        x.iter()
            .map(|sample| {
                let center = if config.with_centering {
                    percentile(sample, 50.0)
                } else {
                    0.0
                };
                let scale = if config.with_scaling {
                    let q_lo = percentile(sample, config.quantile_range.0);
                    let q_hi = percentile(sample, config.quantile_range.1);
                    let iqr = q_hi - q_lo;
                    if iqr == 0.0 {
                        1.0
                    } else {
                        iqr
                    }
                } else {
                    1.0
                };
                sample.iter().map(|&v| (v - center) / scale).collect()
            })
            .collect()
    }
}

/// Compute the p-th percentile (0-100) of a slice using linear interpolation.
/// Matches numpy's default `interpolation='linear'`.
pub fn percentile(data: &[f64], p: f64) -> f64 {
    assert!(!data.is_empty(), "Cannot compute percentile of empty slice");
    assert!((0.0..=100.0).contains(&p), "Percentile must be in [0, 100]");

    let mut sorted: Vec<f64> = data.to_vec();
    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());

    if sorted.len() == 1 {
        return sorted[0];
    }

    let n = sorted.len();
    let idx = p / 100.0 * (n - 1) as f64;
    let lo = idx.floor() as usize;
    let hi = lo + 1;
    let frac = idx - lo as f64;

    if hi >= n {
        sorted[n - 1]
    } else {
        sorted[lo] + frac * (sorted[hi] - sorted[lo])
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    fn assert_close(actual: &[f64], expected: &[f64], eps: f64) {
        assert_eq!(actual.len(), expected.len());
        for (i, (a, e)) in actual.iter().zip(expected.iter()).enumerate() {
            assert!(
                (a - e).abs() < eps,
                "[{i}]: actual={a}, expected={e}, eps={eps}"
            );
        }
    }

    #[test]
    fn test_standard_scaler_basic() {
        let config = StandardScalerConfig::new();
        let x = vec![vec![1.0, 2.0, 3.0, 4.0, 5.0]];
        let result = StandardScaler::transform(&config, &x);
        // mean=3.0, population std=sqrt(2.0)
        let std = 2.0_f64.sqrt();
        let expected: Vec<f64> = (1..=5).map(|v| (v as f64 - 3.0) / std).collect();
        assert_close(&result[0], &expected, 1e-10);
    }

    #[test]
    fn test_standard_scaler_no_mean() {
        let config = StandardScalerConfig {
            with_mean: false,
            with_std: true,
        };
        let x = vec![vec![1.0, 2.0, 3.0]];
        let result = StandardScaler::transform(&config, &x);
        // Variance is computed from actual mean (2.0): var = 2/3
        // But mean is NOT subtracted from output
        let var: f64 = 2.0 / 3.0;
        let std = var.sqrt();
        let expected: Vec<f64> = vec![1.0 / std, 2.0 / std, 3.0 / std];
        assert_close(&result[0], &expected, 1e-10);
    }

    #[test]
    fn test_standard_scaler_constant() {
        let config = StandardScalerConfig::new();
        let x = vec![vec![5.0, 5.0, 5.0]];
        let result = StandardScaler::transform(&config, &x);
        assert_close(&result[0], &[0.0, 0.0, 0.0], 1e-10);
    }

    #[test]
    fn test_minmax_scaler_basic() {
        let config = MinMaxScalerConfig::new();
        let x = vec![vec![1.0, 2.0, 3.0, 4.0, 5.0]];
        let result = MinMaxScaler::transform(&config, &x);
        assert_close(&result[0], &[0.0, 0.25, 0.5, 0.75, 1.0], 1e-10);
    }

    #[test]
    fn test_minmax_scaler_custom_range() {
        let config = MinMaxScalerConfig {
            sample_range: (-1.0, 1.0),
        };
        let x = vec![vec![0.0, 5.0, 10.0]];
        let result = MinMaxScaler::transform(&config, &x);
        assert_close(&result[0], &[-1.0, 0.0, 1.0], 1e-10);
    }

    #[test]
    fn test_minmax_scaler_constant() {
        let config = MinMaxScalerConfig::new();
        let x = vec![vec![3.0, 3.0, 3.0]];
        let result = MinMaxScaler::transform(&config, &x);
        // Constant row: fill with range_min (0.0)
        assert_close(&result[0], &[0.0, 0.0, 0.0], 1e-10);
    }

    #[test]
    fn test_maxabs_scaler_basic() {
        let x = vec![vec![-3.0, 1.0, 2.0]];
        let result = MaxAbsScaler::transform(&(), &x);
        assert_close(&result[0], &[-1.0, 1.0 / 3.0, 2.0 / 3.0], 1e-10);
    }

    #[test]
    fn test_maxabs_scaler_zero() {
        let x = vec![vec![0.0, 0.0, 0.0]];
        let result = MaxAbsScaler::transform(&(), &x);
        assert_close(&result[0], &[0.0, 0.0, 0.0], 1e-10);
    }

    #[test]
    fn test_robust_scaler_basic() {
        let config = RobustScalerConfig::new();
        let x = vec![vec![1.0, 2.0, 3.0, 4.0, 5.0]];
        let result = RobustScaler::transform(&config, &x);
        let median = 3.0;
        let q25 = percentile(&x[0], 25.0);
        let q75 = percentile(&x[0], 75.0);
        let iqr = q75 - q25;
        let expected: Vec<f64> = x[0].iter().map(|&v| (v - median) / iqr).collect();
        assert_close(&result[0], &expected, 1e-10);
    }

    #[test]
    fn test_percentile_basic() {
        let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
        assert!((percentile(&data, 0.0) - 1.0).abs() < 1e-10);
        assert!((percentile(&data, 50.0) - 3.0).abs() < 1e-10);
        assert!((percentile(&data, 100.0) - 5.0).abs() < 1e-10);
        assert!((percentile(&data, 25.0) - 2.0).abs() < 1e-10);
        assert!((percentile(&data, 75.0) - 4.0).abs() < 1e-10);
    }

    #[test]
    fn test_percentile_interpolation() {
        let data = vec![0.0, 10.0];
        assert!((percentile(&data, 50.0) - 5.0).abs() < 1e-10);
        assert!((percentile(&data, 25.0) - 2.5).abs() < 1e-10);
    }

    #[test]
    fn test_multiple_samples() {
        let config = StandardScalerConfig::new();
        let x = vec![vec![1.0, 2.0, 3.0], vec![10.0, 20.0, 30.0]];
        let result = StandardScaler::transform(&config, &x);
        assert_eq!(result.len(), 2);
        // Both should be scaled independently to same shape
        assert_close(&result[0], &result[1], 1e-10);
    }
}