oxits 0.1.0 - Docs.rs

use crate::core::config::PowerMethod;
use crate::core::traits::Transformer;

#[derive(Debug, Clone, Copy)]
pub struct PowerTransformerConfig {
    pub method: PowerMethod,
    pub standardize: bool,
}

impl PowerTransformerConfig {
    pub fn new() -> Self {
        Self {
            method: PowerMethod::YeoJohnson,
            standardize: true,
        }
    }
}

impl Default for PowerTransformerConfig {
    fn default() -> Self {
        Self::new()
    }
}

pub struct PowerTransformer;

impl Transformer for PowerTransformer {
    type Config = PowerTransformerConfig;

    fn transform(config: &Self::Config, x: &[Vec<f64>]) -> Vec<Vec<f64>> {
        assert!(!x.is_empty(), "Input must have at least one sample");

        let mut result: Vec<Vec<f64>> = x
            .iter()
            .map(|sample| {
                let lambda = fit_lambda(sample, config.method);
                apply_transform(sample, lambda, config.method)
            })
            .collect();

        if config.standardize {
            for row in &mut result {
                let n = row.len() as f64;
                let mean = row.iter().sum::<f64>() / n;
                let var = row.iter().map(|&v| (v - mean) * (v - mean)).sum::<f64>() / n;
                let std = if var == 0.0 { 1.0 } else { var.sqrt() };
                for v in row.iter_mut() {
                    *v = (*v - mean) / std;
                }
            }
        }

        result
    }
}

/// Apply the power transform with a given lambda.
fn apply_transform(x: &[f64], lambda: f64, method: PowerMethod) -> Vec<f64> {
    x.iter()
        .map(|&v| match method {
            PowerMethod::BoxCox => box_cox(v, lambda),
            PowerMethod::YeoJohnson => yeo_johnson(v, lambda),
        })
        .collect()
}

/// Box-Cox transform for a single value. Requires x > 0.
fn box_cox(x: f64, lambda: f64) -> f64 {
    assert!(x > 0.0, "Box-Cox requires strictly positive values");
    if lambda.abs() < 1e-10 {
        x.ln()
    } else {
        (x.powf(lambda) - 1.0) / lambda
    }
}

/// Yeo-Johnson transform for a single value. Works for all reals.
fn yeo_johnson(x: f64, lambda: f64) -> f64 {
    if x >= 0.0 {
        if (lambda).abs() < 1e-10 {
            (x + 1.0).ln()
        } else {
            ((x + 1.0).powf(lambda) - 1.0) / lambda
        }
    } else if (2.0 - lambda).abs() < 1e-10 {
        -((-x + 1.0).ln())
    } else {
        -((-x + 1.0).powf(2.0 - lambda) - 1.0) / (2.0 - lambda)
    }
}

/// Fit the optimal lambda via MLE using Brent's method.
/// We search lambda in [-2, 2] (scipy's default range).
fn fit_lambda(x: &[f64], method: PowerMethod) -> f64 {
    let n = x.len() as f64;

    // Brent's method to maximize log-likelihood
    let neg_log_likelihood = |lambda: f64| -> f64 {
        let transformed: Vec<f64> = x
            .iter()
            .map(|&v| match method {
                PowerMethod::BoxCox => box_cox(v, lambda),
                PowerMethod::YeoJohnson => yeo_johnson(v, lambda),
            })
            .collect();

        let mean = transformed.iter().sum::<f64>() / n;
        let var = transformed
            .iter()
            .map(|&v| (v - mean) * (v - mean))
            .sum::<f64>()
            / n;

        // Log-likelihood (up to constant)
        let ll_var = if var > 0.0 { -n / 2.0 * var.ln() } else { 0.0 };

        // Jacobian contribution
        let ll_jac = match method {
            PowerMethod::BoxCox => (lambda - 1.0) * x.iter().map(|&v| v.ln()).sum::<f64>(),
            PowerMethod::YeoJohnson => x
                .iter()
                .map(|&v| {
                    if v >= 0.0 {
                        (lambda - 1.0) * (v + 1.0).ln()
                    } else {
                        (1.0 - lambda) * (-v + 1.0).ln()
                    }
                })
                .sum::<f64>(),
        };

        -(ll_var + ll_jac)
    };

    brent_minimize(neg_log_likelihood, -2.0, 2.0, 1e-8, 500)
}

/// State for Brent's minimization algorithm.
struct BrentState {
    x: f64,
    w: f64,
    v: f64,
    fx: f64,
    fw: f64,
    fv: f64,
    d: f64,
    e: f64,
    lo: f64,
    hi: f64,
}

/// Try parabolic interpolation. Returns Some(step) if successful, None to use golden section.
fn try_parabolic(s: &BrentState, tol1: f64) -> Option<f64> {
    if s.e.abs() <= tol1 {
        return None;
    }
    let r = (s.x - s.w) * (s.fx - s.fv);
    let q = (s.x - s.v) * (s.fx - s.fw);
    let p = (s.x - s.v) * q - (s.x - s.w) * r;
    let q = 2.0 * (q - r);
    let (p, q) = if q > 0.0 { (-p, q) } else { (p, -q) };

    if p.abs() < (0.5 * q * s.e).abs() && p > q * (s.lo - s.x) && p < q * (s.hi - s.x) {
        Some(p / q)
    } else {
        None
    }
}

/// Compute the trial point, ensuring minimum step of tol1.
fn trial_point(x: f64, d: f64, tol1: f64) -> f64 {
    if d.abs() >= tol1 {
        x + d
    } else if d > 0.0 {
        x + tol1
    } else {
        x - tol1
    }
}

/// Update Brent state after evaluating f(u) = fu.
fn brent_update(s: &mut BrentState, u: f64, fu: f64) {
    if fu <= s.fx {
        if u < s.x {
            s.hi = s.x
        } else {
            s.lo = s.x
        }
        s.v = s.w;
        s.fv = s.fw;
        s.w = s.x;
        s.fw = s.fx;
        s.x = u;
        s.fx = fu;
    } else {
        if u < s.x {
            s.lo = u
        } else {
            s.hi = u
        }
        if fu <= s.fw || s.w == s.x {
            s.v = s.w;
            s.fv = s.fw;
            s.w = u;
            s.fw = fu;
        } else if fu <= s.fv || s.v == s.x || s.v == s.w {
            s.v = u;
            s.fv = fu;
        }
    }
}

/// Brent's method for minimization on `[a, b]`.
/// Returns the x that minimizes f.
fn brent_minimize<F: Fn(f64) -> f64>(f: F, a: f64, b: f64, tol: f64, max_iter: usize) -> f64 {
    let golden = 0.381966011250105; // (3 - sqrt(5)) / 2
    let x0 = a + golden * (b - a);
    let fx0 = f(x0);

    let mut s = BrentState {
        x: x0,
        w: x0,
        v: x0,
        fx: fx0,
        fw: fx0,
        fv: fx0,
        d: 0.0,
        e: 0.0,
        lo: a,
        hi: b,
    };

    for _ in 0..max_iter {
        let mid = 0.5 * (s.lo + s.hi);
        let tol1 = tol * s.x.abs() + 1e-10;
        let tol2 = 2.0 * tol1;

        if (s.x - mid).abs() <= tol2 - 0.5 * (s.hi - s.lo) {
            return s.x;
        }

        let use_golden = if let Some(step) = try_parabolic(&s, tol1) {
            s.d = step;
            false
        } else {
            s.e = if s.x < mid { s.hi - s.x } else { s.lo - s.x };
            s.d = golden * s.e;
            true
        };

        let u = trial_point(s.x, s.d, tol1);
        let fu = f(u);
        brent_update(&mut s, u, fu);
        s.e = if use_golden { s.e } else { s.d };
    }

    s.x
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_yeo_johnson_identity_at_lambda_1() {
        // When lambda=1, yeo_johnson(x) = x for x >= 0
        for &v in &[0.0, 1.0, 5.0, 100.0] {
            let result = yeo_johnson(v, 1.0);
            assert!((result - v).abs() < 1e-10, "yj({v}, 1.0) = {result}");
        }
    }

    #[test]
    fn test_yeo_johnson_log_at_lambda_0() {
        // When lambda=0, yeo_johnson(x) = ln(x+1) for x >= 0
        for &v in &[0.0, 1.0, 5.0] {
            let result = yeo_johnson(v, 0.0);
            let expected = (v + 1.0).ln();
            assert!(
                (result - expected).abs() < 1e-10,
                "yj({v}, 0.0) = {result}, expected {expected}"
            );
        }
    }

    #[test]
    fn test_box_cox_log_at_lambda_0() {
        for &v in &[0.5, 1.0, 5.0] {
            let result = box_cox(v, 0.0);
            let expected = v.ln();
            assert!((result - expected).abs() < 1e-10);
        }
    }

    #[test]
    fn test_box_cox_square_root_approx() {
        // lambda=0.5: (x^0.5 - 1) / 0.5 = 2*(sqrt(x) - 1)
        let x = 4.0;
        let result = box_cox(x, 0.5);
        let expected = 2.0 * (x.sqrt() - 1.0);
        assert!((result - expected).abs() < 1e-10);
    }

    #[test]
    fn test_power_transformer_standardize() {
        let config = PowerTransformerConfig::new();
        let x = vec![vec![1.0, 2.0, 3.0, 4.0, 5.0]];
        let result = PowerTransformer::transform(&config, &x);
        // With standardize, output should have zero mean
        let mean: f64 = result[0].iter().sum::<f64>() / result[0].len() as f64;
        assert!(mean.abs() < 1e-8, "mean = {mean}");
    }

    #[test]
    #[should_panic(expected = "Box-Cox requires strictly positive")]
    fn test_box_cox_negative_value() {
        box_cox(-1.0, 1.0);
    }
}