use crate::core::config::PowerMethod;
use crate::core::traits::Transformer;
#[derive(Debug, Clone, Copy)]
pub struct PowerTransformerConfig {
pub method: PowerMethod,
pub standardize: bool,
}
impl PowerTransformerConfig {
pub fn new() -> Self {
Self {
method: PowerMethod::YeoJohnson,
standardize: true,
}
}
}
impl Default for PowerTransformerConfig {
fn default() -> Self {
Self::new()
}
}
pub struct PowerTransformer;
impl Transformer for PowerTransformer {
type Config = PowerTransformerConfig;
fn transform(config: &Self::Config, x: &[Vec<f64>]) -> Vec<Vec<f64>> {
assert!(!x.is_empty(), "Input must have at least one sample");
let mut result: Vec<Vec<f64>> = x
.iter()
.map(|sample| {
let lambda = fit_lambda(sample, config.method);
apply_transform(sample, lambda, config.method)
})
.collect();
if config.standardize {
for row in &mut result {
let n = row.len() as f64;
let mean = row.iter().sum::<f64>() / n;
let var = row.iter().map(|&v| (v - mean) * (v - mean)).sum::<f64>() / n;
let std = if var == 0.0 { 1.0 } else { var.sqrt() };
for v in row.iter_mut() {
*v = (*v - mean) / std;
}
}
}
result
}
}
fn apply_transform(x: &[f64], lambda: f64, method: PowerMethod) -> Vec<f64> {
x.iter()
.map(|&v| match method {
PowerMethod::BoxCox => box_cox(v, lambda),
PowerMethod::YeoJohnson => yeo_johnson(v, lambda),
})
.collect()
}
fn box_cox(x: f64, lambda: f64) -> f64 {
assert!(x > 0.0, "Box-Cox requires strictly positive values");
if lambda.abs() < 1e-10 {
x.ln()
} else {
(x.powf(lambda) - 1.0) / lambda
}
}
fn yeo_johnson(x: f64, lambda: f64) -> f64 {
if x >= 0.0 {
if (lambda).abs() < 1e-10 {
(x + 1.0).ln()
} else {
((x + 1.0).powf(lambda) - 1.0) / lambda
}
} else if (2.0 - lambda).abs() < 1e-10 {
-((-x + 1.0).ln())
} else {
-((-x + 1.0).powf(2.0 - lambda) - 1.0) / (2.0 - lambda)
}
}
fn fit_lambda(x: &[f64], method: PowerMethod) -> f64 {
let n = x.len() as f64;
let neg_log_likelihood = |lambda: f64| -> f64 {
let transformed: Vec<f64> = x
.iter()
.map(|&v| match method {
PowerMethod::BoxCox => box_cox(v, lambda),
PowerMethod::YeoJohnson => yeo_johnson(v, lambda),
})
.collect();
let mean = transformed.iter().sum::<f64>() / n;
let var = transformed
.iter()
.map(|&v| (v - mean) * (v - mean))
.sum::<f64>()
/ n;
let ll_var = if var > 0.0 { -n / 2.0 * var.ln() } else { 0.0 };
let ll_jac = match method {
PowerMethod::BoxCox => (lambda - 1.0) * x.iter().map(|&v| v.ln()).sum::<f64>(),
PowerMethod::YeoJohnson => x
.iter()
.map(|&v| {
if v >= 0.0 {
(lambda - 1.0) * (v + 1.0).ln()
} else {
(1.0 - lambda) * (-v + 1.0).ln()
}
})
.sum::<f64>(),
};
-(ll_var + ll_jac)
};
brent_minimize(neg_log_likelihood, -2.0, 2.0, 1e-8, 500)
}
struct BrentState {
x: f64,
w: f64,
v: f64,
fx: f64,
fw: f64,
fv: f64,
d: f64,
e: f64,
lo: f64,
hi: f64,
}
fn try_parabolic(s: &BrentState, tol1: f64) -> Option<f64> {
if s.e.abs() <= tol1 {
return None;
}
let r = (s.x - s.w) * (s.fx - s.fv);
let q = (s.x - s.v) * (s.fx - s.fw);
let p = (s.x - s.v) * q - (s.x - s.w) * r;
let q = 2.0 * (q - r);
let (p, q) = if q > 0.0 { (-p, q) } else { (p, -q) };
if p.abs() < (0.5 * q * s.e).abs() && p > q * (s.lo - s.x) && p < q * (s.hi - s.x) {
Some(p / q)
} else {
None
}
}
fn trial_point(x: f64, d: f64, tol1: f64) -> f64 {
if d.abs() >= tol1 {
x + d
} else if d > 0.0 {
x + tol1
} else {
x - tol1
}
}
fn brent_update(s: &mut BrentState, u: f64, fu: f64) {
if fu <= s.fx {
if u < s.x {
s.hi = s.x
} else {
s.lo = s.x
}
s.v = s.w;
s.fv = s.fw;
s.w = s.x;
s.fw = s.fx;
s.x = u;
s.fx = fu;
} else {
if u < s.x {
s.lo = u
} else {
s.hi = u
}
if fu <= s.fw || s.w == s.x {
s.v = s.w;
s.fv = s.fw;
s.w = u;
s.fw = fu;
} else if fu <= s.fv || s.v == s.x || s.v == s.w {
s.v = u;
s.fv = fu;
}
}
}
fn brent_minimize<F: Fn(f64) -> f64>(f: F, a: f64, b: f64, tol: f64, max_iter: usize) -> f64 {
let golden = 0.381966011250105; let x0 = a + golden * (b - a);
let fx0 = f(x0);
let mut s = BrentState {
x: x0,
w: x0,
v: x0,
fx: fx0,
fw: fx0,
fv: fx0,
d: 0.0,
e: 0.0,
lo: a,
hi: b,
};
for _ in 0..max_iter {
let mid = 0.5 * (s.lo + s.hi);
let tol1 = tol * s.x.abs() + 1e-10;
let tol2 = 2.0 * tol1;
if (s.x - mid).abs() <= tol2 - 0.5 * (s.hi - s.lo) {
return s.x;
}
let use_golden = if let Some(step) = try_parabolic(&s, tol1) {
s.d = step;
false
} else {
s.e = if s.x < mid { s.hi - s.x } else { s.lo - s.x };
s.d = golden * s.e;
true
};
let u = trial_point(s.x, s.d, tol1);
let fu = f(u);
brent_update(&mut s, u, fu);
s.e = if use_golden { s.e } else { s.d };
}
s.x
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_yeo_johnson_identity_at_lambda_1() {
for &v in &[0.0, 1.0, 5.0, 100.0] {
let result = yeo_johnson(v, 1.0);
assert!((result - v).abs() < 1e-10, "yj({v}, 1.0) = {result}");
}
}
#[test]
fn test_yeo_johnson_log_at_lambda_0() {
for &v in &[0.0, 1.0, 5.0] {
let result = yeo_johnson(v, 0.0);
let expected = (v + 1.0).ln();
assert!(
(result - expected).abs() < 1e-10,
"yj({v}, 0.0) = {result}, expected {expected}"
);
}
}
#[test]
fn test_box_cox_log_at_lambda_0() {
for &v in &[0.5, 1.0, 5.0] {
let result = box_cox(v, 0.0);
let expected = v.ln();
assert!((result - expected).abs() < 1e-10);
}
}
#[test]
fn test_box_cox_square_root_approx() {
let x = 4.0;
let result = box_cox(x, 0.5);
let expected = 2.0 * (x.sqrt() - 1.0);
assert!((result - expected).abs() < 1e-10);
}
#[test]
fn test_power_transformer_standardize() {
let config = PowerTransformerConfig::new();
let x = vec![vec![1.0, 2.0, 3.0, 4.0, 5.0]];
let result = PowerTransformer::transform(&config, &x);
let mean: f64 = result[0].iter().sum::<f64>() / result[0].len() as f64;
assert!(mean.abs() < 1e-8, "mean = {mean}");
}
#[test]
#[should_panic(expected = "Box-Cox requires strictly positive")]
fn test_box_cox_negative_value() {
box_cox(-1.0, 1.0);
}
}