Skip to main content

anofox_ml_preprocessing/
power_transformer.rs

1use anofox_ml_core::{FitUnsupervised, Float, InverseTransform, Result, RustMlError, Transform};
2use ndarray::{Array1, Array2};
3
4/// Parameters for PowerTransformer (unfitted state).
5///
6/// Applies a Yeo-Johnson power transform to each feature to make the data
7/// more Gaussian-like, then optionally standardizes to zero mean and unit
8/// variance.
9///
10/// The optimal lambda per feature is found via grid search (maximizing the
11/// log-likelihood of the resulting normal distribution).
12#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
13pub struct PowerTransformer {
14    /// If true, standardize the transformed data to zero mean, unit variance.
15    pub standardize: bool,
16}
17
18impl PowerTransformer {
19    /// Create a new `PowerTransformer` with defaults (standardization enabled).
20    pub fn new() -> Self {
21        Self { standardize: true }
22    }
23
24    /// Set whether to standardize after the power transform.
25    pub fn standardize(mut self, standardize: bool) -> Self {
26        self.standardize = standardize;
27        self
28    }
29}
30
31impl Default for PowerTransformer {
32    fn default() -> Self {
33        Self::new()
34    }
35}
36
37/// Fitted PowerTransformer -- holds learned lambdas per feature and
38/// optional standardization parameters (mean and std).
39#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
40#[serde(bound(deserialize = "F: serde::de::DeserializeOwned"))]
41pub struct FittedPowerTransformer<F: Float> {
42    lambdas: Array1<F>,
43    means: Array1<F>,
44    stds: Array1<F>,
45    standardize: bool,
46}
47
48/// Apply the Yeo-Johnson transform to a single value.
49fn yeo_johnson<F: Float>(x: F, lam: F) -> F {
50    let zero = F::zero();
51    let one = F::one();
52    let two = F::from_f64(2.0).unwrap();
53    let eps = F::from_f64(1e-10).unwrap();
54
55    if x >= zero {
56        if (lam - zero).abs() > eps {
57            // ((x + 1)^lambda - 1) / lambda
58            ((x + one).powf(lam) - one) / lam
59        } else {
60            // ln(x + 1)
61            (x + one).ln()
62        }
63    } else {
64        // x < 0
65        if (lam - two).abs() > eps {
66            // -((-x + 1)^(2 - lambda) - 1) / (2 - lambda)
67            -((-x + one).powf(two - lam) - one) / (two - lam)
68        } else {
69            // -ln(-x + 1)
70            -(-x + one).ln()
71        }
72    }
73}
74
75/// Apply the inverse Yeo-Johnson transform to a single value.
76fn yeo_johnson_inverse<F: Float>(y: F, lam: F) -> F {
77    let zero = F::zero();
78    let one = F::one();
79    let two = F::from_f64(2.0).unwrap();
80    let eps = F::from_f64(1e-10).unwrap();
81
82    if y >= zero {
83        if (lam - zero).abs() > eps {
84            // x = (y * lambda + 1)^(1/lambda) - 1
85            (y * lam + one).powf(one / lam) - one
86        } else {
87            // x = exp(y) - 1
88            y.exp() - one
89        }
90    } else {
91        // y < 0
92        if (lam - two).abs() > eps {
93            // x = 1 - (-(2 - lambda) * y + 1)^(1/(2-lambda))
94            one - (-(two - lam) * y + one).powf(one / (two - lam))
95        } else {
96            // x = 1 - exp(-y)
97            one - (-y).exp()
98        }
99    }
100}
101
102/// Compute the negative log-likelihood for a candidate lambda on a column.
103/// The log-likelihood of the Yeo-Johnson transformed data under a normal
104/// model includes the Jacobian term.
105fn neg_log_likelihood<F: Float>(col: &[F], lam: F) -> f64 {
106    let n = col.len() as f64;
107    // Transform all values
108    let transformed: Vec<f64> = col
109        .iter()
110        .map(|&x| yeo_johnson(x, lam).to_f64().unwrap())
111        .collect();
112
113    // Mean
114    let mean: f64 = transformed.iter().sum::<f64>() / n;
115
116    // Variance
117    let var: f64 = transformed
118        .iter()
119        .map(|&t| (t - mean) * (t - mean))
120        .sum::<f64>()
121        / n;
122    let var = var.max(1e-30); // avoid log(0)
123
124    // Log-likelihood = -n/2 * ln(2*pi*var) + (lam - 1) * sum(sign(x) * ln(|x| + 1))
125    // We only need to maximize, so we can drop the constant -n/2 * ln(2*pi) part
126    // nll = n/2 * ln(var) - (lam - 1) * sum(sign(x) * ln(|x| + 1))
127    let lam_f64 = lam.to_f64().unwrap();
128    let jacobian_sum: f64 = col
129        .iter()
130        .map(|&x| {
131            let x_f64 = x.to_f64().unwrap();
132            let sign = if x_f64 >= 0.0 { 1.0 } else { -1.0 };
133            sign * (x_f64.abs() + 1.0).ln()
134        })
135        .sum();
136
137    n / 2.0 * var.ln() - (lam_f64 - 1.0) * jacobian_sum
138}
139
140impl<F: Float> FitUnsupervised<F> for PowerTransformer {
141    type Fitted = FittedPowerTransformer<F>;
142
143    fn fit(&self, x: &Array2<F>) -> Result<Self::Fitted> {
144        if x.is_empty() {
145            return Err(RustMlError::EmptyInput("input array is empty".into()));
146        }
147
148        let ncols = x.ncols();
149        let mut lambdas = Array1::<F>::zeros(ncols);
150
151        // For each feature, find the best lambda by grid search
152        for j in 0..ncols {
153            let col: Vec<F> = x.column(j).to_vec();
154            let mut best_lam = F::zero();
155            let mut best_nll = f64::INFINITY;
156
157            // Grid search over lambda in [-2.0, 2.0] with step 0.1
158            let mut lam_val = -20i32; // represents -2.0
159            while lam_val <= 20 {
160                let lam = F::from_f64(lam_val as f64 / 10.0).unwrap();
161                let nll = neg_log_likelihood(&col, lam);
162                if nll < best_nll {
163                    best_nll = nll;
164                    best_lam = lam;
165                }
166                lam_val += 1;
167            }
168
169            lambdas[j] = best_lam;
170        }
171
172        // Transform the data to compute standardization parameters
173        let n = F::from_usize(x.nrows()).unwrap();
174        let mut means = Array1::<F>::zeros(ncols);
175        let mut stds = Array1::<F>::ones(ncols);
176
177        if self.standardize {
178            // Compute the transformed data's mean and std per column
179            for j in 0..ncols {
180                let lam = lambdas[j];
181                let mut sum = F::zero();
182                for &val in x.column(j).iter() {
183                    sum = sum + yeo_johnson(val, lam);
184                }
185                let mean = sum / n;
186                means[j] = mean;
187
188                let mut var_sum = F::zero();
189                for &val in x.column(j).iter() {
190                    let t = yeo_johnson(val, lam) - mean;
191                    var_sum = var_sum + t * t;
192                }
193                stds[j] = (var_sum / n).sqrt();
194            }
195        }
196
197        Ok(FittedPowerTransformer {
198            lambdas,
199            means,
200            stds,
201            standardize: self.standardize,
202        })
203    }
204}
205
206impl<F: Float> Transform<F> for FittedPowerTransformer<F> {
207    fn transform(&self, x: &Array2<F>) -> Result<Array2<F>> {
208        if x.ncols() != self.lambdas.len() {
209            return Err(RustMlError::ShapeMismatch(format!(
210                "expected {} features, got {}",
211                self.lambdas.len(),
212                x.ncols()
213            )));
214        }
215
216        let mut result = Array2::<F>::zeros(x.raw_dim());
217        for i in 0..x.nrows() {
218            for j in 0..x.ncols() {
219                let mut val = yeo_johnson(x[[i, j]], self.lambdas[j]);
220                if self.standardize {
221                    val = val - self.means[j];
222                    if self.stds[j] > F::from_f64(1e-15).unwrap() {
223                        val = val / self.stds[j];
224                    }
225                }
226                result[[i, j]] = val;
227            }
228        }
229        Ok(result)
230    }
231}
232
233impl<F: Float> InverseTransform<F> for FittedPowerTransformer<F> {
234    fn inverse_transform(&self, x: &Array2<F>) -> Result<Array2<F>> {
235        if x.ncols() != self.lambdas.len() {
236            return Err(RustMlError::ShapeMismatch(format!(
237                "expected {} features, got {}",
238                self.lambdas.len(),
239                x.ncols()
240            )));
241        }
242
243        let mut result = Array2::<F>::zeros(x.raw_dim());
244        for i in 0..x.nrows() {
245            for j in 0..x.ncols() {
246                let mut val = x[[i, j]];
247                // Undo standardization
248                if self.standardize {
249                    if self.stds[j] > F::from_f64(1e-15).unwrap() {
250                        val = val * self.stds[j];
251                    }
252                    val = val + self.means[j];
253                }
254                // Undo Yeo-Johnson
255                result[[i, j]] = yeo_johnson_inverse(val, self.lambdas[j]);
256            }
257        }
258        Ok(result)
259    }
260}
261
262impl<F: Float> FittedPowerTransformer<F> {
263    /// Return the fitted lambda per feature.
264    pub fn lambdas(&self) -> &Array1<F> {
265        &self.lambdas
266    }
267
268    /// Return the mean per feature (after Yeo-Johnson, before standardization).
269    pub fn means(&self) -> &Array1<F> {
270        &self.means
271    }
272
273    /// Return the std per feature (after Yeo-Johnson, before standardization).
274    pub fn stds(&self) -> &Array1<F> {
275        &self.stds
276    }
277}
278
279#[cfg(test)]
280mod tests {
281    use super::*;
282    use approx::assert_abs_diff_eq;
283    use ndarray::array;
284
285    #[test]
286    fn test_fit_transform_basic() {
287        let x = array![
288            [1.0, -1.0],
289            [2.0, -2.0],
290            [3.0, -3.0],
291            [4.0, -4.0],
292            [5.0, -5.0],
293        ];
294        let pt = PowerTransformer::default();
295        let fitted = FitUnsupervised::<f64>::fit(&pt, &x).unwrap();
296        let transformed = fitted.transform(&x).unwrap();
297
298        // Each column should have mean ~0 and std ~1
299        let n = x.nrows() as f64;
300        for j in 0..x.ncols() {
301            let col_mean: f64 = transformed.column(j).sum() / n;
302            assert_abs_diff_eq!(col_mean, 0.0, epsilon = 1e-8);
303
304            let col_std: f64 = (transformed
305                .column(j)
306                .iter()
307                .map(|&v| (v - col_mean).powi(2))
308                .sum::<f64>()
309                / n)
310                .sqrt();
311            assert_abs_diff_eq!(col_std, 1.0, epsilon = 1e-6);
312        }
313    }
314
315    #[test]
316    fn test_inverse_transform_roundtrip() {
317        let x = array![[0.5, 1.0], [1.5, 2.0], [2.5, 3.0], [3.5, 4.0], [4.5, 5.0],];
318        let pt = PowerTransformer::default();
319        let fitted = FitUnsupervised::<f64>::fit(&pt, &x).unwrap();
320        let transformed = fitted.transform(&x).unwrap();
321        let recovered = fitted.inverse_transform(&transformed).unwrap();
322
323        for (a, b) in x.iter().zip(recovered.iter()) {
324            assert_abs_diff_eq!(a, b, epsilon = 1e-6);
325        }
326    }
327
328    #[test]
329    fn test_without_standardize() {
330        let x = array![[1.0], [2.0], [3.0], [4.0], [5.0]];
331        let pt = PowerTransformer::new().standardize(false);
332        let fitted = FitUnsupervised::<f64>::fit(&pt, &x).unwrap();
333        let transformed = fitted.transform(&x).unwrap();
334
335        // Without standardization, the transform is just Yeo-Johnson
336        let lam = fitted.lambdas()[0];
337        for i in 0..x.nrows() {
338            let expected = yeo_johnson(x[[i, 0]], lam);
339            assert_abs_diff_eq!(transformed[[i, 0]], expected, epsilon = 1e-10);
340        }
341    }
342
343    #[test]
344    fn test_yeo_johnson_identity_at_lambda_one() {
345        // When lambda = 1, Yeo-Johnson for x >= 0 is ((x+1)^1 - 1)/1 = x
346        let one = 1.0_f64;
347        for &x in &[0.0, 1.0, 2.0, 5.0, 10.0] {
348            let result = yeo_johnson(x, one);
349            assert_abs_diff_eq!(result, x, epsilon = 1e-10);
350        }
351    }
352
353    #[test]
354    fn test_yeo_johnson_lambda_zero() {
355        // When lambda = 0, Yeo-Johnson for x >= 0 is ln(x + 1)
356        let zero = 0.0_f64;
357        for &x in &[0.0, 1.0, 2.0, 5.0] {
358            let result = yeo_johnson(x, zero);
359            let expected = (x + 1.0).ln();
360            assert_abs_diff_eq!(result, expected, epsilon = 1e-10);
361        }
362    }
363
364    #[test]
365    fn test_yeo_johnson_negative_values() {
366        // Verify transform and inverse for negative values
367        let lam = 0.5_f64;
368        for &x in &[-1.0, -2.0, -5.0, -0.5] {
369            let y = yeo_johnson(x, lam);
370            let x_back = yeo_johnson_inverse(y, lam);
371            assert_abs_diff_eq!(x, x_back, epsilon = 1e-8);
372        }
373    }
374
375    #[test]
376    fn test_lambda_two_negative_branch() {
377        // When lambda = 2 and x < 0: -ln(-x + 1)
378        let two = 2.0_f64;
379        for &x in &[-1.0, -2.0, -0.5] {
380            let result = yeo_johnson(x, two);
381            let expected = -(-x + 1.0).ln();
382            assert_abs_diff_eq!(result, expected, epsilon = 1e-10);
383        }
384    }
385
386    #[test]
387    fn test_empty_input() {
388        let x: Array2<f64> = Array2::zeros((0, 0));
389        let pt = PowerTransformer::default();
390        assert!(FitUnsupervised::<f64>::fit(&pt, &x).is_err());
391    }
392
393    #[test]
394    fn test_shape_mismatch() {
395        let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
396        let pt = PowerTransformer::default();
397        let fitted = FitUnsupervised::<f64>::fit(&pt, &x).unwrap();
398
399        let x_wrong = array![[1.0, 2.0, 3.0]];
400        assert!(fitted.transform(&x_wrong).is_err());
401        assert!(fitted.inverse_transform(&x_wrong).is_err());
402    }
403
404    #[test]
405    fn test_mixed_positive_negative() {
406        let x = array![
407            [-3.0, 10.0],
408            [-1.0, 20.0],
409            [0.0, 30.0],
410            [1.0, 40.0],
411            [3.0, 50.0],
412        ];
413        let pt = PowerTransformer::default();
414        let fitted = FitUnsupervised::<f64>::fit(&pt, &x).unwrap();
415        let transformed = fitted.transform(&x).unwrap();
416        let recovered = fitted.inverse_transform(&transformed).unwrap();
417
418        for (a, b) in x.iter().zip(recovered.iter()) {
419            assert_abs_diff_eq!(a, b, epsilon = 1e-5);
420        }
421    }
422}