Skip to main content

ferrolearn_preprocess/
power_transformer.rs

1//! Power transformer: apply a power transform to make data more Gaussian.
2//!
3//! Implements the **Yeo-Johnson** transformation, which works for both positive
4//! and negative values. An optimal lambda per feature is estimated via a simple
5//! grid search that maximises the log-likelihood of the transformed column
6//! following a normal distribution.
7//!
8//! After transformation, the data can optionally be standardized (zero mean,
9//! unit variance). Standardization is enabled by default, matching the
10//! scikit-learn default.
11//!
12//! # Yeo-Johnson definition
13//!
14//! ```text
15//! y ≥ 0, λ ≠ 0:  ((y + 1)^λ - 1) / λ
16//! y ≥ 0, λ = 0:  ln(y + 1)
17//! y < 0, λ ≠ 2:  -((1 - y)^(2-λ) - 1) / (2 - λ)
18//! y < 0, λ = 2:  -ln(1 - y)
19//! ```
20
21use ferrolearn_core::error::FerroError;
22use ferrolearn_core::pipeline::{FittedPipelineTransformer, PipelineTransformer};
23use ferrolearn_core::traits::{Fit, FitTransform, Transform};
24use ndarray::{Array1, Array2};
25use num_traits::Float;
26
27// ---------------------------------------------------------------------------
28// Helpers
29// ---------------------------------------------------------------------------
30
31/// Apply the Yeo-Johnson transform to a single value with parameter `lambda`.
32fn yeo_johnson<F: Float>(y: F, lambda: F) -> F {
33    let zero = F::zero();
34    let one = F::one();
35    let two = one + one;
36    let eps = F::from(1e-10_f64).unwrap_or(F::epsilon());
37
38    if y >= zero {
39        if (lambda - zero).abs() < eps {
40            // λ ≈ 0: ln(y + 1)
41            (y + one).ln()
42        } else {
43            // ((y + 1)^λ - 1) / λ
44            ((y + one).powf(lambda) - one) / lambda
45        }
46    } else {
47        // y < 0
48        let two_minus_lambda = two - lambda;
49        if (two_minus_lambda).abs() < eps {
50            // λ ≈ 2: -ln(1 - y)
51            -(one - y).ln()
52        } else {
53            // -((1 - y)^(2-λ) - 1) / (2 - λ)
54            -((one - y).powf(two_minus_lambda) - one) / two_minus_lambda
55        }
56    }
57}
58
59/// Compute the log-likelihood of a zero-mean, unit-variance normal distribution
60/// for the transformed data. This is used as the optimisation criterion for
61/// finding the optimal lambda.
62///
63/// For a column `col` transformed with `lambda`, the log-likelihood contribution
64/// from the Yeo-Johnson Jacobian is:
65/// `(λ - 1) * sum(sign(y) * ln(|y| + 1))` for each sample.
66/// We then add the normal log-likelihood of the transformed values.
67fn log_likelihood_yj<F: Float>(col: &[F], lambda: F) -> F {
68    let n = F::from(col.len()).unwrap_or(F::one());
69    let one = F::one();
70    let two = one + one;
71    let pi2 = F::from(std::f64::consts::TAU).unwrap_or(F::one()); // 2π
72
73    // Transform each value
74    let transformed: Vec<F> = col
75        .iter()
76        .copied()
77        .map(|v| yeo_johnson(v, lambda))
78        .collect();
79
80    // Compute mean and variance of transformed values
81    let mean = transformed
82        .iter()
83        .copied()
84        .fold(F::zero(), |acc, v| acc + v)
85        / n;
86    let variance = transformed
87        .iter()
88        .copied()
89        .map(|v| (v - mean) * (v - mean))
90        .fold(F::zero(), |acc, v| acc + v)
91        / n;
92
93    if variance <= F::zero() {
94        return F::neg_infinity();
95    }
96
97    // Normal log-likelihood: -n/2 * ln(2π) - n/2 * ln(var) - 1/(2*var)*sum((t-mean)^2)
98    // Simplified: -n/2 * ln(2π*var) - n/2
99    let normal_ll = -n / two * (pi2 * variance).ln() - n / two;
100
101    // Jacobian contribution from Yeo-Johnson
102    // For both y >= 0 and y < 0, the Jacobian term is ln(|y| + 1).
103    let lambda_minus_1 = lambda - one;
104    let jacobian: F = col
105        .iter()
106        .copied()
107        .fold(F::zero(), |acc, y| acc + (y.abs() + one).ln());
108    let jacobian_ll = lambda_minus_1 * jacobian;
109
110    normal_ll + jacobian_ll
111}
112
113// ---------------------------------------------------------------------------
114// PowerTransformer (unfitted)
115// ---------------------------------------------------------------------------
116
117/// An unfitted power transformer using the Yeo-Johnson method.
118///
119/// Calling [`Fit::fit`] estimates an optimal lambda per feature (via grid
120/// search over a range of lambda values) and returns a [`FittedPowerTransformer`]
121/// that can transform new data.
122///
123/// # Examples
124///
125/// ```
126/// use ferrolearn_preprocess::PowerTransformer;
127/// use ferrolearn_core::traits::{Fit, Transform};
128/// use ndarray::array;
129///
130/// let pt = PowerTransformer::<f64>::new();
131/// let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
132/// let fitted = pt.fit(&x, &()).unwrap();
133/// let transformed = fitted.transform(&x).unwrap();
134/// ```
135#[derive(Debug, Clone)]
136pub struct PowerTransformer<F> {
137    /// Whether to standardize the output (zero mean, unit variance).
138    pub(crate) standardize: bool,
139    _marker: std::marker::PhantomData<F>,
140}
141
142impl<F: Float + Send + Sync + 'static> PowerTransformer<F> {
143    /// Create a new `PowerTransformer` with standardization enabled (default).
144    #[must_use]
145    pub fn new() -> Self {
146        Self {
147            standardize: true,
148            _marker: std::marker::PhantomData,
149        }
150    }
151
152    /// Create a new `PowerTransformer` with standardization disabled.
153    #[must_use]
154    pub fn without_standardize() -> Self {
155        Self {
156            standardize: false,
157            _marker: std::marker::PhantomData,
158        }
159    }
160
161    /// Whether standardization is enabled.
162    #[must_use]
163    pub fn standardize(&self) -> bool {
164        self.standardize
165    }
166}
167
168impl<F: Float + Send + Sync + 'static> Default for PowerTransformer<F> {
169    fn default() -> Self {
170        Self::new()
171    }
172}
173
174// ---------------------------------------------------------------------------
175// FittedPowerTransformer
176// ---------------------------------------------------------------------------
177
178/// A fitted power transformer holding per-column lambda values and optional
179/// standardisation parameters.
180///
181/// Created by calling [`Fit::fit`] on a [`PowerTransformer`].
182#[derive(Debug, Clone)]
183pub struct FittedPowerTransformer<F> {
184    /// Per-column optimal lambda values.
185    pub(crate) lambdas: Array1<F>,
186    /// Per-column means of the transformed data (used for standardization).
187    pub(crate) means: Option<Array1<F>>,
188    /// Per-column standard deviations of the transformed data (used for standardization).
189    pub(crate) stds: Option<Array1<F>>,
190}
191
192impl<F: Float + Send + Sync + 'static> FittedPowerTransformer<F> {
193    /// Return the per-column lambda values learned during fitting.
194    #[must_use]
195    pub fn lambdas(&self) -> &Array1<F> {
196        &self.lambdas
197    }
198}
199
200// ---------------------------------------------------------------------------
201// Trait implementations
202// ---------------------------------------------------------------------------
203
204impl<F: Float + Send + Sync + 'static> Fit<Array2<F>, ()> for PowerTransformer<F> {
205    type Fitted = FittedPowerTransformer<F>;
206    type Error = FerroError;
207
208    /// Fit the transformer by estimating the optimal lambda per feature.
209    ///
210    /// Uses a grid search over lambda values in `[-3, 3]` with 201 candidate
211    /// values, selecting the lambda that maximises the log-likelihood of the
212    /// Yeo-Johnson transformed column following a normal distribution.
213    ///
214    /// # Errors
215    ///
216    /// Returns [`FerroError::InsufficientSamples`] if the input has zero rows.
217    fn fit(&self, x: &Array2<F>, _y: &()) -> Result<FittedPowerTransformer<F>, FerroError> {
218        let n_samples = x.nrows();
219        if n_samples == 0 {
220            return Err(FerroError::InsufficientSamples {
221                required: 1,
222                actual: 0,
223                context: "PowerTransformer::fit".into(),
224            });
225        }
226
227        let n_features = x.ncols();
228        let mut lambdas = Array1::zeros(n_features);
229
230        // Grid search: 201 candidate lambdas in [-3, 3]
231        let n_candidates = 201_usize;
232        let lambda_min = F::from(-3.0_f64).unwrap_or(F::zero());
233        let lambda_max = F::from(3.0_f64).unwrap_or(F::one());
234        let step = (lambda_max - lambda_min) / F::from(n_candidates - 1).unwrap_or(F::one());
235
236        for j in 0..n_features {
237            let col: Vec<F> = x.column(j).iter().copied().collect();
238
239            let mut best_ll = F::neg_infinity();
240            let mut best_lambda = F::one(); // default lambda
241
242            for k in 0..n_candidates {
243                let lambda = lambda_min + step * F::from(k).unwrap_or(F::zero());
244                let ll = log_likelihood_yj(&col, lambda);
245                if ll > best_ll {
246                    best_ll = ll;
247                    best_lambda = lambda;
248                }
249            }
250
251            lambdas[j] = best_lambda;
252        }
253
254        // If standardize, compute mean and std of transformed data
255        let (means, stds) = if self.standardize {
256            let n = F::from(n_samples).unwrap_or(F::one());
257            let mut means_arr = Array1::zeros(n_features);
258            let mut stds_arr = Array1::zeros(n_features);
259            for j in 0..n_features {
260                let lambda = lambdas[j];
261                let transformed: Vec<F> = x
262                    .column(j)
263                    .iter()
264                    .copied()
265                    .map(|v| yeo_johnson(v, lambda))
266                    .collect();
267                let mean = transformed
268                    .iter()
269                    .copied()
270                    .fold(F::zero(), |acc, v| acc + v)
271                    / n;
272                let variance = transformed
273                    .iter()
274                    .copied()
275                    .map(|v| (v - mean) * (v - mean))
276                    .fold(F::zero(), |acc, v| acc + v)
277                    / n;
278                means_arr[j] = mean;
279                stds_arr[j] = variance.sqrt();
280            }
281            (Some(means_arr), Some(stds_arr))
282        } else {
283            (None, None)
284        };
285
286        Ok(FittedPowerTransformer {
287            lambdas,
288            means,
289            stds,
290        })
291    }
292}
293
294impl<F: Float + Send + Sync + 'static> Transform<Array2<F>> for FittedPowerTransformer<F> {
295    type Output = Array2<F>;
296    type Error = FerroError;
297
298    /// Apply the Yeo-Johnson transform and optionally standardize.
299    ///
300    /// # Errors
301    ///
302    /// Returns [`FerroError::ShapeMismatch`] if the number of columns does not
303    /// match the number of features seen during fitting.
304    fn transform(&self, x: &Array2<F>) -> Result<Array2<F>, FerroError> {
305        let n_features = self.lambdas.len();
306        if x.ncols() != n_features {
307            return Err(FerroError::ShapeMismatch {
308                expected: vec![x.nrows(), n_features],
309                actual: vec![x.nrows(), x.ncols()],
310                context: "FittedPowerTransformer::transform".into(),
311            });
312        }
313
314        let mut out = x.to_owned();
315        for (j, mut col) in out.columns_mut().into_iter().enumerate() {
316            let lambda = self.lambdas[j];
317            for v in col.iter_mut() {
318                *v = yeo_johnson(*v, lambda);
319            }
320
321            // Standardize if requested
322            if let (Some(means), Some(stds)) = (&self.means, &self.stds) {
323                let m = means[j];
324                let s = stds[j];
325                if s > F::zero() {
326                    for v in col.iter_mut() {
327                        *v = (*v - m) / s;
328                    }
329                }
330            }
331        }
332        Ok(out)
333    }
334}
335
336/// Implement `Transform` on the unfitted transformer to satisfy the
337/// `FitTransform: Transform` supertrait bound.
338impl<F: Float + Send + Sync + 'static> Transform<Array2<F>> for PowerTransformer<F> {
339    type Output = Array2<F>;
340    type Error = FerroError;
341
342    /// Always returns an error — the transformer must be fitted first.
343    fn transform(&self, _x: &Array2<F>) -> Result<Array2<F>, FerroError> {
344        Err(FerroError::InvalidParameter {
345            name: "PowerTransformer".into(),
346            reason: "transformer must be fitted before calling transform; use fit() first".into(),
347        })
348    }
349}
350
351impl<F: Float + Send + Sync + 'static> FitTransform<Array2<F>> for PowerTransformer<F> {
352    type FitError = FerroError;
353
354    /// Fit the transformer on `x` and return the transformed output in one step.
355    ///
356    /// # Errors
357    ///
358    /// Returns an error if fitting fails.
359    fn fit_transform(&self, x: &Array2<F>) -> Result<Array2<F>, FerroError> {
360        let fitted = self.fit(x, &())?;
361        fitted.transform(x)
362    }
363}
364
365// ---------------------------------------------------------------------------
366// Pipeline integration (generic)
367// ---------------------------------------------------------------------------
368
369impl<F: Float + Send + Sync + 'static> PipelineTransformer<F> for PowerTransformer<F> {
370    /// Fit the transformer using the pipeline interface.
371    ///
372    /// # Errors
373    ///
374    /// Propagates errors from [`Fit::fit`].
375    fn fit_pipeline(
376        &self,
377        x: &Array2<F>,
378        _y: &Array1<F>,
379    ) -> Result<Box<dyn FittedPipelineTransformer<F>>, FerroError> {
380        let fitted = self.fit(x, &())?;
381        Ok(Box::new(fitted))
382    }
383}
384
385impl<F: Float + Send + Sync + 'static> FittedPipelineTransformer<F> for FittedPowerTransformer<F> {
386    /// Transform data using the pipeline interface.
387    ///
388    /// # Errors
389    ///
390    /// Propagates errors from [`Transform::transform`].
391    fn transform_pipeline(&self, x: &Array2<F>) -> Result<Array2<F>, FerroError> {
392        self.transform(x)
393    }
394}
395
396// ---------------------------------------------------------------------------
397// Tests
398// ---------------------------------------------------------------------------
399
400#[cfg(test)]
401mod tests {
402    use super::*;
403    use approx::assert_abs_diff_eq;
404    use ndarray::array;
405
406    #[test]
407    fn test_yeo_johnson_identity_at_lambda_one() {
408        // At λ=1: y≥0 -> ((y+1)^1 - 1)/1 = y.  So identity for non-negative.
409        let one = 1.0_f64;
410        for v in [0.0, 0.5, 1.0, 2.0, 5.0] {
411            let out = yeo_johnson(v, one);
412            assert_abs_diff_eq!(out, v, epsilon = 1e-10);
413        }
414    }
415
416    #[test]
417    fn test_yeo_johnson_log_at_lambda_zero() {
418        // At λ=0, y≥0: ln(y+1)
419        let zero = 0.0_f64;
420        for v in [0.0, 0.5, 1.0, 2.0] {
421            let expected = (v + 1.0).ln();
422            assert_abs_diff_eq!(yeo_johnson(v, zero), expected, epsilon = 1e-10);
423        }
424    }
425
426    #[test]
427    fn test_yeo_johnson_negative_at_lambda_two() {
428        // At λ=2, y<0: -ln(1-y)
429        let two = 2.0_f64;
430        for v in [-0.5, -1.0, -2.0] {
431            let expected = -(1.0 - v).ln();
432            assert_abs_diff_eq!(yeo_johnson(v, two), expected, epsilon = 1e-10);
433        }
434    }
435
436    #[test]
437    fn test_power_transformer_fit_basic() {
438        let pt = PowerTransformer::<f64>::new();
439        let x = array![[1.0], [2.0], [3.0], [4.0], [5.0]];
440        let fitted = pt.fit(&x, &()).unwrap();
441        // Lambda should be within [-3, 3]
442        let lambda = fitted.lambdas()[0];
443        assert!(lambda >= -3.0 && lambda <= 3.0);
444    }
445
446    #[test]
447    fn test_power_transformer_transform_shape() {
448        let pt = PowerTransformer::<f64>::new();
449        let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
450        let fitted = pt.fit(&x, &()).unwrap();
451        let out = fitted.transform(&x).unwrap();
452        assert_eq!(out.shape(), x.shape());
453    }
454
455    #[test]
456    fn test_standardize_produces_zero_mean() {
457        let pt = PowerTransformer::<f64>::new(); // standardize=true
458        let x = array![[1.0], [2.0], [3.0], [4.0], [5.0]];
459        let fitted = pt.fit(&x, &()).unwrap();
460        let out = fitted.transform(&x).unwrap();
461        let mean: f64 = out.column(0).iter().sum::<f64>() / out.nrows() as f64;
462        assert_abs_diff_eq!(mean, 0.0, epsilon = 1e-6);
463    }
464
465    #[test]
466    fn test_without_standardize() {
467        let pt = PowerTransformer::<f64>::without_standardize();
468        assert!(!pt.standardize());
469        let x = array![[1.0], [2.0], [3.0]];
470        let fitted = pt.fit(&x, &()).unwrap();
471        assert!(fitted.means.is_none());
472        assert!(fitted.stds.is_none());
473        let out = fitted.transform(&x).unwrap();
474        assert_eq!(out.shape(), x.shape());
475    }
476
477    #[test]
478    fn test_fit_transform_equivalence() {
479        let pt = PowerTransformer::<f64>::new();
480        let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
481        let via_ft = pt.fit_transform(&x).unwrap();
482        let fitted = pt.fit(&x, &()).unwrap();
483        let via_sep = fitted.transform(&x).unwrap();
484        for (a, b) in via_ft.iter().zip(via_sep.iter()) {
485            assert_abs_diff_eq!(a, b, epsilon = 1e-12);
486        }
487    }
488
489    #[test]
490    fn test_shape_mismatch_error() {
491        let pt = PowerTransformer::<f64>::new();
492        let x_train = array![[1.0, 2.0], [3.0, 4.0]];
493        let fitted = pt.fit(&x_train, &()).unwrap();
494        let x_bad = array![[1.0, 2.0, 3.0]];
495        assert!(fitted.transform(&x_bad).is_err());
496    }
497
498    #[test]
499    fn test_insufficient_samples_error() {
500        let pt = PowerTransformer::<f64>::new();
501        let x: Array2<f64> = Array2::zeros((0, 2));
502        assert!(pt.fit(&x, &()).is_err());
503    }
504
505    #[test]
506    fn test_unfitted_transform_error() {
507        let pt = PowerTransformer::<f64>::new();
508        let x = array![[1.0, 2.0]];
509        assert!(pt.transform(&x).is_err());
510    }
511
512    #[test]
513    fn test_negative_values_supported() {
514        let pt = PowerTransformer::<f64>::without_standardize();
515        // Yeo-Johnson supports negative values
516        let x = array![[-2.0], [-1.0], [0.0], [1.0], [2.0]];
517        let fitted = pt.fit(&x, &()).unwrap();
518        let out = fitted.transform(&x).unwrap();
519        // Should not panic and produce finite values
520        for v in out.iter() {
521            assert!(v.is_finite(), "got non-finite value: {v}");
522        }
523    }
524
525    #[test]
526    fn test_pipeline_integration() {
527        use ferrolearn_core::pipeline::PipelineTransformer;
528        let pt = PowerTransformer::<f64>::new();
529        let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
530        let y = Array1::zeros(3);
531        let fitted = pt.fit_pipeline(&x, &y).unwrap();
532        let result = fitted.transform_pipeline(&x).unwrap();
533        assert_eq!(result.shape(), x.shape());
534    }
535}