Skip to main content

ferrolearn_preprocess/
power_transformer.rs

1//! Power transformer: apply a power transform to make data more Gaussian.
2//!
3//! Implements the **Yeo-Johnson** transformation, which works for both positive
4//! and negative values. An optimal lambda per feature is estimated via a simple
5//! grid search that maximises the log-likelihood of the transformed column
6//! following a normal distribution.
7//!
8//! After transformation, the data can optionally be standardized (zero mean,
9//! unit variance). Standardization is enabled by default, matching the
10//! scikit-learn default.
11//!
12//! # Yeo-Johnson definition
13//!
14//! ```text
15//! y ≥ 0, λ ≠ 0:  ((y + 1)^λ - 1) / λ
16//! y ≥ 0, λ = 0:  ln(y + 1)
17//! y < 0, λ ≠ 2:  -((1 - y)^(2-λ) - 1) / (2 - λ)
18//! y < 0, λ = 2:  -ln(1 - y)
19//! ```
20
21use ferrolearn_core::error::FerroError;
22use ferrolearn_core::pipeline::{FittedPipelineTransformer, PipelineTransformer};
23use ferrolearn_core::traits::{Fit, FitTransform, Transform};
24use ndarray::{Array1, Array2};
25use num_traits::Float;
26
27// ---------------------------------------------------------------------------
28// Helpers
29// ---------------------------------------------------------------------------
30
31/// Apply the Yeo-Johnson transform to a single value with parameter `lambda`.
32fn yeo_johnson<F: Float>(y: F, lambda: F) -> F {
33    let zero = F::zero();
34    let one = F::one();
35    let two = one + one;
36    let eps = F::from(1e-10_f64).unwrap_or(F::epsilon());
37
38    if y >= zero {
39        if (lambda - zero).abs() < eps {
40            // λ ≈ 0: ln(y + 1)
41            (y + one).ln()
42        } else {
43            // ((y + 1)^λ - 1) / λ
44            ((y + one).powf(lambda) - one) / lambda
45        }
46    } else {
47        // y < 0
48        let two_minus_lambda = two - lambda;
49        if (two_minus_lambda).abs() < eps {
50            // λ ≈ 2: -ln(1 - y)
51            -(one - y).ln()
52        } else {
53            // -((1 - y)^(2-λ) - 1) / (2 - λ)
54            -((one - y).powf(two_minus_lambda) - one) / two_minus_lambda
55        }
56    }
57}
58
59/// Compute the log-likelihood of a zero-mean, unit-variance normal distribution
60/// for the transformed data. This is used as the optimisation criterion for
61/// finding the optimal lambda.
62///
63/// For a column `col` transformed with `lambda`, the log-likelihood contribution
64/// from the Yeo-Johnson Jacobian is:
65/// `(λ - 1) * sum(sign(y) * ln(|y| + 1))` for each sample.
66/// We then add the normal log-likelihood of the transformed values.
67fn log_likelihood_yj<F: Float>(col: &[F], lambda: F) -> F {
68    let n = F::from(col.len()).unwrap_or(F::one());
69    let one = F::one();
70    let two = one + one;
71    let pi2 = F::from(std::f64::consts::TAU).unwrap_or(F::one()); // 2π
72
73    // Transform each value
74    let transformed: Vec<F> = col
75        .iter()
76        .copied()
77        .map(|v| yeo_johnson(v, lambda))
78        .collect();
79
80    // Compute mean and variance of transformed values
81    let mean = transformed
82        .iter()
83        .copied()
84        .fold(F::zero(), |acc, v| acc + v)
85        / n;
86    let variance = transformed
87        .iter()
88        .copied()
89        .map(|v| (v - mean) * (v - mean))
90        .fold(F::zero(), |acc, v| acc + v)
91        / n;
92
93    if variance <= F::zero() {
94        return F::neg_infinity();
95    }
96
97    // Normal log-likelihood: -n/2 * ln(2π) - n/2 * ln(var) - 1/(2*var)*sum((t-mean)^2)
98    // Simplified: -n/2 * ln(2π*var) - n/2
99    let normal_ll = -n / two * (pi2 * variance).ln() - n / two;
100
101    // Jacobian contribution from Yeo-Johnson
102    // For both y >= 0 and y < 0, the Jacobian term is ln(|y| + 1).
103    let lambda_minus_1 = lambda - one;
104    let jacobian: F = col
105        .iter()
106        .copied()
107        .fold(F::zero(), |acc, y| acc + (y.abs() + one).ln());
108    let jacobian_ll = lambda_minus_1 * jacobian;
109
110    normal_ll + jacobian_ll
111}
112
113// ---------------------------------------------------------------------------
114// PowerTransformer (unfitted)
115// ---------------------------------------------------------------------------
116
117/// An unfitted power transformer using the Yeo-Johnson method.
118///
119/// Calling [`Fit::fit`] estimates an optimal lambda per feature (via grid
120/// search over a range of lambda values) and returns a [`FittedPowerTransformer`]
121/// that can transform new data.
122///
123/// # Examples
124///
125/// ```
126/// use ferrolearn_preprocess::PowerTransformer;
127/// use ferrolearn_core::traits::{Fit, Transform};
128/// use ndarray::array;
129///
130/// let pt = PowerTransformer::<f64>::new();
131/// let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
132/// let fitted = pt.fit(&x, &()).unwrap();
133/// let transformed = fitted.transform(&x).unwrap();
134/// ```
135#[derive(Debug, Clone)]
136pub struct PowerTransformer<F> {
137    /// Whether to standardize the output (zero mean, unit variance).
138    pub(crate) standardize: bool,
139    _marker: std::marker::PhantomData<F>,
140}
141
142impl<F: Float + Send + Sync + 'static> PowerTransformer<F> {
143    /// Create a new `PowerTransformer` with standardization enabled (default).
144    #[must_use]
145    pub fn new() -> Self {
146        Self {
147            standardize: true,
148            _marker: std::marker::PhantomData,
149        }
150    }
151
152    /// Create a new `PowerTransformer` with standardization disabled.
153    #[must_use]
154    pub fn without_standardize() -> Self {
155        Self {
156            standardize: false,
157            _marker: std::marker::PhantomData,
158        }
159    }
160
161    /// Whether standardization is enabled.
162    #[must_use]
163    pub fn standardize(&self) -> bool {
164        self.standardize
165    }
166}
167
168impl<F: Float + Send + Sync + 'static> Default for PowerTransformer<F> {
169    fn default() -> Self {
170        Self::new()
171    }
172}
173
174// ---------------------------------------------------------------------------
175// FittedPowerTransformer
176// ---------------------------------------------------------------------------
177
178/// A fitted power transformer holding per-column lambda values and optional
179/// standardisation parameters.
180///
181/// Created by calling [`Fit::fit`] on a [`PowerTransformer`].
182#[derive(Debug, Clone)]
183pub struct FittedPowerTransformer<F> {
184    /// Per-column optimal lambda values.
185    pub(crate) lambdas: Array1<F>,
186    /// Per-column means of the transformed data (used for standardization).
187    pub(crate) means: Option<Array1<F>>,
188    /// Per-column standard deviations of the transformed data (used for standardization).
189    pub(crate) stds: Option<Array1<F>>,
190}
191
192impl<F: Float + Send + Sync + 'static> FittedPowerTransformer<F> {
193    /// Return the per-column lambda values learned during fitting.
194    #[must_use]
195    pub fn lambdas(&self) -> &Array1<F> {
196        &self.lambdas
197    }
198}
199
200// ---------------------------------------------------------------------------
201// Trait implementations
202// ---------------------------------------------------------------------------
203
204impl<F: Float + Send + Sync + 'static> Fit<Array2<F>, ()> for PowerTransformer<F> {
205    type Fitted = FittedPowerTransformer<F>;
206    type Error = FerroError;
207
208    /// Fit the transformer by estimating the optimal lambda per feature.
209    ///
210    /// Uses Brent's method to minimize the negative log-likelihood on the
211    /// interval `[-3, 3]`, selecting the lambda that maximises the
212    /// log-likelihood of the Yeo-Johnson transformed column following a
213    /// normal distribution.
214    ///
215    /// # Errors
216    ///
217    /// Returns [`FerroError::InsufficientSamples`] if the input has zero rows.
218    fn fit(&self, x: &Array2<F>, _y: &()) -> Result<FittedPowerTransformer<F>, FerroError> {
219        let n_samples = x.nrows();
220        if n_samples == 0 {
221            return Err(FerroError::InsufficientSamples {
222                required: 1,
223                actual: 0,
224                context: "PowerTransformer::fit".into(),
225            });
226        }
227
228        let n_features = x.ncols();
229        let mut lambdas = Array1::zeros(n_features);
230
231        for j in 0..n_features {
232            // Convert column to f64 for the Brent optimizer.
233            let col_f64: Vec<f64> = x
234                .column(j)
235                .iter()
236                .copied()
237                .map(|v| v.to_f64().unwrap_or(0.0))
238                .collect();
239
240            // Minimize the negative log-likelihood using Brent's method.
241            let result = ferrolearn_numerical::optimize::brent_bounded(
242                |lambda| {
243                    let lam = F::from(lambda).unwrap_or(F::one());
244                    // Convert column back to generic F for the log-likelihood.
245                    let col_f: Vec<F> = col_f64
246                        .iter()
247                        .map(|&v| F::from(v).unwrap_or(F::zero()))
248                        .collect();
249                    let ll = log_likelihood_yj(&col_f, lam);
250                    // Negate: minimize negative log-likelihood = maximize log-likelihood.
251                    -ll.to_f64().unwrap_or(f64::INFINITY)
252                },
253                -3.0,
254                3.0,
255                1e-8,
256                500,
257            );
258
259            lambdas[j] = F::from(result.x).unwrap_or(F::one());
260        }
261
262        // If standardize, compute mean and std of transformed data
263        let (means, stds) = if self.standardize {
264            let n = F::from(n_samples).unwrap_or(F::one());
265            let mut means_arr = Array1::zeros(n_features);
266            let mut stds_arr = Array1::zeros(n_features);
267            for j in 0..n_features {
268                let lambda = lambdas[j];
269                let transformed: Vec<F> = x
270                    .column(j)
271                    .iter()
272                    .copied()
273                    .map(|v| yeo_johnson(v, lambda))
274                    .collect();
275                let mean = transformed
276                    .iter()
277                    .copied()
278                    .fold(F::zero(), |acc, v| acc + v)
279                    / n;
280                let variance = transformed
281                    .iter()
282                    .copied()
283                    .map(|v| (v - mean) * (v - mean))
284                    .fold(F::zero(), |acc, v| acc + v)
285                    / n;
286                means_arr[j] = mean;
287                stds_arr[j] = variance.sqrt();
288            }
289            (Some(means_arr), Some(stds_arr))
290        } else {
291            (None, None)
292        };
293
294        Ok(FittedPowerTransformer {
295            lambdas,
296            means,
297            stds,
298        })
299    }
300}
301
302impl<F: Float + Send + Sync + 'static> Transform<Array2<F>> for FittedPowerTransformer<F> {
303    type Output = Array2<F>;
304    type Error = FerroError;
305
306    /// Apply the Yeo-Johnson transform and optionally standardize.
307    ///
308    /// # Errors
309    ///
310    /// Returns [`FerroError::ShapeMismatch`] if the number of columns does not
311    /// match the number of features seen during fitting.
312    fn transform(&self, x: &Array2<F>) -> Result<Array2<F>, FerroError> {
313        let n_features = self.lambdas.len();
314        if x.ncols() != n_features {
315            return Err(FerroError::ShapeMismatch {
316                expected: vec![x.nrows(), n_features],
317                actual: vec![x.nrows(), x.ncols()],
318                context: "FittedPowerTransformer::transform".into(),
319            });
320        }
321
322        let mut out = x.to_owned();
323        for (j, mut col) in out.columns_mut().into_iter().enumerate() {
324            let lambda = self.lambdas[j];
325            for v in col.iter_mut() {
326                *v = yeo_johnson(*v, lambda);
327            }
328
329            // Standardize if requested
330            if let (Some(means), Some(stds)) = (&self.means, &self.stds) {
331                let m = means[j];
332                let s = stds[j];
333                if s > F::zero() {
334                    for v in col.iter_mut() {
335                        *v = (*v - m) / s;
336                    }
337                }
338            }
339        }
340        Ok(out)
341    }
342}
343
344/// Implement `Transform` on the unfitted transformer to satisfy the
345/// `FitTransform: Transform` supertrait bound.
346impl<F: Float + Send + Sync + 'static> Transform<Array2<F>> for PowerTransformer<F> {
347    type Output = Array2<F>;
348    type Error = FerroError;
349
350    /// Always returns an error — the transformer must be fitted first.
351    fn transform(&self, _x: &Array2<F>) -> Result<Array2<F>, FerroError> {
352        Err(FerroError::InvalidParameter {
353            name: "PowerTransformer".into(),
354            reason: "transformer must be fitted before calling transform; use fit() first".into(),
355        })
356    }
357}
358
359impl<F: Float + Send + Sync + 'static> FitTransform<Array2<F>> for PowerTransformer<F> {
360    type FitError = FerroError;
361
362    /// Fit the transformer on `x` and return the transformed output in one step.
363    ///
364    /// # Errors
365    ///
366    /// Returns an error if fitting fails.
367    fn fit_transform(&self, x: &Array2<F>) -> Result<Array2<F>, FerroError> {
368        let fitted = self.fit(x, &())?;
369        fitted.transform(x)
370    }
371}
372
373// ---------------------------------------------------------------------------
374// Pipeline integration (generic)
375// ---------------------------------------------------------------------------
376
377impl<F: Float + Send + Sync + 'static> PipelineTransformer<F> for PowerTransformer<F> {
378    /// Fit the transformer using the pipeline interface.
379    ///
380    /// # Errors
381    ///
382    /// Propagates errors from [`Fit::fit`].
383    fn fit_pipeline(
384        &self,
385        x: &Array2<F>,
386        _y: &Array1<F>,
387    ) -> Result<Box<dyn FittedPipelineTransformer<F>>, FerroError> {
388        let fitted = self.fit(x, &())?;
389        Ok(Box::new(fitted))
390    }
391}
392
393impl<F: Float + Send + Sync + 'static> FittedPipelineTransformer<F> for FittedPowerTransformer<F> {
394    /// Transform data using the pipeline interface.
395    ///
396    /// # Errors
397    ///
398    /// Propagates errors from [`Transform::transform`].
399    fn transform_pipeline(&self, x: &Array2<F>) -> Result<Array2<F>, FerroError> {
400        self.transform(x)
401    }
402}
403
404// ---------------------------------------------------------------------------
405// Tests
406// ---------------------------------------------------------------------------
407
408#[cfg(test)]
409mod tests {
410    use super::*;
411    use approx::assert_abs_diff_eq;
412    use ndarray::array;
413
414    #[test]
415    fn test_yeo_johnson_identity_at_lambda_one() {
416        // At λ=1: y≥0 -> ((y+1)^1 - 1)/1 = y.  So identity for non-negative.
417        let one = 1.0_f64;
418        for v in [0.0, 0.5, 1.0, 2.0, 5.0] {
419            let out = yeo_johnson(v, one);
420            assert_abs_diff_eq!(out, v, epsilon = 1e-10);
421        }
422    }
423
424    #[test]
425    fn test_yeo_johnson_log_at_lambda_zero() {
426        // At λ=0, y≥0: ln(y+1)
427        let zero = 0.0_f64;
428        for v in [0.0, 0.5, 1.0, 2.0] {
429            let expected = (v + 1.0).ln();
430            assert_abs_diff_eq!(yeo_johnson(v, zero), expected, epsilon = 1e-10);
431        }
432    }
433
434    #[test]
435    fn test_yeo_johnson_negative_at_lambda_two() {
436        // At λ=2, y<0: -ln(1-y)
437        let two = 2.0_f64;
438        for v in [-0.5, -1.0, -2.0] {
439            let expected = -(1.0 - v).ln();
440            assert_abs_diff_eq!(yeo_johnson(v, two), expected, epsilon = 1e-10);
441        }
442    }
443
444    #[test]
445    fn test_power_transformer_fit_basic() {
446        let pt = PowerTransformer::<f64>::new();
447        let x = array![[1.0], [2.0], [3.0], [4.0], [5.0]];
448        let fitted = pt.fit(&x, &()).unwrap();
449        // Lambda should be within [-3, 3]
450        let lambda = fitted.lambdas()[0];
451        assert!(lambda >= -3.0 && lambda <= 3.0);
452    }
453
454    #[test]
455    fn test_power_transformer_transform_shape() {
456        let pt = PowerTransformer::<f64>::new();
457        let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
458        let fitted = pt.fit(&x, &()).unwrap();
459        let out = fitted.transform(&x).unwrap();
460        assert_eq!(out.shape(), x.shape());
461    }
462
463    #[test]
464    fn test_standardize_produces_zero_mean() {
465        let pt = PowerTransformer::<f64>::new(); // standardize=true
466        let x = array![[1.0], [2.0], [3.0], [4.0], [5.0]];
467        let fitted = pt.fit(&x, &()).unwrap();
468        let out = fitted.transform(&x).unwrap();
469        let mean: f64 = out.column(0).iter().sum::<f64>() / out.nrows() as f64;
470        assert_abs_diff_eq!(mean, 0.0, epsilon = 1e-6);
471    }
472
473    #[test]
474    fn test_without_standardize() {
475        let pt = PowerTransformer::<f64>::without_standardize();
476        assert!(!pt.standardize());
477        let x = array![[1.0], [2.0], [3.0]];
478        let fitted = pt.fit(&x, &()).unwrap();
479        assert!(fitted.means.is_none());
480        assert!(fitted.stds.is_none());
481        let out = fitted.transform(&x).unwrap();
482        assert_eq!(out.shape(), x.shape());
483    }
484
485    #[test]
486    fn test_fit_transform_equivalence() {
487        let pt = PowerTransformer::<f64>::new();
488        let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
489        let via_ft = pt.fit_transform(&x).unwrap();
490        let fitted = pt.fit(&x, &()).unwrap();
491        let via_sep = fitted.transform(&x).unwrap();
492        for (a, b) in via_ft.iter().zip(via_sep.iter()) {
493            assert_abs_diff_eq!(a, b, epsilon = 1e-12);
494        }
495    }
496
497    #[test]
498    fn test_shape_mismatch_error() {
499        let pt = PowerTransformer::<f64>::new();
500        let x_train = array![[1.0, 2.0], [3.0, 4.0]];
501        let fitted = pt.fit(&x_train, &()).unwrap();
502        let x_bad = array![[1.0, 2.0, 3.0]];
503        assert!(fitted.transform(&x_bad).is_err());
504    }
505
506    #[test]
507    fn test_insufficient_samples_error() {
508        let pt = PowerTransformer::<f64>::new();
509        let x: Array2<f64> = Array2::zeros((0, 2));
510        assert!(pt.fit(&x, &()).is_err());
511    }
512
513    #[test]
514    fn test_unfitted_transform_error() {
515        let pt = PowerTransformer::<f64>::new();
516        let x = array![[1.0, 2.0]];
517        assert!(pt.transform(&x).is_err());
518    }
519
520    #[test]
521    fn test_negative_values_supported() {
522        let pt = PowerTransformer::<f64>::without_standardize();
523        // Yeo-Johnson supports negative values
524        let x = array![[-2.0], [-1.0], [0.0], [1.0], [2.0]];
525        let fitted = pt.fit(&x, &()).unwrap();
526        let out = fitted.transform(&x).unwrap();
527        // Should not panic and produce finite values
528        for v in out.iter() {
529            assert!(v.is_finite(), "got non-finite value: {v}");
530        }
531    }
532
533    #[test]
534    fn test_pipeline_integration() {
535        use ferrolearn_core::pipeline::PipelineTransformer;
536        let pt = PowerTransformer::<f64>::new();
537        let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
538        let y = Array1::zeros(3);
539        let fitted = pt.fit_pipeline(&x, &y).unwrap();
540        let result = fitted.transform_pipeline(&x).unwrap();
541        assert_eq!(result.shape(), x.shape());
542    }
543}