1use ferrolearn_core::error::FerroError;
22use ferrolearn_core::pipeline::{FittedPipelineTransformer, PipelineTransformer};
23use ferrolearn_core::traits::{Fit, FitTransform, Transform};
24use ndarray::{Array1, Array2};
25use num_traits::Float;
26
27fn yeo_johnson<F: Float>(y: F, lambda: F) -> F {
33 let zero = F::zero();
34 let one = F::one();
35 let two = one + one;
36 let eps = F::from(1e-10_f64).unwrap_or(F::epsilon());
37
38 if y >= zero {
39 if (lambda - zero).abs() < eps {
40 (y + one).ln()
42 } else {
43 ((y + one).powf(lambda) - one) / lambda
45 }
46 } else {
47 let two_minus_lambda = two - lambda;
49 if (two_minus_lambda).abs() < eps {
50 -(one - y).ln()
52 } else {
53 -((one - y).powf(two_minus_lambda) - one) / two_minus_lambda
55 }
56 }
57}
58
59fn log_likelihood_yj<F: Float>(col: &[F], lambda: F) -> F {
68 let n = F::from(col.len()).unwrap_or(F::one());
69 let one = F::one();
70 let two = one + one;
71 let pi2 = F::from(std::f64::consts::TAU).unwrap_or(F::one()); let transformed: Vec<F> = col
75 .iter()
76 .copied()
77 .map(|v| yeo_johnson(v, lambda))
78 .collect();
79
80 let mean = transformed
82 .iter()
83 .copied()
84 .fold(F::zero(), |acc, v| acc + v)
85 / n;
86 let variance = transformed
87 .iter()
88 .copied()
89 .map(|v| (v - mean) * (v - mean))
90 .fold(F::zero(), |acc, v| acc + v)
91 / n;
92
93 if variance <= F::zero() {
94 return F::neg_infinity();
95 }
96
97 let normal_ll = -n / two * (pi2 * variance).ln() - n / two;
100
101 let lambda_minus_1 = lambda - one;
104 let jacobian: F = col
105 .iter()
106 .copied()
107 .fold(F::zero(), |acc, y| acc + (y.abs() + one).ln());
108 let jacobian_ll = lambda_minus_1 * jacobian;
109
110 normal_ll + jacobian_ll
111}
112
113#[derive(Debug, Clone)]
136pub struct PowerTransformer<F> {
137 pub(crate) standardize: bool,
139 _marker: std::marker::PhantomData<F>,
140}
141
142impl<F: Float + Send + Sync + 'static> PowerTransformer<F> {
143 #[must_use]
145 pub fn new() -> Self {
146 Self {
147 standardize: true,
148 _marker: std::marker::PhantomData,
149 }
150 }
151
152 #[must_use]
154 pub fn without_standardize() -> Self {
155 Self {
156 standardize: false,
157 _marker: std::marker::PhantomData,
158 }
159 }
160
161 #[must_use]
163 pub fn standardize(&self) -> bool {
164 self.standardize
165 }
166}
167
168impl<F: Float + Send + Sync + 'static> Default for PowerTransformer<F> {
169 fn default() -> Self {
170 Self::new()
171 }
172}
173
174#[derive(Debug, Clone)]
183pub struct FittedPowerTransformer<F> {
184 pub(crate) lambdas: Array1<F>,
186 pub(crate) means: Option<Array1<F>>,
188 pub(crate) stds: Option<Array1<F>>,
190}
191
192impl<F: Float + Send + Sync + 'static> FittedPowerTransformer<F> {
193 #[must_use]
195 pub fn lambdas(&self) -> &Array1<F> {
196 &self.lambdas
197 }
198}
199
200impl<F: Float + Send + Sync + 'static> Fit<Array2<F>, ()> for PowerTransformer<F> {
205 type Fitted = FittedPowerTransformer<F>;
206 type Error = FerroError;
207
208 fn fit(&self, x: &Array2<F>, _y: &()) -> Result<FittedPowerTransformer<F>, FerroError> {
218 let n_samples = x.nrows();
219 if n_samples == 0 {
220 return Err(FerroError::InsufficientSamples {
221 required: 1,
222 actual: 0,
223 context: "PowerTransformer::fit".into(),
224 });
225 }
226
227 let n_features = x.ncols();
228 let mut lambdas = Array1::zeros(n_features);
229
230 let n_candidates = 201_usize;
232 let lambda_min = F::from(-3.0_f64).unwrap_or(F::zero());
233 let lambda_max = F::from(3.0_f64).unwrap_or(F::one());
234 let step = (lambda_max - lambda_min) / F::from(n_candidates - 1).unwrap_or(F::one());
235
236 for j in 0..n_features {
237 let col: Vec<F> = x.column(j).iter().copied().collect();
238
239 let mut best_ll = F::neg_infinity();
240 let mut best_lambda = F::one(); for k in 0..n_candidates {
243 let lambda = lambda_min + step * F::from(k).unwrap_or(F::zero());
244 let ll = log_likelihood_yj(&col, lambda);
245 if ll > best_ll {
246 best_ll = ll;
247 best_lambda = lambda;
248 }
249 }
250
251 lambdas[j] = best_lambda;
252 }
253
254 let (means, stds) = if self.standardize {
256 let n = F::from(n_samples).unwrap_or(F::one());
257 let mut means_arr = Array1::zeros(n_features);
258 let mut stds_arr = Array1::zeros(n_features);
259 for j in 0..n_features {
260 let lambda = lambdas[j];
261 let transformed: Vec<F> = x
262 .column(j)
263 .iter()
264 .copied()
265 .map(|v| yeo_johnson(v, lambda))
266 .collect();
267 let mean = transformed
268 .iter()
269 .copied()
270 .fold(F::zero(), |acc, v| acc + v)
271 / n;
272 let variance = transformed
273 .iter()
274 .copied()
275 .map(|v| (v - mean) * (v - mean))
276 .fold(F::zero(), |acc, v| acc + v)
277 / n;
278 means_arr[j] = mean;
279 stds_arr[j] = variance.sqrt();
280 }
281 (Some(means_arr), Some(stds_arr))
282 } else {
283 (None, None)
284 };
285
286 Ok(FittedPowerTransformer {
287 lambdas,
288 means,
289 stds,
290 })
291 }
292}
293
294impl<F: Float + Send + Sync + 'static> Transform<Array2<F>> for FittedPowerTransformer<F> {
295 type Output = Array2<F>;
296 type Error = FerroError;
297
298 fn transform(&self, x: &Array2<F>) -> Result<Array2<F>, FerroError> {
305 let n_features = self.lambdas.len();
306 if x.ncols() != n_features {
307 return Err(FerroError::ShapeMismatch {
308 expected: vec![x.nrows(), n_features],
309 actual: vec![x.nrows(), x.ncols()],
310 context: "FittedPowerTransformer::transform".into(),
311 });
312 }
313
314 let mut out = x.to_owned();
315 for (j, mut col) in out.columns_mut().into_iter().enumerate() {
316 let lambda = self.lambdas[j];
317 for v in col.iter_mut() {
318 *v = yeo_johnson(*v, lambda);
319 }
320
321 if let (Some(means), Some(stds)) = (&self.means, &self.stds) {
323 let m = means[j];
324 let s = stds[j];
325 if s > F::zero() {
326 for v in col.iter_mut() {
327 *v = (*v - m) / s;
328 }
329 }
330 }
331 }
332 Ok(out)
333 }
334}
335
336impl<F: Float + Send + Sync + 'static> Transform<Array2<F>> for PowerTransformer<F> {
339 type Output = Array2<F>;
340 type Error = FerroError;
341
342 fn transform(&self, _x: &Array2<F>) -> Result<Array2<F>, FerroError> {
344 Err(FerroError::InvalidParameter {
345 name: "PowerTransformer".into(),
346 reason: "transformer must be fitted before calling transform; use fit() first".into(),
347 })
348 }
349}
350
351impl<F: Float + Send + Sync + 'static> FitTransform<Array2<F>> for PowerTransformer<F> {
352 type FitError = FerroError;
353
354 fn fit_transform(&self, x: &Array2<F>) -> Result<Array2<F>, FerroError> {
360 let fitted = self.fit(x, &())?;
361 fitted.transform(x)
362 }
363}
364
365impl<F: Float + Send + Sync + 'static> PipelineTransformer<F> for PowerTransformer<F> {
370 fn fit_pipeline(
376 &self,
377 x: &Array2<F>,
378 _y: &Array1<F>,
379 ) -> Result<Box<dyn FittedPipelineTransformer<F>>, FerroError> {
380 let fitted = self.fit(x, &())?;
381 Ok(Box::new(fitted))
382 }
383}
384
385impl<F: Float + Send + Sync + 'static> FittedPipelineTransformer<F> for FittedPowerTransformer<F> {
386 fn transform_pipeline(&self, x: &Array2<F>) -> Result<Array2<F>, FerroError> {
392 self.transform(x)
393 }
394}
395
396#[cfg(test)]
401mod tests {
402 use super::*;
403 use approx::assert_abs_diff_eq;
404 use ndarray::array;
405
406 #[test]
407 fn test_yeo_johnson_identity_at_lambda_one() {
408 let one = 1.0_f64;
410 for v in [0.0, 0.5, 1.0, 2.0, 5.0] {
411 let out = yeo_johnson(v, one);
412 assert_abs_diff_eq!(out, v, epsilon = 1e-10);
413 }
414 }
415
416 #[test]
417 fn test_yeo_johnson_log_at_lambda_zero() {
418 let zero = 0.0_f64;
420 for v in [0.0, 0.5, 1.0, 2.0] {
421 let expected = (v + 1.0).ln();
422 assert_abs_diff_eq!(yeo_johnson(v, zero), expected, epsilon = 1e-10);
423 }
424 }
425
426 #[test]
427 fn test_yeo_johnson_negative_at_lambda_two() {
428 let two = 2.0_f64;
430 for v in [-0.5, -1.0, -2.0] {
431 let expected = -(1.0 - v).ln();
432 assert_abs_diff_eq!(yeo_johnson(v, two), expected, epsilon = 1e-10);
433 }
434 }
435
436 #[test]
437 fn test_power_transformer_fit_basic() {
438 let pt = PowerTransformer::<f64>::new();
439 let x = array![[1.0], [2.0], [3.0], [4.0], [5.0]];
440 let fitted = pt.fit(&x, &()).unwrap();
441 let lambda = fitted.lambdas()[0];
443 assert!(lambda >= -3.0 && lambda <= 3.0);
444 }
445
446 #[test]
447 fn test_power_transformer_transform_shape() {
448 let pt = PowerTransformer::<f64>::new();
449 let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
450 let fitted = pt.fit(&x, &()).unwrap();
451 let out = fitted.transform(&x).unwrap();
452 assert_eq!(out.shape(), x.shape());
453 }
454
455 #[test]
456 fn test_standardize_produces_zero_mean() {
457 let pt = PowerTransformer::<f64>::new(); let x = array![[1.0], [2.0], [3.0], [4.0], [5.0]];
459 let fitted = pt.fit(&x, &()).unwrap();
460 let out = fitted.transform(&x).unwrap();
461 let mean: f64 = out.column(0).iter().sum::<f64>() / out.nrows() as f64;
462 assert_abs_diff_eq!(mean, 0.0, epsilon = 1e-6);
463 }
464
465 #[test]
466 fn test_without_standardize() {
467 let pt = PowerTransformer::<f64>::without_standardize();
468 assert!(!pt.standardize());
469 let x = array![[1.0], [2.0], [3.0]];
470 let fitted = pt.fit(&x, &()).unwrap();
471 assert!(fitted.means.is_none());
472 assert!(fitted.stds.is_none());
473 let out = fitted.transform(&x).unwrap();
474 assert_eq!(out.shape(), x.shape());
475 }
476
477 #[test]
478 fn test_fit_transform_equivalence() {
479 let pt = PowerTransformer::<f64>::new();
480 let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
481 let via_ft = pt.fit_transform(&x).unwrap();
482 let fitted = pt.fit(&x, &()).unwrap();
483 let via_sep = fitted.transform(&x).unwrap();
484 for (a, b) in via_ft.iter().zip(via_sep.iter()) {
485 assert_abs_diff_eq!(a, b, epsilon = 1e-12);
486 }
487 }
488
489 #[test]
490 fn test_shape_mismatch_error() {
491 let pt = PowerTransformer::<f64>::new();
492 let x_train = array![[1.0, 2.0], [3.0, 4.0]];
493 let fitted = pt.fit(&x_train, &()).unwrap();
494 let x_bad = array![[1.0, 2.0, 3.0]];
495 assert!(fitted.transform(&x_bad).is_err());
496 }
497
498 #[test]
499 fn test_insufficient_samples_error() {
500 let pt = PowerTransformer::<f64>::new();
501 let x: Array2<f64> = Array2::zeros((0, 2));
502 assert!(pt.fit(&x, &()).is_err());
503 }
504
505 #[test]
506 fn test_unfitted_transform_error() {
507 let pt = PowerTransformer::<f64>::new();
508 let x = array![[1.0, 2.0]];
509 assert!(pt.transform(&x).is_err());
510 }
511
512 #[test]
513 fn test_negative_values_supported() {
514 let pt = PowerTransformer::<f64>::without_standardize();
515 let x = array![[-2.0], [-1.0], [0.0], [1.0], [2.0]];
517 let fitted = pt.fit(&x, &()).unwrap();
518 let out = fitted.transform(&x).unwrap();
519 for v in out.iter() {
521 assert!(v.is_finite(), "got non-finite value: {v}");
522 }
523 }
524
525 #[test]
526 fn test_pipeline_integration() {
527 use ferrolearn_core::pipeline::PipelineTransformer;
528 let pt = PowerTransformer::<f64>::new();
529 let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
530 let y = Array1::zeros(3);
531 let fitted = pt.fit_pipeline(&x, &y).unwrap();
532 let result = fitted.transform_pipeline(&x).unwrap();
533 assert_eq!(result.shape(), x.shape());
534 }
535}