1use ferrolearn_core::error::FerroError;
22use ferrolearn_core::pipeline::{FittedPipelineTransformer, PipelineTransformer};
23use ferrolearn_core::traits::{Fit, FitTransform, Transform};
24use ndarray::{Array1, Array2};
25use num_traits::Float;
26
27fn yeo_johnson<F: Float>(y: F, lambda: F) -> F {
33 let zero = F::zero();
34 let one = F::one();
35 let two = one + one;
36 let eps = F::from(1e-10_f64).unwrap_or(F::epsilon());
37
38 if y >= zero {
39 if (lambda - zero).abs() < eps {
40 (y + one).ln()
42 } else {
43 ((y + one).powf(lambda) - one) / lambda
45 }
46 } else {
47 let two_minus_lambda = two - lambda;
49 if (two_minus_lambda).abs() < eps {
50 -(one - y).ln()
52 } else {
53 -((one - y).powf(two_minus_lambda) - one) / two_minus_lambda
55 }
56 }
57}
58
59fn log_likelihood_yj<F: Float>(col: &[F], lambda: F) -> F {
68 let n = F::from(col.len()).unwrap_or(F::one());
69 let one = F::one();
70 let two = one + one;
71 let pi2 = F::from(std::f64::consts::TAU).unwrap_or(F::one()); let transformed: Vec<F> = col
75 .iter()
76 .copied()
77 .map(|v| yeo_johnson(v, lambda))
78 .collect();
79
80 let mean = transformed
82 .iter()
83 .copied()
84 .fold(F::zero(), |acc, v| acc + v)
85 / n;
86 let variance = transformed
87 .iter()
88 .copied()
89 .map(|v| (v - mean) * (v - mean))
90 .fold(F::zero(), |acc, v| acc + v)
91 / n;
92
93 if variance <= F::zero() {
94 return F::neg_infinity();
95 }
96
97 let normal_ll = -n / two * (pi2 * variance).ln() - n / two;
100
101 let lambda_minus_1 = lambda - one;
104 let jacobian: F = col
105 .iter()
106 .copied()
107 .fold(F::zero(), |acc, y| acc + (y.abs() + one).ln());
108 let jacobian_ll = lambda_minus_1 * jacobian;
109
110 normal_ll + jacobian_ll
111}
112
113#[derive(Debug, Clone)]
136pub struct PowerTransformer<F> {
137 pub(crate) standardize: bool,
139 _marker: std::marker::PhantomData<F>,
140}
141
142impl<F: Float + Send + Sync + 'static> PowerTransformer<F> {
143 #[must_use]
145 pub fn new() -> Self {
146 Self {
147 standardize: true,
148 _marker: std::marker::PhantomData,
149 }
150 }
151
152 #[must_use]
154 pub fn without_standardize() -> Self {
155 Self {
156 standardize: false,
157 _marker: std::marker::PhantomData,
158 }
159 }
160
161 #[must_use]
163 pub fn standardize(&self) -> bool {
164 self.standardize
165 }
166}
167
168impl<F: Float + Send + Sync + 'static> Default for PowerTransformer<F> {
169 fn default() -> Self {
170 Self::new()
171 }
172}
173
174#[derive(Debug, Clone)]
183pub struct FittedPowerTransformer<F> {
184 pub(crate) lambdas: Array1<F>,
186 pub(crate) means: Option<Array1<F>>,
188 pub(crate) stds: Option<Array1<F>>,
190}
191
192impl<F: Float + Send + Sync + 'static> FittedPowerTransformer<F> {
193 #[must_use]
195 pub fn lambdas(&self) -> &Array1<F> {
196 &self.lambdas
197 }
198}
199
200impl<F: Float + Send + Sync + 'static> Fit<Array2<F>, ()> for PowerTransformer<F> {
205 type Fitted = FittedPowerTransformer<F>;
206 type Error = FerroError;
207
208 fn fit(&self, x: &Array2<F>, _y: &()) -> Result<FittedPowerTransformer<F>, FerroError> {
219 let n_samples = x.nrows();
220 if n_samples == 0 {
221 return Err(FerroError::InsufficientSamples {
222 required: 1,
223 actual: 0,
224 context: "PowerTransformer::fit".into(),
225 });
226 }
227
228 let n_features = x.ncols();
229 let mut lambdas = Array1::zeros(n_features);
230
231 for j in 0..n_features {
232 let col_f64: Vec<f64> = x
234 .column(j)
235 .iter()
236 .copied()
237 .map(|v| v.to_f64().unwrap_or(0.0))
238 .collect();
239
240 let result = ferrolearn_numerical::optimize::brent_bounded(
242 |lambda| {
243 let lam = F::from(lambda).unwrap_or(F::one());
244 let col_f: Vec<F> = col_f64
246 .iter()
247 .map(|&v| F::from(v).unwrap_or(F::zero()))
248 .collect();
249 let ll = log_likelihood_yj(&col_f, lam);
250 -ll.to_f64().unwrap_or(f64::INFINITY)
252 },
253 -3.0,
254 3.0,
255 1e-8,
256 500,
257 );
258
259 lambdas[j] = F::from(result.x).unwrap_or(F::one());
260 }
261
262 let (means, stds) = if self.standardize {
264 let n = F::from(n_samples).unwrap_or(F::one());
265 let mut means_arr = Array1::zeros(n_features);
266 let mut stds_arr = Array1::zeros(n_features);
267 for j in 0..n_features {
268 let lambda = lambdas[j];
269 let transformed: Vec<F> = x
270 .column(j)
271 .iter()
272 .copied()
273 .map(|v| yeo_johnson(v, lambda))
274 .collect();
275 let mean = transformed
276 .iter()
277 .copied()
278 .fold(F::zero(), |acc, v| acc + v)
279 / n;
280 let variance = transformed
281 .iter()
282 .copied()
283 .map(|v| (v - mean) * (v - mean))
284 .fold(F::zero(), |acc, v| acc + v)
285 / n;
286 means_arr[j] = mean;
287 stds_arr[j] = variance.sqrt();
288 }
289 (Some(means_arr), Some(stds_arr))
290 } else {
291 (None, None)
292 };
293
294 Ok(FittedPowerTransformer {
295 lambdas,
296 means,
297 stds,
298 })
299 }
300}
301
302impl<F: Float + Send + Sync + 'static> Transform<Array2<F>> for FittedPowerTransformer<F> {
303 type Output = Array2<F>;
304 type Error = FerroError;
305
306 fn transform(&self, x: &Array2<F>) -> Result<Array2<F>, FerroError> {
313 let n_features = self.lambdas.len();
314 if x.ncols() != n_features {
315 return Err(FerroError::ShapeMismatch {
316 expected: vec![x.nrows(), n_features],
317 actual: vec![x.nrows(), x.ncols()],
318 context: "FittedPowerTransformer::transform".into(),
319 });
320 }
321
322 let mut out = x.to_owned();
323 for (j, mut col) in out.columns_mut().into_iter().enumerate() {
324 let lambda = self.lambdas[j];
325 for v in col.iter_mut() {
326 *v = yeo_johnson(*v, lambda);
327 }
328
329 if let (Some(means), Some(stds)) = (&self.means, &self.stds) {
331 let m = means[j];
332 let s = stds[j];
333 if s > F::zero() {
334 for v in col.iter_mut() {
335 *v = (*v - m) / s;
336 }
337 }
338 }
339 }
340 Ok(out)
341 }
342}
343
344impl<F: Float + Send + Sync + 'static> Transform<Array2<F>> for PowerTransformer<F> {
347 type Output = Array2<F>;
348 type Error = FerroError;
349
350 fn transform(&self, _x: &Array2<F>) -> Result<Array2<F>, FerroError> {
352 Err(FerroError::InvalidParameter {
353 name: "PowerTransformer".into(),
354 reason: "transformer must be fitted before calling transform; use fit() first".into(),
355 })
356 }
357}
358
359impl<F: Float + Send + Sync + 'static> FitTransform<Array2<F>> for PowerTransformer<F> {
360 type FitError = FerroError;
361
362 fn fit_transform(&self, x: &Array2<F>) -> Result<Array2<F>, FerroError> {
368 let fitted = self.fit(x, &())?;
369 fitted.transform(x)
370 }
371}
372
373impl<F: Float + Send + Sync + 'static> PipelineTransformer<F> for PowerTransformer<F> {
378 fn fit_pipeline(
384 &self,
385 x: &Array2<F>,
386 _y: &Array1<F>,
387 ) -> Result<Box<dyn FittedPipelineTransformer<F>>, FerroError> {
388 let fitted = self.fit(x, &())?;
389 Ok(Box::new(fitted))
390 }
391}
392
393impl<F: Float + Send + Sync + 'static> FittedPipelineTransformer<F> for FittedPowerTransformer<F> {
394 fn transform_pipeline(&self, x: &Array2<F>) -> Result<Array2<F>, FerroError> {
400 self.transform(x)
401 }
402}
403
404#[cfg(test)]
409mod tests {
410 use super::*;
411 use approx::assert_abs_diff_eq;
412 use ndarray::array;
413
414 #[test]
415 fn test_yeo_johnson_identity_at_lambda_one() {
416 let one = 1.0_f64;
418 for v in [0.0, 0.5, 1.0, 2.0, 5.0] {
419 let out = yeo_johnson(v, one);
420 assert_abs_diff_eq!(out, v, epsilon = 1e-10);
421 }
422 }
423
424 #[test]
425 fn test_yeo_johnson_log_at_lambda_zero() {
426 let zero = 0.0_f64;
428 for v in [0.0, 0.5, 1.0, 2.0] {
429 let expected = (v + 1.0).ln();
430 assert_abs_diff_eq!(yeo_johnson(v, zero), expected, epsilon = 1e-10);
431 }
432 }
433
434 #[test]
435 fn test_yeo_johnson_negative_at_lambda_two() {
436 let two = 2.0_f64;
438 for v in [-0.5, -1.0, -2.0] {
439 let expected = -(1.0 - v).ln();
440 assert_abs_diff_eq!(yeo_johnson(v, two), expected, epsilon = 1e-10);
441 }
442 }
443
444 #[test]
445 fn test_power_transformer_fit_basic() {
446 let pt = PowerTransformer::<f64>::new();
447 let x = array![[1.0], [2.0], [3.0], [4.0], [5.0]];
448 let fitted = pt.fit(&x, &()).unwrap();
449 let lambda = fitted.lambdas()[0];
451 assert!(lambda >= -3.0 && lambda <= 3.0);
452 }
453
454 #[test]
455 fn test_power_transformer_transform_shape() {
456 let pt = PowerTransformer::<f64>::new();
457 let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
458 let fitted = pt.fit(&x, &()).unwrap();
459 let out = fitted.transform(&x).unwrap();
460 assert_eq!(out.shape(), x.shape());
461 }
462
463 #[test]
464 fn test_standardize_produces_zero_mean() {
465 let pt = PowerTransformer::<f64>::new(); let x = array![[1.0], [2.0], [3.0], [4.0], [5.0]];
467 let fitted = pt.fit(&x, &()).unwrap();
468 let out = fitted.transform(&x).unwrap();
469 let mean: f64 = out.column(0).iter().sum::<f64>() / out.nrows() as f64;
470 assert_abs_diff_eq!(mean, 0.0, epsilon = 1e-6);
471 }
472
473 #[test]
474 fn test_without_standardize() {
475 let pt = PowerTransformer::<f64>::without_standardize();
476 assert!(!pt.standardize());
477 let x = array![[1.0], [2.0], [3.0]];
478 let fitted = pt.fit(&x, &()).unwrap();
479 assert!(fitted.means.is_none());
480 assert!(fitted.stds.is_none());
481 let out = fitted.transform(&x).unwrap();
482 assert_eq!(out.shape(), x.shape());
483 }
484
485 #[test]
486 fn test_fit_transform_equivalence() {
487 let pt = PowerTransformer::<f64>::new();
488 let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
489 let via_ft = pt.fit_transform(&x).unwrap();
490 let fitted = pt.fit(&x, &()).unwrap();
491 let via_sep = fitted.transform(&x).unwrap();
492 for (a, b) in via_ft.iter().zip(via_sep.iter()) {
493 assert_abs_diff_eq!(a, b, epsilon = 1e-12);
494 }
495 }
496
497 #[test]
498 fn test_shape_mismatch_error() {
499 let pt = PowerTransformer::<f64>::new();
500 let x_train = array![[1.0, 2.0], [3.0, 4.0]];
501 let fitted = pt.fit(&x_train, &()).unwrap();
502 let x_bad = array![[1.0, 2.0, 3.0]];
503 assert!(fitted.transform(&x_bad).is_err());
504 }
505
506 #[test]
507 fn test_insufficient_samples_error() {
508 let pt = PowerTransformer::<f64>::new();
509 let x: Array2<f64> = Array2::zeros((0, 2));
510 assert!(pt.fit(&x, &()).is_err());
511 }
512
513 #[test]
514 fn test_unfitted_transform_error() {
515 let pt = PowerTransformer::<f64>::new();
516 let x = array![[1.0, 2.0]];
517 assert!(pt.transform(&x).is_err());
518 }
519
520 #[test]
521 fn test_negative_values_supported() {
522 let pt = PowerTransformer::<f64>::without_standardize();
523 let x = array![[-2.0], [-1.0], [0.0], [1.0], [2.0]];
525 let fitted = pt.fit(&x, &()).unwrap();
526 let out = fitted.transform(&x).unwrap();
527 for v in out.iter() {
529 assert!(v.is_finite(), "got non-finite value: {v}");
530 }
531 }
532
533 #[test]
534 fn test_pipeline_integration() {
535 use ferrolearn_core::pipeline::PipelineTransformer;
536 let pt = PowerTransformer::<f64>::new();
537 let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
538 let y = Array1::zeros(3);
539 let fitted = pt.fit_pipeline(&x, &y).unwrap();
540 let result = fitted.transform_pipeline(&x).unwrap();
541 assert_eq!(result.shape(), x.shape());
542 }
543}