1use anofox_ml_core::{FitUnsupervised, Float, InverseTransform, Result, RustMlError, Transform};
2use ndarray::{Array1, Array2};
3
4#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
13pub struct PowerTransformer {
14 pub standardize: bool,
16}
17
18impl PowerTransformer {
19 pub fn new() -> Self {
21 Self { standardize: true }
22 }
23
24 pub fn standardize(mut self, standardize: bool) -> Self {
26 self.standardize = standardize;
27 self
28 }
29}
30
31impl Default for PowerTransformer {
32 fn default() -> Self {
33 Self::new()
34 }
35}
36
37#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
40#[serde(bound(deserialize = "F: serde::de::DeserializeOwned"))]
41pub struct FittedPowerTransformer<F: Float> {
42 lambdas: Array1<F>,
43 means: Array1<F>,
44 stds: Array1<F>,
45 standardize: bool,
46}
47
48fn yeo_johnson<F: Float>(x: F, lam: F) -> F {
50 let zero = F::zero();
51 let one = F::one();
52 let two = F::from_f64(2.0).unwrap();
53 let eps = F::from_f64(1e-10).unwrap();
54
55 if x >= zero {
56 if (lam - zero).abs() > eps {
57 ((x + one).powf(lam) - one) / lam
59 } else {
60 (x + one).ln()
62 }
63 } else {
64 if (lam - two).abs() > eps {
66 -((-x + one).powf(two - lam) - one) / (two - lam)
68 } else {
69 -(-x + one).ln()
71 }
72 }
73}
74
75fn yeo_johnson_inverse<F: Float>(y: F, lam: F) -> F {
77 let zero = F::zero();
78 let one = F::one();
79 let two = F::from_f64(2.0).unwrap();
80 let eps = F::from_f64(1e-10).unwrap();
81
82 if y >= zero {
83 if (lam - zero).abs() > eps {
84 (y * lam + one).powf(one / lam) - one
86 } else {
87 y.exp() - one
89 }
90 } else {
91 if (lam - two).abs() > eps {
93 one - (-(two - lam) * y + one).powf(one / (two - lam))
95 } else {
96 one - (-y).exp()
98 }
99 }
100}
101
102fn neg_log_likelihood<F: Float>(col: &[F], lam: F) -> f64 {
106 let n = col.len() as f64;
107 let transformed: Vec<f64> = col
109 .iter()
110 .map(|&x| yeo_johnson(x, lam).to_f64().unwrap())
111 .collect();
112
113 let mean: f64 = transformed.iter().sum::<f64>() / n;
115
116 let var: f64 = transformed
118 .iter()
119 .map(|&t| (t - mean) * (t - mean))
120 .sum::<f64>()
121 / n;
122 let var = var.max(1e-30); let lam_f64 = lam.to_f64().unwrap();
128 let jacobian_sum: f64 = col
129 .iter()
130 .map(|&x| {
131 let x_f64 = x.to_f64().unwrap();
132 let sign = if x_f64 >= 0.0 { 1.0 } else { -1.0 };
133 sign * (x_f64.abs() + 1.0).ln()
134 })
135 .sum();
136
137 n / 2.0 * var.ln() - (lam_f64 - 1.0) * jacobian_sum
138}
139
140impl<F: Float> FitUnsupervised<F> for PowerTransformer {
141 type Fitted = FittedPowerTransformer<F>;
142
143 fn fit(&self, x: &Array2<F>) -> Result<Self::Fitted> {
144 if x.is_empty() {
145 return Err(RustMlError::EmptyInput("input array is empty".into()));
146 }
147
148 let ncols = x.ncols();
149 let mut lambdas = Array1::<F>::zeros(ncols);
150
151 for j in 0..ncols {
153 let col: Vec<F> = x.column(j).to_vec();
154 let mut best_lam = F::zero();
155 let mut best_nll = f64::INFINITY;
156
157 let mut lam_val = -20i32; while lam_val <= 20 {
160 let lam = F::from_f64(lam_val as f64 / 10.0).unwrap();
161 let nll = neg_log_likelihood(&col, lam);
162 if nll < best_nll {
163 best_nll = nll;
164 best_lam = lam;
165 }
166 lam_val += 1;
167 }
168
169 lambdas[j] = best_lam;
170 }
171
172 let n = F::from_usize(x.nrows()).unwrap();
174 let mut means = Array1::<F>::zeros(ncols);
175 let mut stds = Array1::<F>::ones(ncols);
176
177 if self.standardize {
178 for j in 0..ncols {
180 let lam = lambdas[j];
181 let mut sum = F::zero();
182 for &val in x.column(j).iter() {
183 sum = sum + yeo_johnson(val, lam);
184 }
185 let mean = sum / n;
186 means[j] = mean;
187
188 let mut var_sum = F::zero();
189 for &val in x.column(j).iter() {
190 let t = yeo_johnson(val, lam) - mean;
191 var_sum = var_sum + t * t;
192 }
193 stds[j] = (var_sum / n).sqrt();
194 }
195 }
196
197 Ok(FittedPowerTransformer {
198 lambdas,
199 means,
200 stds,
201 standardize: self.standardize,
202 })
203 }
204}
205
206impl<F: Float> Transform<F> for FittedPowerTransformer<F> {
207 fn transform(&self, x: &Array2<F>) -> Result<Array2<F>> {
208 if x.ncols() != self.lambdas.len() {
209 return Err(RustMlError::ShapeMismatch(format!(
210 "expected {} features, got {}",
211 self.lambdas.len(),
212 x.ncols()
213 )));
214 }
215
216 let mut result = Array2::<F>::zeros(x.raw_dim());
217 for i in 0..x.nrows() {
218 for j in 0..x.ncols() {
219 let mut val = yeo_johnson(x[[i, j]], self.lambdas[j]);
220 if self.standardize {
221 val = val - self.means[j];
222 if self.stds[j] > F::from_f64(1e-15).unwrap() {
223 val = val / self.stds[j];
224 }
225 }
226 result[[i, j]] = val;
227 }
228 }
229 Ok(result)
230 }
231}
232
233impl<F: Float> InverseTransform<F> for FittedPowerTransformer<F> {
234 fn inverse_transform(&self, x: &Array2<F>) -> Result<Array2<F>> {
235 if x.ncols() != self.lambdas.len() {
236 return Err(RustMlError::ShapeMismatch(format!(
237 "expected {} features, got {}",
238 self.lambdas.len(),
239 x.ncols()
240 )));
241 }
242
243 let mut result = Array2::<F>::zeros(x.raw_dim());
244 for i in 0..x.nrows() {
245 for j in 0..x.ncols() {
246 let mut val = x[[i, j]];
247 if self.standardize {
249 if self.stds[j] > F::from_f64(1e-15).unwrap() {
250 val = val * self.stds[j];
251 }
252 val = val + self.means[j];
253 }
254 result[[i, j]] = yeo_johnson_inverse(val, self.lambdas[j]);
256 }
257 }
258 Ok(result)
259 }
260}
261
262impl<F: Float> FittedPowerTransformer<F> {
263 pub fn lambdas(&self) -> &Array1<F> {
265 &self.lambdas
266 }
267
268 pub fn means(&self) -> &Array1<F> {
270 &self.means
271 }
272
273 pub fn stds(&self) -> &Array1<F> {
275 &self.stds
276 }
277}
278
279#[cfg(test)]
280mod tests {
281 use super::*;
282 use approx::assert_abs_diff_eq;
283 use ndarray::array;
284
285 #[test]
286 fn test_fit_transform_basic() {
287 let x = array![
288 [1.0, -1.0],
289 [2.0, -2.0],
290 [3.0, -3.0],
291 [4.0, -4.0],
292 [5.0, -5.0],
293 ];
294 let pt = PowerTransformer::default();
295 let fitted = FitUnsupervised::<f64>::fit(&pt, &x).unwrap();
296 let transformed = fitted.transform(&x).unwrap();
297
298 let n = x.nrows() as f64;
300 for j in 0..x.ncols() {
301 let col_mean: f64 = transformed.column(j).sum() / n;
302 assert_abs_diff_eq!(col_mean, 0.0, epsilon = 1e-8);
303
304 let col_std: f64 = (transformed
305 .column(j)
306 .iter()
307 .map(|&v| (v - col_mean).powi(2))
308 .sum::<f64>()
309 / n)
310 .sqrt();
311 assert_abs_diff_eq!(col_std, 1.0, epsilon = 1e-6);
312 }
313 }
314
315 #[test]
316 fn test_inverse_transform_roundtrip() {
317 let x = array![[0.5, 1.0], [1.5, 2.0], [2.5, 3.0], [3.5, 4.0], [4.5, 5.0],];
318 let pt = PowerTransformer::default();
319 let fitted = FitUnsupervised::<f64>::fit(&pt, &x).unwrap();
320 let transformed = fitted.transform(&x).unwrap();
321 let recovered = fitted.inverse_transform(&transformed).unwrap();
322
323 for (a, b) in x.iter().zip(recovered.iter()) {
324 assert_abs_diff_eq!(a, b, epsilon = 1e-6);
325 }
326 }
327
328 #[test]
329 fn test_without_standardize() {
330 let x = array![[1.0], [2.0], [3.0], [4.0], [5.0]];
331 let pt = PowerTransformer::new().standardize(false);
332 let fitted = FitUnsupervised::<f64>::fit(&pt, &x).unwrap();
333 let transformed = fitted.transform(&x).unwrap();
334
335 let lam = fitted.lambdas()[0];
337 for i in 0..x.nrows() {
338 let expected = yeo_johnson(x[[i, 0]], lam);
339 assert_abs_diff_eq!(transformed[[i, 0]], expected, epsilon = 1e-10);
340 }
341 }
342
343 #[test]
344 fn test_yeo_johnson_identity_at_lambda_one() {
345 let one = 1.0_f64;
347 for &x in &[0.0, 1.0, 2.0, 5.0, 10.0] {
348 let result = yeo_johnson(x, one);
349 assert_abs_diff_eq!(result, x, epsilon = 1e-10);
350 }
351 }
352
353 #[test]
354 fn test_yeo_johnson_lambda_zero() {
355 let zero = 0.0_f64;
357 for &x in &[0.0, 1.0, 2.0, 5.0] {
358 let result = yeo_johnson(x, zero);
359 let expected = (x + 1.0).ln();
360 assert_abs_diff_eq!(result, expected, epsilon = 1e-10);
361 }
362 }
363
364 #[test]
365 fn test_yeo_johnson_negative_values() {
366 let lam = 0.5_f64;
368 for &x in &[-1.0, -2.0, -5.0, -0.5] {
369 let y = yeo_johnson(x, lam);
370 let x_back = yeo_johnson_inverse(y, lam);
371 assert_abs_diff_eq!(x, x_back, epsilon = 1e-8);
372 }
373 }
374
375 #[test]
376 fn test_lambda_two_negative_branch() {
377 let two = 2.0_f64;
379 for &x in &[-1.0, -2.0, -0.5] {
380 let result = yeo_johnson(x, two);
381 let expected = -(-x + 1.0).ln();
382 assert_abs_diff_eq!(result, expected, epsilon = 1e-10);
383 }
384 }
385
386 #[test]
387 fn test_empty_input() {
388 let x: Array2<f64> = Array2::zeros((0, 0));
389 let pt = PowerTransformer::default();
390 assert!(FitUnsupervised::<f64>::fit(&pt, &x).is_err());
391 }
392
393 #[test]
394 fn test_shape_mismatch() {
395 let x = array![[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]];
396 let pt = PowerTransformer::default();
397 let fitted = FitUnsupervised::<f64>::fit(&pt, &x).unwrap();
398
399 let x_wrong = array![[1.0, 2.0, 3.0]];
400 assert!(fitted.transform(&x_wrong).is_err());
401 assert!(fitted.inverse_transform(&x_wrong).is_err());
402 }
403
404 #[test]
405 fn test_mixed_positive_negative() {
406 let x = array![
407 [-3.0, 10.0],
408 [-1.0, 20.0],
409 [0.0, 30.0],
410 [1.0, 40.0],
411 [3.0, 50.0],
412 ];
413 let pt = PowerTransformer::default();
414 let fitted = FitUnsupervised::<f64>::fit(&pt, &x).unwrap();
415 let transformed = fitted.transform(&x).unwrap();
416 let recovered = fitted.inverse_transform(&transformed).unwrap();
417
418 for (a, b) in x.iter().zip(recovered.iter()) {
419 assert_abs_diff_eq!(a, b, epsilon = 1e-5);
420 }
421 }
422}