1use argmin::core::{CostFunction, Executor};
4use argmin::solver::brent::BrentOpt;
5
6use super::{Error, Transformer};
7
8fn box_cox(x: f64, lambda: f64) -> Result<f64, Error> {
11 if x <= 0.0 {
12 return Err(Error::NonPositiveData);
13 }
14 if x.is_nan() {
15 return Ok(x);
16 }
17 if lambda == 0.0 {
18 Ok(x.ln())
19 } else {
20 Ok((x.powf(lambda) - 1.0) / lambda)
21 }
22}
23
24fn inverse_box_cox(y: f64, lambda: f64) -> Result<f64, Error> {
26 if y.is_nan() {
27 Ok(y)
28 } else if lambda == 0.0 {
29 Ok(y.exp())
30 } else {
31 let value = y * lambda + 1.0;
32 if value <= 0.0 {
33 Err(Error::InvalidDomain)
34 } else {
35 Ok(value.powf(1.0 / lambda))
36 }
37 }
38}
39
40fn box_cox_log_likelihood(data: &[f64], lambda: f64) -> Result<f64, Error> {
41 let n = data.len() as f64;
42 if n == 0.0 {
43 return Err(Error::EmptyData);
44 }
45 if data.iter().any(|&x| x <= 0.0) {
46 return Err(Error::NonPositiveData);
47 }
48 let transformed_data = data
49 .iter()
50 .map(|&x| box_cox(x, lambda))
51 .collect::<Result<Vec<_>, _>>()?;
52
53 let mean_transformed: f64 = transformed_data.iter().sum::<f64>() / n;
54 let variance: f64 = transformed_data
55 .iter()
56 .map(|&x| (x - mean_transformed).powi(2))
57 .sum::<f64>()
58 / n;
59
60 if variance <= 0.0 {
62 return Err(Error::VarianceNotPositive);
63 }
64 let log_likelihood =
65 -0.5 * n * variance.ln() + (lambda - 1.0) * data.iter().map(|&x| x.ln()).sum::<f64>();
66 Ok(log_likelihood)
67}
68
69#[derive(Clone)]
70struct BoxCoxProblem<'a> {
71 data: &'a [f64],
72}
73
74impl CostFunction for BoxCoxProblem<'_> {
75 type Param = f64;
76 type Output = f64;
77
78 fn cost(&self, lambda: &Self::Param) -> Result<Self::Output, argmin::core::Error> {
80 Ok(box_cox_log_likelihood(self.data, *lambda).map(|ll| -ll)?)
81 }
82}
83
84struct OptimizationParams {
85 initial_param: f64,
86 lower_bound: f64,
87 upper_bound: f64,
88 max_iterations: u64,
89}
90
91impl Default for OptimizationParams {
92 fn default() -> Self {
93 Self {
94 initial_param: 0.0,
95 lower_bound: -2.0,
96 upper_bound: 2.0,
97 max_iterations: 1000,
98 }
99 }
100}
101
102fn optimize_lambda<T: CostFunction<Param = f64, Output = f64>>(
103 cost: T,
104 params: OptimizationParams,
105) -> Result<f64, Error> {
106 let solver = BrentOpt::new(params.lower_bound, params.upper_bound);
107 let result = Executor::new(cost, solver)
108 .configure(|state| {
109 state
110 .param(params.initial_param)
111 .max_iters(params.max_iterations)
112 })
113 .run();
114
115 result
116 .map_err(Error::Optimize)
117 .and_then(|res| res.state().best_param.ok_or_else(|| Error::NoBestParameter))
118}
119
120pub(crate) fn optimize_box_cox_lambda(data: &[f64]) -> Result<f64, Error> {
122 let cost = BoxCoxProblem { data };
124 let optimization_params = OptimizationParams::default();
125 optimize_lambda(cost, optimization_params)
126}
127
128pub(crate) fn optimize_yeo_johnson_lambda(data: &[f64]) -> Result<f64, Error> {
129 let cost = YeoJohnsonProblem { data };
131 let optimization_params = OptimizationParams::default();
132 optimize_lambda(cost, optimization_params)
133}
134
135#[derive(Clone, Debug)]
153pub struct BoxCox {
154 lambda: f64,
155 ignore_nans: bool,
156}
157
158impl BoxCox {
159 pub fn new() -> Self {
161 Self {
162 lambda: f64::NAN,
163 ignore_nans: false,
164 }
165 }
166
167 pub fn with_lambda(mut self, lambda: f64) -> Result<Self, Error> {
173 if !lambda.is_finite() {
174 return Err(Error::InvalidLambda);
175 }
176 self.lambda = lambda;
177 Ok(self)
178 }
179
180 pub fn ignore_nans(mut self, ignore_nans: bool) -> Self {
187 self.ignore_nans = ignore_nans;
188 self
189 }
190}
191
192impl Default for BoxCox {
193 fn default() -> Self {
194 Self::new()
195 }
196}
197
198impl Transformer for BoxCox {
199 fn fit(&mut self, data: &[f64]) -> Result<(), Error> {
200 if !self.ignore_nans || !data.iter().any(|&x| x.is_nan()) {
203 self.lambda = optimize_box_cox_lambda(data)?;
204 } else {
205 let data = data
206 .iter()
207 .copied()
208 .filter(|&x| !x.is_nan())
209 .collect::<Vec<_>>();
210 if data.is_empty() {
211 return Err(Error::EmptyData);
212 }
213 self.lambda = optimize_box_cox_lambda(&data)?;
214 }
215 Ok(())
216 }
217
218 fn transform(&self, data: &mut [f64]) -> Result<(), Error> {
219 if self.lambda.is_nan() {
220 return Err(Error::NotFitted);
221 }
222 for x in data.iter_mut() {
223 *x = box_cox(*x, self.lambda)?;
224 }
225 Ok(())
226 }
227
228 fn inverse_transform(&self, data: &mut [f64]) -> Result<(), Error> {
229 for x in data.iter_mut() {
230 *x = inverse_box_cox(*x, self.lambda)?;
231 }
232 Ok(())
233 }
234}
235
236fn yeo_johnson(x: f64, lambda: f64) -> Result<f64, Error> {
238 if x.is_nan() {
239 return Ok(x);
240 }
241 if !lambda.is_finite() {
242 return Err(Error::InvalidLambda);
243 }
244
245 if x >= 0.0 {
246 if lambda == 0.0 {
247 Ok((x + 1.0).ln())
248 } else {
249 Ok(((x + 1.0).powf(lambda) - 1.0) / lambda)
250 }
251 } else if lambda == 2.0 {
252 Ok(-(-x + 1.0).ln())
253 } else {
254 Ok(-((-x + 1.0).powf(2.0 - lambda) - 1.0) / (2.0 - lambda))
255 }
256}
257
258fn inverse_yeo_johnson(y: f64, lambda: f64) -> f64 {
260 const EPSILON: f64 = 1e-6;
261
262 if y >= 0.0 && lambda.abs() < EPSILON {
263 (y.exp()) - 1.0
265 } else if y >= 0.0 {
266 (y * lambda + 1.0).powf(1.0 / lambda) - 1.0
268 } else if (lambda - 2.0).abs() < EPSILON {
269 -(-y.exp() - 1.0)
271 } else {
272 -((-((2.0 - lambda) * y) + 1.0).powf(1.0 / (2.0 - lambda)) - 1.0)
274 }
275}
276
277fn yeo_johnson_log_likelihood(data: &[f64], lambda: f64) -> Result<f64, Error> {
278 let n = data.len() as f64;
279
280 if n == 0.0 {
281 return Err(Error::EmptyData);
282 }
283
284 let transformed_data = data
285 .iter()
286 .map(|&x| yeo_johnson(x, lambda))
287 .collect::<Result<Vec<f64>, _>>()?;
288
289 let mean = transformed_data.iter().sum::<f64>() / n;
290
291 let variance = transformed_data
292 .iter()
293 .map(|&x| (x - mean).powi(2))
294 .sum::<f64>()
295 / n;
296
297 if variance <= 0.0 {
298 return Err(Error::VarianceNotPositive);
299 }
300
301 let log_sigma_squared = variance.ln();
302 let log_likelihood = -n / 2.0 * log_sigma_squared;
303
304 let additional_term: f64 = data
305 .iter()
306 .map(|&x| x.signum() * (x.abs() + 1.0).ln())
307 .sum::<f64>()
308 * (lambda - 1.0);
309
310 Ok(log_likelihood + additional_term)
311}
312
313#[derive(Clone)]
314struct YeoJohnsonProblem<'a> {
315 data: &'a [f64],
316}
317
318impl CostFunction for YeoJohnsonProblem<'_> {
319 type Param = f64;
320 type Output = f64;
321
322 fn cost(&self, lambda: &Self::Param) -> Result<Self::Output, argmin::core::Error> {
324 Ok(yeo_johnson_log_likelihood(self.data, *lambda).map(|ll| -ll)?)
325 }
326}
327
328#[derive(Clone, Debug)]
349pub struct YeoJohnson {
350 lambda: f64,
351 ignore_nans: bool,
352}
353
354impl YeoJohnson {
355 pub fn new() -> Self {
357 Self {
358 lambda: f64::NAN,
359 ignore_nans: false,
360 }
361 }
362
363 pub fn with_lambda(mut self, lambda: f64) -> Result<Self, Error> {
369 if !lambda.is_finite() {
370 return Err(Error::InvalidLambda);
371 }
372 self.lambda = lambda;
373 Ok(self)
374 }
375
376 pub fn ignore_nans(mut self, ignore_nans: bool) -> Self {
383 self.ignore_nans = ignore_nans;
384 self
385 }
386}
387
388impl Default for YeoJohnson {
389 fn default() -> Self {
390 Self::new()
391 }
392}
393
394impl Transformer for YeoJohnson {
395 fn fit(&mut self, data: &[f64]) -> Result<(), Error> {
396 if !self.ignore_nans || !data.iter().any(|&x| x.is_nan()) {
399 self.lambda = optimize_yeo_johnson_lambda(data)?;
400 } else {
401 let data = data
402 .iter()
403 .copied()
404 .filter(|&x| !x.is_nan())
405 .collect::<Vec<_>>();
406 if data.is_empty() {
407 return Err(Error::EmptyData);
408 }
409 self.lambda = optimize_yeo_johnson_lambda(&data)?;
410 }
411 Ok(())
412 }
413
414 fn transform(&self, data: &mut [f64]) -> Result<(), Error> {
415 if self.lambda.is_nan() {
416 return Err(Error::NotFitted);
417 }
418 for x in data.iter_mut() {
419 *x = yeo_johnson(*x, self.lambda)?;
420 }
421 Ok(())
422 }
423
424 fn inverse_transform(&self, data: &mut [f64]) -> Result<(), Error> {
425 for x in data.iter_mut() {
426 *x = inverse_yeo_johnson(*x, self.lambda);
427 }
428 Ok(())
429 }
430}
431
432#[cfg(test)]
433mod test {
434 use super::*;
435 use augurs_testing::{assert_all_close, assert_approx_eq};
436
437 #[test]
438 fn correct_optimal_box_cox_lambda() {
439 let data = &[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7];
440 let got = optimize_box_cox_lambda(data);
441 assert!(got.is_ok());
442 let lambda = got.unwrap();
443 assert_approx_eq!(lambda, 0.7123778635679304);
444 }
445
446 #[test]
447 fn optimal_box_cox_lambda_lambda_empty_data() {
448 let data = &[];
449 let got = optimize_box_cox_lambda(data);
450 assert!(got.is_err());
451 }
452
453 #[test]
454 fn optimal_box_cox_lambda_non_positive_data() {
455 let data = &[0.0, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7];
456 let got = optimize_box_cox_lambda(data);
457 assert!(got.is_err());
458 }
459
460 #[test]
461 fn correct_optimal_yeo_johnson_lambda() {
462 let data = &[0.0, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7];
463 let got = optimize_yeo_johnson_lambda(data);
464 assert!(got.is_ok());
465 let lambda = got.unwrap();
466 assert_approx_eq!(lambda, 1.7458442076987954);
467 }
468
469 #[test]
470 fn test_box_cox_llf() {
471 let data = &[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7];
472 let lambda = 1.0;
473 let got = box_cox_log_likelihood(data, lambda);
474 assert!(got.is_ok());
475 let llf = got.unwrap();
476 assert_approx_eq!(llf, 11.266065387038703);
477 }
478
479 #[test]
480 fn test_box_cox_llf_non_positive() {
481 let data = &[0.0, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7];
482 let lambda = 0.0;
483 let got = box_cox_log_likelihood(data, lambda);
484 assert!(got.is_err());
485 }
486
487 #[test]
488 fn test_yeo_johnson_llf() {
489 let data = &[0.0, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7];
490 let lambda = 1.0;
491 let got = yeo_johnson_log_likelihood(data, lambda);
492 assert!(got.is_ok());
493 let llf = got.unwrap();
494 assert_approx_eq!(llf, 10.499377905819307);
495 }
496
497 #[test]
498 fn box_cox_single() {
499 assert_approx_eq!(box_cox(1.0, 0.5).unwrap(), 0.0);
500 assert_approx_eq!(box_cox(2.0, 0.5).unwrap(), 0.8284271247461903);
501 assert!(box_cox(f64::NAN, 0.5).unwrap().is_nan());
502 }
503
504 #[test]
505 fn inverse_box_cox_single() {
506 assert_approx_eq!(inverse_box_cox(0.0, 0.5).unwrap(), 1.0);
507 assert_approx_eq!(inverse_box_cox(0.8284271247461903, 0.5).unwrap(), 2.0);
508 assert!(inverse_box_cox(f64::NAN, 0.5).unwrap().is_nan());
509 }
510
511 #[test]
512 fn box_cox_transform() {
513 let mut data = vec![1.0, 2.0, 3.0];
514 let lambda = 0.5;
515 let box_cox = BoxCox::new().with_lambda(lambda).unwrap();
516 let expected = vec![0.0, 0.8284271247461903, 1.4641016151377544];
517 box_cox.transform(&mut data).unwrap();
518 assert_all_close(&expected, &data);
519 }
520
521 #[test]
522 fn box_cox_fit_transform_nans() {
523 let mut data = vec![1.0, 2.0, f64::NAN, 3.0];
524 let mut box_cox = BoxCox::new();
525 assert!(box_cox.fit_transform(&mut data).is_err());
526 }
527
528 #[test]
529 fn box_cox_transform_ignore_nans() {
530 let mut data = vec![1.0, 2.0, f64::NAN, 3.0];
531 let mut box_cox = BoxCox::new().ignore_nans(true);
532 let expected = vec![0.0, 0.8284271247461903, f64::NAN, 1.4641016151377544];
533 box_cox.fit_transform(&mut data).unwrap();
534 assert_all_close(&expected, &data);
535 }
536
537 #[test]
538 fn inverse_box_cox_transform() {
539 let mut data = vec![0.0, 0.5_f64.ln(), 1.0_f64.ln()];
540 let lambda = 0.5;
541 let box_cox = BoxCox::new().with_lambda(lambda).unwrap();
542 let expected = vec![1.0, 0.426966072919605, 1.0];
543 box_cox.inverse_transform(&mut data).unwrap();
544 assert_all_close(&expected, &data);
545 }
546
547 #[test]
548 fn yeo_johnson_single() {
549 assert_approx_eq!(yeo_johnson(1.0, 0.5).unwrap(), 0.8284271247461903);
550 assert_approx_eq!(yeo_johnson(2.0, 0.5).unwrap(), 1.4641016151377544);
551 assert!(yeo_johnson(f64::NAN, 0.5).unwrap().is_nan());
552 }
553
554 #[test]
555 fn inverse_yeo_johnson_single() {
556 assert_approx_eq!(inverse_yeo_johnson(0.8284271247461903, 0.5), 1.0);
557 assert_approx_eq!(inverse_yeo_johnson(1.4641016151377544, 0.5), 2.0);
558 assert!(inverse_yeo_johnson(f64::NAN, 0.5).is_nan());
559 }
560
561 #[test]
562 fn yeo_johnson_transform() {
563 let mut data = vec![-1.0, 0.0, 1.0];
564 let lambda = 0.5;
565 let yeo_johnson = YeoJohnson::new().with_lambda(lambda).unwrap();
566 let expected = vec![-1.2189514164974602, 0.0, 0.8284271247461903];
567 yeo_johnson.transform(&mut data).unwrap();
568 assert_all_close(&expected, &data);
569 }
570
571 #[test]
572 fn yeo_johnson_fit_transform_nans() {
573 let mut data = vec![-1.0, 0.0, f64::NAN, 1.0];
574 let mut yeo_johnson = YeoJohnson::new();
575 assert!(yeo_johnson.fit_transform(&mut data).is_err());
576 }
577
578 #[test]
579 fn yeo_johnson_fit_transform_ignore_nans() {
580 let mut data = vec![-1.0, 0.0, f64::NAN, 1.0];
581 let mut yeo_johnson = YeoJohnson::new().ignore_nans(true);
582 let expected = vec![-1.0000010312156777, 0.0, f64::NAN, 0.9999989687856643];
583 yeo_johnson.fit_transform(&mut data).unwrap();
584 assert_all_close(&expected, &data);
585 }
586
587 #[test]
588 fn inverse_yeo_johnson_transform() {
589 let mut data = vec![-1.2189514164974602, 0.0, 0.8284271247461903];
590 let lambda = 0.5;
591 let yeo_johnson = YeoJohnson::new().with_lambda(lambda).unwrap();
592 let expected = vec![-1.0, 0.0, 1.0];
593 yeo_johnson.inverse_transform(&mut data).unwrap();
594 assert_all_close(&expected, &data);
595 }
596}