1use scirs2_core::ndarray::{Array1, Array2, ArrayBase, Data, Ix2};
8use scirs2_core::numeric::{Float, NumCast};
9use scirs2_core::parallel_ops::*;
10
11use crate::error::{Result, TransformError};
12
13const EPSILON: f64 = 1e-10;
15
16pub struct PolynomialFeatures {
20 degree: usize,
22 interaction_only: bool,
24 includebias: bool,
26}
27
28impl PolynomialFeatures {
29 pub fn new(degree: usize, interaction_only: bool, includebias: bool) -> Self {
39 PolynomialFeatures {
40 degree,
41 interaction_only,
42 includebias,
43 }
44 }
45
46 pub fn n_output_features(&self, nfeatures: usize) -> usize {
54 if nfeatures == 0 {
55 return 0;
56 }
57
58 if self.interaction_only {
59 let mut n = if self.includebias { 1 } else { 0 };
60 for d in 1..=self.degree.min(nfeatures) {
61 let mut b = 1;
63 for i in 0..d {
64 b = b * (nfeatures - i) / (i + 1);
65 }
66 n += b;
67 }
68 n
69 } else {
70 let n = if self.includebias { 1 } else { 0 };
73 n + (0..=self.degree)
74 .skip(if self.includebias { 1 } else { 0 })
75 .map(|d| {
76 let mut b = 1;
78 for i in 0..d {
79 b = b * (nfeatures + d - 1 - i) / (i + 1);
80 }
81 b
82 })
83 .sum::<usize>()
84 }
85 }
86
87 pub fn transform<S>(&self, array: &ArrayBase<S, Ix2>) -> Result<Array2<f64>>
95 where
96 S: Data,
97 S::Elem: Float + NumCast,
98 {
99 if array.is_empty() {
101 return Err(TransformError::InvalidInput(
102 "Input array cannot be empty".to_string(),
103 ));
104 }
105
106 let array_f64 = array.mapv(|x| NumCast::from(x).unwrap_or(0.0));
107
108 for &val in array_f64.iter() {
110 if !val.is_finite() {
111 return Err(TransformError::DataValidationError(
112 "Input array contains non-finite values (NaN or infinity)".to_string(),
113 ));
114 }
115 }
116
117 if !array_f64.is_standard_layout() {
118 return Err(TransformError::InvalidInput(
119 "Input array must be in standard memory layout".to_string(),
120 ));
121 }
122
123 let shape = array_f64.shape();
124 let n_samples = shape[0];
125 let nfeatures = shape[1];
126
127 if self.degree == 0 {
129 return Err(TransformError::InvalidInput(
130 "Degree must be at least 1".to_string(),
131 ));
132 }
133
134 if self.degree > 10 {
135 return Err(TransformError::InvalidInput(
136 "Degree > 10 may cause numerical overflow. Please use a smaller degree."
137 .to_string(),
138 ));
139 }
140
141 let n_output_features = self.n_output_features(nfeatures);
143
144 if n_output_features > 100_000 {
146 return Err(TransformError::MemoryError(format!(
147 "Output would have {n_output_features} features, which exceeds the limit of 100,000. Consider reducing degree or using interaction_only=true"
148 )));
149 }
150
151 let max_abs_value = array_f64.iter().map(|&x| x.abs()).fold(0.0, f64::max);
153 if max_abs_value > 100.0 && self.degree > 3 {
154 return Err(TransformError::DataValidationError(format!(
155 "Large input values (max: {max_abs_value:.2}) with high degree ({}) may cause numerical overflow. Consider scaling input data first.",
156 self.degree
157 )));
158 }
159
160 let mut result = Array2::zeros((n_samples, n_output_features));
161
162 let mut powers = vec![0; nfeatures];
164 let mut col_idx = 0;
165
166 if self.includebias {
168 for i in 0..n_samples {
169 result[[i, col_idx]] = 1.0;
170 }
171 col_idx += 1;
172 }
173
174 for i in 0..n_samples {
177 for j in 0..nfeatures {
178 result[[i, col_idx + j]] = array_f64[[i, j]];
179 }
180 }
181 col_idx += nfeatures;
182
183 if self.degree >= 2 {
185 #[allow(clippy::too_many_arguments)]
187 fn generate_combinations(
188 powers: &mut Vec<usize>,
189 start: usize,
190 degree: usize,
191 max_degree: usize,
192 interaction_only: bool,
193 array: &Array2<f64>,
194 result: &mut Array2<f64>,
195 col_idx: &mut usize,
196 ) -> Result<()> {
197 if degree == 0 {
198 let sum: usize = powers.iter().sum();
200 if sum >= 2 && sum <= max_degree {
201 for i in 0..array.shape()[0] {
203 let mut val = 1.0;
204 for (j, &p) in powers.iter().enumerate() {
205 if p > 0 {
206 let base = array[[i, j]];
207 let powered = base.powi(p as i32);
208
209 if !powered.is_finite() {
211 return Err(TransformError::ComputationError(format!(
212 "Numerical overflow detected when computing {base}^{p} at sample {i}, feature {j}"
213 )));
214 }
215
216 val *= powered;
217
218 if !val.is_finite() {
220 return Err(TransformError::ComputationError(format!(
221 "Numerical overflow detected during polynomial feature computation at sample {i}"
222 )));
223 }
224 }
225 }
226 result[[i, *col_idx]] = val;
227 }
228 *col_idx += 1;
229 }
230 return Ok(());
231 }
232
233 for j in start..powers.len() {
234 let max_power = if interaction_only { 1 } else { degree };
236 for p in 1..=max_power {
237 powers[j] += p;
238 generate_combinations(
239 powers,
240 j + 1,
241 degree - p,
242 max_degree,
243 interaction_only,
244 array,
245 result,
246 col_idx,
247 )?;
248 powers[j] -= p;
249 }
250 }
251 Ok(())
252 }
253
254 let mut current_col_idx = col_idx;
256 generate_combinations(
257 &mut powers,
258 0,
259 self.degree,
260 self.degree,
261 self.interaction_only,
262 &array_f64,
263 &mut result,
264 &mut current_col_idx,
265 )?;
266 }
267
268 for &val in result.iter() {
270 if !val.is_finite() {
271 return Err(TransformError::ComputationError(
272 "Output contains non-finite values. This may be due to numerical overflow."
273 .to_string(),
274 ));
275 }
276 }
277
278 let max_output_value = result.iter().map(|&x| x.abs()).fold(0.0, f64::max);
280 if max_output_value > 1e15 {
281 return Err(TransformError::DataValidationError(format!(
282 "Output contains extremely large values (max: {max_output_value:.2e}). Consider scaling input data or reducing polynomial degree."
283 )));
284 }
285
286 Ok(result)
287 }
288}
289
290#[allow(dead_code)]
311pub fn binarize<S>(array: &ArrayBase<S, Ix2>, threshold: f64) -> Result<Array2<f64>>
312where
313 S: Data,
314 S::Elem: Float + NumCast,
315{
316 if array.is_empty() {
318 return Err(TransformError::InvalidInput(
319 "Input _array cannot be empty".to_string(),
320 ));
321 }
322
323 if !threshold.is_finite() {
324 return Err(TransformError::InvalidInput(
325 "Threshold must be a finite number".to_string(),
326 ));
327 }
328
329 let array_f64 = array.mapv(|x| NumCast::from(x).unwrap_or(0.0));
330
331 for &val in array_f64.iter() {
333 if !val.is_finite() {
334 return Err(TransformError::DataValidationError(
335 "Input _array contains non-finite values (NaN or infinity)".to_string(),
336 ));
337 }
338 }
339
340 if !array_f64.is_standard_layout() {
341 return Err(TransformError::InvalidInput(
342 "Input _array must be in standard memory layout".to_string(),
343 ));
344 }
345
346 let shape = array_f64.shape();
347 let mut binarized = Array2::zeros((shape[0], shape[1]));
348
349 for i in 0..shape[0] {
350 for j in 0..shape[1] {
351 binarized[[i, j]] = if array_f64[[i, j]] > threshold {
352 1.0
353 } else {
354 0.0
355 };
356 }
357 }
358
359 Ok(binarized)
360}
361
362#[allow(dead_code)]
373fn compute_quantiles<S>(
374 array: &ArrayBase<S, Ix2>,
375 n_quantiles: usize,
376 axis: usize,
377) -> Result<Array2<f64>>
378where
379 S: Data,
380 S::Elem: Float + NumCast,
381{
382 let array_f64 = array.mapv(|x| NumCast::from(x).unwrap_or(0.0));
383
384 if n_quantiles < 2 {
385 return Err(TransformError::InvalidInput(
386 "n_quantiles must be at least 2".to_string(),
387 ));
388 }
389
390 if axis >= 2 {
391 return Err(TransformError::InvalidInput(
392 "axis must be 0 or 1".to_string(),
393 ));
394 }
395
396 let shape = array_f64.shape();
397 let nfeatures = if axis == 0 { shape[1] } else { shape[0] };
398
399 let mut quantiles = Array2::zeros((nfeatures, n_quantiles));
400
401 for i in 0..nfeatures {
402 let data: Vec<f64> = if axis == 0 {
404 array_f64.column(i).to_vec()
405 } else {
406 array_f64.row(i).to_vec()
407 };
408
409 let mut sorted_data = data.clone();
410 sorted_data.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
411
412 for j in 0..n_quantiles {
414 let q = j as f64 / (n_quantiles - 1) as f64;
415 let idx = (q * (sorted_data.len() - 1) as f64).round() as usize;
416 quantiles[[i, j]] = sorted_data[idx];
417 }
418 }
419
420 Ok(quantiles)
421}
422
423#[allow(dead_code)]
434pub fn discretize_equal_width<S>(
435 array: &ArrayBase<S, Ix2>,
436 n_bins: usize,
437 encode: &str,
438 axis: usize,
439) -> Result<Array2<f64>>
440where
441 S: Data,
442 S::Elem: Float + NumCast,
443{
444 let array_f64 = array.mapv(|x| NumCast::from(x).unwrap_or(0.0));
445
446 if !array_f64.is_standard_layout() {
447 return Err(TransformError::InvalidInput(
448 "Input array must be in standard memory layout".to_string(),
449 ));
450 }
451
452 if n_bins < 2 {
453 return Err(TransformError::InvalidInput(
454 "n_bins must be at least 2".to_string(),
455 ));
456 }
457
458 if encode != "onehot" && encode != "ordinal" {
459 return Err(TransformError::InvalidInput(
460 "encode must be 'onehot' or 'ordinal'".to_string(),
461 ));
462 }
463
464 if axis >= 2 {
465 return Err(TransformError::InvalidInput(
466 "axis must be 0 or 1".to_string(),
467 ));
468 }
469
470 let shape = array_f64.shape();
471 let n_samples = shape[0];
472 let nfeatures = shape[1];
473
474 let n_output_features = if encode == "onehot" {
475 if axis == 0 {
476 nfeatures * n_bins
477 } else {
478 n_samples * n_bins
479 }
480 } else if axis == 0 {
481 nfeatures
482 } else {
483 n_samples
484 };
485
486 let mut min_values = Array1::zeros(if axis == 0 { nfeatures } else { n_samples });
487 let mut max_values = Array1::zeros(if axis == 0 { nfeatures } else { n_samples });
488
489 for i in 0..(if axis == 0 { nfeatures } else { n_samples }) {
491 let data = if axis == 0 {
492 array_f64.column(i)
493 } else {
494 array_f64.row(i)
495 };
496
497 min_values[i] = data.fold(f64::INFINITY, |acc, &x| acc.min(x));
498 max_values[i] = data.fold(f64::NEG_INFINITY, |acc, &x| acc.max(x));
499 }
500
501 let mut bin_edges = Array2::zeros((if axis == 0 { nfeatures } else { n_samples }, n_bins + 1));
503 for i in 0..(if axis == 0 { nfeatures } else { n_samples }) {
504 let min_val = min_values[i];
505 let max_val = max_values[i];
506 let bin_width = if (max_val - min_val).abs() < EPSILON {
507 1.0
509 } else {
510 (max_val - min_val) / n_bins as f64
511 };
512
513 for j in 0..=n_bins {
514 bin_edges[[i, j]] = min_val + bin_width * j as f64;
515 }
516
517 bin_edges[[i, n_bins]] = bin_edges[[i, n_bins]].max(max_val + EPSILON);
519 }
520
521 let mut discretized = Array2::zeros((n_samples, n_output_features));
523
524 if encode == "ordinal" {
525 for i in 0..n_samples {
527 for j in 0..nfeatures {
528 let value = array_f64[[i, j]];
529 let feature_idx = if axis == 0 { j } else { i };
530
531 let mut bin_idx = 0;
533 while bin_idx < n_bins && value > bin_edges[[feature_idx, bin_idx + 1]] {
534 bin_idx += 1;
535 }
536
537 let output_idx = if axis == 0 { j } else { i };
538 discretized[[i, output_idx]] = bin_idx as f64;
539 }
540 }
541 } else {
542 for i in 0..n_samples {
544 for j in 0..nfeatures {
545 let value = array_f64[[i, j]];
546 let feature_idx = if axis == 0 { j } else { i };
547
548 let mut bin_idx = 0;
550 while bin_idx < n_bins && value > bin_edges[[feature_idx, bin_idx + 1]] {
551 bin_idx += 1;
552 }
553
554 let output_idx = if axis == 0 {
555 j * n_bins + bin_idx
556 } else {
557 i * n_bins + bin_idx
558 };
559
560 discretized[[i, output_idx]] = 1.0;
561 }
562 }
563 }
564
565 Ok(discretized)
566}
567
568#[allow(dead_code)]
579pub fn discretize_equal_frequency<S>(
580 array: &ArrayBase<S, Ix2>,
581 n_bins: usize,
582 encode: &str,
583 axis: usize,
584) -> Result<Array2<f64>>
585where
586 S: Data,
587 S::Elem: Float + NumCast,
588{
589 let array_f64 = array.mapv(|x| NumCast::from(x).unwrap_or(0.0));
590
591 if !array_f64.is_standard_layout() {
592 return Err(TransformError::InvalidInput(
593 "Input array must be in standard memory layout".to_string(),
594 ));
595 }
596
597 if n_bins < 2 {
598 return Err(TransformError::InvalidInput(
599 "n_bins must be at least 2".to_string(),
600 ));
601 }
602
603 if encode != "onehot" && encode != "ordinal" {
604 return Err(TransformError::InvalidInput(
605 "encode must be 'onehot' or 'ordinal'".to_string(),
606 ));
607 }
608
609 if axis >= 2 {
610 return Err(TransformError::InvalidInput(
611 "axis must be 0 or 1".to_string(),
612 ));
613 }
614
615 let shape = array_f64.shape();
616 let n_samples = shape[0];
617 let nfeatures = shape[1];
618
619 let n_output_features = if encode == "onehot" {
620 if axis == 0 {
621 nfeatures * n_bins
622 } else {
623 n_samples * n_bins
624 }
625 } else if axis == 0 {
626 nfeatures
627 } else {
628 n_samples
629 };
630
631 let quantiles = compute_quantiles(&array_f64, n_bins + 1, axis)?;
633
634 let mut discretized = Array2::zeros((n_samples, n_output_features));
636
637 if encode == "ordinal" {
638 for i in 0..n_samples {
640 for j in 0..nfeatures {
641 let value = array_f64[[i, j]];
642 let feature_idx = if axis == 0 { j } else { i };
643
644 let mut bin_idx = 0;
646 while bin_idx < n_bins && value > quantiles[[feature_idx, bin_idx + 1]] {
647 bin_idx += 1;
648 }
649
650 let output_idx = if axis == 0 { j } else { i };
651 discretized[[i, output_idx]] = bin_idx as f64;
652 }
653 }
654 } else {
655 for i in 0..n_samples {
657 for j in 0..nfeatures {
658 let value = array_f64[[i, j]];
659 let feature_idx = if axis == 0 { j } else { i };
660
661 let mut bin_idx = 0;
663 while bin_idx < n_bins && value > quantiles[[feature_idx, bin_idx + 1]] {
664 bin_idx += 1;
665 }
666
667 let output_idx = if axis == 0 {
668 j * n_bins + bin_idx
669 } else {
670 i * n_bins + bin_idx
671 };
672
673 discretized[[i, output_idx]] = 1.0;
674 }
675 }
676 }
677
678 Ok(discretized)
679}
680
681#[allow(dead_code)]
704pub fn power_transform<S>(
705 array: &ArrayBase<S, Ix2>,
706 method: &str,
707 standardize: bool,
708) -> Result<Array2<f64>>
709where
710 S: Data,
711 S::Elem: Float + NumCast,
712{
713 let array_f64 = array.mapv(|x| NumCast::from(x).unwrap_or(0.0));
714
715 if !array_f64.is_standard_layout() {
716 return Err(TransformError::InvalidInput(
717 "Input array must be in standard memory layout".to_string(),
718 ));
719 }
720
721 if method != "box-cox" && method != "yeo-johnson" {
722 return Err(TransformError::InvalidInput(format!(
723 "Unknown method: {method}. Supported methods are 'box-cox' and 'yeo-johnson'"
724 )));
725 }
726
727 let shape = array_f64.shape();
728 let n_samples = shape[0];
729 let nfeatures = shape[1];
730
731 let mut transformed = Array2::zeros((n_samples, nfeatures));
732
733 for j in 0..nfeatures {
735 let feature = array_f64.column(j).to_vec();
736
737 let lambda = estimate_optimal_lambda(&feature, method)?;
739
740 for i in 0..n_samples {
742 let x = array_f64[[i, j]];
743
744 let transformed_value = match method {
745 "box-cox" => {
746 if lambda.abs() < 1e-10 {
747 x.ln()
748 } else {
749 (x.powf(lambda) - 1.0) / lambda
750 }
751 }
752 "yeo-johnson" => {
753 if x >= 0.0 {
754 if lambda.abs() < 1e-10 {
755 x.ln_1p()
756 } else {
757 ((1.0 + x).powf(lambda) - 1.0) / lambda
758 }
759 } else if (2.0 - lambda).abs() < 1e-10 {
760 -((-x + 1.0).ln())
761 } else {
762 -((-x + 1.0).powf(2.0 - lambda) - 1.0) / (2.0 - lambda)
763 }
764 }
765 _ => unreachable!(), };
767
768 transformed[[i, j]] = transformed_value;
769 }
770 }
771
772 if standardize {
774 for j in 0..nfeatures {
775 let column_data: Vec<f64> = transformed.column(j).to_vec();
776 let mean = column_data.iter().sum::<f64>() / column_data.len() as f64;
777 let variance = column_data.iter().map(|x| (x - mean).powi(2)).sum::<f64>()
778 / column_data.len() as f64;
779 let std_dev = variance.sqrt();
780
781 if std_dev > 1e-10 {
782 for i in 0..n_samples {
783 transformed[[i, j]] = (transformed[[i, j]] - mean) / std_dev;
784 }
785 }
786 }
787 }
788
789 Ok(transformed)
790}
791
792#[allow(dead_code)]
794fn estimate_optimal_lambda(data: &[f64], method: &str) -> Result<f64> {
795 if data.is_empty() {
796 return Err(TransformError::InvalidInput(
797 "Empty data for lambda estimation".to_string(),
798 ));
799 }
800
801 if method == "box-cox" {
803 if data.iter().any(|&x| x <= 0.0) {
805 return Err(TransformError::InvalidInput(
806 "Box-Cox transformation requires all positive values".to_string(),
807 ));
808 }
809 }
810
811 let lambda_range = vec![-2.0, -1.5, -1.0, -0.5, 0.0, 0.5, 1.0, 1.5, 2.0];
813
814 let mut best_lambda = 0.0;
815 let mut best_log_likelihood = f64::NEG_INFINITY;
816
817 for &lambda in &lambda_range {
819 let log_likelihood = compute_log_likelihood(data, lambda, method)?;
820
821 if log_likelihood > best_log_likelihood {
822 best_log_likelihood = log_likelihood;
823 best_lambda = lambda;
824 }
825 }
826
827 let tolerance = 1e-6;
829 let refined_lambda = golden_section_search(
830 data,
831 method,
832 best_lambda - 0.5,
833 best_lambda + 0.5,
834 tolerance,
835 )?;
836
837 Ok(refined_lambda)
838}
839
840#[allow(dead_code)]
842fn compute_log_likelihood(data: &[f64], lambda: f64, method: &str) -> Result<f64> {
843 let n = data.len() as f64;
844 let mut transformed_data = Vec::with_capacity(data.len());
845 let mut jacobian_sum = 0.0;
846
847 for &x in data {
849 let (y, jacobian) = match method {
850 "box-cox" => {
851 if x <= 0.0 {
852 return Ok(f64::NEG_INFINITY); }
854 let y = if lambda.abs() < 1e-10 {
855 x.ln()
856 } else {
857 (x.powf(lambda) - 1.0) / lambda
858 };
859 let jacobian = x.ln();
860 (y, jacobian)
861 }
862 "yeo-johnson" => {
863 let (y, jacobian) = if x >= 0.0 {
864 if lambda.abs() < 1e-10 {
865 (x.ln_1p(), (1.0 + x).ln())
866 } else {
867 let y = ((1.0 + x).powf(lambda) - 1.0) / lambda;
868 let jacobian = (1.0 + x).ln();
869 (y, jacobian)
870 }
871 } else if (2.0 - lambda).abs() < 1e-10 {
872 (-((-x + 1.0).ln()), -(1.0 - x).ln())
873 } else {
874 let y = -((-x + 1.0).powf(2.0 - lambda) - 1.0) / (2.0 - lambda);
875 let jacobian = -(1.0 - x).ln();
876 (y, jacobian)
877 };
878 (y, jacobian)
879 }
880 _ => {
881 return Err(TransformError::InvalidInput(format!(
882 "Unknown method: {method}"
883 )))
884 }
885 };
886
887 if !y.is_finite() {
888 return Ok(f64::NEG_INFINITY);
889 }
890
891 transformed_data.push(y);
892 jacobian_sum += jacobian;
893 }
894
895 let mean = transformed_data.iter().sum::<f64>() / n;
897 let variance = transformed_data
898 .iter()
899 .map(|y| (y - mean).powi(2))
900 .sum::<f64>()
901 / n;
902
903 if variance <= 0.0 {
904 return Ok(f64::NEG_INFINITY);
905 }
906
907 let log_likelihood =
909 -0.5 * n * (2.0 * std::f64::consts::PI).ln() - 0.5 * n * variance.ln() - 0.5 * n
910 + (lambda - 1.0) * jacobian_sum;
911
912 Ok(log_likelihood)
913}
914
915#[allow(dead_code)]
917fn golden_section_search(
918 data: &[f64],
919 method: &str,
920 mut a: f64,
921 mut b: f64,
922 tolerance: f64,
923) -> Result<f64> {
924 let phi = (1.0 + 5.0_f64.sqrt()) / 2.0; let resphi = 2.0 - phi;
926
927 let mut x1 = a + resphi * (b - a);
929 let mut x2 = a + (1.0 - resphi) * (b - a);
930
931 let mut f1 = compute_log_likelihood(data, x1, method)?;
932 let mut f2 = compute_log_likelihood(data, x2, method)?;
933
934 for _ in 0..100 {
935 if (b - a).abs() < tolerance {
937 break;
938 }
939
940 if f1 > f2 {
941 b = x2;
942 x2 = x1;
943 f2 = f1;
944 x1 = a + resphi * (b - a);
945 f1 = compute_log_likelihood(data, x1, method)?;
946 } else {
947 a = x1;
948 x1 = x2;
949 f1 = f2;
950 x2 = a + (1.0 - resphi) * (b - a);
951 f2 = compute_log_likelihood(data, x2, method)?;
952 }
953 }
954
955 Ok((a + b) / 2.0)
956}
957
958#[allow(dead_code)]
960fn yeo_johnson_transform(x: f64, lambda: f64) -> f64 {
961 if x >= 0.0 {
962 if (lambda - 0.0).abs() < EPSILON {
963 (x + 1.0).ln()
964 } else {
965 ((x + 1.0).powf(lambda) - 1.0) / lambda
966 }
967 } else if (lambda - 2.0).abs() < EPSILON {
968 -((-x + 1.0).ln())
969 } else {
970 -(((-x + 1.0).powf(2.0 - lambda) - 1.0) / (2.0 - lambda))
971 }
972}
973
974#[allow(dead_code)]
976fn box_cox_transform(x: f64, lambda: f64) -> f64 {
977 if (lambda - 0.0).abs() < EPSILON {
978 x.ln()
979 } else {
980 (x.powf(lambda) - 1.0) / lambda
981 }
982}
983
984#[derive(Debug, Clone)]
994pub struct PowerTransformer {
995 method: String,
997 standardize: bool,
999 lambdas_: Option<Array1<f64>>,
1001 means_: Option<Array1<f64>>,
1003 stds_: Option<Array1<f64>>,
1005 is_fitted: bool,
1007}
1008
1009impl PowerTransformer {
1010 pub fn new(method: &str, standardize: bool) -> Result<Self> {
1019 if method != "yeo-johnson" && method != "box-cox" {
1020 return Err(TransformError::InvalidInput(
1021 "method must be 'yeo-johnson' or 'box-cox'".to_string(),
1022 ));
1023 }
1024
1025 Ok(PowerTransformer {
1026 method: method.to_string(),
1027 standardize,
1028 lambdas_: None,
1029 means_: None,
1030 stds_: None,
1031 is_fitted: false,
1032 })
1033 }
1034
1035 #[allow(dead_code)]
1037 pub fn yeo_johnson(standardize: bool) -> Self {
1038 PowerTransformer {
1039 method: "yeo-johnson".to_string(),
1040 standardize,
1041 lambdas_: None,
1042 means_: None,
1043 stds_: None,
1044 is_fitted: false,
1045 }
1046 }
1047
1048 #[allow(dead_code)]
1050 pub fn box_cox(standardize: bool) -> Self {
1051 PowerTransformer {
1052 method: "box-cox".to_string(),
1053 standardize,
1054 lambdas_: None,
1055 means_: None,
1056 stds_: None,
1057 is_fitted: false,
1058 }
1059 }
1060
1061 pub fn fit<S>(&mut self, x: &ArrayBase<S, Ix2>) -> Result<()>
1071 where
1072 S: Data,
1073 S::Elem: Float + NumCast,
1074 {
1075 let x_f64 = x.mapv(|x| NumCast::from(x).unwrap_or(0.0));
1076
1077 if !x_f64.is_standard_layout() {
1078 return Err(TransformError::InvalidInput(
1079 "Input array must be in standard memory layout".to_string(),
1080 ));
1081 }
1082
1083 let shape = x_f64.shape();
1084 let n_samples = shape[0];
1085 let nfeatures = shape[1];
1086
1087 if n_samples == 0 || nfeatures == 0 {
1088 return Err(TransformError::InvalidInput("Empty input data".to_string()));
1089 }
1090
1091 if self.method == "box-cox" {
1092 if x_f64.iter().any(|&x| x <= 0.0) {
1094 return Err(TransformError::InvalidInput(
1095 "Box-Cox transformation requires strictly positive data".to_string(),
1096 ));
1097 }
1098 }
1099
1100 let lambdas: Vec<f64> = (0..nfeatures)
1102 .into_par_iter()
1103 .map(|j| {
1104 let feature = x_f64.column(j).to_vec();
1105 self.optimize_lambda(&feature)
1106 })
1107 .collect();
1108
1109 self.lambdas_ = Some(Array1::from_vec(lambdas));
1110
1111 if self.standardize {
1113 let mut means = Array1::zeros(nfeatures);
1114 let mut stds = Array1::zeros(nfeatures);
1115
1116 for j in 0..nfeatures {
1118 let lambda = self.lambdas_.as_ref().unwrap()[j];
1119 let mut transformed_feature = Array1::zeros(n_samples);
1120
1121 for i in 0..n_samples {
1123 let x = x_f64[[i, j]];
1124 transformed_feature[i] = if self.method == "yeo-johnson" {
1125 yeo_johnson_transform(x, lambda)
1126 } else {
1127 box_cox_transform(x, lambda)
1128 };
1129 }
1130
1131 let mean = transformed_feature.sum() / n_samples as f64;
1133 let variance = transformed_feature
1134 .iter()
1135 .map(|&x| (x - mean).powi(2))
1136 .sum::<f64>()
1137 / n_samples as f64;
1138 let std_dev = variance.sqrt();
1139
1140 means[j] = mean;
1141 stds[j] = if std_dev > EPSILON { std_dev } else { 1.0 };
1142 }
1143
1144 self.means_ = Some(means);
1145 self.stds_ = Some(stds);
1146 }
1147
1148 self.is_fitted = true;
1149 Ok(())
1150 }
1151
1152 pub fn transform<S>(&self, x: &ArrayBase<S, Ix2>) -> Result<Array2<f64>>
1160 where
1161 S: Data,
1162 S::Elem: Float + NumCast,
1163 {
1164 if !self.is_fitted {
1165 return Err(TransformError::InvalidInput(
1166 "PowerTransformer must be fitted before transform".to_string(),
1167 ));
1168 }
1169
1170 let x_f64 = x.mapv(|x| NumCast::from(x).unwrap_or(0.0));
1171
1172 if !x_f64.is_standard_layout() {
1173 return Err(TransformError::InvalidInput(
1174 "Input array must be in standard memory layout".to_string(),
1175 ));
1176 }
1177
1178 let shape = x_f64.shape();
1179 let n_samples = shape[0];
1180 let nfeatures = shape[1];
1181
1182 let lambdas = self.lambdas_.as_ref().unwrap();
1183
1184 if nfeatures != lambdas.len() {
1185 return Err(TransformError::InvalidInput(
1186 "Number of features in transform data does not match fitted data".to_string(),
1187 ));
1188 }
1189
1190 let mut transformed = Array2::zeros((n_samples, nfeatures));
1191
1192 let transformed_data: Vec<Vec<f64>> = (0..nfeatures)
1194 .into_par_iter()
1195 .map(|j| {
1196 let lambda = lambdas[j];
1197 (0..n_samples)
1199 .map(|i| {
1200 let x = x_f64[[i, j]];
1201 if self.method == "yeo-johnson" {
1202 yeo_johnson_transform(x, lambda)
1203 } else {
1204 box_cox_transform(x, lambda)
1205 }
1206 })
1207 .collect()
1208 })
1209 .collect();
1210
1211 for j in 0..nfeatures {
1213 for i in 0..n_samples {
1214 transformed[[i, j]] = transformed_data[j][i];
1215 }
1216 }
1217
1218 if self.standardize {
1220 let means = self.means_.as_ref().unwrap();
1221 let stds = self.stds_.as_ref().unwrap();
1222
1223 for j in 0..nfeatures {
1224 let mean = means[j];
1225 let std = stds[j];
1226
1227 for i in 0..n_samples {
1228 transformed[[i, j]] = (transformed[[i, j]] - mean) / std;
1229 }
1230 }
1231 }
1232
1233 Ok(transformed)
1234 }
1235
1236 pub fn fit_transform<S>(&mut self, x: &ArrayBase<S, Ix2>) -> Result<Array2<f64>>
1244 where
1245 S: Data,
1246 S::Elem: Float + NumCast,
1247 {
1248 self.fit(x)?;
1249 self.transform(x)
1250 }
1251
1252 pub fn inverse_transform<S>(&self, x: &ArrayBase<S, Ix2>) -> Result<Array2<f64>>
1260 where
1261 S: Data,
1262 S::Elem: Float + NumCast,
1263 {
1264 if !self.is_fitted {
1265 return Err(TransformError::InvalidInput(
1266 "PowerTransformer must be fitted before inverse_transform".to_string(),
1267 ));
1268 }
1269
1270 let x_f64 = x.mapv(|x| NumCast::from(x).unwrap_or(0.0));
1271
1272 if !x_f64.is_standard_layout() {
1273 return Err(TransformError::InvalidInput(
1274 "Input array must be in standard memory layout".to_string(),
1275 ));
1276 }
1277
1278 let shape = x_f64.shape();
1279 let n_samples = shape[0];
1280 let nfeatures = shape[1];
1281
1282 let lambdas = self.lambdas_.as_ref().unwrap();
1283
1284 if nfeatures != lambdas.len() {
1285 return Err(TransformError::InvalidInput(
1286 "Number of features in inverse transform data does not match fitted data"
1287 .to_string(),
1288 ));
1289 }
1290
1291 let mut x_normalized = x_f64.clone();
1292
1293 if self.standardize {
1295 let means = self.means_.as_ref().unwrap();
1296 let stds = self.stds_.as_ref().unwrap();
1297
1298 for j in 0..nfeatures {
1299 let mean = means[j];
1300 let std = stds[j];
1301
1302 for i in 0..n_samples {
1303 x_normalized[[i, j]] = x_normalized[[i, j]] * std + mean;
1304 }
1305 }
1306 }
1307
1308 let mut original = Array2::zeros((n_samples, nfeatures));
1309
1310 let original_data: Vec<Vec<f64>> = (0..nfeatures)
1312 .into_par_iter()
1313 .map(|j| {
1314 let lambda = lambdas[j];
1315 (0..n_samples)
1317 .map(|i| {
1318 let y = x_normalized[[i, j]];
1319 if self.method == "yeo-johnson" {
1320 yeo_johnson_inverse_transform(y, lambda)
1321 } else {
1322 box_cox_inverse_transform(y, lambda)
1323 }
1324 })
1325 .collect()
1326 })
1327 .collect();
1328
1329 for j in 0..nfeatures {
1331 for i in 0..n_samples {
1332 original[[i, j]] = original_data[j][i];
1333 }
1334 }
1335
1336 Ok(original)
1337 }
1338
1339 fn optimize_lambda(&self, data: &[f64]) -> f64 {
1347 let mut a = -2.0;
1349 let mut b = 2.0;
1350 let tolerance = 1e-6;
1351 let golden_ratio = (5.0_f64.sqrt() - 1.0) / 2.0;
1352
1353 let mut c = b - golden_ratio * (b - a);
1355 let mut d = a + golden_ratio * (b - a);
1356
1357 while (b - a).abs() > tolerance {
1358 let fc = -self.log_likelihood(data, c);
1359 let fd = -self.log_likelihood(data, d);
1360
1361 if fc < fd {
1362 b = d;
1363 d = c;
1364 c = b - golden_ratio * (b - a);
1365 } else {
1366 a = c;
1367 c = d;
1368 d = a + golden_ratio * (b - a);
1369 }
1370 }
1371
1372 (a + b) / 2.0
1373 }
1374
1375 fn log_likelihood(&self, data: &[f64], lambda: f64) -> f64 {
1384 let n = data.len() as f64;
1385 let mut log_likelihood = 0.0;
1386
1387 let transformed: Vec<f64> = data
1389 .iter()
1390 .map(|&x| {
1391 if self.method == "yeo-johnson" {
1392 yeo_johnson_transform(x, lambda)
1393 } else {
1394 box_cox_transform(x, lambda)
1395 }
1396 })
1397 .collect();
1398
1399 let mean = transformed.iter().sum::<f64>() / n;
1401 let variance = transformed.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / n;
1402
1403 if variance <= 0.0 {
1404 return f64::NEG_INFINITY;
1405 }
1406
1407 log_likelihood -= 0.5 * n * (2.0 * std::f64::consts::PI * variance).ln();
1409 log_likelihood -=
1410 0.5 * transformed.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / variance;
1411
1412 for &x in data {
1414 if self.method == "yeo-johnson" {
1415 log_likelihood += yeo_johnson_log_jacobian(x, lambda);
1416 } else {
1417 log_likelihood += box_cox_log_jacobian(x, lambda);
1418 }
1419 }
1420
1421 log_likelihood
1422 }
1423
1424 #[allow(dead_code)]
1426 pub fn lambdas(&self) -> Option<&Array1<f64>> {
1427 self.lambdas_.as_ref()
1428 }
1429
1430 #[allow(dead_code)]
1432 pub fn is_fitted(&self) -> bool {
1433 self.is_fitted
1434 }
1435}
1436
1437impl Default for PowerTransformer {
1438 fn default() -> Self {
1439 PowerTransformer {
1440 method: "yeo-johnson".to_string(),
1441 standardize: false,
1442 lambdas_: None,
1443 means_: None,
1444 stds_: None,
1445 is_fitted: false,
1446 }
1447 }
1448}
1449
1450#[allow(dead_code)]
1452fn yeo_johnson_inverse_transform(y: f64, lambda: f64) -> f64 {
1453 if y >= 0.0 {
1454 if (lambda - 0.0).abs() < EPSILON {
1455 y.exp() - 1.0
1456 } else {
1457 (lambda * y + 1.0).powf(1.0 / lambda) - 1.0
1458 }
1459 } else if (lambda - 2.0).abs() < EPSILON {
1460 1.0 - (-y).exp()
1461 } else {
1462 1.0 - (-(2.0 - lambda) * y + 1.0).powf(1.0 / (2.0 - lambda))
1463 }
1464}
1465
1466#[allow(dead_code)]
1468fn box_cox_inverse_transform(y: f64, lambda: f64) -> f64 {
1469 if (lambda - 0.0).abs() < EPSILON {
1470 y.exp()
1471 } else {
1472 (lambda * y + 1.0).powf(1.0 / lambda)
1473 }
1474}
1475
1476#[allow(dead_code)]
1478fn yeo_johnson_log_jacobian(x: f64, lambda: f64) -> f64 {
1479 if x >= 0.0 {
1480 (lambda - 1.0) * (x + 1.0).ln()
1481 } else {
1482 (1.0 - lambda) * (-x + 1.0).ln()
1483 }
1484}
1485
1486#[allow(dead_code)]
1488fn box_cox_log_jacobian(x: f64, lambda: f64) -> f64 {
1489 (lambda - 1.0) * x.ln()
1490}
1491
1492#[allow(dead_code)]
1502pub fn log_transform<S>(array: &ArrayBase<S, Ix2>, epsilon: f64) -> Result<Array2<f64>>
1503where
1504 S: Data,
1505 S::Elem: Float + NumCast,
1506{
1507 let array_f64 = array.mapv(|x| NumCast::from(x).unwrap_or(0.0));
1508
1509 if !array_f64.is_standard_layout() {
1510 return Err(TransformError::InvalidInput(
1511 "Input _array must be in standard memory layout".to_string(),
1512 ));
1513 }
1514
1515 if epsilon <= 0.0 {
1516 return Err(TransformError::InvalidInput(
1517 "epsilon must be a positive value".to_string(),
1518 ));
1519 }
1520
1521 let mut transformed = Array2::zeros(array_f64.raw_dim());
1522
1523 for i in 0..array_f64.shape()[0] {
1524 for j in 0..array_f64.shape()[1] {
1525 transformed[[i, j]] = (array_f64[[i, j]] + epsilon).ln();
1526 }
1527 }
1528
1529 Ok(transformed)
1530}
1531
1532#[cfg(test)]
1533mod tests {
1534 use super::*;
1535 use approx::assert_abs_diff_eq;
1536 use scirs2_core::ndarray::Array;
1537
1538 #[test]
1539 fn test_polynomial_features() {
1540 let data = Array::from_shape_vec((2, 2), vec![1.0, 2.0, 3.0, 4.0]).unwrap();
1541
1542 let poly = PolynomialFeatures::new(2, false, true);
1544 let transformed = poly.transform(&data).unwrap();
1545
1546 assert_eq!(transformed.shape(), &[2, 6]);
1548
1549 let expected_values = [1.0, 1.0, 2.0, 1.0, 2.0, 4.0];
1552 let mut found_values = [false; 6];
1553
1554 for i in 0..6 {
1555 for j in 0..6 {
1556 if (transformed[[0, i]] - expected_values[j]).abs() < 1e-10 {
1557 found_values[j] = true;
1558 }
1559 }
1560 }
1561
1562 assert!(
1563 found_values.iter().all(|&x| x),
1564 "Not all expected values found in the first row"
1565 );
1566
1567 let expected_values = [1.0, 3.0, 4.0, 9.0, 12.0, 16.0];
1569 let mut found_values = [false; 6];
1570
1571 for i in 0..6 {
1572 for j in 0..6 {
1573 if (transformed[[1, i]] - expected_values[j]).abs() < 1e-10 {
1574 found_values[j] = true;
1575 }
1576 }
1577 }
1578
1579 assert!(
1580 found_values.iter().all(|&x| x),
1581 "Not all expected values found in the second row"
1582 );
1583 }
1584
1585 #[test]
1586 fn test_binarize() {
1587 let data = Array::from_shape_vec((2, 3), vec![1.0, -1.0, 2.0, 2.0, 0.0, -3.0]).unwrap();
1588
1589 let binarized = binarize(&data, 0.0).unwrap();
1590
1591 assert_eq!(binarized.shape(), &[2, 3]);
1593
1594 assert_abs_diff_eq!(binarized[[0, 0]], 1.0, epsilon = 1e-10);
1595 assert_abs_diff_eq!(binarized[[0, 1]], 0.0, epsilon = 1e-10);
1596 assert_abs_diff_eq!(binarized[[0, 2]], 1.0, epsilon = 1e-10);
1597 assert_abs_diff_eq!(binarized[[1, 0]], 1.0, epsilon = 1e-10);
1598 assert_abs_diff_eq!(binarized[[1, 1]], 0.0, epsilon = 1e-10);
1599 assert_abs_diff_eq!(binarized[[1, 2]], 0.0, epsilon = 1e-10);
1600 }
1601
1602 #[test]
1603 fn test_discretize_equal_width() {
1604 let data = Array::from_shape_vec((3, 2), vec![0.0, 0.0, 3.0, 5.0, 6.0, 10.0]).unwrap();
1605
1606 let discretized = discretize_equal_width(&data, 2, "ordinal", 0).unwrap();
1608
1609 assert_eq!(discretized.shape(), &[3, 2]);
1611
1612 assert_abs_diff_eq!(discretized[[0, 0]], 0.0, epsilon = 1e-10);
1613 assert_abs_diff_eq!(discretized[[0, 1]], 0.0, epsilon = 1e-10);
1614 assert_abs_diff_eq!(discretized[[1, 0]], 0.0, epsilon = 1e-10);
1615 assert_abs_diff_eq!(discretized[[1, 1]], 0.0, epsilon = 1e-10);
1616 assert_abs_diff_eq!(discretized[[2, 0]], 1.0, epsilon = 1e-10);
1617 assert_abs_diff_eq!(discretized[[2, 1]], 1.0, epsilon = 1e-10);
1618
1619 let discretized = discretize_equal_width(&data, 2, "onehot", 0).unwrap();
1621
1622 assert_eq!(discretized.shape(), &[3, 4]);
1628
1629 assert_abs_diff_eq!(discretized[[0, 0]], 1.0, epsilon = 1e-10);
1630 assert_abs_diff_eq!(discretized[[0, 1]], 0.0, epsilon = 1e-10);
1631 assert_abs_diff_eq!(discretized[[0, 2]], 1.0, epsilon = 1e-10);
1632 assert_abs_diff_eq!(discretized[[0, 3]], 0.0, epsilon = 1e-10);
1633
1634 assert_abs_diff_eq!(discretized[[1, 0]], 1.0, epsilon = 1e-10);
1635 assert_abs_diff_eq!(discretized[[1, 1]], 0.0, epsilon = 1e-10);
1636 assert_abs_diff_eq!(discretized[[1, 2]], 1.0, epsilon = 1e-10);
1637 assert_abs_diff_eq!(discretized[[1, 3]], 0.0, epsilon = 1e-10);
1638
1639 assert_abs_diff_eq!(discretized[[2, 0]], 0.0, epsilon = 1e-10);
1640 assert_abs_diff_eq!(discretized[[2, 1]], 1.0, epsilon = 1e-10);
1641 assert_abs_diff_eq!(discretized[[2, 2]], 0.0, epsilon = 1e-10);
1642 assert_abs_diff_eq!(discretized[[2, 3]], 1.0, epsilon = 1e-10);
1643 }
1644
1645 #[test]
1646 fn test_log_transform() {
1647 let data = Array::from_shape_vec((2, 2), vec![0.0, 1.0, 2.0, 3.0]).unwrap();
1648
1649 let transformed = log_transform(&data, 1.0).unwrap();
1650
1651 assert_abs_diff_eq!(transformed[[0, 0]], (0.0 + 1.0).ln(), epsilon = 1e-10);
1653 assert_abs_diff_eq!(transformed[[0, 1]], (1.0 + 1.0).ln(), epsilon = 1e-10);
1654 assert_abs_diff_eq!(transformed[[1, 0]], (2.0 + 1.0).ln(), epsilon = 1e-10);
1655 assert_abs_diff_eq!(transformed[[1, 1]], (3.0 + 1.0).ln(), epsilon = 1e-10);
1656 }
1657
1658 #[test]
1659 fn test_power_transformer_yeo_johnson() {
1660 let data =
1662 Array::from_shape_vec((4, 2), vec![1.0, -1.0, 2.0, 0.5, 3.0, -2.0, 0.1, 1.5]).unwrap();
1663
1664 let mut transformer = PowerTransformer::yeo_johnson(false);
1665
1666 assert!(!transformer.is_fitted());
1668
1669 transformer.fit(&data).unwrap();
1671 assert!(transformer.is_fitted());
1672
1673 let lambdas = transformer.lambdas().unwrap();
1675 assert_eq!(lambdas.len(), 2);
1676
1677 let transformed = transformer.transform(&data).unwrap();
1679 assert_eq!(transformed.shape(), data.shape());
1680
1681 let inverse = transformer.inverse_transform(&transformed).unwrap();
1683 assert_eq!(inverse.shape(), data.shape());
1684
1685 for i in 0..data.shape()[0] {
1687 for j in 0..data.shape()[1] {
1688 assert_abs_diff_eq!(inverse[[i, j]], data[[i, j]], epsilon = 1e-6);
1689 }
1690 }
1691 }
1692
1693 #[test]
1694 fn test_power_transformer_box_cox() {
1695 let data =
1697 Array::from_shape_vec((4, 2), vec![1.0, 2.0, 3.0, 4.0, 0.5, 1.5, 2.5, 3.5]).unwrap();
1698
1699 let mut transformer = PowerTransformer::box_cox(false);
1700
1701 transformer.fit(&data).unwrap();
1703
1704 let transformed = transformer.transform(&data).unwrap();
1706 assert_eq!(transformed.shape(), data.shape());
1707
1708 let inverse = transformer.inverse_transform(&transformed).unwrap();
1710
1711 for i in 0..data.shape()[0] {
1713 for j in 0..data.shape()[1] {
1714 assert_abs_diff_eq!(inverse[[i, j]], data[[i, j]], epsilon = 1e-6);
1715 }
1716 }
1717 }
1718
1719 #[test]
1720 fn test_power_transformer_standardized() {
1721 let data = Array::from_shape_vec(
1722 (5, 2),
1723 vec![1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0, 5.0, 5.0, 6.0],
1724 )
1725 .unwrap();
1726
1727 let mut transformer = PowerTransformer::yeo_johnson(true);
1728
1729 let transformed = transformer.fit_transform(&data).unwrap();
1731
1732 for j in 0..transformed.shape()[1] {
1734 let column = transformed.column(j);
1735 let mean = column.sum() / column.len() as f64;
1736 let variance =
1737 column.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / column.len() as f64;
1738
1739 assert_abs_diff_eq!(mean, 0.0, epsilon = 1e-10);
1740 assert_abs_diff_eq!(variance, 1.0, epsilon = 1e-10);
1741 }
1742 }
1743
1744 #[test]
1745 fn test_power_transformer_box_cox_negative_data() {
1746 let data = Array::from_shape_vec((2, 2), vec![1.0, -1.0, 2.0, 3.0]).unwrap();
1748
1749 let mut transformer = PowerTransformer::box_cox(false);
1750
1751 assert!(transformer.fit(&data).is_err());
1753 }
1754
1755 #[test]
1756 fn test_power_transformer_fit_transform() {
1757 let data = Array::from_shape_vec((3, 2), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
1758
1759 let mut transformer = PowerTransformer::yeo_johnson(false);
1760
1761 let transformed1 = transformer.fit_transform(&data).unwrap();
1763
1764 let mut transformer2 = PowerTransformer::yeo_johnson(false);
1766 transformer2.fit(&data).unwrap();
1767 let transformed2 = transformer2.transform(&data).unwrap();
1768
1769 for i in 0..transformed1.shape()[0] {
1771 for j in 0..transformed1.shape()[1] {
1772 assert_abs_diff_eq!(transformed1[[i, j]], transformed2[[i, j]], epsilon = 1e-12);
1773 }
1774 }
1775 }
1776
1777 #[test]
1778 fn test_power_transformer_different_data_sizes() {
1779 let train_data = Array::from_shape_vec((3, 2), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
1780 let test_data = Array::from_shape_vec((2, 2), vec![2.5, 3.5, 4.5, 5.5]).unwrap();
1781
1782 let mut transformer = PowerTransformer::yeo_johnson(false);
1783
1784 transformer.fit(&train_data).unwrap();
1786
1787 let transformed = transformer.transform(&test_data).unwrap();
1789 assert_eq!(transformed.shape(), test_data.shape());
1790
1791 let inverse = transformer.inverse_transform(&transformed).unwrap();
1793
1794 for i in 0..test_data.shape()[0] {
1795 for j in 0..test_data.shape()[1] {
1796 assert_abs_diff_eq!(inverse[[i, j]], test_data[[i, j]], epsilon = 1e-6);
1797 }
1798 }
1799 }
1800
1801 #[test]
1802 fn test_power_transformer_mismatched_features() {
1803 let train_data = Array::from_shape_vec((2, 2), vec![1.0, 2.0, 3.0, 4.0]).unwrap();
1804 let test_data = Array::from_shape_vec((2, 3), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
1805
1806 let mut transformer = PowerTransformer::yeo_johnson(false);
1807 transformer.fit(&train_data).unwrap();
1808
1809 assert!(transformer.transform(&test_data).is_err());
1811 }
1812
1813 #[test]
1814 fn test_power_transformer_not_fitted() {
1815 let data = Array::from_shape_vec((2, 2), vec![1.0, 2.0, 3.0, 4.0]).unwrap();
1816 let transformer = PowerTransformer::yeo_johnson(false);
1817
1818 assert!(transformer.transform(&data).is_err());
1820 assert!(transformer.inverse_transform(&data).is_err());
1821 }
1822
1823 #[test]
1824 fn test_power_transformer_creation_methods() {
1825 let transformer1 = PowerTransformer::new("yeo-johnson", true).unwrap();
1827 let transformer2 = PowerTransformer::yeo_johnson(true);
1828
1829 assert_eq!(transformer1.method, transformer2.method);
1831 assert_eq!(transformer1.standardize, transformer2.standardize);
1832
1833 let transformer3 = PowerTransformer::new("box-cox", false).unwrap();
1834 let transformer4 = PowerTransformer::box_cox(false);
1835
1836 assert_eq!(transformer3.method, transformer4.method);
1837 assert_eq!(transformer3.standardize, transformer4.standardize);
1838
1839 assert!(PowerTransformer::new("invalid", false).is_err());
1841 }
1842
1843 #[test]
1844 fn test_power_transformer_empty_data() {
1845 let empty_data = Array2::<f64>::zeros((0, 2));
1846 let mut transformer = PowerTransformer::yeo_johnson(false);
1847
1848 assert!(transformer.fit(&empty_data).is_err());
1850 }
1851
1852 #[test]
1853 fn test_yeo_johnson_inverse_functions() {
1854 let test_values = vec![-2.0, -0.5, 0.0, 0.5, 1.0, 2.0];
1855 let lambdas = vec![0.0, 0.5, 1.0, 1.5, 2.0];
1856
1857 for &lambda in &lambdas {
1858 for &x in &test_values {
1859 let y = yeo_johnson_transform(x, lambda);
1860 let x_recovered = yeo_johnson_inverse_transform(y, lambda);
1861 assert_abs_diff_eq!(x_recovered, x, epsilon = 1e-10);
1862 }
1863 }
1864 }
1865
1866 #[test]
1867 fn test_box_cox_inverse_functions() {
1868 let test_values = vec![0.1, 0.5, 1.0, 2.0, 5.0]; let lambdas = vec![0.0, 0.5, 1.0, 1.5, 2.0];
1870
1871 for &lambda in &lambdas {
1872 for &x in &test_values {
1873 let y = box_cox_transform(x, lambda);
1874 let x_recovered = box_cox_inverse_transform(y, lambda);
1875 assert_abs_diff_eq!(x_recovered, x, epsilon = 1e-10);
1876 }
1877 }
1878 }
1879}