1use scirs2_core::ndarray::{Array1, Array2, Axis};
20use sklears_core::{
21 error::{Result, SklearsError},
22 traits::{Fit, Trained, Transform, Untrained},
23 types::Float,
24};
25use std::marker::PhantomData;
26
27#[derive(Debug, Clone, Copy)]
29pub enum OutlierTransformationMethod {
30 Log,
32 Log1p,
34 Sqrt,
36 BoxCox,
38 BoxCoxFixed(Float),
40 QuantileUniform,
42 QuantileNormal,
44 RobustScale,
46 Interpolate,
48 Smooth,
50 Trim,
52}
53
54#[derive(Debug, Clone)]
56pub struct OutlierTransformationConfig {
57 pub method: OutlierTransformationMethod,
59 pub outlier_threshold: Float,
61 pub detection_method: String,
63 pub lower_percentile: Float,
65 pub upper_percentile: Float,
67 pub smoothing_window: usize,
69 pub n_quantiles: usize,
71 pub handle_negatives: bool,
73 pub log_epsilon: Float,
75 pub feature_wise: bool,
77}
78
79impl Default for OutlierTransformationConfig {
80 fn default() -> Self {
81 Self {
82 method: OutlierTransformationMethod::Log1p,
83 outlier_threshold: 3.0,
84 detection_method: "z-score".to_string(),
85 lower_percentile: 1.0,
86 upper_percentile: 99.0,
87 smoothing_window: 5,
88 n_quantiles: 1000,
89 handle_negatives: true,
90 log_epsilon: 1e-8,
91 feature_wise: true,
92 }
93 }
94}
95
96#[derive(Debug, Clone)]
98pub struct OutlierTransformer<State = Untrained> {
99 config: OutlierTransformationConfig,
100 state: PhantomData<State>,
101 transformation_params_: Option<TransformationParameters>,
103 n_features_in_: Option<usize>,
104}
105
106#[derive(Debug, Clone)]
108pub struct TransformationParameters {
109 pub feature_params: Vec<FeatureTransformationParams>,
111 pub global_params: Option<GlobalTransformationParams>,
113}
114
115#[derive(Debug, Clone)]
117pub struct FeatureTransformationParams {
118 pub lambda: Option<Float>,
120 pub shift: Float,
122 pub quantiles: Option<Array1<Float>>,
124 pub references: Option<Array1<Float>>,
126 pub median: Option<Float>,
128 pub iqr: Option<Float>,
129 pub lower_bound: Option<Float>,
131 pub upper_bound: Option<Float>,
132 pub mean: Option<Float>,
134 pub std: Option<Float>,
135}
136
137#[derive(Debug, Clone)]
139pub struct GlobalTransformationParams {
140 pub global_shift: Float,
142 pub global_lambda: Option<Float>,
144}
145
146impl OutlierTransformer<Untrained> {
147 pub fn new() -> Self {
149 Self {
150 config: OutlierTransformationConfig::default(),
151 state: PhantomData,
152 transformation_params_: None,
153 n_features_in_: None,
154 }
155 }
156
157 pub fn log() -> Self {
159 Self::new().method(OutlierTransformationMethod::Log)
160 }
161
162 pub fn log1p() -> Self {
164 Self::new().method(OutlierTransformationMethod::Log1p)
165 }
166
167 pub fn sqrt() -> Self {
169 Self::new().method(OutlierTransformationMethod::Sqrt)
170 }
171
172 pub fn box_cox() -> Self {
174 Self::new().method(OutlierTransformationMethod::BoxCox)
175 }
176
177 pub fn box_cox_fixed(lambda: Float) -> Self {
179 Self::new().method(OutlierTransformationMethod::BoxCoxFixed(lambda))
180 }
181
182 pub fn quantile_uniform(n_quantiles: usize) -> Self {
184 Self::new()
185 .method(OutlierTransformationMethod::QuantileUniform)
186 .n_quantiles(n_quantiles)
187 }
188
189 pub fn quantile_normal(n_quantiles: usize) -> Self {
191 Self::new()
192 .method(OutlierTransformationMethod::QuantileNormal)
193 .n_quantiles(n_quantiles)
194 }
195
196 pub fn robust_scale() -> Self {
198 Self::new().method(OutlierTransformationMethod::RobustScale)
199 }
200
201 pub fn interpolate(threshold: Float, detection_method: &str) -> Self {
203 Self::new()
204 .method(OutlierTransformationMethod::Interpolate)
205 .outlier_threshold(threshold)
206 .detection_method(detection_method.to_string())
207 }
208
209 pub fn smooth(window_size: usize, threshold: Float) -> Self {
211 Self::new()
212 .method(OutlierTransformationMethod::Smooth)
213 .smoothing_window(window_size)
214 .outlier_threshold(threshold)
215 }
216
217 pub fn trim(lower_percentile: Float, upper_percentile: Float) -> Self {
219 Self::new()
220 .method(OutlierTransformationMethod::Trim)
221 .lower_percentile(lower_percentile)
222 .upper_percentile(upper_percentile)
223 }
224
225 pub fn method(mut self, method: OutlierTransformationMethod) -> Self {
227 self.config.method = method;
228 self
229 }
230
231 pub fn outlier_threshold(mut self, threshold: Float) -> Self {
233 self.config.outlier_threshold = threshold;
234 self
235 }
236
237 pub fn detection_method(mut self, method: String) -> Self {
239 self.config.detection_method = method;
240 self
241 }
242
243 pub fn lower_percentile(mut self, percentile: Float) -> Self {
245 self.config.lower_percentile = percentile;
246 self
247 }
248
249 pub fn upper_percentile(mut self, percentile: Float) -> Self {
251 self.config.upper_percentile = percentile;
252 self
253 }
254
255 pub fn smoothing_window(mut self, window: usize) -> Self {
257 self.config.smoothing_window = window;
258 self
259 }
260
261 pub fn n_quantiles(mut self, n_quantiles: usize) -> Self {
263 self.config.n_quantiles = n_quantiles;
264 self
265 }
266
267 pub fn handle_negatives(mut self, handle: bool) -> Self {
269 self.config.handle_negatives = handle;
270 self
271 }
272
273 pub fn log_epsilon(mut self, epsilon: Float) -> Self {
275 self.config.log_epsilon = epsilon;
276 self
277 }
278
279 pub fn feature_wise(mut self, feature_wise: bool) -> Self {
281 self.config.feature_wise = feature_wise;
282 self
283 }
284}
285
286impl Fit<Array2<Float>, ()> for OutlierTransformer<Untrained> {
287 type Fitted = OutlierTransformer<Trained>;
288
289 fn fit(mut self, x: &Array2<Float>, _y: &()) -> Result<Self::Fitted> {
290 let (n_samples, n_features) = x.dim();
291
292 if n_samples == 0 || n_features == 0 {
293 return Err(SklearsError::InvalidInput(
294 "Input array is empty".to_string(),
295 ));
296 }
297
298 self.n_features_in_ = Some(n_features);
299
300 let feature_params = if self.config.feature_wise {
302 (0..n_features)
303 .map(|j| {
304 self.fit_feature_params(
305 x.column(j)
306 .to_owned()
307 .as_slice()
308 .expect("matrix indexing should be valid"),
309 )
310 })
311 .collect::<Result<Vec<_>>>()?
312 } else {
313 vec![self.fit_feature_params(x.as_slice().expect("slice operation should succeed"))?]
315 };
316
317 self.transformation_params_ = Some(TransformationParameters {
318 feature_params,
319 global_params: None, });
321
322 Ok(OutlierTransformer {
323 config: self.config,
324 state: PhantomData,
325 transformation_params_: self.transformation_params_,
326 n_features_in_: self.n_features_in_,
327 })
328 }
329}
330
331impl OutlierTransformer<Untrained> {
332 fn fit_feature_params(&self, data: &[Float]) -> Result<FeatureTransformationParams> {
334 let mut params = FeatureTransformationParams {
335 lambda: None,
336 shift: 0.0,
337 quantiles: None,
338 references: None,
339 median: None,
340 iqr: None,
341 lower_bound: None,
342 upper_bound: None,
343 mean: None,
344 std: None,
345 };
346
347 let valid_data: Vec<Float> = data.iter().filter(|x| x.is_finite()).copied().collect();
349
350 if valid_data.is_empty() {
351 return Ok(params);
352 }
353
354 let mean = valid_data.iter().sum::<Float>() / valid_data.len() as Float;
355 let variance = valid_data.iter().map(|x| (x - mean).powi(2)).sum::<Float>()
356 / valid_data.len() as Float;
357 let std = variance.sqrt();
358
359 params.mean = Some(mean);
360 params.std = Some(std);
361
362 let mut sorted_data = valid_data.clone();
364 sorted_data.sort_by(|a, b| a.partial_cmp(b).expect("operation should succeed"));
365
366 let median = if sorted_data.len() % 2 == 0 {
367 let mid = sorted_data.len() / 2;
368 (sorted_data[mid - 1] + sorted_data[mid]) / 2.0
369 } else {
370 sorted_data[sorted_data.len() / 2]
371 };
372
373 let q1_idx = sorted_data.len() / 4;
374 let q3_idx = 3 * sorted_data.len() / 4;
375 let q1 = sorted_data[q1_idx];
376 let q3 = sorted_data[q3_idx];
377 let iqr = q3 - q1;
378
379 params.median = Some(median);
380 params.iqr = Some(iqr);
381
382 match self.config.detection_method.as_str() {
384 "z-score" => {
385 params.lower_bound = Some(mean - self.config.outlier_threshold * std);
386 params.upper_bound = Some(mean + self.config.outlier_threshold * std);
387 }
388 "iqr" => {
389 params.lower_bound = Some(q1 - self.config.outlier_threshold * iqr);
390 params.upper_bound = Some(q3 + self.config.outlier_threshold * iqr);
391 }
392 "percentile" => {
393 let lower_idx =
394 ((self.config.lower_percentile / 100.0) * sorted_data.len() as Float) as usize;
395 let upper_idx =
396 ((self.config.upper_percentile / 100.0) * sorted_data.len() as Float) as usize;
397 params.lower_bound = Some(sorted_data[lower_idx.min(sorted_data.len() - 1)]);
398 params.upper_bound = Some(sorted_data[upper_idx.min(sorted_data.len() - 1)]);
399 }
400 _ => {
401 return Err(SklearsError::InvalidInput(format!(
402 "Unknown detection method: {}",
403 self.config.detection_method
404 )));
405 }
406 }
407
408 if self.config.handle_negatives {
410 match self.config.method {
411 OutlierTransformationMethod::Log | OutlierTransformationMethod::Sqrt => {
412 let min_val = sorted_data[0];
413 if min_val <= 0.0 {
414 params.shift = -min_val + self.config.log_epsilon;
415 }
416 }
417 OutlierTransformationMethod::BoxCox
418 | OutlierTransformationMethod::BoxCoxFixed(_) => {
419 let min_val = sorted_data[0];
420 if min_val <= 0.0 {
421 params.shift = -min_val + self.config.log_epsilon;
422 }
423 }
424 _ => {}
425 }
426 }
427
428 match self.config.method {
430 OutlierTransformationMethod::BoxCox => {
431 params.lambda = Some(self.estimate_box_cox_lambda(&valid_data, params.shift)?);
432 }
433 OutlierTransformationMethod::BoxCoxFixed(lambda) => {
434 params.lambda = Some(lambda);
435 }
436 OutlierTransformationMethod::QuantileUniform
437 | OutlierTransformationMethod::QuantileNormal => {
438 params.quantiles = Some(self.compute_quantiles(&sorted_data)?);
439 params.references = Some(self.compute_references()?);
440 }
441 _ => {}
442 }
443
444 Ok(params)
445 }
446
447 fn estimate_box_cox_lambda(&self, data: &[Float], shift: Float) -> Result<Float> {
449 let shifted_data: Vec<Float> = data.iter().map(|x| x + shift).collect();
450
451 let lambda_range: Vec<Float> = (-20..=20).map(|i| i as Float * 0.1).collect();
453
454 let mut best_lambda = 0.0;
455 let mut best_llf = Float::NEG_INFINITY;
456
457 for &lambda in &lambda_range {
458 if let Ok(llf) = self.box_cox_log_likelihood(&shifted_data, lambda) {
459 if llf > best_llf {
460 best_llf = llf;
461 best_lambda = lambda;
462 }
463 }
464 }
465
466 Ok(best_lambda)
467 }
468
469 fn box_cox_log_likelihood(&self, data: &[Float], lambda: Float) -> Result<Float> {
471 let n = data.len() as Float;
472
473 let transformed: Vec<Float> = data
475 .iter()
476 .map(|&x| {
477 if x <= 0.0 {
478 return Float::NAN;
479 }
480 if lambda.abs() < 1e-10 {
481 x.ln()
482 } else {
483 (x.powf(lambda) - 1.0) / lambda
484 }
485 })
486 .collect();
487
488 if transformed.iter().any(|x| !x.is_finite()) {
490 return Err(SklearsError::InvalidInput(
491 "Invalid Box-Cox transformation".to_string(),
492 ));
493 }
494
495 let mean = transformed.iter().sum::<Float>() / n;
497 let variance = transformed
498 .iter()
499 .map(|x| (x - mean).powi(2))
500 .sum::<Float>()
501 / n;
502
503 let log_jacobian = (lambda - 1.0) * data.iter().map(|x| x.ln()).sum::<Float>();
504 let llf = -0.5 * n * (2.0 * std::f64::consts::PI as Float).ln()
505 - 0.5 * n * variance.ln()
506 - 0.5 * n
507 + log_jacobian;
508
509 Ok(llf)
510 }
511
512 fn compute_quantiles(&self, sorted_data: &[Float]) -> Result<Array1<Float>> {
514 let n_quantiles = self.config.n_quantiles.min(sorted_data.len());
515 let mut quantiles = Array1::zeros(n_quantiles);
516
517 for i in 0..n_quantiles {
518 let q = i as Float / (n_quantiles - 1) as Float;
519 let idx = (q * (sorted_data.len() - 1) as Float) as usize;
520 quantiles[i] = sorted_data[idx.min(sorted_data.len() - 1)];
521 }
522
523 Ok(quantiles)
524 }
525
526 fn compute_references(&self) -> Result<Array1<Float>> {
528 let n_quantiles = self.config.n_quantiles;
529 let mut references = Array1::zeros(n_quantiles);
530
531 match self.config.method {
532 OutlierTransformationMethod::QuantileUniform => {
533 for i in 0..n_quantiles {
534 references[i] = i as Float / (n_quantiles - 1) as Float;
535 }
536 }
537 OutlierTransformationMethod::QuantileNormal => {
538 for i in 0..n_quantiles {
540 let p = i as Float / (n_quantiles - 1) as Float;
541 references[i] = self.inverse_normal_cdf(p);
542 }
543 }
544 _ => {
545 return Err(SklearsError::InvalidInput(
546 "Invalid quantile method".to_string(),
547 ));
548 }
549 }
550
551 Ok(references)
552 }
553
554 fn inverse_normal_cdf(&self, p: Float) -> Float {
556 if p <= 0.0 {
557 return Float::NEG_INFINITY;
558 }
559 if p >= 1.0 {
560 return Float::INFINITY;
561 }
562 if p == 0.5 {
563 return 0.0;
564 }
565
566 let a = [
569 -3.969683028665376e+01,
570 2.209460984245205e+02,
571 -2.759285104469687e+02,
572 1.383577518672690e+02,
573 -3.066479806614716e+01,
574 2.506628277459239e+00,
575 ];
576 let b = [
577 -5.447609879822406e+01,
578 1.615858368580409e+02,
579 -1.556989798598866e+02,
580 6.680131188771972e+01,
581 -1.328068155288572e+01,
582 ];
583
584 let q = if p > 0.5 { 1.0 - p } else { p };
585 let t = (-2.0 * q.ln()).sqrt();
586
587 let mut num = a[5];
588 for i in (0..5).rev() {
589 num = num * t + a[i];
590 }
591
592 let mut den = 1.0;
593 for i in (0..5).rev() {
594 den = den * t + b[i];
595 }
596
597 let x = t - num / den;
598 if p > 0.5 {
599 x
600 } else {
601 -x
602 }
603 }
604}
605
606impl Transform<Array2<Float>, Array2<Float>> for OutlierTransformer<Trained> {
607 fn transform(&self, x: &Array2<Float>) -> Result<Array2<Float>> {
608 let (_n_samples, n_features) = x.dim();
609
610 if n_features != self.n_features_in().expect("operation should succeed") {
611 return Err(SklearsError::FeatureMismatch {
612 expected: self.n_features_in().expect("operation should succeed"),
613 actual: n_features,
614 });
615 }
616
617 let params = self
618 .transformation_params_
619 .as_ref()
620 .expect("operation should succeed");
621 let mut result = x.clone();
622
623 if self.config.feature_wise {
624 for j in 0..n_features {
625 let feature_params = ¶ms.feature_params[j];
626 let mut column = result.column_mut(j);
627 self.transform_feature_inplace(&mut column, feature_params)?;
628 }
629 } else {
630 let feature_params = ¶ms.feature_params[0];
632 for mut row in result.axis_iter_mut(Axis(0)) {
633 for elem in row.iter_mut() {
634 *elem = self.transform_value(*elem, feature_params)?;
635 }
636 }
637 }
638
639 Ok(result)
640 }
641}
642
643impl OutlierTransformer<Trained> {
644 pub fn n_features_in(&self) -> Option<usize> {
646 self.n_features_in_
647 }
648
649 fn transform_feature_inplace(
651 &self,
652 column: &mut scirs2_core::ndarray::ArrayViewMut1<Float>,
653 params: &FeatureTransformationParams,
654 ) -> Result<()> {
655 for elem in column.iter_mut() {
656 *elem = self.transform_value(*elem, params)?;
657 }
658 Ok(())
659 }
660
661 fn transform_value(&self, value: Float, params: &FeatureTransformationParams) -> Result<Float> {
663 if !value.is_finite() {
664 return Ok(value);
665 }
666
667 match self.config.method {
668 OutlierTransformationMethod::Log => {
669 let shifted = value + params.shift;
670 if shifted <= 0.0 {
671 Ok(Float::NAN)
672 } else {
673 Ok(shifted.ln())
674 }
675 }
676 OutlierTransformationMethod::Log1p => Ok((value + params.shift).ln_1p()),
677 OutlierTransformationMethod::Sqrt => {
678 let shifted = value + params.shift;
679 if shifted < 0.0 {
680 Ok(Float::NAN)
681 } else {
682 Ok(shifted.sqrt())
683 }
684 }
685 OutlierTransformationMethod::BoxCox | OutlierTransformationMethod::BoxCoxFixed(_) => {
686 let lambda = params.lambda.unwrap_or(0.0);
687 let shifted = value + params.shift;
688 if shifted <= 0.0 {
689 return Ok(Float::NAN);
690 }
691 if lambda.abs() < 1e-10 {
692 Ok(shifted.ln())
693 } else {
694 Ok((shifted.powf(lambda) - 1.0) / lambda)
695 }
696 }
697 OutlierTransformationMethod::QuantileUniform
698 | OutlierTransformationMethod::QuantileNormal => {
699 self.quantile_transform_value(value, params)
700 }
701 OutlierTransformationMethod::RobustScale => {
702 let median = params.median.unwrap_or(0.0);
703 let iqr = params.iqr.unwrap_or(1.0);
704 if iqr > 0.0 {
705 Ok((value - median) / iqr)
706 } else {
707 Ok(0.0)
708 }
709 }
710 OutlierTransformationMethod::Interpolate => self.interpolate_value(value, params),
711 OutlierTransformationMethod::Smooth => {
712 Ok(value)
714 }
715 OutlierTransformationMethod::Trim => {
716 let lower = params.lower_bound.unwrap_or(Float::NEG_INFINITY);
717 let upper = params.upper_bound.unwrap_or(Float::INFINITY);
718 Ok(value.max(lower).min(upper))
719 }
720 }
721 }
722
723 fn quantile_transform_value(
725 &self,
726 value: Float,
727 params: &FeatureTransformationParams,
728 ) -> Result<Float> {
729 let quantiles = params.quantiles.as_ref().expect("operation should succeed");
730 let references = params
731 .references
732 .as_ref()
733 .expect("operation should succeed");
734
735 let mut pos = 0;
737 for (i, &q) in quantiles.iter().enumerate() {
738 if value <= q {
739 pos = i;
740 break;
741 }
742 pos = i + 1;
743 }
744
745 pos = pos.min(references.len() - 1);
746 Ok(references[pos])
747 }
748
749 fn interpolate_value(
751 &self,
752 value: Float,
753 params: &FeatureTransformationParams,
754 ) -> Result<Float> {
755 let lower = params.lower_bound.unwrap_or(Float::NEG_INFINITY);
756 let upper = params.upper_bound.unwrap_or(Float::INFINITY);
757
758 if value < lower {
759 Ok(lower)
760 } else if value > upper {
761 Ok(upper)
762 } else {
763 Ok(value)
764 }
765 }
766
767 pub fn transformation_params(&self) -> Option<&TransformationParameters> {
769 self.transformation_params_.as_ref()
770 }
771
772 pub fn feature_stats(&self, feature_idx: usize) -> Option<&FeatureTransformationParams> {
774 self.transformation_params_
775 .as_ref()?
776 .feature_params
777 .get(feature_idx)
778 }
779}
780
781impl Default for OutlierTransformer<Untrained> {
782 fn default() -> Self {
783 Self::new()
784 }
785}
786
787#[allow(non_snake_case)]
788#[cfg(test)]
789mod tests {
790 use super::*;
791 use approx::assert_relative_eq;
792 use scirs2_core::ndarray::Array2;
793
794 #[test]
795 fn test_log_transformation() {
796 let data = Array2::from_shape_vec(
797 (5, 2),
798 vec![
799 1.0, 10.0, 2.0, 20.0, 3.0, 30.0, 100.0, 1000.0, 4.0, 40.0,
801 ],
802 )
803 .expect("operation should succeed");
804
805 let transformer = OutlierTransformer::log();
806 let fitted = transformer
807 .fit(&data, &())
808 .expect("model fitting should succeed");
809 let result = fitted
810 .transform(&data)
811 .expect("transformation should succeed");
812
813 assert_eq!(result.dim(), data.dim());
814
815 assert_relative_eq!(result[[0, 0]], 1.0_f64.ln(), epsilon = 1e-10);
817
818 assert_relative_eq!(result[[3, 0]], 100.0_f64.ln(), epsilon = 1e-10);
820 }
821
822 #[test]
823 fn test_log1p_transformation() {
824 let data = Array2::from_shape_vec((4, 1), vec![0.0, 1.0, 10.0, 100.0])
825 .expect("shape and data length should match");
826
827 let transformer = OutlierTransformer::log1p();
828 let fitted = transformer
829 .fit(&data, &())
830 .expect("model fitting should succeed");
831 let result = fitted
832 .transform(&data)
833 .expect("transformation should succeed");
834
835 assert_eq!(result.dim(), data.dim());
836
837 assert_relative_eq!(result[[0, 0]], 0.0, epsilon = 1e-10);
839
840 assert_relative_eq!(result[[1, 0]], (2.0_f64).ln(), epsilon = 1e-10);
842 }
843
844 #[test]
845 fn test_sqrt_transformation() {
846 let data = Array2::from_shape_vec((4, 1), vec![1.0, 4.0, 9.0, 100.0])
847 .expect("shape and data length should match");
848
849 let transformer = OutlierTransformer::sqrt();
850 let fitted = transformer
851 .fit(&data, &())
852 .expect("model fitting should succeed");
853 let result = fitted
854 .transform(&data)
855 .expect("transformation should succeed");
856
857 assert_eq!(result.dim(), data.dim());
858
859 assert_relative_eq!(result[[0, 0]], 1.0, epsilon = 1e-10);
861
862 assert_relative_eq!(result[[3, 0]], 10.0, epsilon = 1e-10);
864 }
865
866 #[test]
867 fn test_robust_scale_transformation() {
868 let data = Array2::from_shape_vec(
869 (7, 1),
870 vec![
871 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 100.0, ],
873 )
874 .expect("operation should succeed");
875
876 let transformer = OutlierTransformer::robust_scale();
877 let fitted = transformer
878 .fit(&data, &())
879 .expect("model fitting should succeed");
880 let result = fitted
881 .transform(&data)
882 .expect("transformation should succeed");
883
884 assert_eq!(result.dim(), data.dim());
885
886 let params = fitted.feature_stats(0).expect("operation should succeed");
888 assert!(params.median.is_some());
889 assert!(params.iqr.is_some());
890 }
891
892 #[test]
893 fn test_interpolate_transformation() {
894 let data = Array2::from_shape_vec((5, 1), vec![1.0, 2.0, 3.0, 4.0, 100.0])
895 .expect("shape and data length should match");
896
897 let transformer = OutlierTransformer::interpolate(2.0, "z-score");
898 let fitted = transformer
899 .fit(&data, &())
900 .expect("model fitting should succeed");
901 let result = fitted
902 .transform(&data)
903 .expect("transformation should succeed");
904
905 assert_eq!(result.dim(), data.dim());
906
907 assert_relative_eq!(result[[0, 0]], 1.0, epsilon = 1e-10);
909 assert_relative_eq!(result[[1, 0]], 2.0, epsilon = 1e-10);
910
911 let params = fitted.feature_stats(0).expect("operation should succeed");
913 assert!(params.upper_bound.is_some());
914 }
915
916 #[test]
917 fn test_trim_transformation() {
918 let data = Array2::from_shape_vec(
919 (11, 1),
920 (1..=10)
921 .map(|x| x as f64)
922 .chain(std::iter::once(1000.0))
923 .collect(),
924 )
925 .expect("operation should succeed");
926
927 let transformer = OutlierTransformer::trim(10.0, 90.0);
928 let fitted = transformer
929 .fit(&data, &())
930 .expect("model fitting should succeed");
931 let result = fitted
932 .transform(&data)
933 .expect("transformation should succeed");
934
935 assert_eq!(result.dim(), data.dim());
936
937 let params = fitted.feature_stats(0).expect("operation should succeed");
939 assert!(params.lower_bound.is_some());
940 assert!(params.upper_bound.is_some());
941 }
942
943 #[test]
944 fn test_box_cox_transformation() {
945 let data = Array2::from_shape_vec((6, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 100.0])
946 .expect("shape and data length should match");
947
948 let transformer = OutlierTransformer::box_cox_fixed(0.5);
949 let fitted = transformer
950 .fit(&data, &())
951 .expect("model fitting should succeed");
952 let result = fitted
953 .transform(&data)
954 .expect("transformation should succeed");
955
956 assert_eq!(result.dim(), data.dim());
957
958 let params = fitted.feature_stats(0).expect("operation should succeed");
959 assert!(params.lambda.is_some());
960 assert_relative_eq!(
961 params.lambda.expect("operation should succeed"),
962 0.5,
963 epsilon = 1e-10
964 );
965 }
966
967 #[test]
968 fn test_handle_negative_values() {
969 let data = Array2::from_shape_vec((4, 1), vec![-2.0, -1.0, 1.0, 100.0])
970 .expect("shape and data length should match");
971
972 let transformer = OutlierTransformer::log().handle_negatives(true);
973 let fitted = transformer
974 .fit(&data, &())
975 .expect("model fitting should succeed");
976 let result = fitted
977 .transform(&data)
978 .expect("transformation should succeed");
979
980 assert_eq!(result.dim(), data.dim());
981
982 let params = fitted.feature_stats(0).expect("operation should succeed");
984 assert!(params.shift > 0.0);
985 }
986
987 #[test]
988 fn test_feature_wise_vs_global() {
989 let data =
990 Array2::from_shape_vec((4, 2), vec![1.0, 10.0, 2.0, 20.0, 3.0, 30.0, 100.0, 1000.0])
991 .expect("operation should succeed");
992
993 let transformer_fw = OutlierTransformer::log().feature_wise(true);
995 let fitted_fw = transformer_fw
996 .fit(&data, &())
997 .expect("model fitting should succeed");
998 let result_fw = fitted_fw
999 .transform(&data)
1000 .expect("transformation should succeed");
1001
1002 let transformer_global = OutlierTransformer::log().feature_wise(false);
1004 let fitted_global = transformer_global
1005 .fit(&data, &())
1006 .expect("model fitting should succeed");
1007 let result_global = fitted_global
1008 .transform(&data)
1009 .expect("transformation should succeed");
1010
1011 assert_eq!(result_fw.dim(), data.dim());
1012 assert_eq!(result_global.dim(), data.dim());
1013
1014 }
1017
1018 #[test]
1019 fn test_transformation_error_handling() {
1020 let data = Array2::from_shape_vec((2, 2), vec![1.0, 2.0, 3.0, 4.0])
1021 .expect("shape and data length should match");
1022 let transformer = OutlierTransformer::log();
1023 let fitted = transformer
1024 .fit(&data, &())
1025 .expect("model fitting should succeed");
1026
1027 let wrong_data = Array2::from_shape_vec((2, 3), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
1029 .expect("shape and data length should match");
1030 assert!(fitted.transform(&wrong_data).is_err());
1031 }
1032
1033 #[test]
1034 fn test_detection_methods() {
1035 let data = Array2::from_shape_vec((7, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 100.0])
1036 .expect("shape and data length should match");
1037
1038 let methods = vec!["z-score", "iqr", "percentile"];
1040
1041 for method in methods {
1042 let transformer = OutlierTransformer::interpolate(2.0, method);
1043 let fitted = transformer
1044 .fit(&data, &())
1045 .expect("model fitting should succeed");
1046 let result = fitted
1047 .transform(&data)
1048 .expect("transformation should succeed");
1049
1050 assert_eq!(result.dim(), data.dim());
1051
1052 let params = fitted.feature_stats(0).expect("operation should succeed");
1053 assert!(params.lower_bound.is_some());
1054 assert!(params.upper_bound.is_some());
1055 }
1056 }
1057}