1use scirs2_core::ndarray::{Array1, Array2, Axis};
20use sklears_core::{
21 error::{Result, SklearsError},
22 traits::{Fit, Trained, Transform, Untrained},
23 types::Float,
24};
25use std::marker::PhantomData;
26
27#[derive(Debug, Clone, Copy)]
29pub enum OutlierTransformationMethod {
30 Log,
32 Log1p,
34 Sqrt,
36 BoxCox,
38 BoxCoxFixed(Float),
40 QuantileUniform,
42 QuantileNormal,
44 RobustScale,
46 Interpolate,
48 Smooth,
50 Trim,
52}
53
54#[derive(Debug, Clone)]
56pub struct OutlierTransformationConfig {
57 pub method: OutlierTransformationMethod,
59 pub outlier_threshold: Float,
61 pub detection_method: String,
63 pub lower_percentile: Float,
65 pub upper_percentile: Float,
67 pub smoothing_window: usize,
69 pub n_quantiles: usize,
71 pub handle_negatives: bool,
73 pub log_epsilon: Float,
75 pub feature_wise: bool,
77}
78
79impl Default for OutlierTransformationConfig {
80 fn default() -> Self {
81 Self {
82 method: OutlierTransformationMethod::Log1p,
83 outlier_threshold: 3.0,
84 detection_method: "z-score".to_string(),
85 lower_percentile: 1.0,
86 upper_percentile: 99.0,
87 smoothing_window: 5,
88 n_quantiles: 1000,
89 handle_negatives: true,
90 log_epsilon: 1e-8,
91 feature_wise: true,
92 }
93 }
94}
95
96#[derive(Debug, Clone)]
98pub struct OutlierTransformer<State = Untrained> {
99 config: OutlierTransformationConfig,
100 state: PhantomData<State>,
101 transformation_params_: Option<TransformationParameters>,
103 n_features_in_: Option<usize>,
104}
105
106#[derive(Debug, Clone)]
108pub struct TransformationParameters {
109 pub feature_params: Vec<FeatureTransformationParams>,
111 pub global_params: Option<GlobalTransformationParams>,
113}
114
115#[derive(Debug, Clone)]
117pub struct FeatureTransformationParams {
118 pub lambda: Option<Float>,
120 pub shift: Float,
122 pub quantiles: Option<Array1<Float>>,
124 pub references: Option<Array1<Float>>,
126 pub median: Option<Float>,
128 pub iqr: Option<Float>,
129 pub lower_bound: Option<Float>,
131 pub upper_bound: Option<Float>,
132 pub mean: Option<Float>,
134 pub std: Option<Float>,
135}
136
137#[derive(Debug, Clone)]
139pub struct GlobalTransformationParams {
140 pub global_shift: Float,
142 pub global_lambda: Option<Float>,
144}
145
146impl OutlierTransformer<Untrained> {
147 pub fn new() -> Self {
149 Self {
150 config: OutlierTransformationConfig::default(),
151 state: PhantomData,
152 transformation_params_: None,
153 n_features_in_: None,
154 }
155 }
156
157 pub fn log() -> Self {
159 Self::new().method(OutlierTransformationMethod::Log)
160 }
161
162 pub fn log1p() -> Self {
164 Self::new().method(OutlierTransformationMethod::Log1p)
165 }
166
167 pub fn sqrt() -> Self {
169 Self::new().method(OutlierTransformationMethod::Sqrt)
170 }
171
172 pub fn box_cox() -> Self {
174 Self::new().method(OutlierTransformationMethod::BoxCox)
175 }
176
177 pub fn box_cox_fixed(lambda: Float) -> Self {
179 Self::new().method(OutlierTransformationMethod::BoxCoxFixed(lambda))
180 }
181
182 pub fn quantile_uniform(n_quantiles: usize) -> Self {
184 Self::new()
185 .method(OutlierTransformationMethod::QuantileUniform)
186 .n_quantiles(n_quantiles)
187 }
188
189 pub fn quantile_normal(n_quantiles: usize) -> Self {
191 Self::new()
192 .method(OutlierTransformationMethod::QuantileNormal)
193 .n_quantiles(n_quantiles)
194 }
195
196 pub fn robust_scale() -> Self {
198 Self::new().method(OutlierTransformationMethod::RobustScale)
199 }
200
201 pub fn interpolate(threshold: Float, detection_method: &str) -> Self {
203 Self::new()
204 .method(OutlierTransformationMethod::Interpolate)
205 .outlier_threshold(threshold)
206 .detection_method(detection_method.to_string())
207 }
208
209 pub fn smooth(window_size: usize, threshold: Float) -> Self {
211 Self::new()
212 .method(OutlierTransformationMethod::Smooth)
213 .smoothing_window(window_size)
214 .outlier_threshold(threshold)
215 }
216
217 pub fn trim(lower_percentile: Float, upper_percentile: Float) -> Self {
219 Self::new()
220 .method(OutlierTransformationMethod::Trim)
221 .lower_percentile(lower_percentile)
222 .upper_percentile(upper_percentile)
223 }
224
225 pub fn method(mut self, method: OutlierTransformationMethod) -> Self {
227 self.config.method = method;
228 self
229 }
230
231 pub fn outlier_threshold(mut self, threshold: Float) -> Self {
233 self.config.outlier_threshold = threshold;
234 self
235 }
236
237 pub fn detection_method(mut self, method: String) -> Self {
239 self.config.detection_method = method;
240 self
241 }
242
243 pub fn lower_percentile(mut self, percentile: Float) -> Self {
245 self.config.lower_percentile = percentile;
246 self
247 }
248
249 pub fn upper_percentile(mut self, percentile: Float) -> Self {
251 self.config.upper_percentile = percentile;
252 self
253 }
254
255 pub fn smoothing_window(mut self, window: usize) -> Self {
257 self.config.smoothing_window = window;
258 self
259 }
260
261 pub fn n_quantiles(mut self, n_quantiles: usize) -> Self {
263 self.config.n_quantiles = n_quantiles;
264 self
265 }
266
267 pub fn handle_negatives(mut self, handle: bool) -> Self {
269 self.config.handle_negatives = handle;
270 self
271 }
272
273 pub fn log_epsilon(mut self, epsilon: Float) -> Self {
275 self.config.log_epsilon = epsilon;
276 self
277 }
278
279 pub fn feature_wise(mut self, feature_wise: bool) -> Self {
281 self.config.feature_wise = feature_wise;
282 self
283 }
284}
285
286impl Fit<Array2<Float>, ()> for OutlierTransformer<Untrained> {
287 type Fitted = OutlierTransformer<Trained>;
288
289 fn fit(mut self, x: &Array2<Float>, _y: &()) -> Result<Self::Fitted> {
290 let (n_samples, n_features) = x.dim();
291
292 if n_samples == 0 || n_features == 0 {
293 return Err(SklearsError::InvalidInput(
294 "Input array is empty".to_string(),
295 ));
296 }
297
298 self.n_features_in_ = Some(n_features);
299
300 let feature_params = if self.config.feature_wise {
302 (0..n_features)
303 .map(|j| self.fit_feature_params(x.column(j).to_owned().as_slice().unwrap()))
304 .collect::<Result<Vec<_>>>()?
305 } else {
306 vec![self.fit_feature_params(x.as_slice().unwrap())?]
308 };
309
310 self.transformation_params_ = Some(TransformationParameters {
311 feature_params,
312 global_params: None, });
314
315 Ok(OutlierTransformer {
316 config: self.config,
317 state: PhantomData,
318 transformation_params_: self.transformation_params_,
319 n_features_in_: self.n_features_in_,
320 })
321 }
322}
323
324impl OutlierTransformer<Untrained> {
325 fn fit_feature_params(&self, data: &[Float]) -> Result<FeatureTransformationParams> {
327 let mut params = FeatureTransformationParams {
328 lambda: None,
329 shift: 0.0,
330 quantiles: None,
331 references: None,
332 median: None,
333 iqr: None,
334 lower_bound: None,
335 upper_bound: None,
336 mean: None,
337 std: None,
338 };
339
340 let valid_data: Vec<Float> = data.iter().filter(|x| x.is_finite()).copied().collect();
342
343 if valid_data.is_empty() {
344 return Ok(params);
345 }
346
347 let mean = valid_data.iter().sum::<Float>() / valid_data.len() as Float;
348 let variance = valid_data.iter().map(|x| (x - mean).powi(2)).sum::<Float>()
349 / valid_data.len() as Float;
350 let std = variance.sqrt();
351
352 params.mean = Some(mean);
353 params.std = Some(std);
354
355 let mut sorted_data = valid_data.clone();
357 sorted_data.sort_by(|a, b| a.partial_cmp(b).unwrap());
358
359 let median = if sorted_data.len() % 2 == 0 {
360 let mid = sorted_data.len() / 2;
361 (sorted_data[mid - 1] + sorted_data[mid]) / 2.0
362 } else {
363 sorted_data[sorted_data.len() / 2]
364 };
365
366 let q1_idx = sorted_data.len() / 4;
367 let q3_idx = 3 * sorted_data.len() / 4;
368 let q1 = sorted_data[q1_idx];
369 let q3 = sorted_data[q3_idx];
370 let iqr = q3 - q1;
371
372 params.median = Some(median);
373 params.iqr = Some(iqr);
374
375 match self.config.detection_method.as_str() {
377 "z-score" => {
378 params.lower_bound = Some(mean - self.config.outlier_threshold * std);
379 params.upper_bound = Some(mean + self.config.outlier_threshold * std);
380 }
381 "iqr" => {
382 params.lower_bound = Some(q1 - self.config.outlier_threshold * iqr);
383 params.upper_bound = Some(q3 + self.config.outlier_threshold * iqr);
384 }
385 "percentile" => {
386 let lower_idx =
387 ((self.config.lower_percentile / 100.0) * sorted_data.len() as Float) as usize;
388 let upper_idx =
389 ((self.config.upper_percentile / 100.0) * sorted_data.len() as Float) as usize;
390 params.lower_bound = Some(sorted_data[lower_idx.min(sorted_data.len() - 1)]);
391 params.upper_bound = Some(sorted_data[upper_idx.min(sorted_data.len() - 1)]);
392 }
393 _ => {
394 return Err(SklearsError::InvalidInput(format!(
395 "Unknown detection method: {}",
396 self.config.detection_method
397 )));
398 }
399 }
400
401 if self.config.handle_negatives {
403 match self.config.method {
404 OutlierTransformationMethod::Log | OutlierTransformationMethod::Sqrt => {
405 let min_val = sorted_data[0];
406 if min_val <= 0.0 {
407 params.shift = -min_val + self.config.log_epsilon;
408 }
409 }
410 OutlierTransformationMethod::BoxCox
411 | OutlierTransformationMethod::BoxCoxFixed(_) => {
412 let min_val = sorted_data[0];
413 if min_val <= 0.0 {
414 params.shift = -min_val + self.config.log_epsilon;
415 }
416 }
417 _ => {}
418 }
419 }
420
421 match self.config.method {
423 OutlierTransformationMethod::BoxCox => {
424 params.lambda = Some(self.estimate_box_cox_lambda(&valid_data, params.shift)?);
425 }
426 OutlierTransformationMethod::BoxCoxFixed(lambda) => {
427 params.lambda = Some(lambda);
428 }
429 OutlierTransformationMethod::QuantileUniform
430 | OutlierTransformationMethod::QuantileNormal => {
431 params.quantiles = Some(self.compute_quantiles(&sorted_data)?);
432 params.references = Some(self.compute_references()?);
433 }
434 _ => {}
435 }
436
437 Ok(params)
438 }
439
440 fn estimate_box_cox_lambda(&self, data: &[Float], shift: Float) -> Result<Float> {
442 let shifted_data: Vec<Float> = data.iter().map(|x| x + shift).collect();
443
444 let lambda_range: Vec<Float> = (-20..=20).map(|i| i as Float * 0.1).collect();
446
447 let mut best_lambda = 0.0;
448 let mut best_llf = Float::NEG_INFINITY;
449
450 for &lambda in &lambda_range {
451 if let Ok(llf) = self.box_cox_log_likelihood(&shifted_data, lambda) {
452 if llf > best_llf {
453 best_llf = llf;
454 best_lambda = lambda;
455 }
456 }
457 }
458
459 Ok(best_lambda)
460 }
461
462 fn box_cox_log_likelihood(&self, data: &[Float], lambda: Float) -> Result<Float> {
464 let n = data.len() as Float;
465
466 let transformed: Vec<Float> = data
468 .iter()
469 .map(|&x| {
470 if x <= 0.0 {
471 return Float::NAN;
472 }
473 if lambda.abs() < 1e-10 {
474 x.ln()
475 } else {
476 (x.powf(lambda) - 1.0) / lambda
477 }
478 })
479 .collect();
480
481 if transformed.iter().any(|x| !x.is_finite()) {
483 return Err(SklearsError::InvalidInput(
484 "Invalid Box-Cox transformation".to_string(),
485 ));
486 }
487
488 let mean = transformed.iter().sum::<Float>() / n;
490 let variance = transformed
491 .iter()
492 .map(|x| (x - mean).powi(2))
493 .sum::<Float>()
494 / n;
495
496 let log_jacobian = (lambda - 1.0) * data.iter().map(|x| x.ln()).sum::<Float>();
497 let llf = -0.5 * n * (2.0 * std::f64::consts::PI as Float).ln()
498 - 0.5 * n * variance.ln()
499 - 0.5 * n
500 + log_jacobian;
501
502 Ok(llf)
503 }
504
505 fn compute_quantiles(&self, sorted_data: &[Float]) -> Result<Array1<Float>> {
507 let n_quantiles = self.config.n_quantiles.min(sorted_data.len());
508 let mut quantiles = Array1::zeros(n_quantiles);
509
510 for i in 0..n_quantiles {
511 let q = i as Float / (n_quantiles - 1) as Float;
512 let idx = (q * (sorted_data.len() - 1) as Float) as usize;
513 quantiles[i] = sorted_data[idx.min(sorted_data.len() - 1)];
514 }
515
516 Ok(quantiles)
517 }
518
519 fn compute_references(&self) -> Result<Array1<Float>> {
521 let n_quantiles = self.config.n_quantiles;
522 let mut references = Array1::zeros(n_quantiles);
523
524 match self.config.method {
525 OutlierTransformationMethod::QuantileUniform => {
526 for i in 0..n_quantiles {
527 references[i] = i as Float / (n_quantiles - 1) as Float;
528 }
529 }
530 OutlierTransformationMethod::QuantileNormal => {
531 for i in 0..n_quantiles {
533 let p = i as Float / (n_quantiles - 1) as Float;
534 references[i] = self.inverse_normal_cdf(p);
535 }
536 }
537 _ => {
538 return Err(SklearsError::InvalidInput(
539 "Invalid quantile method".to_string(),
540 ));
541 }
542 }
543
544 Ok(references)
545 }
546
547 fn inverse_normal_cdf(&self, p: Float) -> Float {
549 if p <= 0.0 {
550 return Float::NEG_INFINITY;
551 }
552 if p >= 1.0 {
553 return Float::INFINITY;
554 }
555 if p == 0.5 {
556 return 0.0;
557 }
558
559 let a = [
562 -3.969683028665376e+01,
563 2.209460984245205e+02,
564 -2.759285104469687e+02,
565 1.383577518672690e+02,
566 -3.066479806614716e+01,
567 2.506628277459239e+00,
568 ];
569 let b = [
570 -5.447609879822406e+01,
571 1.615858368580409e+02,
572 -1.556989798598866e+02,
573 6.680131188771972e+01,
574 -1.328068155288572e+01,
575 ];
576
577 let q = if p > 0.5 { 1.0 - p } else { p };
578 let t = (-2.0 * q.ln()).sqrt();
579
580 let mut num = a[5];
581 for i in (0..5).rev() {
582 num = num * t + a[i];
583 }
584
585 let mut den = 1.0;
586 for i in (0..5).rev() {
587 den = den * t + b[i];
588 }
589
590 let x = t - num / den;
591 if p > 0.5 {
592 x
593 } else {
594 -x
595 }
596 }
597}
598
599impl Transform<Array2<Float>, Array2<Float>> for OutlierTransformer<Trained> {
600 fn transform(&self, x: &Array2<Float>) -> Result<Array2<Float>> {
601 let (_n_samples, n_features) = x.dim();
602
603 if n_features != self.n_features_in().unwrap() {
604 return Err(SklearsError::FeatureMismatch {
605 expected: self.n_features_in().unwrap(),
606 actual: n_features,
607 });
608 }
609
610 let params = self.transformation_params_.as_ref().unwrap();
611 let mut result = x.clone();
612
613 if self.config.feature_wise {
614 for j in 0..n_features {
615 let feature_params = ¶ms.feature_params[j];
616 let mut column = result.column_mut(j);
617 self.transform_feature_inplace(&mut column, feature_params)?;
618 }
619 } else {
620 let feature_params = ¶ms.feature_params[0];
622 for mut row in result.axis_iter_mut(Axis(0)) {
623 for elem in row.iter_mut() {
624 *elem = self.transform_value(*elem, feature_params)?;
625 }
626 }
627 }
628
629 Ok(result)
630 }
631}
632
633impl OutlierTransformer<Trained> {
634 pub fn n_features_in(&self) -> Option<usize> {
636 self.n_features_in_
637 }
638
639 fn transform_feature_inplace(
641 &self,
642 column: &mut scirs2_core::ndarray::ArrayViewMut1<Float>,
643 params: &FeatureTransformationParams,
644 ) -> Result<()> {
645 for elem in column.iter_mut() {
646 *elem = self.transform_value(*elem, params)?;
647 }
648 Ok(())
649 }
650
651 fn transform_value(&self, value: Float, params: &FeatureTransformationParams) -> Result<Float> {
653 if !value.is_finite() {
654 return Ok(value);
655 }
656
657 match self.config.method {
658 OutlierTransformationMethod::Log => {
659 let shifted = value + params.shift;
660 if shifted <= 0.0 {
661 Ok(Float::NAN)
662 } else {
663 Ok(shifted.ln())
664 }
665 }
666 OutlierTransformationMethod::Log1p => Ok((value + params.shift).ln_1p()),
667 OutlierTransformationMethod::Sqrt => {
668 let shifted = value + params.shift;
669 if shifted < 0.0 {
670 Ok(Float::NAN)
671 } else {
672 Ok(shifted.sqrt())
673 }
674 }
675 OutlierTransformationMethod::BoxCox | OutlierTransformationMethod::BoxCoxFixed(_) => {
676 let lambda = params.lambda.unwrap_or(0.0);
677 let shifted = value + params.shift;
678 if shifted <= 0.0 {
679 return Ok(Float::NAN);
680 }
681 if lambda.abs() < 1e-10 {
682 Ok(shifted.ln())
683 } else {
684 Ok((shifted.powf(lambda) - 1.0) / lambda)
685 }
686 }
687 OutlierTransformationMethod::QuantileUniform
688 | OutlierTransformationMethod::QuantileNormal => {
689 self.quantile_transform_value(value, params)
690 }
691 OutlierTransformationMethod::RobustScale => {
692 let median = params.median.unwrap_or(0.0);
693 let iqr = params.iqr.unwrap_or(1.0);
694 if iqr > 0.0 {
695 Ok((value - median) / iqr)
696 } else {
697 Ok(0.0)
698 }
699 }
700 OutlierTransformationMethod::Interpolate => self.interpolate_value(value, params),
701 OutlierTransformationMethod::Smooth => {
702 Ok(value)
704 }
705 OutlierTransformationMethod::Trim => {
706 let lower = params.lower_bound.unwrap_or(Float::NEG_INFINITY);
707 let upper = params.upper_bound.unwrap_or(Float::INFINITY);
708 Ok(value.max(lower).min(upper))
709 }
710 }
711 }
712
713 fn quantile_transform_value(
715 &self,
716 value: Float,
717 params: &FeatureTransformationParams,
718 ) -> Result<Float> {
719 let quantiles = params.quantiles.as_ref().unwrap();
720 let references = params.references.as_ref().unwrap();
721
722 let mut pos = 0;
724 for (i, &q) in quantiles.iter().enumerate() {
725 if value <= q {
726 pos = i;
727 break;
728 }
729 pos = i + 1;
730 }
731
732 pos = pos.min(references.len() - 1);
733 Ok(references[pos])
734 }
735
736 fn interpolate_value(
738 &self,
739 value: Float,
740 params: &FeatureTransformationParams,
741 ) -> Result<Float> {
742 let lower = params.lower_bound.unwrap_or(Float::NEG_INFINITY);
743 let upper = params.upper_bound.unwrap_or(Float::INFINITY);
744
745 if value < lower {
746 Ok(lower)
747 } else if value > upper {
748 Ok(upper)
749 } else {
750 Ok(value)
751 }
752 }
753
754 pub fn transformation_params(&self) -> Option<&TransformationParameters> {
756 self.transformation_params_.as_ref()
757 }
758
759 pub fn feature_stats(&self, feature_idx: usize) -> Option<&FeatureTransformationParams> {
761 self.transformation_params_
762 .as_ref()?
763 .feature_params
764 .get(feature_idx)
765 }
766}
767
768impl Default for OutlierTransformer<Untrained> {
769 fn default() -> Self {
770 Self::new()
771 }
772}
773
774#[allow(non_snake_case)]
775#[cfg(test)]
776mod tests {
777 use super::*;
778 use approx::assert_relative_eq;
779 use scirs2_core::ndarray::Array2;
780
781 #[test]
782 fn test_log_transformation() {
783 let data = Array2::from_shape_vec(
784 (5, 2),
785 vec![
786 1.0, 10.0, 2.0, 20.0, 3.0, 30.0, 100.0, 1000.0, 4.0, 40.0,
788 ],
789 )
790 .unwrap();
791
792 let transformer = OutlierTransformer::log();
793 let fitted = transformer.fit(&data, &()).unwrap();
794 let result = fitted.transform(&data).unwrap();
795
796 assert_eq!(result.dim(), data.dim());
797
798 assert_relative_eq!(result[[0, 0]], 1.0_f64.ln(), epsilon = 1e-10);
800
801 assert_relative_eq!(result[[3, 0]], 100.0_f64.ln(), epsilon = 1e-10);
803 }
804
805 #[test]
806 fn test_log1p_transformation() {
807 let data = Array2::from_shape_vec((4, 1), vec![0.0, 1.0, 10.0, 100.0]).unwrap();
808
809 let transformer = OutlierTransformer::log1p();
810 let fitted = transformer.fit(&data, &()).unwrap();
811 let result = fitted.transform(&data).unwrap();
812
813 assert_eq!(result.dim(), data.dim());
814
815 assert_relative_eq!(result[[0, 0]], 0.0, epsilon = 1e-10);
817
818 assert_relative_eq!(result[[1, 0]], (2.0_f64).ln(), epsilon = 1e-10);
820 }
821
822 #[test]
823 fn test_sqrt_transformation() {
824 let data = Array2::from_shape_vec((4, 1), vec![1.0, 4.0, 9.0, 100.0]).unwrap();
825
826 let transformer = OutlierTransformer::sqrt();
827 let fitted = transformer.fit(&data, &()).unwrap();
828 let result = fitted.transform(&data).unwrap();
829
830 assert_eq!(result.dim(), data.dim());
831
832 assert_relative_eq!(result[[0, 0]], 1.0, epsilon = 1e-10);
834
835 assert_relative_eq!(result[[3, 0]], 10.0, epsilon = 1e-10);
837 }
838
839 #[test]
840 fn test_robust_scale_transformation() {
841 let data = Array2::from_shape_vec(
842 (7, 1),
843 vec![
844 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 100.0, ],
846 )
847 .unwrap();
848
849 let transformer = OutlierTransformer::robust_scale();
850 let fitted = transformer.fit(&data, &()).unwrap();
851 let result = fitted.transform(&data).unwrap();
852
853 assert_eq!(result.dim(), data.dim());
854
855 let params = fitted.feature_stats(0).unwrap();
857 assert!(params.median.is_some());
858 assert!(params.iqr.is_some());
859 }
860
861 #[test]
862 fn test_interpolate_transformation() {
863 let data = Array2::from_shape_vec((5, 1), vec![1.0, 2.0, 3.0, 4.0, 100.0]).unwrap();
864
865 let transformer = OutlierTransformer::interpolate(2.0, "z-score");
866 let fitted = transformer.fit(&data, &()).unwrap();
867 let result = fitted.transform(&data).unwrap();
868
869 assert_eq!(result.dim(), data.dim());
870
871 assert_relative_eq!(result[[0, 0]], 1.0, epsilon = 1e-10);
873 assert_relative_eq!(result[[1, 0]], 2.0, epsilon = 1e-10);
874
875 let params = fitted.feature_stats(0).unwrap();
877 assert!(params.upper_bound.is_some());
878 }
879
880 #[test]
881 fn test_trim_transformation() {
882 let data = Array2::from_shape_vec(
883 (11, 1),
884 (1..=10)
885 .map(|x| x as f64)
886 .chain(std::iter::once(1000.0))
887 .collect(),
888 )
889 .unwrap();
890
891 let transformer = OutlierTransformer::trim(10.0, 90.0);
892 let fitted = transformer.fit(&data, &()).unwrap();
893 let result = fitted.transform(&data).unwrap();
894
895 assert_eq!(result.dim(), data.dim());
896
897 let params = fitted.feature_stats(0).unwrap();
899 assert!(params.lower_bound.is_some());
900 assert!(params.upper_bound.is_some());
901 }
902
903 #[test]
904 fn test_box_cox_transformation() {
905 let data = Array2::from_shape_vec((6, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 100.0]).unwrap();
906
907 let transformer = OutlierTransformer::box_cox_fixed(0.5);
908 let fitted = transformer.fit(&data, &()).unwrap();
909 let result = fitted.transform(&data).unwrap();
910
911 assert_eq!(result.dim(), data.dim());
912
913 let params = fitted.feature_stats(0).unwrap();
914 assert!(params.lambda.is_some());
915 assert_relative_eq!(params.lambda.unwrap(), 0.5, epsilon = 1e-10);
916 }
917
918 #[test]
919 fn test_handle_negative_values() {
920 let data = Array2::from_shape_vec((4, 1), vec![-2.0, -1.0, 1.0, 100.0]).unwrap();
921
922 let transformer = OutlierTransformer::log().handle_negatives(true);
923 let fitted = transformer.fit(&data, &()).unwrap();
924 let result = fitted.transform(&data).unwrap();
925
926 assert_eq!(result.dim(), data.dim());
927
928 let params = fitted.feature_stats(0).unwrap();
930 assert!(params.shift > 0.0);
931 }
932
933 #[test]
934 fn test_feature_wise_vs_global() {
935 let data =
936 Array2::from_shape_vec((4, 2), vec![1.0, 10.0, 2.0, 20.0, 3.0, 30.0, 100.0, 1000.0])
937 .unwrap();
938
939 let transformer_fw = OutlierTransformer::log().feature_wise(true);
941 let fitted_fw = transformer_fw.fit(&data, &()).unwrap();
942 let result_fw = fitted_fw.transform(&data).unwrap();
943
944 let transformer_global = OutlierTransformer::log().feature_wise(false);
946 let fitted_global = transformer_global.fit(&data, &()).unwrap();
947 let result_global = fitted_global.transform(&data).unwrap();
948
949 assert_eq!(result_fw.dim(), data.dim());
950 assert_eq!(result_global.dim(), data.dim());
951
952 }
955
956 #[test]
957 fn test_transformation_error_handling() {
958 let data = Array2::from_shape_vec((2, 2), vec![1.0, 2.0, 3.0, 4.0]).unwrap();
959 let transformer = OutlierTransformer::log();
960 let fitted = transformer.fit(&data, &()).unwrap();
961
962 let wrong_data =
964 Array2::from_shape_vec((2, 3), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
965 assert!(fitted.transform(&wrong_data).is_err());
966 }
967
968 #[test]
969 fn test_detection_methods() {
970 let data =
971 Array2::from_shape_vec((7, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 100.0]).unwrap();
972
973 let methods = vec!["z-score", "iqr", "percentile"];
975
976 for method in methods {
977 let transformer = OutlierTransformer::interpolate(2.0, method);
978 let fitted = transformer.fit(&data, &()).unwrap();
979 let result = fitted.transform(&data).unwrap();
980
981 assert_eq!(result.dim(), data.dim());
982
983 let params = fitted.feature_stats(0).unwrap();
984 assert!(params.lower_bound.is_some());
985 assert!(params.upper_bound.is_some());
986 }
987 }
988}