1use scirs2_core::ndarray::{Array1, Array2, ArrayBase, Data, Ix2};
7use scirs2_core::numeric::{Float, NumCast};
8
9use crate::error::{Result, TransformError};
10
11pub const EPSILON: f64 = 1e-10;
13
14pub struct QuantileTransformer {
19 n_quantiles: usize,
21 output_distribution: String,
23 clip: bool,
25 quantiles: Option<Array2<f64>>,
27 references: Option<Array1<f64>>,
29}
30
31impl QuantileTransformer {
32 pub fn new(n_quantiles: usize, outputdistribution: &str, clip: bool) -> Result<Self> {
42 if n_quantiles < 2 {
43 return Err(TransformError::InvalidInput(
44 "n_quantiles must be at least 2".to_string(),
45 ));
46 }
47
48 if outputdistribution != "uniform" && outputdistribution != "normal" {
49 return Err(TransformError::InvalidInput(
50 "output_distribution must be 'uniform' or 'normal'".to_string(),
51 ));
52 }
53
54 Ok(QuantileTransformer {
55 n_quantiles,
56 output_distribution: outputdistribution.to_string(),
57 clip,
58 quantiles: None,
59 references: None,
60 })
61 }
62
63 pub fn fit<S>(&mut self, x: &ArrayBase<S, Ix2>) -> Result<()>
71 where
72 S: Data,
73 S::Elem: Float + NumCast,
74 {
75 let x_f64 = x.mapv(|x| NumCast::from(x).unwrap_or(0.0));
76
77 let n_samples = x_f64.shape()[0];
78 let n_features = x_f64.shape()[1];
79
80 if n_samples == 0 || n_features == 0 {
81 return Err(TransformError::InvalidInput("Empty input data".to_string()));
82 }
83
84 if self.n_quantiles > n_samples {
85 return Err(TransformError::InvalidInput(format!(
86 "n_quantiles ({}) cannot be greater than n_samples ({})",
87 self.n_quantiles, n_samples
88 )));
89 }
90
91 let mut quantiles = Array2::zeros((n_features, self.n_quantiles));
93
94 for j in 0..n_features {
95 let mut feature_data: Vec<f64> = x_f64.column(j).to_vec();
97 feature_data.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
98
99 for i in 0..self.n_quantiles {
101 let q = i as f64 / (self.n_quantiles - 1) as f64;
102 let idx = (q * (feature_data.len() - 1) as f64).round() as usize;
103 quantiles[[j, i]] = feature_data[idx];
104 }
105 }
106
107 let references = if self.output_distribution == "uniform" {
109 Array1::from_shape_fn(self.n_quantiles, |i| {
111 i as f64 / (self.n_quantiles - 1) as f64
112 })
113 } else {
114 Array1::from_shape_fn(self.n_quantiles, |i| {
116 let u = i as f64 / (self.n_quantiles - 1) as f64;
117 let u_clamped = u.clamp(1e-7, 1.0 - 1e-7);
119 inverse_normal_cdf(u_clamped)
120 })
121 };
122
123 self.quantiles = Some(quantiles);
124 self.references = Some(references);
125
126 Ok(())
127 }
128
129 pub fn transform<S>(&self, x: &ArrayBase<S, Ix2>) -> Result<Array2<f64>>
137 where
138 S: Data,
139 S::Elem: Float + NumCast,
140 {
141 let x_f64 = x.mapv(|x| NumCast::from(x).unwrap_or(0.0));
142
143 let n_samples = x_f64.shape()[0];
144 let n_features = x_f64.shape()[1];
145
146 if self.quantiles.is_none() || self.references.is_none() {
147 return Err(TransformError::TransformationError(
148 "QuantileTransformer has not been fitted".to_string(),
149 ));
150 }
151
152 let quantiles = self.quantiles.as_ref().unwrap();
153 let references = self.references.as_ref().unwrap();
154
155 if n_features != quantiles.shape()[0] {
156 return Err(TransformError::InvalidInput(format!(
157 "x has {} features, but QuantileTransformer was fitted with {} features",
158 n_features,
159 quantiles.shape()[0]
160 )));
161 }
162
163 let mut transformed = Array2::zeros((n_samples, n_features));
164
165 for i in 0..n_samples {
166 for j in 0..n_features {
167 let value = x_f64[[i, j]];
168
169 let feature_quantiles = quantiles.row(j);
171
172 let mut lower_idx = 0;
174 let mut upper_idx = self.n_quantiles - 1;
175
176 if value <= feature_quantiles[0] {
178 transformed[[i, j]] = references[0];
179 continue;
180 }
181 if value >= feature_quantiles[self.n_quantiles - 1] {
182 transformed[[i, j]] = references[self.n_quantiles - 1];
183 continue;
184 }
185
186 while upper_idx - lower_idx > 1 {
188 let mid = (lower_idx + upper_idx) / 2;
189 if value <= feature_quantiles[mid] {
190 upper_idx = mid;
191 } else {
192 lower_idx = mid;
193 }
194 }
195
196 let lower_quantile = feature_quantiles[lower_idx];
198 let upper_quantile = feature_quantiles[upper_idx];
199 let lower_ref = references[lower_idx];
200 let upper_ref = references[upper_idx];
201
202 if (upper_quantile - lower_quantile).abs() < EPSILON {
203 transformed[[i, j]] = lower_ref;
204 } else {
205 let ratio = (value - lower_quantile) / (upper_quantile - lower_quantile);
206 transformed[[i, j]] = lower_ref + ratio * (upper_ref - lower_ref);
207 }
208 }
209 }
210
211 if self.clip && self.output_distribution == "uniform" {
213 for i in 0..n_samples {
214 for j in 0..n_features {
215 transformed[[i, j]] = transformed[[i, j]].clamp(0.0, 1.0);
216 }
217 }
218 }
219
220 Ok(transformed)
221 }
222
223 pub fn fit_transform<S>(&mut self, x: &ArrayBase<S, Ix2>) -> Result<Array2<f64>>
231 where
232 S: Data,
233 S::Elem: Float + NumCast,
234 {
235 self.fit(x)?;
236 self.transform(x)
237 }
238
239 pub fn quantiles(&self) -> Option<&Array2<f64>> {
244 self.quantiles.as_ref()
245 }
246}
247
248#[allow(dead_code)]
252fn inverse_normal_cdf(u: f64) -> f64 {
253 const A0: f64 = 2.50662823884;
255 const A1: f64 = -18.61500062529;
256 const A2: f64 = 41.39119773534;
257 const A3: f64 = -25.44106049637;
258 const B1: f64 = -8.47351093090;
259 const B2: f64 = 23.08336743743;
260 const B3: f64 = -21.06224101826;
261 const B4: f64 = 3.13082909833;
262 const C0: f64 = 0.3374754822726147;
263 const C1: f64 = 0.9761690190917186;
264 const C2: f64 = 0.1607979714918209;
265 const C3: f64 = 0.0276438810333863;
266 const C4: f64 = 0.0038405729373609;
267 const C5: f64 = 0.0003951896511919;
268 const C6: f64 = 0.0000321767881768;
269 const C7: f64 = 0.0000002888167364;
270 const C8: f64 = 0.0000003960315187;
271
272 let y = u - 0.5;
273
274 if y.abs() < 0.42 {
275 let r = y * y;
277 y * (((A3 * r + A2) * r + A1) * r + A0) / ((((B4 * r + B3) * r + B2) * r + B1) * r + 1.0)
278 } else {
279 let r = if y > 0.0 { 1.0 - u } else { u };
281 let r = (-r.ln()).ln();
282
283 let result = C0
284 + r * (C1 + r * (C2 + r * (C3 + r * (C4 + r * (C5 + r * (C6 + r * (C7 + r * C8)))))));
285
286 if y < 0.0 {
287 -result
288 } else {
289 result
290 }
291 }
292}
293
294pub struct MaxAbsScaler {
300 max_abs_: Option<Array1<f64>>,
302 scale_: Option<Array1<f64>>,
304}
305
306impl MaxAbsScaler {
307 pub fn new() -> Self {
319 MaxAbsScaler {
320 max_abs_: None,
321 scale_: None,
322 }
323 }
324
325 #[allow(dead_code)]
327 pub fn with_defaults() -> Self {
328 Self::new()
329 }
330
331 pub fn fit<S>(&mut self, x: &ArrayBase<S, Ix2>) -> Result<()>
339 where
340 S: Data,
341 S::Elem: Float + NumCast,
342 {
343 let x_f64 = x.mapv(|x| NumCast::from(x).unwrap_or(0.0));
344
345 let n_samples = x_f64.shape()[0];
346 let n_features = x_f64.shape()[1];
347
348 if n_samples == 0 || n_features == 0 {
349 return Err(TransformError::InvalidInput("Empty input data".to_string()));
350 }
351
352 let mut max_abs = Array1::zeros(n_features);
354
355 for j in 0..n_features {
356 let feature_data = x_f64.column(j);
357 let max_abs_value = feature_data
358 .iter()
359 .map(|&x| x.abs())
360 .fold(0.0, |acc, x| acc.max(x));
361
362 max_abs[j] = max_abs_value;
363 }
364
365 let scale = max_abs.mapv(|max_abs_val| {
367 if max_abs_val > EPSILON {
368 1.0 / max_abs_val
369 } else {
370 1.0 }
372 });
373
374 self.max_abs_ = Some(max_abs);
375 self.scale_ = Some(scale);
376
377 Ok(())
378 }
379
380 pub fn transform<S>(&self, x: &ArrayBase<S, Ix2>) -> Result<Array2<f64>>
388 where
389 S: Data,
390 S::Elem: Float + NumCast,
391 {
392 let x_f64 = x.mapv(|x| NumCast::from(x).unwrap_or(0.0));
393
394 let n_samples = x_f64.shape()[0];
395 let n_features = x_f64.shape()[1];
396
397 if self.scale_.is_none() {
398 return Err(TransformError::TransformationError(
399 "MaxAbsScaler has not been fitted".to_string(),
400 ));
401 }
402
403 let scale = self.scale_.as_ref().unwrap();
404
405 if n_features != scale.len() {
406 return Err(TransformError::InvalidInput(format!(
407 "x has {} features, but MaxAbsScaler was fitted with {} features",
408 n_features,
409 scale.len()
410 )));
411 }
412
413 let mut transformed = Array2::zeros((n_samples, n_features));
414
415 for i in 0..n_samples {
417 for j in 0..n_features {
418 transformed[[i, j]] = x_f64[[i, j]] * scale[j];
419 }
420 }
421
422 Ok(transformed)
423 }
424
425 pub fn fit_transform<S>(&mut self, x: &ArrayBase<S, Ix2>) -> Result<Array2<f64>>
433 where
434 S: Data,
435 S::Elem: Float + NumCast,
436 {
437 self.fit(x)?;
438 self.transform(x)
439 }
440
441 pub fn inverse_transform<S>(&self, x: &ArrayBase<S, Ix2>) -> Result<Array2<f64>>
449 where
450 S: Data,
451 S::Elem: Float + NumCast,
452 {
453 let x_f64 = x.mapv(|x| NumCast::from(x).unwrap_or(0.0));
454
455 let n_samples = x_f64.shape()[0];
456 let n_features = x_f64.shape()[1];
457
458 if self.max_abs_.is_none() {
459 return Err(TransformError::TransformationError(
460 "MaxAbsScaler has not been fitted".to_string(),
461 ));
462 }
463
464 let max_abs = self.max_abs_.as_ref().unwrap();
465
466 if n_features != max_abs.len() {
467 return Err(TransformError::InvalidInput(format!(
468 "x has {} features, but MaxAbsScaler was fitted with {} features",
469 n_features,
470 max_abs.len()
471 )));
472 }
473
474 let mut transformed = Array2::zeros((n_samples, n_features));
475
476 for i in 0..n_samples {
478 for j in 0..n_features {
479 transformed[[i, j]] = x_f64[[i, j]] * max_abs[j];
480 }
481 }
482
483 Ok(transformed)
484 }
485
486 pub fn max_abs(&self) -> Option<&Array1<f64>> {
491 self.max_abs_.as_ref()
492 }
493
494 pub fn scale(&self) -> Option<&Array1<f64>> {
499 self.scale_.as_ref()
500 }
501}
502
503impl Default for MaxAbsScaler {
504 fn default() -> Self {
505 Self::new()
506 }
507}
508
509#[cfg(test)]
510mod tests {
511 use super::*;
512 use approx::assert_abs_diff_eq;
513 use scirs2_core::ndarray::Array;
514
515 #[test]
516 fn test_quantile_transformer_uniform() {
517 let data = Array::from_shape_vec(
519 (6, 2),
520 vec![
521 1.0, 10.0, 2.0, 20.0, 3.0, 30.0, 4.0, 40.0, 5.0, 50.0, 100.0, 1000.0,
522 ], )
524 .unwrap();
525
526 let mut transformer = QuantileTransformer::new(5, "uniform", true).unwrap();
527 let transformed = transformer.fit_transform(&data).unwrap();
528
529 assert_eq!(transformed.shape(), &[6, 2]);
531
532 for i in 0..6 {
534 for j in 0..2 {
535 assert!(
536 transformed[[i, j]] >= 0.0 && transformed[[i, j]] <= 1.0,
537 "Value at [{}, {}] = {} is not in [0, 1]",
538 i,
539 j,
540 transformed[[i, j]]
541 );
542 }
543 }
544
545 assert_abs_diff_eq!(transformed[[0, 0]], 0.0, epsilon = 1e-10); assert_abs_diff_eq!(transformed[[5, 0]], 1.0, epsilon = 1e-10); assert_abs_diff_eq!(transformed[[0, 1]], 0.0, epsilon = 1e-10); assert_abs_diff_eq!(transformed[[5, 1]], 1.0, epsilon = 1e-10); }
551
552 #[test]
553 fn test_quantile_transformer_normal() {
554 let data = Array::from_shape_vec((5, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0]).unwrap();
556
557 let mut transformer = QuantileTransformer::new(5, "normal", false).unwrap();
558 let transformed = transformer.fit_transform(&data).unwrap();
559
560 assert_eq!(transformed.shape(), &[5, 1]);
562
563 assert_abs_diff_eq!(transformed[[2, 0]], 0.0, epsilon = 1e-10);
565 }
566
567 #[test]
568 fn test_quantile_transformer_errors() {
569 assert!(QuantileTransformer::new(1, "uniform", true).is_err());
571
572 assert!(QuantileTransformer::new(100, "invalid", true).is_err());
574
575 let small_data = Array::from_shape_vec((2, 1), vec![1.0, 2.0]).unwrap();
577 let mut transformer = QuantileTransformer::new(10, "uniform", true).unwrap();
578 assert!(transformer.fit(&small_data).is_err());
579 }
580
581 #[test]
582 fn test_inverse_normal_cdf() {
583 assert_abs_diff_eq!(inverse_normal_cdf(0.5), 0.0, epsilon = 1e-6);
585 assert!(inverse_normal_cdf(0.1) < 0.0); assert!(inverse_normal_cdf(0.9) > 0.0); }
588
589 #[test]
590 fn test_max_abs_scaler_basic() {
591 let data = Array::from_shape_vec(
595 (5, 2),
596 vec![-4.0, -10.0, -2.0, -5.0, 0.0, 0.0, 2.0, 5.0, 4.0, 10.0],
597 )
598 .unwrap();
599
600 let mut scaler = MaxAbsScaler::new();
601 let scaled = scaler.fit_transform(&data).unwrap();
602
603 assert_eq!(scaled.shape(), &[5, 2]);
605
606 let max_abs = scaler.max_abs().unwrap();
608 assert_abs_diff_eq!(max_abs[0], 4.0, epsilon = 1e-10);
609 assert_abs_diff_eq!(max_abs[1], 10.0, epsilon = 1e-10);
610
611 let scale = scaler.scale().unwrap();
613 assert_abs_diff_eq!(scale[0], 0.25, epsilon = 1e-10); assert_abs_diff_eq!(scale[1], 0.1, epsilon = 1e-10); for j in 0..2 {
618 let feature_max = scaled
619 .column(j)
620 .iter()
621 .map(|&x| x.abs())
622 .fold(0.0, f64::max);
623 assert_abs_diff_eq!(feature_max, 1.0, epsilon = 1e-10);
624 }
625
626 assert_abs_diff_eq!(scaled[[0, 0]], -1.0, epsilon = 1e-10); assert_abs_diff_eq!(scaled[[0, 1]], -1.0, epsilon = 1e-10); assert_abs_diff_eq!(scaled[[2, 0]], 0.0, epsilon = 1e-10); assert_abs_diff_eq!(scaled[[2, 1]], 0.0, epsilon = 1e-10); assert_abs_diff_eq!(scaled[[4, 0]], 1.0, epsilon = 1e-10); assert_abs_diff_eq!(scaled[[4, 1]], 1.0, epsilon = 1e-10); }
634
635 #[test]
636 fn test_max_abs_scaler_positive_only() {
637 let data = Array::from_shape_vec((3, 2), vec![1.0, 2.0, 3.0, 6.0, 5.0, 10.0]).unwrap();
639
640 let mut scaler = MaxAbsScaler::new();
641 let scaled = scaler.fit_transform(&data).unwrap();
642
643 let max_abs = scaler.max_abs().unwrap();
645 assert_abs_diff_eq!(max_abs[0], 5.0, epsilon = 1e-10);
646 assert_abs_diff_eq!(max_abs[1], 10.0, epsilon = 1e-10);
647
648 assert_abs_diff_eq!(scaled[[0, 0]], 0.2, epsilon = 1e-10); assert_abs_diff_eq!(scaled[[0, 1]], 0.2, epsilon = 1e-10); assert_abs_diff_eq!(scaled[[2, 0]], 1.0, epsilon = 1e-10); assert_abs_diff_eq!(scaled[[2, 1]], 1.0, epsilon = 1e-10); }
654
655 #[test]
656 fn test_max_abs_scaler_inverse_transform() {
657 let data = Array::from_shape_vec((3, 2), vec![-6.0, 8.0, 0.0, -4.0, 3.0, 12.0]).unwrap();
658
659 let mut scaler = MaxAbsScaler::new();
660 let scaled = scaler.fit_transform(&data).unwrap();
661 let inverse = scaler.inverse_transform(&scaled).unwrap();
662
663 assert_eq!(inverse.shape(), data.shape());
665 for i in 0..3 {
666 for j in 0..2 {
667 assert_abs_diff_eq!(inverse[[i, j]], data[[i, j]], epsilon = 1e-10);
668 }
669 }
670 }
671
672 #[test]
673 fn test_max_abs_scaler_constant_feature() {
674 let data = Array::from_shape_vec((3, 2), vec![0.0, 5.0, 0.0, 10.0, 0.0, 15.0]).unwrap();
676
677 let mut scaler = MaxAbsScaler::new();
678 let scaled = scaler.fit_transform(&data).unwrap();
679
680 for i in 0..3 {
682 assert_abs_diff_eq!(scaled[[i, 0]], 0.0, epsilon = 1e-10);
683 }
684
685 assert_abs_diff_eq!(scaled[[0, 1]], 1.0 / 3.0, epsilon = 1e-10); assert_abs_diff_eq!(scaled[[2, 1]], 1.0, epsilon = 1e-10); }
689
690 #[test]
691 fn test_max_abs_scaler_errors() {
692 let empty_data = Array2::<f64>::zeros((0, 2));
694 let mut scaler = MaxAbsScaler::new();
695 assert!(scaler.fit(&empty_data).is_err());
696
697 let data = Array::from_shape_vec((2, 2), vec![1.0, 2.0, 3.0, 4.0]).unwrap();
699 let unfitted_scaler = MaxAbsScaler::new();
700 assert!(unfitted_scaler.transform(&data).is_err());
701 assert!(unfitted_scaler.inverse_transform(&data).is_err());
702
703 let train_data = Array::from_shape_vec((2, 3), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap();
705 let test_data = Array::from_shape_vec((2, 2), vec![1.0, 2.0, 3.0, 4.0]).unwrap();
706
707 let mut scaler = MaxAbsScaler::new();
708 scaler.fit(&train_data).unwrap();
709 assert!(scaler.transform(&test_data).is_err());
710 assert!(scaler.inverse_transform(&test_data).is_err());
711 }
712
713 #[test]
714 fn test_max_abs_scaler_single_feature() {
715 let data = Array::from_shape_vec((4, 1), vec![-8.0, -2.0, 4.0, 6.0]).unwrap();
717
718 let mut scaler = MaxAbsScaler::new();
719 let scaled = scaler.fit_transform(&data).unwrap();
720
721 let max_abs = scaler.max_abs().unwrap();
723 assert_abs_diff_eq!(max_abs[0], 8.0, epsilon = 1e-10);
724
725 assert_abs_diff_eq!(scaled[[0, 0]], -1.0, epsilon = 1e-10); assert_abs_diff_eq!(scaled[[1, 0]], -0.25, epsilon = 1e-10); assert_abs_diff_eq!(scaled[[2, 0]], 0.5, epsilon = 1e-10); assert_abs_diff_eq!(scaled[[3, 0]], 0.75, epsilon = 1e-10); }
731
732 #[test]
733 fn test_max_abs_scaler_sparse_preservation() {
734 let data = Array::from_shape_vec(
736 (4, 3),
737 vec![
738 0.0, 5.0, 0.0, 10.0, 0.0, -8.0, 0.0, 0.0, 4.0, -5.0, 10.0, 0.0, ],
743 )
744 .unwrap();
745
746 let mut scaler = MaxAbsScaler::new();
747 let scaled = scaler.fit_transform(&data).unwrap();
748
749 assert_abs_diff_eq!(scaled[[0, 0]], 0.0, epsilon = 1e-10);
751 assert_abs_diff_eq!(scaled[[0, 2]], 0.0, epsilon = 1e-10);
752 assert_abs_diff_eq!(scaled[[1, 1]], 0.0, epsilon = 1e-10);
753 assert_abs_diff_eq!(scaled[[2, 0]], 0.0, epsilon = 1e-10);
754 assert_abs_diff_eq!(scaled[[2, 1]], 0.0, epsilon = 1e-10);
755 assert_abs_diff_eq!(scaled[[3, 2]], 0.0, epsilon = 1e-10);
756
757 assert_abs_diff_eq!(scaled[[0, 1]], 0.5, epsilon = 1e-10); assert_abs_diff_eq!(scaled[[1, 0]], 1.0, epsilon = 1e-10); assert_abs_diff_eq!(scaled[[1, 2]], -1.0, epsilon = 1e-10); assert_abs_diff_eq!(scaled[[2, 2]], 0.5, epsilon = 1e-10); assert_abs_diff_eq!(scaled[[3, 0]], -0.5, epsilon = 1e-10); assert_abs_diff_eq!(scaled[[3, 1]], 1.0, epsilon = 1e-10); }
766}