1use scirs2_core::ndarray::{Array1, Array2, ArrayBase, Axis, Data, Ix1, Ix2};
7use scirs2_core::numeric::{Float, NumCast};
8
9use crate::error::{Result, TransformError};
10
11pub const EPSILON: f64 = 1e-10;
13
14#[derive(Debug, Clone, Copy, PartialEq)]
16pub enum NormalizationMethod {
17 MinMax,
19 MinMaxCustom(f64, f64),
21 ZScore,
23 MaxAbs,
25 L1,
27 L2,
29 Robust,
31}
32
33#[allow(dead_code)]
56pub fn normalize_array<S>(
57 array: &ArrayBase<S, Ix2>,
58 method: NormalizationMethod,
59 axis: usize,
60) -> Result<Array2<f64>>
61where
62 S: Data,
63 S::Elem: Float + NumCast,
64{
65 let array_f64 = array.mapv(|x| NumCast::from(x).unwrap_or(0.0));
66
67 if !array_f64.is_standard_layout() {
68 return Err(TransformError::InvalidInput(
69 "Input array must be in standard memory layout".to_string(),
70 ));
71 }
72
73 if array_f64.ndim() != 2 {
74 return Err(TransformError::InvalidInput(
75 "Only 2D arrays are supported".to_string(),
76 ));
77 }
78
79 if axis >= array_f64.ndim() {
80 return Err(TransformError::InvalidInput(format!(
81 "Invalid axis {} for array with {} dimensions",
82 axis,
83 array_f64.ndim()
84 )));
85 }
86
87 let shape = array_f64.shape();
88 let mut normalized = Array2::zeros((shape[0], shape[1]));
89
90 match method {
91 NormalizationMethod::MinMax => {
92 let min = array_f64.map_axis(Axis(axis), |view| {
93 view.fold(f64::INFINITY, |acc, &x| acc.min(x))
94 });
95
96 let max = array_f64.map_axis(Axis(axis), |view| {
97 view.fold(f64::NEG_INFINITY, |acc, &x| acc.max(x))
98 });
99
100 let range = &max - &min;
101
102 for i in 0..shape[0] {
103 for j in 0..shape[1] {
104 let value = array_f64[[i, j]];
105 let idx = if axis == 0 { j } else { i };
106
107 if range[idx].abs() > EPSILON {
108 normalized[[i, j]] = (value - min[idx]) / range[idx];
109 } else {
110 normalized[[i, j]] = 0.5; }
112 }
113 }
114 }
115 NormalizationMethod::MinMaxCustom(new_min, new_max) => {
116 let min = array_f64.map_axis(Axis(axis), |view| {
117 view.fold(f64::INFINITY, |acc, &x| acc.min(x))
118 });
119
120 let max = array_f64.map_axis(Axis(axis), |view| {
121 view.fold(f64::NEG_INFINITY, |acc, &x| acc.max(x))
122 });
123
124 let range = &max - &min;
125 let new_range = new_max - new_min;
126
127 for i in 0..shape[0] {
128 for j in 0..shape[1] {
129 let value = array_f64[[i, j]];
130 let idx = if axis == 0 { j } else { i };
131
132 if range[idx].abs() > EPSILON {
133 normalized[[i, j]] = (value - min[idx]) / range[idx] * new_range + new_min;
134 } else {
135 normalized[[i, j]] = (new_min + new_max) / 2.0; }
137 }
138 }
139 }
140 NormalizationMethod::ZScore => {
141 let mean = array_f64.map_axis(Axis(axis), |view| {
142 view.iter().sum::<f64>() / view.len() as f64
143 });
144
145 let std_dev = array_f64.map_axis(Axis(axis), |view| {
146 let m = view.iter().sum::<f64>() / view.len() as f64;
147 let variance =
148 view.iter().map(|&x| (x - m).powi(2)).sum::<f64>() / view.len() as f64;
149 variance.sqrt()
150 });
151
152 for i in 0..shape[0] {
153 for j in 0..shape[1] {
154 let value = array_f64[[i, j]];
155 let idx = if axis == 0 { j } else { i };
156
157 if std_dev[idx] > EPSILON {
158 normalized[[i, j]] = (value - mean[idx]) / std_dev[idx];
159 } else {
160 normalized[[i, j]] = 0.0; }
162 }
163 }
164 }
165 NormalizationMethod::MaxAbs => {
166 let max_abs = array_f64.map_axis(Axis(axis), |view| {
167 view.fold(0.0, |acc, &x| acc.max(x.abs()))
168 });
169
170 for i in 0..shape[0] {
171 for j in 0..shape[1] {
172 let value = array_f64[[i, j]];
173 let idx = if axis == 0 { j } else { i };
174
175 if max_abs[idx] > EPSILON {
176 normalized[[i, j]] = value / max_abs[idx];
177 } else {
178 normalized[[i, j]] = 0.0; }
180 }
181 }
182 }
183 NormalizationMethod::L1 => {
184 let l1_norm =
185 array_f64.map_axis(Axis(axis), |view| view.fold(0.0, |acc, &x| acc + x.abs()));
186
187 for i in 0..shape[0] {
188 for j in 0..shape[1] {
189 let value = array_f64[[i, j]];
190 let idx = if axis == 0 { j } else { i };
191
192 if l1_norm[idx] > EPSILON {
193 normalized[[i, j]] = value / l1_norm[idx];
194 } else {
195 normalized[[i, j]] = 0.0; }
197 }
198 }
199 }
200 NormalizationMethod::L2 => {
201 let l2_norm = array_f64.map_axis(Axis(axis), |view| {
202 let sum_squares = view.iter().fold(0.0, |acc, &x| acc + x * x);
203 sum_squares.sqrt()
204 });
205
206 for i in 0..shape[0] {
207 for j in 0..shape[1] {
208 let value = array_f64[[i, j]];
209 let idx = if axis == 0 { j } else { i };
210
211 if l2_norm[idx] > EPSILON {
212 normalized[[i, j]] = value / l2_norm[idx];
213 } else {
214 normalized[[i, j]] = 0.0; }
216 }
217 }
218 }
219 NormalizationMethod::Robust => {
220 let median = array_f64.map_axis(Axis(axis), |view| {
221 let mut data = view.to_vec();
222 data.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
223 let n = data.len();
224 if n % 2 == 0 {
225 (data[n / 2 - 1] + data[n / 2]) / 2.0
226 } else {
227 data[n / 2]
228 }
229 });
230
231 let iqr = array_f64.map_axis(Axis(axis), |view| {
232 let mut data = view.to_vec();
233 data.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
234 let n = data.len();
235
236 let q1_pos = 0.25 * (n - 1) as f64;
238 let q1_idx_low = q1_pos.floor() as usize;
239 let q1_idx_high = q1_pos.ceil() as usize;
240 let q1 = if q1_idx_low == q1_idx_high {
241 data[q1_idx_low]
242 } else {
243 let weight = q1_pos - q1_idx_low as f64;
244 data[q1_idx_low] * (1.0 - weight) + data[q1_idx_high] * weight
245 };
246
247 let q3_pos = 0.75 * (n - 1) as f64;
249 let q3_idx_low = q3_pos.floor() as usize;
250 let q3_idx_high = q3_pos.ceil() as usize;
251 let q3 = if q3_idx_low == q3_idx_high {
252 data[q3_idx_low]
253 } else {
254 let weight = q3_pos - q3_idx_low as f64;
255 data[q3_idx_low] * (1.0 - weight) + data[q3_idx_high] * weight
256 };
257
258 q3 - q1
259 });
260
261 for i in 0..shape[0] {
262 for j in 0..shape[1] {
263 let value = array_f64[[i, j]];
264 let idx = if axis == 0 { j } else { i };
265
266 if iqr[idx] > EPSILON {
267 normalized[[i, j]] = (value - median[idx]) / iqr[idx];
268 } else {
269 normalized[[i, j]] = 0.0; }
271 }
272 }
273 }
274 }
275
276 Ok(normalized)
277}
278
279#[allow(dead_code)]
299pub fn normalize_vector<S>(
300 array: &ArrayBase<S, Ix1>,
301 method: NormalizationMethod,
302) -> Result<Array1<f64>>
303where
304 S: Data,
305 S::Elem: Float + NumCast,
306{
307 let array_f64 = array.mapv(|x| NumCast::from(x).unwrap_or(0.0));
308
309 if array_f64.is_empty() {
310 return Err(TransformError::InvalidInput(
311 "Input array is empty".to_string(),
312 ));
313 }
314
315 let mut normalized = Array1::zeros(array_f64.len());
316
317 match method {
318 NormalizationMethod::MinMax => {
319 let min = array_f64.fold(f64::INFINITY, |acc, &x| acc.min(x));
320 let max = array_f64.fold(f64::NEG_INFINITY, |acc, &x| acc.max(x));
321 let range = max - min;
322
323 if range.abs() > EPSILON {
324 for (i, &value) in array_f64.iter().enumerate() {
325 normalized[i] = (value - min) / range;
326 }
327 } else {
328 normalized.fill(0.5); }
330 }
331 NormalizationMethod::MinMaxCustom(new_min, new_max) => {
332 let min = array_f64.fold(f64::INFINITY, |acc, &x| acc.min(x));
333 let max = array_f64.fold(f64::NEG_INFINITY, |acc, &x| acc.max(x));
334 let range = max - min;
335 let new_range = new_max - new_min;
336
337 if range.abs() > EPSILON {
338 for (i, &value) in array_f64.iter().enumerate() {
339 normalized[i] = (value - min) / range * new_range + new_min;
340 }
341 } else {
342 normalized.fill((new_min + new_max) / 2.0); }
344 }
345 NormalizationMethod::ZScore => {
346 let mean = array_f64.iter().sum::<f64>() / array_f64.len() as f64;
347 let variance =
348 array_f64.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / array_f64.len() as f64;
349 let std_dev = variance.sqrt();
350
351 if std_dev > EPSILON {
352 for (i, &value) in array_f64.iter().enumerate() {
353 normalized[i] = (value - mean) / std_dev;
354 }
355 } else {
356 normalized.fill(0.0); }
358 }
359 NormalizationMethod::MaxAbs => {
360 let max_abs = array_f64.fold(0.0, |acc, &x| acc.max(x.abs()));
361
362 if max_abs > EPSILON {
363 for (i, &value) in array_f64.iter().enumerate() {
364 normalized[i] = value / max_abs;
365 }
366 } else {
367 normalized.fill(0.0); }
369 }
370 NormalizationMethod::L1 => {
371 let l1_norm = array_f64.fold(0.0, |acc, &x| acc + x.abs());
372
373 if l1_norm > EPSILON {
374 for (i, &value) in array_f64.iter().enumerate() {
375 normalized[i] = value / l1_norm;
376 }
377 } else {
378 normalized.fill(0.0); }
380 }
381 NormalizationMethod::L2 => {
382 let sum_squares = array_f64.iter().fold(0.0, |acc, &x| acc + x * x);
383 let l2_norm = sum_squares.sqrt();
384
385 if l2_norm > EPSILON {
386 for (i, &value) in array_f64.iter().enumerate() {
387 normalized[i] = value / l2_norm;
388 }
389 } else {
390 normalized.fill(0.0); }
392 }
393 NormalizationMethod::Robust => {
394 let mut data = array_f64.to_vec();
395 data.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
396 let n = data.len();
397
398 let median = if n.is_multiple_of(2) {
400 (data[n / 2 - 1] + data[n / 2]) / 2.0
401 } else {
402 data[n / 2]
403 };
404
405 let q1_pos = 0.25 * (n - 1) as f64;
408 let q1_idx_low = q1_pos.floor() as usize;
409 let q1_idx_high = q1_pos.ceil() as usize;
410 let q1 = if q1_idx_low == q1_idx_high {
411 data[q1_idx_low]
412 } else {
413 let weight = q1_pos - q1_idx_low as f64;
414 data[q1_idx_low] * (1.0 - weight) + data[q1_idx_high] * weight
415 };
416
417 let q3_pos = 0.75 * (n - 1) as f64;
419 let q3_idx_low = q3_pos.floor() as usize;
420 let q3_idx_high = q3_pos.ceil() as usize;
421 let q3 = if q3_idx_low == q3_idx_high {
422 data[q3_idx_low]
423 } else {
424 let weight = q3_pos - q3_idx_low as f64;
425 data[q3_idx_low] * (1.0 - weight) + data[q3_idx_high] * weight
426 };
427
428 let iqr = q3 - q1;
429
430 if iqr > EPSILON {
431 for (i, &value) in array_f64.iter().enumerate() {
432 normalized[i] = (value - median) / iqr;
433 }
434 } else {
435 normalized.fill(0.0); }
437 }
438 }
439
440 Ok(normalized)
441}
442
443#[derive(Clone)]
445pub struct Normalizer {
446 axis: usize,
448 params: NormalizerParams,
450}
451
452#[derive(Clone)]
454enum NormalizerParams {
455 MinMax {
457 min: Array1<f64>,
458 max: Array1<f64>,
459 new_min: f64,
460 new_max: f64,
461 },
462 ZScore {
464 mean: Array1<f64>,
465 std_dev: Array1<f64>,
466 },
467 MaxAbs { max_abs: Array1<f64> },
469 L1 { l1_norm: Array1<f64> },
471 L2 { l2_norm: Array1<f64> },
473 Robust {
475 median: Array1<f64>,
476 iqr: Array1<f64>,
477 },
478}
479
480impl Normalizer {
481 pub fn new(method: NormalizationMethod, axis: usize) -> Self {
490 let params = match method {
491 NormalizationMethod::MinMax => NormalizerParams::MinMax {
492 min: Array1::zeros(0),
493 max: Array1::zeros(0),
494 new_min: 0.0,
495 new_max: 1.0,
496 },
497 NormalizationMethod::MinMaxCustom(min, max) => NormalizerParams::MinMax {
498 min: Array1::zeros(0),
499 max: Array1::zeros(0),
500 new_min: min,
501 new_max: max,
502 },
503 NormalizationMethod::ZScore => NormalizerParams::ZScore {
504 mean: Array1::zeros(0),
505 std_dev: Array1::zeros(0),
506 },
507 NormalizationMethod::MaxAbs => NormalizerParams::MaxAbs {
508 max_abs: Array1::zeros(0),
509 },
510 NormalizationMethod::L1 => NormalizerParams::L1 {
511 l1_norm: Array1::zeros(0),
512 },
513 NormalizationMethod::L2 => NormalizerParams::L2 {
514 l2_norm: Array1::zeros(0),
515 },
516 NormalizationMethod::Robust => NormalizerParams::Robust {
517 median: Array1::zeros(0),
518 iqr: Array1::zeros(0),
519 },
520 };
521
522 Normalizer { axis, params }
523 }
524
525 pub fn fit<S>(&mut self, array: &ArrayBase<S, Ix2>) -> Result<()>
533 where
534 S: Data,
535 S::Elem: Float + NumCast,
536 {
537 let array_f64 = array.mapv(|x| NumCast::from(x).unwrap_or(0.0));
538
539 if !array_f64.is_standard_layout() {
540 return Err(TransformError::InvalidInput(
541 "Input array must be in standard memory layout".to_string(),
542 ));
543 }
544
545 if array_f64.ndim() != 2 {
546 return Err(TransformError::InvalidInput(
547 "Only 2D arrays are supported".to_string(),
548 ));
549 }
550
551 if self.axis >= array_f64.ndim() {
552 return Err(TransformError::InvalidInput(format!(
553 "Invalid axis {} for array with {} dimensions",
554 self.axis,
555 array_f64.ndim()
556 )));
557 }
558
559 match &mut self.params {
560 NormalizerParams::MinMax {
561 min,
562 max,
563 new_min: _,
564 new_max: _,
565 } => {
566 *min = array_f64.map_axis(Axis(self.axis), |view| {
567 view.fold(f64::INFINITY, |acc, &x| acc.min(x))
568 });
569
570 *max = array_f64.map_axis(Axis(self.axis), |view| {
571 view.fold(f64::NEG_INFINITY, |acc, &x| acc.max(x))
572 });
573 }
574 NormalizerParams::ZScore { mean, std_dev } => {
575 *mean = array_f64.map_axis(Axis(self.axis), |view| {
576 view.iter().sum::<f64>() / view.len() as f64
577 });
578
579 *std_dev = array_f64.map_axis(Axis(self.axis), |view| {
580 let m = view.iter().sum::<f64>() / view.len() as f64;
581 let variance =
582 view.iter().map(|&x| (x - m).powi(2)).sum::<f64>() / view.len() as f64;
583 variance.sqrt()
584 });
585 }
586 NormalizerParams::MaxAbs { max_abs } => {
587 *max_abs = array_f64.map_axis(Axis(self.axis), |view| {
588 view.fold(0.0, |acc, &x| acc.max(x.abs()))
589 });
590 }
591 NormalizerParams::L1 { l1_norm } => {
592 *l1_norm = array_f64.map_axis(Axis(self.axis), |view| {
593 view.fold(0.0, |acc, &x| acc + x.abs())
594 });
595 }
596 NormalizerParams::L2 { l2_norm } => {
597 *l2_norm = array_f64.map_axis(Axis(self.axis), |view| {
598 let sum_squares = view.iter().fold(0.0, |acc, &x| acc + x * x);
599 sum_squares.sqrt()
600 });
601 }
602 NormalizerParams::Robust { median, iqr } => {
603 *median = array_f64.map_axis(Axis(self.axis), |view| {
604 let mut data = view.to_vec();
605 data.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
606 let n = data.len();
607 if n % 2 == 0 {
608 (data[n / 2 - 1] + data[n / 2]) / 2.0
609 } else {
610 data[n / 2]
611 }
612 });
613
614 *iqr = array_f64.map_axis(Axis(self.axis), |view| {
615 let mut data = view.to_vec();
616 data.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
617 let n = data.len();
618
619 let q1_pos = 0.25 * (n - 1) as f64;
621 let q1_idx_low = q1_pos.floor() as usize;
622 let q1_idx_high = q1_pos.ceil() as usize;
623 let q1 = if q1_idx_low == q1_idx_high {
624 data[q1_idx_low]
625 } else {
626 let weight = q1_pos - q1_idx_low as f64;
627 data[q1_idx_low] * (1.0 - weight) + data[q1_idx_high] * weight
628 };
629
630 let q3_pos = 0.75 * (n - 1) as f64;
632 let q3_idx_low = q3_pos.floor() as usize;
633 let q3_idx_high = q3_pos.ceil() as usize;
634 let q3 = if q3_idx_low == q3_idx_high {
635 data[q3_idx_low]
636 } else {
637 let weight = q3_pos - q3_idx_low as f64;
638 data[q3_idx_low] * (1.0 - weight) + data[q3_idx_high] * weight
639 };
640
641 q3 - q1
642 });
643 }
644 }
645
646 Ok(())
647 }
648
649 pub fn transform<S>(&self, array: &ArrayBase<S, Ix2>) -> Result<Array2<f64>>
657 where
658 S: Data,
659 S::Elem: Float + NumCast,
660 {
661 let array_f64 = array.mapv(|x| NumCast::from(x).unwrap_or(0.0));
662
663 if !array_f64.is_standard_layout() {
664 return Err(TransformError::InvalidInput(
665 "Input array must be in standard memory layout".to_string(),
666 ));
667 }
668
669 if array_f64.ndim() != 2 {
670 return Err(TransformError::InvalidInput(
671 "Only 2D arrays are supported".to_string(),
672 ));
673 }
674
675 let expected_size = match &self.params {
677 NormalizerParams::MinMax { min, .. } => min.len(),
678 NormalizerParams::ZScore { mean, .. } => mean.len(),
679 NormalizerParams::MaxAbs { max_abs } => max_abs.len(),
680 NormalizerParams::L1 { l1_norm } => l1_norm.len(),
681 NormalizerParams::L2 { l2_norm } => l2_norm.len(),
682 NormalizerParams::Robust { median, .. } => median.len(),
683 };
684
685 let actual_size = if self.axis == 0 {
686 array_f64.shape()[1]
687 } else {
688 array_f64.shape()[0]
689 };
690
691 if expected_size != actual_size {
692 return Err(TransformError::InvalidInput(format!(
693 "Expected {expected_size} features, got {actual_size}"
694 )));
695 }
696
697 let shape = array_f64.shape();
698 let mut transformed = Array2::zeros((shape[0], shape[1]));
699
700 match &self.params {
701 NormalizerParams::MinMax {
702 min,
703 max,
704 new_min,
705 new_max,
706 } => {
707 let range = max - min;
708 let new_range = new_max - new_min;
709
710 for i in 0..shape[0] {
711 for j in 0..shape[1] {
712 let value = array_f64[[i, j]];
713 let idx = if self.axis == 0 { j } else { i };
714
715 if range[idx].abs() > EPSILON {
716 transformed[[i, j]] =
717 (value - min[idx]) / range[idx] * new_range + new_min;
718 } else {
719 transformed[[i, j]] = (new_min + new_max) / 2.0; }
721 }
722 }
723 }
724 NormalizerParams::ZScore { mean, std_dev } => {
725 for i in 0..shape[0] {
726 for j in 0..shape[1] {
727 let value = array_f64[[i, j]];
728 let idx = if self.axis == 0 { j } else { i };
729
730 if std_dev[idx] > EPSILON {
731 transformed[[i, j]] = (value - mean[idx]) / std_dev[idx];
732 } else {
733 transformed[[i, j]] = 0.0; }
735 }
736 }
737 }
738 NormalizerParams::MaxAbs { max_abs } => {
739 for i in 0..shape[0] {
740 for j in 0..shape[1] {
741 let value = array_f64[[i, j]];
742 let idx = if self.axis == 0 { j } else { i };
743
744 if max_abs[idx] > EPSILON {
745 transformed[[i, j]] = value / max_abs[idx];
746 } else {
747 transformed[[i, j]] = 0.0; }
749 }
750 }
751 }
752 NormalizerParams::L1 { l1_norm } => {
753 for i in 0..shape[0] {
754 for j in 0..shape[1] {
755 let value = array_f64[[i, j]];
756 let idx = if self.axis == 0 { j } else { i };
757
758 if l1_norm[idx] > EPSILON {
759 transformed[[i, j]] = value / l1_norm[idx];
760 } else {
761 transformed[[i, j]] = 0.0; }
763 }
764 }
765 }
766 NormalizerParams::L2 { l2_norm } => {
767 for i in 0..shape[0] {
768 for j in 0..shape[1] {
769 let value = array_f64[[i, j]];
770 let idx = if self.axis == 0 { j } else { i };
771
772 if l2_norm[idx] > EPSILON {
773 transformed[[i, j]] = value / l2_norm[idx];
774 } else {
775 transformed[[i, j]] = 0.0; }
777 }
778 }
779 }
780 NormalizerParams::Robust { median, iqr } => {
781 for i in 0..shape[0] {
782 for j in 0..shape[1] {
783 let value = array_f64[[i, j]];
784 let idx = if self.axis == 0 { j } else { i };
785
786 if iqr[idx] > EPSILON {
787 transformed[[i, j]] = (value - median[idx]) / iqr[idx];
788 } else {
789 transformed[[i, j]] = 0.0; }
791 }
792 }
793 }
794 }
795
796 Ok(transformed)
797 }
798
799 pub fn fit_transform<S>(&mut self, array: &ArrayBase<S, Ix2>) -> Result<Array2<f64>>
807 where
808 S: Data,
809 S::Elem: Float + NumCast,
810 {
811 self.fit(array)?;
812 self.transform(array)
813 }
814}
815
816#[cfg(test)]
817mod tests {
818 use super::*;
819 use approx::assert_abs_diff_eq;
820 use scirs2_core::ndarray::Array;
821
822 #[test]
823 fn test_normalize_vector_minmax() {
824 let data = Array::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
825 let normalized = normalize_vector(&data, NormalizationMethod::MinMax).unwrap();
826
827 let expected = Array::from_vec(vec![0.0, 0.25, 0.5, 0.75, 1.0]);
828
829 for (a, b) in normalized.iter().zip(expected.iter()) {
830 assert_abs_diff_eq!(a, b, epsilon = 1e-10);
831 }
832 }
833
834 #[test]
835 fn test_normalize_vector_zscore() {
836 let data = Array::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
837 let normalized = normalize_vector(&data, NormalizationMethod::ZScore).unwrap();
838
839 let mean = 3.0;
840 let std_dev = (10.0 / 5.0_f64).sqrt();
841 let expected = data.mapv(|x| (x - mean) / std_dev);
842
843 for (a, b) in normalized.iter().zip(expected.iter()) {
844 assert_abs_diff_eq!(a, b, epsilon = 1e-10);
845 }
846 }
847
848 #[test]
849 fn test_normalize_array_minmax() {
850 let data = Array::from_shape_vec((3, 3), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0])
851 .unwrap();
852
853 let normalized = normalize_array(&data, NormalizationMethod::MinMax, 0).unwrap();
855
856 let expected =
857 Array::from_shape_vec((3, 3), vec![0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 1.0, 1.0, 1.0])
858 .unwrap();
859
860 for i in 0..3 {
861 for j in 0..3 {
862 assert_abs_diff_eq!(normalized[[i, j]], expected[[i, j]], epsilon = 1e-10);
863 }
864 }
865
866 let normalized = normalize_array(&data, NormalizationMethod::MinMax, 1).unwrap();
868
869 let expected =
870 Array::from_shape_vec((3, 3), vec![0.0, 0.5, 1.0, 0.0, 0.5, 1.0, 0.0, 0.5, 1.0])
871 .unwrap();
872
873 for i in 0..3 {
874 for j in 0..3 {
875 assert_abs_diff_eq!(normalized[[i, j]], expected[[i, j]], epsilon = 1e-10);
876 }
877 }
878 }
879
880 #[test]
881 fn test_normalizer_fit_transform() {
882 let data = Array::from_shape_vec((3, 3), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0])
883 .unwrap();
884
885 let mut normalizer = Normalizer::new(NormalizationMethod::MinMax, 0);
887 let transformed = normalizer.fit_transform(&data).unwrap();
888
889 let expected =
890 Array::from_shape_vec((3, 3), vec![0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 1.0, 1.0, 1.0])
891 .unwrap();
892
893 for i in 0..3 {
894 for j in 0..3 {
895 assert_abs_diff_eq!(transformed[[i, j]], expected[[i, j]], epsilon = 1e-10);
896 }
897 }
898
899 let data2 = Array::from_shape_vec((2, 3), vec![2.0, 3.0, 4.0, 5.0, 6.0, 7.0]).unwrap();
901
902 let transformed2 = normalizer.transform(&data2).unwrap();
903
904 let expected2 = Array::from_shape_vec(
905 (2, 3),
906 vec![
907 1.0 / 6.0,
908 1.0 / 6.0,
909 1.0 / 6.0,
910 2.0 / 3.0,
911 2.0 / 3.0,
912 2.0 / 3.0,
913 ],
914 )
915 .unwrap();
916
917 for i in 0..2 {
918 for j in 0..3 {
919 assert_abs_diff_eq!(transformed2[[i, j]], expected2[[i, j]], epsilon = 1e-10);
920 }
921 }
922 }
923
924 #[test]
925 fn test_normalize_vector_robust() {
926 let data = Array::from_vec(vec![1.0, 2.0, 3.0, 4.0, 100.0]); let normalized = normalize_vector(&data, NormalizationMethod::Robust).unwrap();
929
930 let expected = Array::from_vec(vec![
935 (1.0 - 3.0) / 2.0, (2.0 - 3.0) / 2.0, (3.0 - 3.0) / 2.0, (4.0 - 3.0) / 2.0, (100.0 - 3.0) / 2.0, ]);
941
942 for (a, b) in normalized.iter().zip(expected.iter()) {
943 assert_abs_diff_eq!(a, b, epsilon = 1e-10);
944 }
945 }
946
947 #[test]
948 fn test_normalize_array_robust() {
949 let data = Array::from_shape_vec((3, 2), vec![1.0, 10.0, 2.0, 20.0, 3.0, 30.0]).unwrap();
950
951 let normalized = normalize_array(&data, NormalizationMethod::Robust, 0).unwrap();
953
954 let expected = Array::from_shape_vec(
957 (3, 2),
958 vec![
959 (1.0 - 2.0) / 1.0, (10.0 - 20.0) / 10.0, (2.0 - 2.0) / 1.0, (20.0 - 20.0) / 10.0, (3.0 - 2.0) / 1.0, (30.0 - 20.0) / 10.0, ],
966 )
967 .unwrap();
968
969 for i in 0..3 {
970 for j in 0..2 {
971 assert_abs_diff_eq!(normalized[[i, j]], expected[[i, j]], epsilon = 1e-10);
972 }
973 }
974 }
975
976 #[test]
977 fn test_robust_normalizer() {
978 let data =
979 Array::from_shape_vec((4, 2), vec![1.0, 100.0, 2.0, 200.0, 3.0, 300.0, 4.0, 400.0])
980 .unwrap();
981
982 let mut normalizer = Normalizer::new(NormalizationMethod::Robust, 0);
983 let transformed = normalizer.fit_transform(&data).unwrap();
984
985 let expected = Array::from_shape_vec(
988 (4, 2),
989 vec![
990 (1.0 - 2.5) / 1.5, (100.0 - 250.0) / 150.0, (2.0 - 2.5) / 1.5, (200.0 - 250.0) / 150.0, (3.0 - 2.5) / 1.5, (300.0 - 250.0) / 150.0, (4.0 - 2.5) / 1.5, (400.0 - 250.0) / 150.0, ],
999 )
1000 .unwrap();
1001
1002 for i in 0..4 {
1003 for j in 0..2 {
1004 assert_abs_diff_eq!(transformed[[i, j]], expected[[i, j]], epsilon = 1e-10);
1005 }
1006 }
1007 }
1008}