scirs2_transform/
normalize.rs

1//! Data normalization and standardization utilities
2//!
3//! This module provides functions for normalizing and standardizing data,
4//! which is often a preprocessing step for machine learning algorithms.
5
6use scirs2_core::ndarray::{Array1, Array2, ArrayBase, Axis, Data, Ix1, Ix2};
7use scirs2_core::numeric::{Float, NumCast};
8
9use crate::error::{Result, TransformError};
10
11/// Small value to use for comparison with zero and numerical stability
12pub const EPSILON: f64 = 1e-10;
13
14/// Method of normalization to apply
15#[derive(Debug, Clone, Copy, PartialEq)]
16pub enum NormalizationMethod {
17    /// Min-max normalization (scales values to [0, 1] range)
18    MinMax,
19    /// Min-max normalization to custom range
20    MinMaxCustom(f64, f64),
21    /// Z-score standardization (zero mean, unit variance)
22    ZScore,
23    /// Max absolute scaling (scales by maximum absolute value)
24    MaxAbs,
25    /// L1 normalization (divide by sum of absolute values)
26    L1,
27    /// L2 normalization (divide by Euclidean norm)
28    L2,
29    /// Robust scaling using median and IQR (robust to outliers)
30    Robust,
31}
32
33/// Normalizes a 2D array along a specified axis
34///
35/// # Arguments
36/// * `array` - The input 2D array to normalize
37/// * `method` - The normalization method to apply
38/// * `axis` - The axis along which to normalize (0 for columns, 1 for rows)
39///
40/// # Returns
41/// * `Result<Array2<f64>>` - The normalized array
42///
43/// # Examples
44/// ```
45/// use scirs2_core::ndarray::array;
46/// use scirs2_transform::normalize::{normalize_array, NormalizationMethod};
47///
48/// let data = array![[1.0, 2.0, 3.0],
49///                   [4.0, 5.0, 6.0],
50///                   [7.0, 8.0, 9.0]];
51///                   
52/// // Normalize columns (axis 0) using min-max normalization
53/// let normalized = normalize_array(&data, NormalizationMethod::MinMax, 0).unwrap();
54/// ```
55#[allow(dead_code)]
56pub fn normalize_array<S>(
57    array: &ArrayBase<S, Ix2>,
58    method: NormalizationMethod,
59    axis: usize,
60) -> Result<Array2<f64>>
61where
62    S: Data,
63    S::Elem: Float + NumCast,
64{
65    let array_f64 = array.mapv(|x| NumCast::from(x).unwrap_or(0.0));
66
67    if !array_f64.is_standard_layout() {
68        return Err(TransformError::InvalidInput(
69            "Input array must be in standard memory layout".to_string(),
70        ));
71    }
72
73    if array_f64.ndim() != 2 {
74        return Err(TransformError::InvalidInput(
75            "Only 2D arrays are supported".to_string(),
76        ));
77    }
78
79    if axis >= array_f64.ndim() {
80        return Err(TransformError::InvalidInput(format!(
81            "Invalid axis {} for array with {} dimensions",
82            axis,
83            array_f64.ndim()
84        )));
85    }
86
87    let shape = array_f64.shape();
88    let mut normalized = Array2::zeros((shape[0], shape[1]));
89
90    match method {
91        NormalizationMethod::MinMax => {
92            let min = array_f64.map_axis(Axis(axis), |view| {
93                view.fold(f64::INFINITY, |acc, &x| acc.min(x))
94            });
95
96            let max = array_f64.map_axis(Axis(axis), |view| {
97                view.fold(f64::NEG_INFINITY, |acc, &x| acc.max(x))
98            });
99
100            let range = &max - &min;
101
102            for i in 0..shape[0] {
103                for j in 0..shape[1] {
104                    let value = array_f64[[i, j]];
105                    let idx = if axis == 0 { j } else { i };
106
107                    if range[idx].abs() > EPSILON {
108                        normalized[[i, j]] = (value - min[idx]) / range[idx];
109                    } else {
110                        normalized[[i, j]] = 0.5; // Default for constant features
111                    }
112                }
113            }
114        }
115        NormalizationMethod::MinMaxCustom(new_min, new_max) => {
116            let min = array_f64.map_axis(Axis(axis), |view| {
117                view.fold(f64::INFINITY, |acc, &x| acc.min(x))
118            });
119
120            let max = array_f64.map_axis(Axis(axis), |view| {
121                view.fold(f64::NEG_INFINITY, |acc, &x| acc.max(x))
122            });
123
124            let range = &max - &min;
125            let new_range = new_max - new_min;
126
127            for i in 0..shape[0] {
128                for j in 0..shape[1] {
129                    let value = array_f64[[i, j]];
130                    let idx = if axis == 0 { j } else { i };
131
132                    if range[idx].abs() > EPSILON {
133                        normalized[[i, j]] = (value - min[idx]) / range[idx] * new_range + new_min;
134                    } else {
135                        normalized[[i, j]] = (new_min + new_max) / 2.0; // Default for constant features
136                    }
137                }
138            }
139        }
140        NormalizationMethod::ZScore => {
141            let mean = array_f64.map_axis(Axis(axis), |view| {
142                view.iter().sum::<f64>() / view.len() as f64
143            });
144
145            let std_dev = array_f64.map_axis(Axis(axis), |view| {
146                let m = view.iter().sum::<f64>() / view.len() as f64;
147                let variance =
148                    view.iter().map(|&x| (x - m).powi(2)).sum::<f64>() / view.len() as f64;
149                variance.sqrt()
150            });
151
152            for i in 0..shape[0] {
153                for j in 0..shape[1] {
154                    let value = array_f64[[i, j]];
155                    let idx = if axis == 0 { j } else { i };
156
157                    if std_dev[idx] > EPSILON {
158                        normalized[[i, j]] = (value - mean[idx]) / std_dev[idx];
159                    } else {
160                        normalized[[i, j]] = 0.0; // Default for constant features
161                    }
162                }
163            }
164        }
165        NormalizationMethod::MaxAbs => {
166            let max_abs = array_f64.map_axis(Axis(axis), |view| {
167                view.fold(0.0, |acc, &x| acc.max(x.abs()))
168            });
169
170            for i in 0..shape[0] {
171                for j in 0..shape[1] {
172                    let value = array_f64[[i, j]];
173                    let idx = if axis == 0 { j } else { i };
174
175                    if max_abs[idx] > EPSILON {
176                        normalized[[i, j]] = value / max_abs[idx];
177                    } else {
178                        normalized[[i, j]] = 0.0; // Default for constant features
179                    }
180                }
181            }
182        }
183        NormalizationMethod::L1 => {
184            let l1_norm =
185                array_f64.map_axis(Axis(axis), |view| view.fold(0.0, |acc, &x| acc + x.abs()));
186
187            for i in 0..shape[0] {
188                for j in 0..shape[1] {
189                    let value = array_f64[[i, j]];
190                    let idx = if axis == 0 { j } else { i };
191
192                    if l1_norm[idx] > EPSILON {
193                        normalized[[i, j]] = value / l1_norm[idx];
194                    } else {
195                        normalized[[i, j]] = 0.0; // Default for constant features
196                    }
197                }
198            }
199        }
200        NormalizationMethod::L2 => {
201            let l2_norm = array_f64.map_axis(Axis(axis), |view| {
202                let sum_squares = view.iter().fold(0.0, |acc, &x| acc + x * x);
203                sum_squares.sqrt()
204            });
205
206            for i in 0..shape[0] {
207                for j in 0..shape[1] {
208                    let value = array_f64[[i, j]];
209                    let idx = if axis == 0 { j } else { i };
210
211                    if l2_norm[idx] > EPSILON {
212                        normalized[[i, j]] = value / l2_norm[idx];
213                    } else {
214                        normalized[[i, j]] = 0.0; // Default for constant features
215                    }
216                }
217            }
218        }
219        NormalizationMethod::Robust => {
220            let median = array_f64.map_axis(Axis(axis), |view| {
221                let mut data = view.to_vec();
222                data.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
223                let n = data.len();
224                if n % 2 == 0 {
225                    (data[n / 2 - 1] + data[n / 2]) / 2.0
226                } else {
227                    data[n / 2]
228                }
229            });
230
231            let iqr = array_f64.map_axis(Axis(axis), |view| {
232                let mut data = view.to_vec();
233                data.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
234                let n = data.len();
235
236                // Calculate Q1 (25th percentile)
237                let q1_pos = 0.25 * (n - 1) as f64;
238                let q1_idx_low = q1_pos.floor() as usize;
239                let q1_idx_high = q1_pos.ceil() as usize;
240                let q1 = if q1_idx_low == q1_idx_high {
241                    data[q1_idx_low]
242                } else {
243                    let weight = q1_pos - q1_idx_low as f64;
244                    data[q1_idx_low] * (1.0 - weight) + data[q1_idx_high] * weight
245                };
246
247                // Calculate Q3 (75th percentile)
248                let q3_pos = 0.75 * (n - 1) as f64;
249                let q3_idx_low = q3_pos.floor() as usize;
250                let q3_idx_high = q3_pos.ceil() as usize;
251                let q3 = if q3_idx_low == q3_idx_high {
252                    data[q3_idx_low]
253                } else {
254                    let weight = q3_pos - q3_idx_low as f64;
255                    data[q3_idx_low] * (1.0 - weight) + data[q3_idx_high] * weight
256                };
257
258                q3 - q1
259            });
260
261            for i in 0..shape[0] {
262                for j in 0..shape[1] {
263                    let value = array_f64[[i, j]];
264                    let idx = if axis == 0 { j } else { i };
265
266                    if iqr[idx] > EPSILON {
267                        normalized[[i, j]] = (value - median[idx]) / iqr[idx];
268                    } else {
269                        normalized[[i, j]] = 0.0; // Default for constant features
270                    }
271                }
272            }
273        }
274    }
275
276    Ok(normalized)
277}
278
279/// Normalizes a 1D array
280///
281/// # Arguments
282/// * `array` - The input 1D array to normalize
283/// * `method` - The normalization method to apply
284///
285/// # Returns
286/// * `Result<Array1<f64>>` - The normalized array
287///
288/// # Examples
289/// ```
290/// use scirs2_core::ndarray::array;
291/// use scirs2_transform::normalize::{normalize_vector, NormalizationMethod};
292///
293/// let data = array![1.0, 2.0, 3.0, 4.0, 5.0];
294///                   
295/// // Normalize vector using min-max normalization
296/// let normalized = normalize_vector(&data, NormalizationMethod::MinMax).unwrap();
297/// ```
298#[allow(dead_code)]
299pub fn normalize_vector<S>(
300    array: &ArrayBase<S, Ix1>,
301    method: NormalizationMethod,
302) -> Result<Array1<f64>>
303where
304    S: Data,
305    S::Elem: Float + NumCast,
306{
307    let array_f64 = array.mapv(|x| NumCast::from(x).unwrap_or(0.0));
308
309    if array_f64.is_empty() {
310        return Err(TransformError::InvalidInput(
311            "Input array is empty".to_string(),
312        ));
313    }
314
315    let mut normalized = Array1::zeros(array_f64.len());
316
317    match method {
318        NormalizationMethod::MinMax => {
319            let min = array_f64.fold(f64::INFINITY, |acc, &x| acc.min(x));
320            let max = array_f64.fold(f64::NEG_INFINITY, |acc, &x| acc.max(x));
321            let range = max - min;
322
323            if range.abs() > EPSILON {
324                for (i, &value) in array_f64.iter().enumerate() {
325                    normalized[i] = (value - min) / range;
326                }
327            } else {
328                normalized.fill(0.5); // Default for constant features
329            }
330        }
331        NormalizationMethod::MinMaxCustom(new_min, new_max) => {
332            let min = array_f64.fold(f64::INFINITY, |acc, &x| acc.min(x));
333            let max = array_f64.fold(f64::NEG_INFINITY, |acc, &x| acc.max(x));
334            let range = max - min;
335            let new_range = new_max - new_min;
336
337            if range.abs() > EPSILON {
338                for (i, &value) in array_f64.iter().enumerate() {
339                    normalized[i] = (value - min) / range * new_range + new_min;
340                }
341            } else {
342                normalized.fill((new_min + new_max) / 2.0); // Default for constant features
343            }
344        }
345        NormalizationMethod::ZScore => {
346            let mean = array_f64.iter().sum::<f64>() / array_f64.len() as f64;
347            let variance =
348                array_f64.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / array_f64.len() as f64;
349            let std_dev = variance.sqrt();
350
351            if std_dev > EPSILON {
352                for (i, &value) in array_f64.iter().enumerate() {
353                    normalized[i] = (value - mean) / std_dev;
354                }
355            } else {
356                normalized.fill(0.0); // Default for constant features
357            }
358        }
359        NormalizationMethod::MaxAbs => {
360            let max_abs = array_f64.fold(0.0, |acc, &x| acc.max(x.abs()));
361
362            if max_abs > EPSILON {
363                for (i, &value) in array_f64.iter().enumerate() {
364                    normalized[i] = value / max_abs;
365                }
366            } else {
367                normalized.fill(0.0); // Default for constant features
368            }
369        }
370        NormalizationMethod::L1 => {
371            let l1_norm = array_f64.fold(0.0, |acc, &x| acc + x.abs());
372
373            if l1_norm > EPSILON {
374                for (i, &value) in array_f64.iter().enumerate() {
375                    normalized[i] = value / l1_norm;
376                }
377            } else {
378                normalized.fill(0.0); // Default for constant features
379            }
380        }
381        NormalizationMethod::L2 => {
382            let sum_squares = array_f64.iter().fold(0.0, |acc, &x| acc + x * x);
383            let l2_norm = sum_squares.sqrt();
384
385            if l2_norm > EPSILON {
386                for (i, &value) in array_f64.iter().enumerate() {
387                    normalized[i] = value / l2_norm;
388                }
389            } else {
390                normalized.fill(0.0); // Default for constant features
391            }
392        }
393        NormalizationMethod::Robust => {
394            let mut data = array_f64.to_vec();
395            data.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
396            let n = data.len();
397
398            // Calculate median
399            let median = if n.is_multiple_of(2) {
400                (data[n / 2 - 1] + data[n / 2]) / 2.0
401            } else {
402                data[n / 2]
403            };
404
405            // Calculate IQR (Interquartile Range)
406            // Calculate Q1 (25th percentile)
407            let q1_pos = 0.25 * (n - 1) as f64;
408            let q1_idx_low = q1_pos.floor() as usize;
409            let q1_idx_high = q1_pos.ceil() as usize;
410            let q1 = if q1_idx_low == q1_idx_high {
411                data[q1_idx_low]
412            } else {
413                let weight = q1_pos - q1_idx_low as f64;
414                data[q1_idx_low] * (1.0 - weight) + data[q1_idx_high] * weight
415            };
416
417            // Calculate Q3 (75th percentile)
418            let q3_pos = 0.75 * (n - 1) as f64;
419            let q3_idx_low = q3_pos.floor() as usize;
420            let q3_idx_high = q3_pos.ceil() as usize;
421            let q3 = if q3_idx_low == q3_idx_high {
422                data[q3_idx_low]
423            } else {
424                let weight = q3_pos - q3_idx_low as f64;
425                data[q3_idx_low] * (1.0 - weight) + data[q3_idx_high] * weight
426            };
427
428            let iqr = q3 - q1;
429
430            if iqr > EPSILON {
431                for (i, &value) in array_f64.iter().enumerate() {
432                    normalized[i] = (value - median) / iqr;
433                }
434            } else {
435                normalized.fill(0.0); // Default for constant features
436            }
437        }
438    }
439
440    Ok(normalized)
441}
442
443/// Represents a fitted normalization model that can transform new data
444#[derive(Clone)]
445pub struct Normalizer {
446    /// The axis along which to normalize (0 for columns, 1 for rows)
447    axis: usize,
448    /// Parameters from the fit (depends on method)
449    params: NormalizerParams,
450}
451
452/// Parameters for different normalization methods
453#[derive(Clone)]
454enum NormalizerParams {
455    /// Min and max values for MinMax normalization
456    MinMax {
457        min: Array1<f64>,
458        max: Array1<f64>,
459        new_min: f64,
460        new_max: f64,
461    },
462    /// Mean and standard deviation for ZScore normalization
463    ZScore {
464        mean: Array1<f64>,
465        std_dev: Array1<f64>,
466    },
467    /// Maximum absolute values for MaxAbs normalization
468    MaxAbs { max_abs: Array1<f64> },
469    /// L1 norms for L1 normalization
470    L1 { l1_norm: Array1<f64> },
471    /// L2 norms for L2 normalization
472    L2 { l2_norm: Array1<f64> },
473    /// Median and IQR for Robust normalization
474    Robust {
475        median: Array1<f64>,
476        iqr: Array1<f64>,
477    },
478}
479
480impl Normalizer {
481    /// Creates a new Normalizer with the specified method and axis
482    ///
483    /// # Arguments
484    /// * `method` - The normalization method to apply
485    /// * `axis` - The axis along which to normalize (0 for columns, 1 for rows)
486    ///
487    /// # Returns
488    /// * A new Normalizer instance
489    pub fn new(method: NormalizationMethod, axis: usize) -> Self {
490        let params = match method {
491            NormalizationMethod::MinMax => NormalizerParams::MinMax {
492                min: Array1::zeros(0),
493                max: Array1::zeros(0),
494                new_min: 0.0,
495                new_max: 1.0,
496            },
497            NormalizationMethod::MinMaxCustom(min, max) => NormalizerParams::MinMax {
498                min: Array1::zeros(0),
499                max: Array1::zeros(0),
500                new_min: min,
501                new_max: max,
502            },
503            NormalizationMethod::ZScore => NormalizerParams::ZScore {
504                mean: Array1::zeros(0),
505                std_dev: Array1::zeros(0),
506            },
507            NormalizationMethod::MaxAbs => NormalizerParams::MaxAbs {
508                max_abs: Array1::zeros(0),
509            },
510            NormalizationMethod::L1 => NormalizerParams::L1 {
511                l1_norm: Array1::zeros(0),
512            },
513            NormalizationMethod::L2 => NormalizerParams::L2 {
514                l2_norm: Array1::zeros(0),
515            },
516            NormalizationMethod::Robust => NormalizerParams::Robust {
517                median: Array1::zeros(0),
518                iqr: Array1::zeros(0),
519            },
520        };
521
522        Normalizer { axis, params }
523    }
524
525    /// Fits the normalizer to the input data
526    ///
527    /// # Arguments
528    /// * `array` - The input 2D array to fit the normalizer to
529    ///
530    /// # Returns
531    /// * `Result<()>` - Ok if successful, Err otherwise
532    pub fn fit<S>(&mut self, array: &ArrayBase<S, Ix2>) -> Result<()>
533    where
534        S: Data,
535        S::Elem: Float + NumCast,
536    {
537        let array_f64 = array.mapv(|x| NumCast::from(x).unwrap_or(0.0));
538
539        if !array_f64.is_standard_layout() {
540            return Err(TransformError::InvalidInput(
541                "Input array must be in standard memory layout".to_string(),
542            ));
543        }
544
545        if array_f64.ndim() != 2 {
546            return Err(TransformError::InvalidInput(
547                "Only 2D arrays are supported".to_string(),
548            ));
549        }
550
551        if self.axis >= array_f64.ndim() {
552            return Err(TransformError::InvalidInput(format!(
553                "Invalid axis {} for array with {} dimensions",
554                self.axis,
555                array_f64.ndim()
556            )));
557        }
558
559        match &mut self.params {
560            NormalizerParams::MinMax {
561                min,
562                max,
563                new_min: _,
564                new_max: _,
565            } => {
566                *min = array_f64.map_axis(Axis(self.axis), |view| {
567                    view.fold(f64::INFINITY, |acc, &x| acc.min(x))
568                });
569
570                *max = array_f64.map_axis(Axis(self.axis), |view| {
571                    view.fold(f64::NEG_INFINITY, |acc, &x| acc.max(x))
572                });
573            }
574            NormalizerParams::ZScore { mean, std_dev } => {
575                *mean = array_f64.map_axis(Axis(self.axis), |view| {
576                    view.iter().sum::<f64>() / view.len() as f64
577                });
578
579                *std_dev = array_f64.map_axis(Axis(self.axis), |view| {
580                    let m = view.iter().sum::<f64>() / view.len() as f64;
581                    let variance =
582                        view.iter().map(|&x| (x - m).powi(2)).sum::<f64>() / view.len() as f64;
583                    variance.sqrt()
584                });
585            }
586            NormalizerParams::MaxAbs { max_abs } => {
587                *max_abs = array_f64.map_axis(Axis(self.axis), |view| {
588                    view.fold(0.0, |acc, &x| acc.max(x.abs()))
589                });
590            }
591            NormalizerParams::L1 { l1_norm } => {
592                *l1_norm = array_f64.map_axis(Axis(self.axis), |view| {
593                    view.fold(0.0, |acc, &x| acc + x.abs())
594                });
595            }
596            NormalizerParams::L2 { l2_norm } => {
597                *l2_norm = array_f64.map_axis(Axis(self.axis), |view| {
598                    let sum_squares = view.iter().fold(0.0, |acc, &x| acc + x * x);
599                    sum_squares.sqrt()
600                });
601            }
602            NormalizerParams::Robust { median, iqr } => {
603                *median = array_f64.map_axis(Axis(self.axis), |view| {
604                    let mut data = view.to_vec();
605                    data.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
606                    let n = data.len();
607                    if n % 2 == 0 {
608                        (data[n / 2 - 1] + data[n / 2]) / 2.0
609                    } else {
610                        data[n / 2]
611                    }
612                });
613
614                *iqr = array_f64.map_axis(Axis(self.axis), |view| {
615                    let mut data = view.to_vec();
616                    data.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
617                    let n = data.len();
618
619                    // Calculate Q1 (25th percentile)
620                    let q1_pos = 0.25 * (n - 1) as f64;
621                    let q1_idx_low = q1_pos.floor() as usize;
622                    let q1_idx_high = q1_pos.ceil() as usize;
623                    let q1 = if q1_idx_low == q1_idx_high {
624                        data[q1_idx_low]
625                    } else {
626                        let weight = q1_pos - q1_idx_low as f64;
627                        data[q1_idx_low] * (1.0 - weight) + data[q1_idx_high] * weight
628                    };
629
630                    // Calculate Q3 (75th percentile)
631                    let q3_pos = 0.75 * (n - 1) as f64;
632                    let q3_idx_low = q3_pos.floor() as usize;
633                    let q3_idx_high = q3_pos.ceil() as usize;
634                    let q3 = if q3_idx_low == q3_idx_high {
635                        data[q3_idx_low]
636                    } else {
637                        let weight = q3_pos - q3_idx_low as f64;
638                        data[q3_idx_low] * (1.0 - weight) + data[q3_idx_high] * weight
639                    };
640
641                    q3 - q1
642                });
643            }
644        }
645
646        Ok(())
647    }
648
649    /// Transforms the input data using the fitted normalizer
650    ///
651    /// # Arguments
652    /// * `array` - The input 2D array to transform
653    ///
654    /// # Returns
655    /// * `Result<Array2<f64>>` - The transformed array
656    pub fn transform<S>(&self, array: &ArrayBase<S, Ix2>) -> Result<Array2<f64>>
657    where
658        S: Data,
659        S::Elem: Float + NumCast,
660    {
661        let array_f64 = array.mapv(|x| NumCast::from(x).unwrap_or(0.0));
662
663        if !array_f64.is_standard_layout() {
664            return Err(TransformError::InvalidInput(
665                "Input array must be in standard memory layout".to_string(),
666            ));
667        }
668
669        if array_f64.ndim() != 2 {
670            return Err(TransformError::InvalidInput(
671                "Only 2D arrays are supported".to_string(),
672            ));
673        }
674
675        // Check the dimension along the normalization axis
676        let expected_size = match &self.params {
677            NormalizerParams::MinMax { min, .. } => min.len(),
678            NormalizerParams::ZScore { mean, .. } => mean.len(),
679            NormalizerParams::MaxAbs { max_abs } => max_abs.len(),
680            NormalizerParams::L1 { l1_norm } => l1_norm.len(),
681            NormalizerParams::L2 { l2_norm } => l2_norm.len(),
682            NormalizerParams::Robust { median, .. } => median.len(),
683        };
684
685        let actual_size = if self.axis == 0 {
686            array_f64.shape()[1]
687        } else {
688            array_f64.shape()[0]
689        };
690
691        if expected_size != actual_size {
692            return Err(TransformError::InvalidInput(format!(
693                "Expected {expected_size} features, got {actual_size}"
694            )));
695        }
696
697        let shape = array_f64.shape();
698        let mut transformed = Array2::zeros((shape[0], shape[1]));
699
700        match &self.params {
701            NormalizerParams::MinMax {
702                min,
703                max,
704                new_min,
705                new_max,
706            } => {
707                let range = max - min;
708                let new_range = new_max - new_min;
709
710                for i in 0..shape[0] {
711                    for j in 0..shape[1] {
712                        let value = array_f64[[i, j]];
713                        let idx = if self.axis == 0 { j } else { i };
714
715                        if range[idx].abs() > EPSILON {
716                            transformed[[i, j]] =
717                                (value - min[idx]) / range[idx] * new_range + new_min;
718                        } else {
719                            transformed[[i, j]] = (new_min + new_max) / 2.0; // Default for constant features
720                        }
721                    }
722                }
723            }
724            NormalizerParams::ZScore { mean, std_dev } => {
725                for i in 0..shape[0] {
726                    for j in 0..shape[1] {
727                        let value = array_f64[[i, j]];
728                        let idx = if self.axis == 0 { j } else { i };
729
730                        if std_dev[idx] > EPSILON {
731                            transformed[[i, j]] = (value - mean[idx]) / std_dev[idx];
732                        } else {
733                            transformed[[i, j]] = 0.0; // Default for constant features
734                        }
735                    }
736                }
737            }
738            NormalizerParams::MaxAbs { max_abs } => {
739                for i in 0..shape[0] {
740                    for j in 0..shape[1] {
741                        let value = array_f64[[i, j]];
742                        let idx = if self.axis == 0 { j } else { i };
743
744                        if max_abs[idx] > EPSILON {
745                            transformed[[i, j]] = value / max_abs[idx];
746                        } else {
747                            transformed[[i, j]] = 0.0; // Default for constant features
748                        }
749                    }
750                }
751            }
752            NormalizerParams::L1 { l1_norm } => {
753                for i in 0..shape[0] {
754                    for j in 0..shape[1] {
755                        let value = array_f64[[i, j]];
756                        let idx = if self.axis == 0 { j } else { i };
757
758                        if l1_norm[idx] > EPSILON {
759                            transformed[[i, j]] = value / l1_norm[idx];
760                        } else {
761                            transformed[[i, j]] = 0.0; // Default for constant features
762                        }
763                    }
764                }
765            }
766            NormalizerParams::L2 { l2_norm } => {
767                for i in 0..shape[0] {
768                    for j in 0..shape[1] {
769                        let value = array_f64[[i, j]];
770                        let idx = if self.axis == 0 { j } else { i };
771
772                        if l2_norm[idx] > EPSILON {
773                            transformed[[i, j]] = value / l2_norm[idx];
774                        } else {
775                            transformed[[i, j]] = 0.0; // Default for constant features
776                        }
777                    }
778                }
779            }
780            NormalizerParams::Robust { median, iqr } => {
781                for i in 0..shape[0] {
782                    for j in 0..shape[1] {
783                        let value = array_f64[[i, j]];
784                        let idx = if self.axis == 0 { j } else { i };
785
786                        if iqr[idx] > EPSILON {
787                            transformed[[i, j]] = (value - median[idx]) / iqr[idx];
788                        } else {
789                            transformed[[i, j]] = 0.0; // Default for constant features
790                        }
791                    }
792                }
793            }
794        }
795
796        Ok(transformed)
797    }
798
799    /// Fits the normalizer to the input data and transforms it
800    ///
801    /// # Arguments
802    /// * `array` - The input 2D array to fit and transform
803    ///
804    /// # Returns
805    /// * `Result<Array2<f64>>` - The transformed array
806    pub fn fit_transform<S>(&mut self, array: &ArrayBase<S, Ix2>) -> Result<Array2<f64>>
807    where
808        S: Data,
809        S::Elem: Float + NumCast,
810    {
811        self.fit(array)?;
812        self.transform(array)
813    }
814}
815
816#[cfg(test)]
817mod tests {
818    use super::*;
819    use approx::assert_abs_diff_eq;
820    use scirs2_core::ndarray::Array;
821
822    #[test]
823    fn test_normalize_vector_minmax() {
824        let data = Array::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
825        let normalized = normalize_vector(&data, NormalizationMethod::MinMax).unwrap();
826
827        let expected = Array::from_vec(vec![0.0, 0.25, 0.5, 0.75, 1.0]);
828
829        for (a, b) in normalized.iter().zip(expected.iter()) {
830            assert_abs_diff_eq!(a, b, epsilon = 1e-10);
831        }
832    }
833
834    #[test]
835    fn test_normalize_vector_zscore() {
836        let data = Array::from_vec(vec![1.0, 2.0, 3.0, 4.0, 5.0]);
837        let normalized = normalize_vector(&data, NormalizationMethod::ZScore).unwrap();
838
839        let mean = 3.0;
840        let std_dev = (10.0 / 5.0_f64).sqrt();
841        let expected = data.mapv(|x| (x - mean) / std_dev);
842
843        for (a, b) in normalized.iter().zip(expected.iter()) {
844            assert_abs_diff_eq!(a, b, epsilon = 1e-10);
845        }
846    }
847
848    #[test]
849    fn test_normalize_array_minmax() {
850        let data = Array::from_shape_vec((3, 3), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0])
851            .unwrap();
852
853        // Normalize columns (axis 0)
854        let normalized = normalize_array(&data, NormalizationMethod::MinMax, 0).unwrap();
855
856        let expected =
857            Array::from_shape_vec((3, 3), vec![0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 1.0, 1.0, 1.0])
858                .unwrap();
859
860        for i in 0..3 {
861            for j in 0..3 {
862                assert_abs_diff_eq!(normalized[[i, j]], expected[[i, j]], epsilon = 1e-10);
863            }
864        }
865
866        // Normalize rows (axis 1)
867        let normalized = normalize_array(&data, NormalizationMethod::MinMax, 1).unwrap();
868
869        let expected =
870            Array::from_shape_vec((3, 3), vec![0.0, 0.5, 1.0, 0.0, 0.5, 1.0, 0.0, 0.5, 1.0])
871                .unwrap();
872
873        for i in 0..3 {
874            for j in 0..3 {
875                assert_abs_diff_eq!(normalized[[i, j]], expected[[i, j]], epsilon = 1e-10);
876            }
877        }
878    }
879
880    #[test]
881    fn test_normalizer_fit_transform() {
882        let data = Array::from_shape_vec((3, 3), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0])
883            .unwrap();
884
885        // Test MinMax normalization
886        let mut normalizer = Normalizer::new(NormalizationMethod::MinMax, 0);
887        let transformed = normalizer.fit_transform(&data).unwrap();
888
889        let expected =
890            Array::from_shape_vec((3, 3), vec![0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 1.0, 1.0, 1.0])
891                .unwrap();
892
893        for i in 0..3 {
894            for j in 0..3 {
895                assert_abs_diff_eq!(transformed[[i, j]], expected[[i, j]], epsilon = 1e-10);
896            }
897        }
898
899        // Test with separate fit and transform
900        let data2 = Array::from_shape_vec((2, 3), vec![2.0, 3.0, 4.0, 5.0, 6.0, 7.0]).unwrap();
901
902        let transformed2 = normalizer.transform(&data2).unwrap();
903
904        let expected2 = Array::from_shape_vec(
905            (2, 3),
906            vec![
907                1.0 / 6.0,
908                1.0 / 6.0,
909                1.0 / 6.0,
910                2.0 / 3.0,
911                2.0 / 3.0,
912                2.0 / 3.0,
913            ],
914        )
915        .unwrap();
916
917        for i in 0..2 {
918            for j in 0..3 {
919                assert_abs_diff_eq!(transformed2[[i, j]], expected2[[i, j]], epsilon = 1e-10);
920            }
921        }
922    }
923
924    #[test]
925    fn test_normalize_vector_robust() {
926        // Test with data containing outliers
927        let data = Array::from_vec(vec![1.0, 2.0, 3.0, 4.0, 100.0]); // 100 is an outlier
928        let normalized = normalize_vector(&data, NormalizationMethod::Robust).unwrap();
929
930        // For this data: sorted = [1, 2, 3, 4, 100]
931        // median = 3.0 (middle value)
932        // Q1 = 2.0 (at 25th percentile), Q3 = 4.0 (at 75th percentile), IQR = 2.0
933        // Expected transformation: (x - 3) / 2
934        let expected = Array::from_vec(vec![
935            (1.0 - 3.0) / 2.0,   // -1.0
936            (2.0 - 3.0) / 2.0,   // -0.5
937            (3.0 - 3.0) / 2.0,   // 0
938            (4.0 - 3.0) / 2.0,   // 0.5
939            (100.0 - 3.0) / 2.0, // 48.5
940        ]);
941
942        for (a, b) in normalized.iter().zip(expected.iter()) {
943            assert_abs_diff_eq!(a, b, epsilon = 1e-10);
944        }
945    }
946
947    #[test]
948    fn test_normalize_array_robust() {
949        let data = Array::from_shape_vec((3, 2), vec![1.0, 10.0, 2.0, 20.0, 3.0, 30.0]).unwrap();
950
951        // Normalize columns (axis 0)
952        let normalized = normalize_array(&data, NormalizationMethod::Robust, 0).unwrap();
953
954        // For column 0: [1, 2, 3] -> median=2, Q1=1.5, Q3=2.5, IQR=1.0
955        // For column 1: [10, 20, 30] -> median=20, Q1=15, Q3=25, IQR=10
956        let expected = Array::from_shape_vec(
957            (3, 2),
958            vec![
959                (1.0 - 2.0) / 1.0,    // -1.0
960                (10.0 - 20.0) / 10.0, // -1.0
961                (2.0 - 2.0) / 1.0,    // 0.0
962                (20.0 - 20.0) / 10.0, // 0.0
963                (3.0 - 2.0) / 1.0,    // 1.0
964                (30.0 - 20.0) / 10.0, // 1.0
965            ],
966        )
967        .unwrap();
968
969        for i in 0..3 {
970            for j in 0..2 {
971                assert_abs_diff_eq!(normalized[[i, j]], expected[[i, j]], epsilon = 1e-10);
972            }
973        }
974    }
975
976    #[test]
977    fn test_robust_normalizer() {
978        let data =
979            Array::from_shape_vec((4, 2), vec![1.0, 100.0, 2.0, 200.0, 3.0, 300.0, 4.0, 400.0])
980                .unwrap();
981
982        let mut normalizer = Normalizer::new(NormalizationMethod::Robust, 0);
983        let transformed = normalizer.fit_transform(&data).unwrap();
984
985        // For column 0: [1, 2, 3, 4] -> median=2.5, Q1=1.75, Q3=3.25, IQR=1.5
986        // For column 1: [100, 200, 300, 400] -> median=250, Q1=175, Q3=325, IQR=150
987        let expected = Array::from_shape_vec(
988            (4, 2),
989            vec![
990                (1.0 - 2.5) / 1.5,       // -1.0
991                (100.0 - 250.0) / 150.0, // -1.0
992                (2.0 - 2.5) / 1.5,       // -0.333...
993                (200.0 - 250.0) / 150.0, // -0.333...
994                (3.0 - 2.5) / 1.5,       // 0.333...
995                (300.0 - 250.0) / 150.0, // 0.333...
996                (4.0 - 2.5) / 1.5,       // 1.0
997                (400.0 - 250.0) / 150.0, // 1.0
998            ],
999        )
1000        .unwrap();
1001
1002        for i in 0..4 {
1003            for j in 0..2 {
1004                assert_abs_diff_eq!(transformed[[i, j]], expected[[i, j]], epsilon = 1e-10);
1005            }
1006        }
1007    }
1008}