nt_features/
normalization.rs

1// Feature normalization for neural network inputs
2//
3// Supports: Min-Max, Z-Score, Robust scaling
4
5use crate::{FeatureError, Result};
6
7#[derive(Debug, Clone, Copy)]
8pub enum NormalizationMethod {
9    MinMax,
10    ZScore,
11    Robust,
12}
13
14pub struct FeatureNormalizer {
15    method: NormalizationMethod,
16    min: Option<f64>,
17    max: Option<f64>,
18    mean: Option<f64>,
19    std_dev: Option<f64>,
20    median: Option<f64>,
21    q25: Option<f64>,
22    q75: Option<f64>,
23}
24
25impl FeatureNormalizer {
26    pub fn new(method: NormalizationMethod) -> Self {
27        Self {
28            method,
29            min: None,
30            max: None,
31            mean: None,
32            std_dev: None,
33            median: None,
34            q25: None,
35            q75: None,
36        }
37    }
38
39    /// Fit normalizer to data
40    pub fn fit(&mut self, data: &[f64]) -> Result<()> {
41        if data.is_empty() {
42            return Err(FeatureError::InsufficientData(1));
43        }
44
45        match self.method {
46            NormalizationMethod::MinMax => {
47                self.min = Some(data.iter().copied().fold(f64::INFINITY, f64::min));
48                self.max = Some(data.iter().copied().fold(f64::NEG_INFINITY, f64::max));
49            }
50            NormalizationMethod::ZScore => {
51                let mean = data.iter().sum::<f64>() / data.len() as f64;
52                let variance =
53                    data.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / data.len() as f64;
54                let std_dev = variance.sqrt();
55
56                self.mean = Some(mean);
57                self.std_dev = Some(std_dev);
58            }
59            NormalizationMethod::Robust => {
60                let mut sorted = data.to_vec();
61                sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
62
63                let q25_idx = sorted.len() / 4;
64                let median_idx = sorted.len() / 2;
65                let q75_idx = (sorted.len() * 3) / 4;
66
67                self.q25 = Some(sorted[q25_idx]);
68                self.median = Some(sorted[median_idx]);
69                self.q75 = Some(sorted[q75_idx]);
70            }
71        }
72
73        Ok(())
74    }
75
76    /// Transform single value
77    pub fn transform(&self, value: f64) -> Result<f64> {
78        match self.method {
79            NormalizationMethod::MinMax => {
80                let min = self.min.ok_or_else(|| {
81                    FeatureError::InvalidParameter("Normalizer not fitted".to_string())
82                })?;
83                let max = self.max.ok_or_else(|| {
84                    FeatureError::InvalidParameter("Normalizer not fitted".to_string())
85                })?;
86
87                if (max - min).abs() < 1e-10 {
88                    return Ok(0.0);
89                }
90
91                Ok((value - min) / (max - min))
92            }
93            NormalizationMethod::ZScore => {
94                let mean = self.mean.ok_or_else(|| {
95                    FeatureError::InvalidParameter("Normalizer not fitted".to_string())
96                })?;
97                let std_dev = self.std_dev.ok_or_else(|| {
98                    FeatureError::InvalidParameter("Normalizer not fitted".to_string())
99                })?;
100
101                if std_dev.abs() < 1e-10 {
102                    return Ok(0.0);
103                }
104
105                Ok((value - mean) / std_dev)
106            }
107            NormalizationMethod::Robust => {
108                let median = self.median.ok_or_else(|| {
109                    FeatureError::InvalidParameter("Normalizer not fitted".to_string())
110                })?;
111                let q25 = self.q25.ok_or_else(|| {
112                    FeatureError::InvalidParameter("Normalizer not fitted".to_string())
113                })?;
114                let q75 = self.q75.ok_or_else(|| {
115                    FeatureError::InvalidParameter("Normalizer not fitted".to_string())
116                })?;
117
118                let iqr = q75 - q25;
119                if iqr.abs() < 1e-10 {
120                    return Ok(0.0);
121                }
122
123                Ok((value - median) / iqr)
124            }
125        }
126    }
127
128    /// Transform batch of values
129    pub fn transform_batch(&self, values: &[f64]) -> Result<Vec<f64>> {
130        values.iter().map(|&v| self.transform(v)).collect()
131    }
132
133    /// Inverse transform (denormalize)
134    pub fn inverse_transform(&self, normalized: f64) -> Result<f64> {
135        match self.method {
136            NormalizationMethod::MinMax => {
137                let min = self.min.ok_or_else(|| {
138                    FeatureError::InvalidParameter("Normalizer not fitted".to_string())
139                })?;
140                let max = self.max.ok_or_else(|| {
141                    FeatureError::InvalidParameter("Normalizer not fitted".to_string())
142                })?;
143
144                Ok(normalized * (max - min) + min)
145            }
146            NormalizationMethod::ZScore => {
147                let mean = self.mean.ok_or_else(|| {
148                    FeatureError::InvalidParameter("Normalizer not fitted".to_string())
149                })?;
150                let std_dev = self.std_dev.ok_or_else(|| {
151                    FeatureError::InvalidParameter("Normalizer not fitted".to_string())
152                })?;
153
154                Ok(normalized * std_dev + mean)
155            }
156            NormalizationMethod::Robust => {
157                let median = self.median.ok_or_else(|| {
158                    FeatureError::InvalidParameter("Normalizer not fitted".to_string())
159                })?;
160                let q25 = self.q25.ok_or_else(|| {
161                    FeatureError::InvalidParameter("Normalizer not fitted".to_string())
162                })?;
163                let q75 = self.q75.ok_or_else(|| {
164                    FeatureError::InvalidParameter("Normalizer not fitted".to_string())
165                })?;
166
167                let iqr = q75 - q25;
168                Ok(normalized * iqr + median)
169            }
170        }
171    }
172}
173
174#[cfg(test)]
175mod tests {
176    use super::*;
177    use approx::assert_relative_eq;
178
179    #[test]
180    fn test_minmax_normalization() {
181        let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
182        let mut normalizer = FeatureNormalizer::new(NormalizationMethod::MinMax);
183
184        normalizer.fit(&data).unwrap();
185
186        assert_relative_eq!(normalizer.transform(1.0).unwrap(), 0.0, epsilon = 1e-6);
187        assert_relative_eq!(normalizer.transform(5.0).unwrap(), 1.0, epsilon = 1e-6);
188        assert_relative_eq!(normalizer.transform(3.0).unwrap(), 0.5, epsilon = 1e-6);
189    }
190
191    #[test]
192    fn test_zscore_normalization() {
193        let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
194        let mut normalizer = FeatureNormalizer::new(NormalizationMethod::ZScore);
195
196        normalizer.fit(&data).unwrap();
197
198        let mean = normalizer.transform(3.0).unwrap();
199        assert_relative_eq!(mean, 0.0, epsilon = 1e-6);
200    }
201
202    #[test]
203    fn test_robust_normalization() {
204        let data = vec![1.0, 2.0, 3.0, 4.0, 100.0]; // With outlier
205        let mut normalizer = FeatureNormalizer::new(NormalizationMethod::Robust);
206
207        normalizer.fit(&data).unwrap();
208
209        // Median should be normalized to 0
210        let median_normalized = normalizer.transform(3.0).unwrap();
211        assert_relative_eq!(median_normalized, 0.0, epsilon = 1e-6);
212    }
213
214    #[test]
215    fn test_inverse_transform() {
216        let data = vec![10.0, 20.0, 30.0, 40.0, 50.0];
217        let mut normalizer = FeatureNormalizer::new(NormalizationMethod::MinMax);
218
219        normalizer.fit(&data).unwrap();
220
221        let normalized = normalizer.transform(30.0).unwrap();
222        let original = normalizer.inverse_transform(normalized).unwrap();
223
224        assert_relative_eq!(original, 30.0, epsilon = 1e-6);
225    }
226
227    #[test]
228    fn test_batch_transform() {
229        let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
230        let mut normalizer = FeatureNormalizer::new(NormalizationMethod::MinMax);
231
232        normalizer.fit(&data).unwrap();
233
234        let test_data = vec![1.0, 3.0, 5.0];
235        let normalized = normalizer.transform_batch(&test_data).unwrap();
236
237        assert_relative_eq!(normalized[0], 0.0, epsilon = 1e-6);
238        assert_relative_eq!(normalized[1], 0.5, epsilon = 1e-6);
239        assert_relative_eq!(normalized[2], 1.0, epsilon = 1e-6);
240    }
241
242    #[test]
243    fn test_unfitted_normalizer() {
244        let normalizer = FeatureNormalizer::new(NormalizationMethod::MinMax);
245        let result = normalizer.transform(5.0);
246        assert!(result.is_err());
247    }
248}