nt_features/
normalization.rs1use crate::{FeatureError, Result};
6
7#[derive(Debug, Clone, Copy)]
8pub enum NormalizationMethod {
9 MinMax,
10 ZScore,
11 Robust,
12}
13
14pub struct FeatureNormalizer {
15 method: NormalizationMethod,
16 min: Option<f64>,
17 max: Option<f64>,
18 mean: Option<f64>,
19 std_dev: Option<f64>,
20 median: Option<f64>,
21 q25: Option<f64>,
22 q75: Option<f64>,
23}
24
25impl FeatureNormalizer {
26 pub fn new(method: NormalizationMethod) -> Self {
27 Self {
28 method,
29 min: None,
30 max: None,
31 mean: None,
32 std_dev: None,
33 median: None,
34 q25: None,
35 q75: None,
36 }
37 }
38
39 pub fn fit(&mut self, data: &[f64]) -> Result<()> {
41 if data.is_empty() {
42 return Err(FeatureError::InsufficientData(1));
43 }
44
45 match self.method {
46 NormalizationMethod::MinMax => {
47 self.min = Some(data.iter().copied().fold(f64::INFINITY, f64::min));
48 self.max = Some(data.iter().copied().fold(f64::NEG_INFINITY, f64::max));
49 }
50 NormalizationMethod::ZScore => {
51 let mean = data.iter().sum::<f64>() / data.len() as f64;
52 let variance =
53 data.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / data.len() as f64;
54 let std_dev = variance.sqrt();
55
56 self.mean = Some(mean);
57 self.std_dev = Some(std_dev);
58 }
59 NormalizationMethod::Robust => {
60 let mut sorted = data.to_vec();
61 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
62
63 let q25_idx = sorted.len() / 4;
64 let median_idx = sorted.len() / 2;
65 let q75_idx = (sorted.len() * 3) / 4;
66
67 self.q25 = Some(sorted[q25_idx]);
68 self.median = Some(sorted[median_idx]);
69 self.q75 = Some(sorted[q75_idx]);
70 }
71 }
72
73 Ok(())
74 }
75
76 pub fn transform(&self, value: f64) -> Result<f64> {
78 match self.method {
79 NormalizationMethod::MinMax => {
80 let min = self.min.ok_or_else(|| {
81 FeatureError::InvalidParameter("Normalizer not fitted".to_string())
82 })?;
83 let max = self.max.ok_or_else(|| {
84 FeatureError::InvalidParameter("Normalizer not fitted".to_string())
85 })?;
86
87 if (max - min).abs() < 1e-10 {
88 return Ok(0.0);
89 }
90
91 Ok((value - min) / (max - min))
92 }
93 NormalizationMethod::ZScore => {
94 let mean = self.mean.ok_or_else(|| {
95 FeatureError::InvalidParameter("Normalizer not fitted".to_string())
96 })?;
97 let std_dev = self.std_dev.ok_or_else(|| {
98 FeatureError::InvalidParameter("Normalizer not fitted".to_string())
99 })?;
100
101 if std_dev.abs() < 1e-10 {
102 return Ok(0.0);
103 }
104
105 Ok((value - mean) / std_dev)
106 }
107 NormalizationMethod::Robust => {
108 let median = self.median.ok_or_else(|| {
109 FeatureError::InvalidParameter("Normalizer not fitted".to_string())
110 })?;
111 let q25 = self.q25.ok_or_else(|| {
112 FeatureError::InvalidParameter("Normalizer not fitted".to_string())
113 })?;
114 let q75 = self.q75.ok_or_else(|| {
115 FeatureError::InvalidParameter("Normalizer not fitted".to_string())
116 })?;
117
118 let iqr = q75 - q25;
119 if iqr.abs() < 1e-10 {
120 return Ok(0.0);
121 }
122
123 Ok((value - median) / iqr)
124 }
125 }
126 }
127
128 pub fn transform_batch(&self, values: &[f64]) -> Result<Vec<f64>> {
130 values.iter().map(|&v| self.transform(v)).collect()
131 }
132
133 pub fn inverse_transform(&self, normalized: f64) -> Result<f64> {
135 match self.method {
136 NormalizationMethod::MinMax => {
137 let min = self.min.ok_or_else(|| {
138 FeatureError::InvalidParameter("Normalizer not fitted".to_string())
139 })?;
140 let max = self.max.ok_or_else(|| {
141 FeatureError::InvalidParameter("Normalizer not fitted".to_string())
142 })?;
143
144 Ok(normalized * (max - min) + min)
145 }
146 NormalizationMethod::ZScore => {
147 let mean = self.mean.ok_or_else(|| {
148 FeatureError::InvalidParameter("Normalizer not fitted".to_string())
149 })?;
150 let std_dev = self.std_dev.ok_or_else(|| {
151 FeatureError::InvalidParameter("Normalizer not fitted".to_string())
152 })?;
153
154 Ok(normalized * std_dev + mean)
155 }
156 NormalizationMethod::Robust => {
157 let median = self.median.ok_or_else(|| {
158 FeatureError::InvalidParameter("Normalizer not fitted".to_string())
159 })?;
160 let q25 = self.q25.ok_or_else(|| {
161 FeatureError::InvalidParameter("Normalizer not fitted".to_string())
162 })?;
163 let q75 = self.q75.ok_or_else(|| {
164 FeatureError::InvalidParameter("Normalizer not fitted".to_string())
165 })?;
166
167 let iqr = q75 - q25;
168 Ok(normalized * iqr + median)
169 }
170 }
171 }
172}
173
174#[cfg(test)]
175mod tests {
176 use super::*;
177 use approx::assert_relative_eq;
178
179 #[test]
180 fn test_minmax_normalization() {
181 let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
182 let mut normalizer = FeatureNormalizer::new(NormalizationMethod::MinMax);
183
184 normalizer.fit(&data).unwrap();
185
186 assert_relative_eq!(normalizer.transform(1.0).unwrap(), 0.0, epsilon = 1e-6);
187 assert_relative_eq!(normalizer.transform(5.0).unwrap(), 1.0, epsilon = 1e-6);
188 assert_relative_eq!(normalizer.transform(3.0).unwrap(), 0.5, epsilon = 1e-6);
189 }
190
191 #[test]
192 fn test_zscore_normalization() {
193 let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
194 let mut normalizer = FeatureNormalizer::new(NormalizationMethod::ZScore);
195
196 normalizer.fit(&data).unwrap();
197
198 let mean = normalizer.transform(3.0).unwrap();
199 assert_relative_eq!(mean, 0.0, epsilon = 1e-6);
200 }
201
202 #[test]
203 fn test_robust_normalization() {
204 let data = vec![1.0, 2.0, 3.0, 4.0, 100.0]; let mut normalizer = FeatureNormalizer::new(NormalizationMethod::Robust);
206
207 normalizer.fit(&data).unwrap();
208
209 let median_normalized = normalizer.transform(3.0).unwrap();
211 assert_relative_eq!(median_normalized, 0.0, epsilon = 1e-6);
212 }
213
214 #[test]
215 fn test_inverse_transform() {
216 let data = vec![10.0, 20.0, 30.0, 40.0, 50.0];
217 let mut normalizer = FeatureNormalizer::new(NormalizationMethod::MinMax);
218
219 normalizer.fit(&data).unwrap();
220
221 let normalized = normalizer.transform(30.0).unwrap();
222 let original = normalizer.inverse_transform(normalized).unwrap();
223
224 assert_relative_eq!(original, 30.0, epsilon = 1e-6);
225 }
226
227 #[test]
228 fn test_batch_transform() {
229 let data = vec![1.0, 2.0, 3.0, 4.0, 5.0];
230 let mut normalizer = FeatureNormalizer::new(NormalizationMethod::MinMax);
231
232 normalizer.fit(&data).unwrap();
233
234 let test_data = vec![1.0, 3.0, 5.0];
235 let normalized = normalizer.transform_batch(&test_data).unwrap();
236
237 assert_relative_eq!(normalized[0], 0.0, epsilon = 1e-6);
238 assert_relative_eq!(normalized[1], 0.5, epsilon = 1e-6);
239 assert_relative_eq!(normalized[2], 1.0, epsilon = 1e-6);
240 }
241
242 #[test]
243 fn test_unfitted_normalizer() {
244 let normalizer = FeatureNormalizer::new(NormalizationMethod::MinMax);
245 let result = normalizer.transform(5.0);
246 assert!(result.is_err());
247 }
248}