Skip to main content

linreg_core/loess/
normalize.rs

1//! Predictor normalization utilities
2
3use crate::linalg::Matrix;
4
5/// Normalization information for predictors
6///
7/// Stores the minimum and range values needed to normalize/denormalize
8/// predictor variables to/from the [0, 1] range.
9#[derive(Debug, Clone)]
10pub struct NormalizationInfo {
11    /// Minimum values for each predictor
12    pub min: Vec<f64>,
13    /// Range (max - min) for each predictor
14    pub range: Vec<f64>,
15}
16
17/// Normalize predictor matrix to [0, 1] range
18///
19/// For each column, applies min-max normalization: `(x - min) / (max - min)`.
20/// This ensures all predictors are on the same scale for distance calculations.
21///
22/// # Arguments
23///
24/// * `x` - Predictor matrix (n obs × p predictors)
25///
26/// # Returns
27///
28/// A tuple of:
29/// - Normalized matrix with values in [0, 1]
30/// - NormalizationInfo containing min and range for each predictor
31///
32/// # Panics
33///
34/// Panics if the matrix has zero columns.
35///
36/// # Example
37///
38/// ```
39/// use linreg_core::loess::normalize::normalize_predictors;
40/// use linreg_core::linalg::Matrix;
41///
42/// // Simple test: normalize [0, 10] to [0, 1]
43/// let data = vec![0.0, 2.5, 5.0, 7.5, 10.0];
44/// let x = Matrix::new(5, 1, data);
45///
46/// let (normalized, info) = normalize_predictors(&x);
47///
48/// // Check normalization info
49/// assert_eq!(info.min[0], 0.0);
50/// assert_eq!(info.range[0], 10.0);
51///
52/// // Check endpoints are normalized to 0 and 1
53/// assert_eq!(normalized.get(0, 0), 0.0);
54/// assert_eq!(normalized.get(4, 0), 1.0);
55/// ```
56pub fn normalize_predictors(x: &Matrix) -> (Matrix, NormalizationInfo) {
57    let n = x.rows;
58    let p = x.cols;
59
60    assert!(p > 0, "Predictor matrix must have at least one column");
61
62    let mut min = Vec::with_capacity(p);
63    let mut range = Vec::with_capacity(p);
64
65    // First pass: compute min and range for each column
66    for j in 0..p {
67        let mut col_min = f64::INFINITY;
68        let mut col_max = f64::NEG_INFINITY;
69
70        for i in 0..n {
71            let val = x.get(i, j);
72            if val < col_min {
73                col_min = val;
74            }
75            if val > col_max {
76                col_max = val;
77            }
78        }
79
80        let col_range = col_max - col_min;
81
82        if col_range <= f64::EPSILON {
83            // Constant column - avoid division by zero
84            min.push(col_min);
85            range.push(1.0);
86        } else {
87            min.push(col_min);
88            range.push(col_range);
89        }
90    }
91
92    // Second pass: fill normalized data in row-major order
93    let mut normalized_data = Vec::with_capacity(n * p);
94    for i in 0..n {
95        for j in 0..p {
96            let val = x.get(i, j);
97            let col_min = min[j];
98            let col_range = range[j];
99
100            // Check if this is a constant column (we set range to 1.0 for those)
101            if col_range == 1.0 && (val - col_min).abs() < f64::EPSILON {
102                // Constant column - use 0.5 as normalized value
103                normalized_data.push(0.5);
104            } else {
105                normalized_data.push((val - col_min) / col_range);
106            }
107        }
108    }
109
110    let normalized = Matrix::new(n, p, normalized_data);
111    let info = NormalizationInfo { min, range };
112
113    (normalized, info)
114}
115
116/// Denormalize a value from \[0,1\] back to original scale
117///
118/// # Arguments
119///
120/// * `value` - Normalized value in \[0, 1\]
121/// * `min` - Minimum of original data
122/// * `range` - Range (max - min) of original data
123///
124/// # Returns
125///
126/// Denormalized value on the original scale
127///
128/// # Example
129///
130/// ```
131/// use linreg_core::loess::normalize::denormalize;
132///
133/// let min = 10.0;
134/// let range = 50.0;
135///
136/// // 0.0 -> min
137/// assert_eq!(denormalize(0.0, min, range), min);
138/// // 1.0 -> min + range = max
139/// assert_eq!(denormalize(1.0, min, range), min + range);
140/// // 0.5 -> midpoint
141/// assert_eq!(denormalize(0.5, min, range), min + range / 2.0);
142/// ```
143#[inline]
144pub fn denormalize(value: f64, min: f64, range: f64) -> f64 {
145    value * range + min
146}
147
148#[cfg(test)]
149mod tests {
150    use super::*;
151
152    #[test]
153    fn test_normalize_simple() {
154        // Simple test: normalize [0, 10] to [0, 1]
155        let data = vec![0.0, 2.5, 5.0, 7.5, 10.0];
156        let x = Matrix::new(5, 1, data.clone());
157
158        let (normalized, info) = normalize_predictors(&x);
159
160        assert_eq!(info.min[0], 0.0);
161        assert_eq!(info.range[0], 10.0);
162
163        // Check endpoints
164        assert_eq!(normalized.get(0, 0), 0.0);
165        assert_eq!(normalized.get(4, 0), 1.0);
166
167        // Check midpoint
168        assert!((normalized.get(2, 0) - 0.5).abs() < 1e-10);
169    }
170
171    #[test]
172    fn test_normalize_multiple_columns() {
173        // Test with multiple predictors
174        // Data in row-major order:
175        // Row 0: col0=0.0, col1=100.0
176        // Row 1: col0=5.0, col1=150.0
177        // Row 2: col0=10.0, col1=200.0
178        let data = vec![
179            0.0, 100.0,  // Row 0
180            5.0, 150.0,  // Row 1
181            10.0, 200.0, // Row 2
182        ];
183        let x = Matrix::new(3, 2, data);
184
185        let (normalized, info) = normalize_predictors(&x);
186
187        // Column 0: values [0, 5, 10]
188        assert_eq!(info.min[0], 0.0);
189        assert_eq!(info.range[0], 10.0);
190        // Column 1: values [100, 150, 200]
191        assert_eq!(info.min[1], 100.0);
192        assert_eq!(info.range[1], 100.0);
193
194        // Check normalized values are in [0, 1]
195        for i in 0..3 {
196            for j in 0..2 {
197                let val = normalized.get(i, j);
198                assert!(val >= 0.0 && val <= 1.0);
199            }
200        }
201
202        // Check specific values
203        assert_eq!(normalized.get(0, 0), 0.0);  // (0-0)/10 = 0
204        assert_eq!(normalized.get(2, 0), 1.0);  // (10-0)/10 = 1
205        assert_eq!(normalized.get(0, 1), 0.0);  // (100-100)/100 = 0
206        assert_eq!(normalized.get(2, 1), 1.0);  // (200-100)/100 = 1
207    }
208
209    #[test]
210    fn test_denormalize() {
211        // Test denormalization
212        let min = 10.0;
213        let range = 50.0;
214
215        // 0.0 -> min
216        assert_eq!(denormalize(0.0, min, range), min);
217        // 1.0 -> min + range = max
218        assert_eq!(denormalize(1.0, min, range), min + range);
219        // 0.5 -> midpoint
220        assert_eq!(denormalize(0.5, min, range), min + range / 2.0);
221    }
222
223    #[test]
224    fn test_normalize_roundtrip() {
225        // Test that normalize -> denormalize preserves values
226        let data = vec![1.0, 3.0, 5.0, 7.0, 9.0];
227        let x = Matrix::new(5, 1, data.clone());
228
229        let (normalized, info) = normalize_predictors(&x);
230
231        // Denormalize each value and check we get back the original
232        for i in 0..5 {
233            let denorm = denormalize(normalized.get(i, 0), info.min[0], info.range[0]);
234            assert!((denorm - data[i]).abs() < 1e-10);
235        }
236    }
237
238    #[test]
239    fn test_normalize_constant_column() {
240        // Test with constant values (all same)
241        let data = vec![5.0, 5.0, 5.0, 5.0, 5.0];
242        let x = Matrix::new(5, 1, data);
243
244        let (normalized, info) = normalize_predictors(&x);
245
246        // All normalized values should be 0.5 (midpoint)
247        for i in 0..5 {
248            assert!((normalized.get(i, 0) - 0.5).abs() < 1e-10);
249        }
250        assert_eq!(info.min[0], 5.0);
251        assert_eq!(info.range[0], 1.0); // Should be set to 1.0 to avoid division by zero
252    }
253
254    #[test]
255    fn test_normalize_negative_values() {
256        // Test with negative values
257        let data = vec![-10.0, -5.0, 0.0, 5.0, 10.0];
258        let x = Matrix::new(5, 1, data);
259
260        let (normalized, info) = normalize_predictors(&x);
261
262        assert_eq!(info.min[0], -10.0);
263        assert_eq!(info.range[0], 20.0);
264
265        // Check endpoints and midpoint
266        assert_eq!(normalized.get(0, 0), 0.0);  // -10 -> 0
267        assert_eq!(normalized.get(4, 0), 1.0);  // 10 -> 1
268        assert!((normalized.get(2, 0) - 0.5).abs() < 1e-10);  // 0 -> 0.5
269    }
270}