use crate::linalg::Matrix;
#[derive(Debug, Clone)]
pub struct NormalizationInfo {
pub min: Vec<f64>,
pub range: Vec<f64>,
}
pub fn normalize_predictors(x: &Matrix) -> (Matrix, NormalizationInfo) {
let n = x.rows;
let p = x.cols;
assert!(p > 0, "Predictor matrix must have at least one column");
let mut min = Vec::with_capacity(p);
let mut range = Vec::with_capacity(p);
for j in 0..p {
let mut col_min = f64::INFINITY;
let mut col_max = f64::NEG_INFINITY;
for i in 0..n {
let val = x.get(i, j);
if val < col_min {
col_min = val;
}
if val > col_max {
col_max = val;
}
}
let col_range = col_max - col_min;
if col_range <= f64::EPSILON {
min.push(col_min);
range.push(1.0);
} else {
min.push(col_min);
range.push(col_range);
}
}
let mut normalized_data = Vec::with_capacity(n * p);
for i in 0..n {
for j in 0..p {
let val = x.get(i, j);
let col_min = min[j];
let col_range = range[j];
if col_range == 1.0 && (val - col_min).abs() < f64::EPSILON {
normalized_data.push(0.5);
} else {
normalized_data.push((val - col_min) / col_range);
}
}
}
let normalized = Matrix::new(n, p, normalized_data);
let info = NormalizationInfo { min, range };
(normalized, info)
}
#[inline]
pub fn denormalize(value: f64, min: f64, range: f64) -> f64 {
value * range + min
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_normalize_simple() {
let data = vec![0.0, 2.5, 5.0, 7.5, 10.0];
let x = Matrix::new(5, 1, data.clone());
let (normalized, info) = normalize_predictors(&x);
assert_eq!(info.min[0], 0.0);
assert_eq!(info.range[0], 10.0);
assert_eq!(normalized.get(0, 0), 0.0);
assert_eq!(normalized.get(4, 0), 1.0);
assert!((normalized.get(2, 0) - 0.5).abs() < 1e-10);
}
#[test]
fn test_normalize_multiple_columns() {
let data = vec![
0.0, 100.0, 5.0, 150.0, 10.0, 200.0, ];
let x = Matrix::new(3, 2, data);
let (normalized, info) = normalize_predictors(&x);
assert_eq!(info.min[0], 0.0);
assert_eq!(info.range[0], 10.0);
assert_eq!(info.min[1], 100.0);
assert_eq!(info.range[1], 100.0);
for i in 0..3 {
for j in 0..2 {
let val = normalized.get(i, j);
assert!(val >= 0.0 && val <= 1.0);
}
}
assert_eq!(normalized.get(0, 0), 0.0); assert_eq!(normalized.get(2, 0), 1.0); assert_eq!(normalized.get(0, 1), 0.0); assert_eq!(normalized.get(2, 1), 1.0); }
#[test]
fn test_denormalize() {
let min = 10.0;
let range = 50.0;
assert_eq!(denormalize(0.0, min, range), min);
assert_eq!(denormalize(1.0, min, range), min + range);
assert_eq!(denormalize(0.5, min, range), min + range / 2.0);
}
#[test]
fn test_normalize_roundtrip() {
let data = vec![1.0, 3.0, 5.0, 7.0, 9.0];
let x = Matrix::new(5, 1, data.clone());
let (normalized, info) = normalize_predictors(&x);
for i in 0..5 {
let denorm = denormalize(normalized.get(i, 0), info.min[0], info.range[0]);
assert!((denorm - data[i]).abs() < 1e-10);
}
}
#[test]
fn test_normalize_constant_column() {
let data = vec![5.0, 5.0, 5.0, 5.0, 5.0];
let x = Matrix::new(5, 1, data);
let (normalized, info) = normalize_predictors(&x);
for i in 0..5 {
assert!((normalized.get(i, 0) - 0.5).abs() < 1e-10);
}
assert_eq!(info.min[0], 5.0);
assert_eq!(info.range[0], 1.0); }
#[test]
fn test_normalize_negative_values() {
let data = vec![-10.0, -5.0, 0.0, 5.0, 10.0];
let x = Matrix::new(5, 1, data);
let (normalized, info) = normalize_predictors(&x);
assert_eq!(info.min[0], -10.0);
assert_eq!(info.range[0], 20.0);
assert_eq!(normalized.get(0, 0), 0.0); assert_eq!(normalized.get(4, 0), 1.0); assert!((normalized.get(2, 0) - 0.5).abs() < 1e-10); }
}