use aprender::preprocessing::{MinMaxScaler, RobustScaler, StandardScaler};
use aprender::primitives::Matrix;
use aprender::traits::Transformer;
use proptest::prelude::*;
proptest! {
#![proptest_config(ProptestConfig::with_cases(256))]
#[test]
fn prop_standard_scaler_zero_mean(
n in 5usize..20,
d in 2usize..4,
data in proptest::collection::vec(-100.0f32..100.0, 80usize)
) {
let total = n * d;
let vals: Vec<f32> = data.iter().copied().cycle().take(total).collect();
let x = Matrix::from_vec(n, d, vals).expect("valid matrix dimensions");
let mut scaler = StandardScaler::new();
scaler.fit(&x).expect("fit succeeds");
let transformed = scaler.transform(&x).expect("transform succeeds");
let (rows, cols) = transformed.shape();
for j in 0..cols {
let col_mean: f32 = (0..rows).map(|i| transformed.get(i, j)).sum::<f32>()
/ rows as f32;
prop_assert!(
col_mean.abs() < 1e-4,
"column {} mean = {}, expected ~0.0",
j, col_mean
);
}
}
#[test]
fn prop_standard_scaler_unit_variance(
n in 5usize..20,
d in 2usize..4,
data in proptest::collection::vec(-100.0f32..100.0, 80usize)
) {
let total = n * d;
let vals: Vec<f32> = data.iter().copied().cycle().take(total).collect();
let x = Matrix::from_vec(n, d, vals.clone()).expect("valid matrix dimensions");
for j in 0..d {
let first = vals[j];
let all_same = (0..n).all(|i| (vals[i * d + j] - first).abs() < 1e-10);
prop_assume!(!all_same);
}
let mut scaler = StandardScaler::new();
scaler.fit(&x).expect("fit succeeds");
let transformed = scaler.transform(&x).expect("transform succeeds");
let (rows, cols) = transformed.shape();
for j in 0..cols {
let col_mean: f32 = (0..rows).map(|i| transformed.get(i, j)).sum::<f32>()
/ rows as f32;
let col_var: f32 = (0..rows)
.map(|i| {
let diff = transformed.get(i, j) - col_mean;
diff * diff
})
.sum::<f32>()
/ rows as f32;
let col_std = col_var.sqrt();
prop_assert!(
(col_std - 1.0).abs() < 1e-3,
"column {} std = {}, expected ~1.0",
j, col_std
);
}
}
#[test]
fn prop_minmax_scaler_bounded(
n in 5usize..20,
d in 2usize..4,
data in proptest::collection::vec(-100.0f32..100.0, 80usize)
) {
let total = n * d;
let vals: Vec<f32> = data.iter().copied().cycle().take(total).collect();
let x = Matrix::from_vec(n, d, vals).expect("valid matrix dimensions");
let mut scaler = MinMaxScaler::new();
scaler.fit(&x).expect("fit succeeds");
let transformed = scaler.transform(&x).expect("transform succeeds");
let eps = 1e-6;
let (rows, cols) = transformed.shape();
for i in 0..rows {
for j in 0..cols {
let val = transformed.get(i, j);
prop_assert!(
val >= 0.0 - eps && val <= 1.0 + eps,
"transformed[{}, {}] = {}, expected in [0, 1]",
i, j, val
);
}
}
}
#[test]
fn prop_minmax_scaler_extremes(
n in 5usize..20,
d in 2usize..4,
data in proptest::collection::vec(-100.0f32..100.0, 80usize)
) {
let total = n * d;
let vals: Vec<f32> = data.iter().copied().cycle().take(total).collect();
let x = Matrix::from_vec(n, d, vals.clone()).expect("valid matrix dimensions");
for j in 0..d {
let col_min = (0..n).map(|i| vals[i * d + j]).fold(f32::INFINITY, f32::min);
let col_max = (0..n).map(|i| vals[i * d + j]).fold(f32::NEG_INFINITY, f32::max);
prop_assume!((col_max - col_min).abs() > 1e-10);
}
let mut scaler = MinMaxScaler::new();
scaler.fit(&x).expect("fit succeeds");
let transformed = scaler.transform(&x).expect("transform succeeds");
let (rows, cols) = transformed.shape();
for j in 0..cols {
let col_min = (0..rows)
.map(|i| transformed.get(i, j))
.fold(f32::INFINITY, f32::min);
let col_max = (0..rows)
.map(|i| transformed.get(i, j))
.fold(f32::NEG_INFINITY, f32::max);
prop_assert!(
col_min.abs() < 1e-5,
"column {} min = {}, expected 0.0",
j, col_min
);
prop_assert!(
(col_max - 1.0).abs() < 1e-5,
"column {} max = {}, expected 1.0",
j, col_max
);
}
}
#[test]
fn prop_standard_scaler_inverse_roundtrip(
n in 5usize..20,
d in 2usize..4,
data in proptest::collection::vec(-100.0f32..100.0, 80usize)
) {
let total = n * d;
let vals: Vec<f32> = data.iter().copied().cycle().take(total).collect();
let x = Matrix::from_vec(n, d, vals).expect("valid matrix dimensions");
let mut scaler = StandardScaler::new();
scaler.fit(&x).expect("fit succeeds");
let transformed = scaler.transform(&x).expect("transform succeeds");
let recovered = scaler.inverse_transform(&transformed).expect("inverse_transform succeeds");
let (rows, cols) = x.shape();
for i in 0..rows {
for j in 0..cols {
let orig = x.get(i, j);
let rec = recovered.get(i, j);
let diff = (orig - rec).abs();
prop_assert!(
diff < 1e-3,
"roundtrip error at [{}, {}]: orig={}, recovered={}, diff={}",
i, j, orig, rec, diff
);
}
}
}
#[test]
fn prop_robust_scaler_finite_centered(
n in 5usize..20,
d in 2usize..4,
data in proptest::collection::vec(-100.0f32..100.0, 80usize)
) {
let total = n * d;
let vals: Vec<f32> = data.iter().copied().cycle().take(total).collect();
let x = Matrix::from_vec(n, d, vals).expect("valid matrix dimensions");
let mut scaler = RobustScaler::new();
scaler.fit(&x).expect("fit succeeds");
let transformed = scaler.transform(&x).expect("transform succeeds");
let (rows, cols) = transformed.shape();
for i in 0..rows {
for j in 0..cols {
let val = transformed.get(i, j);
prop_assert!(
val.is_finite(),
"RobustScaler output[{}, {}] = {}, expected finite",
i, j, val
);
}
}
for j in 0..cols {
let mut col: Vec<f32> = (0..rows).map(|i| transformed.get(i, j)).collect();
col.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let median = if col.len().is_multiple_of(2) {
(col[col.len() / 2 - 1] + col[col.len() / 2]) / 2.0
} else {
col[col.len() / 2]
};
prop_assert!(
median.abs() < 1e-3,
"column {} median = {}, expected ~0.0 after robust centering",
j, median
);
}
}
#[test]
fn prop_minmax_scaler_inverse_roundtrip(
n in 5usize..20,
d in 2usize..4,
data in proptest::collection::vec(-100.0f32..100.0, 80usize)
) {
let total = n * d;
let vals: Vec<f32> = data.iter().copied().cycle().take(total).collect();
let x = Matrix::from_vec(n, d, vals.clone()).expect("valid matrix dimensions");
for j in 0..d {
let col_min = (0..n).map(|i| vals[i * d + j]).fold(f32::INFINITY, f32::min);
let col_max = (0..n).map(|i| vals[i * d + j]).fold(f32::NEG_INFINITY, f32::max);
prop_assume!((col_max - col_min).abs() > 1e-10);
}
let mut scaler = MinMaxScaler::new();
scaler.fit(&x).expect("fit succeeds");
let transformed = scaler.transform(&x).expect("transform succeeds");
let recovered = scaler.inverse_transform(&transformed).expect("inverse_transform succeeds");
let (rows, cols) = x.shape();
for i in 0..rows {
for j in 0..cols {
let orig = x.get(i, j);
let rec = recovered.get(i, j);
let diff = (orig - rec).abs();
prop_assert!(
diff < 1e-3,
"roundtrip error at [{}, {}]: orig={}, recovered={}, diff={}",
i, j, orig, rec, diff
);
}
}
}
}