#![allow(clippy::needless_range_loop)]
use scry_learn::dataset::Dataset;
use scry_learn::linear::{LassoRegression, LinearRegression, LogisticRegression};
use scry_learn::metrics::{f1_score, Average};
use scry_learn::preprocess::{StandardScaler, Transformer};
use scry_learn::tree::{DecisionTreeClassifier, DecisionTreeRegressor, RandomForestClassifier};
fn make_dataset(cols: Vec<Vec<f64>>, target: Vec<f64>) -> Dataset {
let n_features = cols.len();
let names: Vec<String> = (0..n_features).map(|i| format!("f{i}")).collect();
Dataset::new(cols, target, names, "target")
}
#[test]
fn near_singular_linear_regression() {
let mut rng = fastrand::Rng::with_seed(42);
let n = 200;
let base: Vec<f64> = (0..n).map(|_| rng.f64() * 10.0).collect();
let noisy: Vec<f64> = base.iter().map(|&v| v + rng.f64() * 1e-8).collect();
let target: Vec<f64> = base.iter().map(|&v| 2.0 * v + 1.0).collect();
let ds = make_dataset(vec![base, noisy], target);
let mut model = LinearRegression::new();
let result = model.fit(&ds);
if matches!(result, Ok(())) {
let coeffs = model.coefficients();
for (i, &c) in coeffs.iter().enumerate() {
assert!(c.is_finite(), "coefficient {i} is not finite: {c}");
}
assert!(model.intercept().is_finite(), "intercept is not finite");
let preds = model.predict(&[vec![5.0, 5.0]]).unwrap();
assert!(
preds[0].is_finite(),
"prediction is not finite: {}",
preds[0]
);
}
println!("near_singular_linear_regression: {:?}", result.is_ok());
}
#[test]
fn extreme_scale_disparity() {
let mut rng = fastrand::Rng::with_seed(42);
let n = 200;
let tiny: Vec<f64> = (0..n).map(|_| rng.f64() * 1e-10).collect();
let huge: Vec<f64> = (0..n).map(|_| rng.f64() * 1e10).collect();
let target: Vec<f64> = tiny
.iter()
.zip(huge.iter())
.map(|(&t, &h)| t * 1e10 + h * 1e-10)
.collect();
let ds = make_dataset(vec![tiny, huge], target);
let mut lr = LinearRegression::new();
if matches!(lr.fit(&ds), Ok(())) {
let preds = lr.predict(&[vec![5e-11, 5e9]]).unwrap();
assert!(
preds[0].is_finite(),
"LR prediction not finite: {}",
preds[0]
);
}
let half = n / 2;
let cls_target: Vec<f64> = (0..n).map(|i| if i < half { 0.0 } else { 1.0 }).collect();
let ds_cls = make_dataset(
vec![
(0..n).map(|_| rng.f64() * 1e-10).collect(),
(0..n)
.map(|i| {
if i < half {
rng.f64()
} else {
rng.f64() + 1e10
}
})
.collect(),
],
cls_target,
);
let mut dt = DecisionTreeClassifier::new();
dt.fit(&ds_cls).unwrap();
let row = vec![5e-11, 5e9];
let pred = dt.predict(&[row]).unwrap();
assert!(pred[0].is_finite());
println!("extreme_scale_disparity: passed");
}
#[test]
fn severe_class_imbalance() {
let mut rng = fastrand::Rng::with_seed(42);
let n = 1000;
let n_minority = 10; let n_features = 5;
let mut cols = vec![vec![0.0; n]; n_features];
let mut target = vec![0.0; n];
for j in 0..n_features {
for i in 0..n {
cols[j][i] = rng.f64() * 10.0;
}
}
for i in (n - n_minority)..n {
target[i] = 1.0;
for j in 0..n_features {
cols[j][i] += 20.0;
}
}
let ds = make_dataset(cols, target.clone());
let mut dt = DecisionTreeClassifier::new();
dt.fit(&ds).unwrap();
let rows: Vec<Vec<f64>> = (0..n)
.map(|i| (0..n_features).map(|j| ds.features[j][i]).collect())
.collect();
let preds = dt.predict(&rows).unwrap();
let f1 = f1_score(&target, &preds, Average::Macro);
println!("severe_class_imbalance DT F1-macro: {f1:.4}");
assert!(
f1 > 0.0,
"F1-macro should be > 0 on separable imbalanced data"
);
let mut rf = RandomForestClassifier::new().n_estimators(50).max_depth(5);
rf.fit(&ds).unwrap();
let preds_rf = rf.predict(&rows).unwrap();
let f1_rf = f1_score(&target, &preds_rf, Average::Macro);
println!("severe_class_imbalance RF F1-macro: {f1_rf:.4}");
assert!(f1_rf > 0.0);
}
#[test]
fn single_class_input() {
let mut rng = fastrand::Rng::with_seed(42);
let n = 100;
let col: Vec<f64> = (0..n).map(|_| rng.f64()).collect();
let target = vec![1.0; n];
let ds = make_dataset(vec![col], target);
let mut dt = DecisionTreeClassifier::new();
let result = dt.fit(&ds);
println!("single_class DT fit: {:?}", result.is_ok());
if result.is_ok() {
let preds = dt.predict(&[vec![0.5]]).unwrap();
assert!(preds[0].is_finite(), "prediction should be finite");
println!("single_class DT prediction: {}", preds[0]);
}
let mut lr = LogisticRegression::new().max_iter(50);
let lr_result = lr.fit(&ds);
assert!(
lr_result.is_err(),
"LogReg on single-class should return Err"
);
println!("single_class LogReg fit: Err — correct behavior");
}
#[test]
fn zero_variance_columns() {
let mut rng = fastrand::Rng::with_seed(42);
let n = 200;
let half = n / 2;
let constant_col = vec![42.0; n]; let normal_col: Vec<f64> = (0..n)
.map(|i| if i < half { rng.f64() } else { rng.f64() + 5.0 })
.collect();
let target: Vec<f64> = (0..n).map(|i| if i < half { 0.0 } else { 1.0 }).collect();
let ds = make_dataset(vec![constant_col, normal_col], target);
let mut scaler = StandardScaler::new();
scaler.fit(&ds).unwrap();
let mut ds_scaled = ds.clone();
scaler.transform(&mut ds_scaled).unwrap();
for &v in &ds_scaled.features[0] {
assert!(v.is_finite(), "scaled zero-variance value not finite: {v}");
}
let mut dt = DecisionTreeClassifier::new();
dt.fit(&ds).unwrap();
let preds = dt.predict(&[vec![42.0, 3.0]]).unwrap();
assert!(preds[0].is_finite());
println!("zero_variance_columns: passed");
}
#[test]
fn near_zero_variance() {
let mut rng = fastrand::Rng::with_seed(42);
let n = 200;
let col: Vec<f64> = (0..n).map(|_| 1.0 + rng.f64() * 1e-15).collect();
let target: Vec<f64> = (0..n).map(|i| (i % 2) as f64).collect();
let ds = make_dataset(vec![col], target);
let mut scaler = StandardScaler::new();
scaler.fit(&ds).unwrap();
let mut ds_scaled = ds.clone();
scaler.transform(&mut ds_scaled).unwrap();
for &v in &ds_scaled.features[0] {
assert!(
v.is_finite(),
"near-zero-variance scaled value not finite: {v}"
);
}
println!("near_zero_variance: passed");
}
#[test]
fn high_dimensional_p_gt_n() {
let mut rng = fastrand::Rng::with_seed(42);
let n = 50;
let p = 1000;
let cols: Vec<Vec<f64>> = (0..p)
.map(|_| (0..n).map(|_| rng.f64()).collect())
.collect();
let target: Vec<f64> = (0..n).map(|_| rng.f64() * 10.0).collect();
let ds = make_dataset(cols, target);
let mut lr_ridge = LinearRegression::new().alpha(1.0);
let result = lr_ridge.fit(&ds);
println!("p>>n LinearRegression(alpha=1.0) fit: {:?}", result.is_ok());
if result.is_ok() {
let coeffs = lr_ridge.coefficients();
let finite_count = coeffs.iter().filter(|c| c.is_finite()).count();
assert_eq!(
finite_count, p,
"all coefficients should be finite with regularization"
);
}
let mut lasso = LassoRegression::new().alpha(1.0).max_iter(100);
let lasso_result = lasso.fit(&ds);
println!("p>>n Lasso fit: {:?}", lasso_result.is_ok());
if lasso_result.is_ok() {
let coeffs = lasso.coefficients();
let zero_count = coeffs.iter().filter(|&&c| c.abs() < 1e-10).count();
println!(" Lasso sparsity: {zero_count}/{p} coefficients are zero");
for &c in coeffs {
assert!(c.is_finite(), "Lasso coefficient not finite: {c}");
}
}
}
#[test]
fn constant_target_regression() {
let mut rng = fastrand::Rng::with_seed(42);
let n = 200;
let col: Vec<f64> = (0..n).map(|_| rng.f64() * 10.0).collect();
let target = vec![7.0; n];
let ds = make_dataset(vec![col], target);
let mut lr = LinearRegression::new();
lr.fit(&ds).unwrap();
assert!(
(lr.intercept() - 7.0).abs() < 0.1,
"intercept should be ~7.0, got {}",
lr.intercept()
);
assert!(
lr.coefficients()[0].abs() < 0.1,
"coefficient should be ~0, got {}",
lr.coefficients()[0]
);
let mut dt = DecisionTreeRegressor::new();
dt.fit(&ds).unwrap();
let preds = dt.predict(&[vec![5.0]]).unwrap();
assert!(
(preds[0] - 7.0).abs() < 0.5,
"DTRegressor should predict ~7.0, got {}",
preds[0]
);
println!("constant_target_regression: passed");
}