use ferrolearn_core::{Fit, Transform};
use ferrolearn_preprocess::{
feature_selection::{ScoreFunc, SelectFromModel, SelectKBest},
knn_imputer::{KNNImputer, KNNWeights},
spline_transformer::{KnotStrategy, SplineTransformer},
FunctionTransformer, GaussianRandomProjection, LabelBinarizer, MultiLabelBinarizer,
OrdinalEncoder, SelectPercentile, SparseRandomProjection, VarianceThreshold,
};
use ferrolearn_test_oracle::{json_to_array1, json_to_array2, load_fixture};
#[test]
fn conformance_ordinal_encoder() {
let fx = load_fixture("ordinal_encoder");
let rows: Vec<Vec<String>> = fx.input["X"]
.as_array()
.unwrap()
.iter()
.map(|row| {
row.as_array()
.unwrap()
.iter()
.map(|v| v.as_str().unwrap().to_string())
.collect()
})
.collect();
let n_rows = rows.len();
let n_cols = rows[0].len();
let flat: Vec<String> = rows.into_iter().flatten().collect();
let x = ndarray::Array2::from_shape_vec((n_rows, n_cols), flat).unwrap();
let model = OrdinalEncoder::new();
let fitted = model.fit(&x, &()).expect("OrdinalEncoder fit");
let xt = fitted.transform(&x).expect("OrdinalEncoder transform");
let expected = json_to_array2(&fx.expected["transformed"]);
assert_eq!(xt.shape(), expected.shape(), "OrdinalEncoder shape");
for (i, (&a, &e)) in xt.iter().zip(expected.iter()).enumerate() {
let e_u = e as usize;
assert_eq!(a, e_u, "OrdinalEncoder[{i}] actual={a} expected={e_u}");
}
}
#[test]
fn conformance_label_binarizer() {
let fx = load_fixture("label_binarizer");
let y_vec: Vec<usize> = fx.input["y"]
.as_array()
.unwrap()
.iter()
.map(|v| v.as_u64().unwrap() as usize)
.collect();
let y = ndarray::Array1::from_vec(y_vec);
let model = LabelBinarizer::new();
let fitted = model.fit(&y, &()).expect("LabelBinarizer fit");
let yt = fitted.transform(&y).expect("LabelBinarizer transform");
let expected = json_to_array2(&fx.expected["transformed"]);
assert_eq!(yt.shape(), expected.shape(), "LabelBinarizer shape");
for (i, (&a, &e)) in yt.iter().zip(expected.iter()).enumerate() {
assert!(
(a - e).abs() < 1e-12,
"LabelBinarizer[{i}] actual={a} expected={e}"
);
}
}
#[test]
fn conformance_multilabel_binarizer() {
let fx = load_fixture("multilabel_binarizer");
let label_lists: Vec<Vec<usize>> = fx.input["y"]
.as_array()
.unwrap()
.iter()
.map(|row| {
row.as_array()
.unwrap()
.iter()
.map(|v| v.as_u64().unwrap() as usize)
.collect()
})
.collect();
let model = MultiLabelBinarizer::new();
let fitted = model.fit(&label_lists, &()).expect("MLB fit");
let yt = fitted.transform(&label_lists).expect("MLB transform");
let expected = json_to_array2(&fx.expected["transformed"]);
assert_eq!(yt.shape(), expected.shape(), "MLB shape");
for (i, (&a, &e)) in yt.iter().zip(expected.iter()).enumerate() {
assert!(
(a - e).abs() < 1e-12,
"MLB[{i}] actual={a} expected={e}"
);
}
}
#[test]
fn conformance_variance_threshold() {
let fx = load_fixture("variance_threshold");
let x = json_to_array2(&fx.input["X"]);
let threshold = fx.params["threshold"].as_f64().unwrap_or(0.0);
let model = VarianceThreshold::<f64>::new(threshold);
let fitted = model.fit(&x, &()).expect("VT fit");
let xt = fitted.transform(&x).expect("VT transform");
let expected = json_to_array2(&fx.expected["transformed"]);
assert_eq!(xt.shape(), expected.shape(), "VT shape");
for (i, (&a, &e)) in xt.iter().zip(expected.iter()).enumerate() {
assert!(
(a - e).abs() < 1e-12,
"VT[{i}] actual={a} expected={e}"
);
}
}
#[test]
fn conformance_select_k_best() {
let fx = load_fixture("select_k_best");
let x = json_to_array2(&fx.input["X"]);
let y_vec: Vec<usize> = fx.input["y"]
.as_array()
.unwrap()
.iter()
.map(|v| v.as_u64().unwrap() as usize)
.collect();
let y = ndarray::Array1::from_vec(y_vec);
let k = fx.params["k"].as_u64().unwrap_or(4) as usize;
let model = SelectKBest::<f64>::new(k, ScoreFunc::FClassif);
let fitted = model.fit(&x, &y).expect("SelectKBest fit");
let xt = fitted.transform(&x).expect("SelectKBest transform");
let expected = json_to_array2(&fx.expected["transformed"]);
assert_eq!(xt.shape(), expected.shape(), "SelectKBest shape");
}
#[test]
fn conformance_select_percentile() {
let fx = load_fixture("select_percentile");
let x = json_to_array2(&fx.input["X"]);
let y_vec: Vec<usize> = fx.input["y"]
.as_array()
.unwrap()
.iter()
.map(|v| v.as_u64().unwrap() as usize)
.collect();
let y = ndarray::Array1::from_vec(y_vec);
let percentile = fx.params["percentile"].as_u64().unwrap_or(40) as usize;
let model = SelectPercentile::<f64>::new(percentile, ScoreFunc::FClassif);
let fitted = model.fit(&x, &y).expect("SelectPercentile fit");
let xt = fitted.transform(&x).expect("SelectPercentile transform");
let expected = json_to_array2(&fx.expected["transformed"]);
assert_eq!(xt.shape(), expected.shape(), "SelectPercentile shape");
}
#[test]
fn conformance_select_from_model() {
let fx = load_fixture("select_from_model");
let x = ferrolearn_test_oracle::json_to_array2(&fx.input["X"]);
let y_vec: Vec<usize> = fx.input["y"]
.as_array()
.unwrap()
.iter()
.map(|v| v.as_u64().unwrap() as usize)
.collect();
let y = ndarray::Array1::from_vec(y_vec);
let c = fx.params["C"].as_f64().unwrap_or(1.0);
let max_iter = fx.params["max_iter"].as_u64().unwrap_or(500) as usize;
use ferrolearn_core::introspection::HasCoefficients;
use ferrolearn_core::Fit;
let lr = ferrolearn_linear::LogisticRegression::<f64>::new()
.with_c(c)
.with_max_iter(max_iter);
let fitted = lr.fit(&x, &y).expect("LogisticRegression fit");
let importances: ndarray::Array1<f64> = fitted.coefficients().mapv(|v| v.abs());
let sfm = SelectFromModel::<f64>::new_from_importances(&importances, None)
.expect("SelectFromModel from importances");
let expected_support: Vec<bool> = fx.expected["support"]
.as_array()
.unwrap()
.iter()
.map(|v| v.as_u64().unwrap() != 0)
.collect();
let n_features = expected_support.len();
let mut actual_support = vec![false; n_features];
for &i in sfm.selected_indices() {
actual_support[i] = true;
}
let matches = actual_support
.iter()
.zip(expected_support.iter())
.filter(|(a, e)| a == e)
.count();
let frac = matches as f64 / n_features as f64;
assert!(
frac >= 0.80,
"SelectFromModel support agreement {frac:.4} below 0.80 floor"
);
}
#[test]
fn conformance_rfe() {
let fx = load_fixture("rfe");
let x = ferrolearn_test_oracle::json_to_array2(&fx.input["X"]);
let y_vec: Vec<usize> = fx.input["y"]
.as_array()
.unwrap()
.iter()
.map(|v| v.as_u64().unwrap() as usize)
.collect();
let y = ndarray::Array1::from_vec(y_vec);
let n_keep = fx.params["n_features_to_select"].as_u64().unwrap_or(4) as usize;
use ferrolearn_core::introspection::HasCoefficients;
use ferrolearn_core::Fit;
let lr = ferrolearn_linear::LogisticRegression::<f64>::new();
let fitted = lr.fit(&x, &y).expect("LogisticRegression fit");
let importances: ndarray::Array1<f64> = fitted.coefficients().mapv(|v| v.abs());
let rfe = ferrolearn_preprocess::rfe::RFE::<f64>::new(&importances, n_keep, 1)
.expect("RFE from importances");
let actual_support = rfe.support();
let expected_support: Vec<bool> = fx.expected["support"]
.as_array()
.unwrap()
.iter()
.map(|v| v.as_u64().unwrap() != 0)
.collect();
let matches = actual_support
.iter()
.zip(expected_support.iter())
.filter(|(a, e)| a == e)
.count();
let frac = matches as f64 / expected_support.len() as f64;
assert!(
frac >= 0.80,
"RFE support agreement {frac:.4} below 0.80 floor"
);
}
#[test]
fn conformance_knn_imputer() {
let fx = load_fixture("knn_imputer");
let rows: Vec<Vec<f64>> = fx.input["X"]
.as_array()
.unwrap()
.iter()
.map(|row| {
row.as_array()
.unwrap()
.iter()
.map(|v| match v {
serde_json::Value::String(s) if s == "NaN" => f64::NAN,
serde_json::Value::Number(n) => n.as_f64().unwrap(),
_ => panic!("unexpected value {v}"),
})
.collect()
})
.collect();
let n_rows = rows.len();
let n_cols = rows[0].len();
let flat: Vec<f64> = rows.into_iter().flatten().collect();
let x = ndarray::Array2::from_shape_vec((n_rows, n_cols), flat).unwrap();
let n_neighbors = fx.params["n_neighbors"].as_u64().unwrap_or(5) as usize;
let model = KNNImputer::<f64>::new(n_neighbors, KNNWeights::Uniform);
let fitted = model.fit(&x, &()).expect("KNNImputer fit");
let xt = fitted.transform(&x).expect("KNNImputer transform");
let expected = json_to_array2(&fx.expected["transformed"]);
assert_eq!(xt.shape(), expected.shape(), "KNNImputer shape");
for (i, (&a, &e)) in xt.iter().zip(expected.iter()).enumerate() {
let threshold = 0.5f64.max(0.2 * e.abs());
assert!(
(a - e).abs() <= threshold,
"KNNImputer[{i}] actual={a:.4} expected={e:.4} diff > {threshold:.4}"
);
}
}
#[test]
fn conformance_spline_transformer() {
let fx = load_fixture("spline_transformer");
let x = json_to_array2(&fx.input["X"]);
let n_knots = fx.params["n_knots"].as_u64().unwrap_or(5) as usize;
let degree = fx.params["degree"].as_u64().unwrap_or(3) as usize;
let model = SplineTransformer::<f64>::new(n_knots, degree, KnotStrategy::Uniform);
let fitted = model.fit(&x, &()).expect("SplineTransformer fit");
let xt = fitted.transform(&x).expect("SplineTransformer transform");
assert_eq!(xt.nrows(), x.nrows(), "SplineTransformer rows");
for v in xt.iter() {
assert!(v.is_finite(), "SplineTransformer non-finite");
}
}
#[test]
fn conformance_gaussian_random_projection() {
let fx = load_fixture("gaussian_random_projection");
let x = json_to_array2(&fx.input["X"]);
let n_components = fx.params["n_components"].as_u64().unwrap_or(8) as usize;
let model = GaussianRandomProjection::<f64>::new(n_components);
let fitted = model.fit(&x, &()).expect("GaussianRP fit");
let xt = fitted.transform(&x).expect("GaussianRP transform");
let expected_shape: Vec<i64> = fx.expected["transformed_shape"]
.as_array()
.unwrap()
.iter()
.map(|v| v.as_i64().unwrap())
.collect();
assert_eq!(
xt.shape(),
&[expected_shape[0] as usize, expected_shape[1] as usize],
"GaussianRP shape"
);
}
#[test]
fn conformance_sparse_random_projection() {
let fx = load_fixture("sparse_random_projection");
let x = json_to_array2(&fx.input["X"]);
let n_components = fx.params["n_components"].as_u64().unwrap_or(8) as usize;
let model = SparseRandomProjection::<f64>::new(n_components);
let fitted = model.fit(&x, &()).expect("SparseRP fit");
let xt = fitted.transform(&x).expect("SparseRP transform");
assert_eq!(xt.nrows(), x.nrows(), "SparseRP rows");
assert_eq!(xt.ncols(), n_components, "SparseRP cols");
}
#[test]
fn conformance_function_transformer() {
let fx = load_fixture("function_transformer");
let x = json_to_array2(&fx.input["X"]);
let f = |v: f64| v.ln_1p();
let model = FunctionTransformer::<f64>::new(f);
let xt = model.transform(&x).expect("FunctionTransformer transform");
let expected = json_to_array2(&fx.expected["transformed"]);
for (i, (&a, &e)) in xt.iter().zip(expected.iter()).enumerate() {
assert!(
(a - e).abs() < 1e-12,
"FT[{i}] actual={a} expected={e}"
);
}
let _ = json_to_array1;
}