use super::*;
use super::*;
use approx::assert_abs_diff_eq;
use scirs2_core::ndarray::Array;
#[test]
fn test_one_hot_encoder_basic() {
let data = Array::from_shape_vec(
(4, 2),
vec![
0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 0.0, 1.0,
],
)
.expect("Test data construction failed");
let mut encoder = OneHotEncoder::with_defaults();
let encoded = encoder
.fit_transform(&data)
.expect("Test data construction failed");
assert_eq!(encoded.shape(), (4, 6));
let encoded_dense = encoded.to_dense();
assert_abs_diff_eq!(encoded_dense[[0, 0]], 1.0, epsilon = 1e-10); assert_abs_diff_eq!(encoded_dense[[0, 1]], 0.0, epsilon = 1e-10); assert_abs_diff_eq!(encoded_dense[[0, 2]], 0.0, epsilon = 1e-10); assert_abs_diff_eq!(encoded_dense[[0, 3]], 1.0, epsilon = 1e-10); assert_abs_diff_eq!(encoded_dense[[0, 4]], 0.0, epsilon = 1e-10); assert_abs_diff_eq!(encoded_dense[[0, 5]], 0.0, epsilon = 1e-10);
assert_abs_diff_eq!(encoded_dense[[1, 0]], 0.0, epsilon = 1e-10); assert_abs_diff_eq!(encoded_dense[[1, 1]], 1.0, epsilon = 1e-10); assert_abs_diff_eq!(encoded_dense[[1, 2]], 0.0, epsilon = 1e-10); assert_abs_diff_eq!(encoded_dense[[1, 3]], 0.0, epsilon = 1e-10); assert_abs_diff_eq!(encoded_dense[[1, 4]], 1.0, epsilon = 1e-10); assert_abs_diff_eq!(encoded_dense[[1, 5]], 0.0, epsilon = 1e-10); }
#[test]
fn test_one_hot_encoder_drop_first() {
let data = Array::from_shape_vec((3, 2), vec![0.0, 1.0, 1.0, 2.0, 2.0, 1.0])
.expect("Test data construction failed");
let mut encoder = OneHotEncoder::new(Some("first".to_string()), "error", false)
.expect("Test data construction failed");
let encoded = encoder
.fit_transform(&data)
.expect("Test data construction failed");
assert_eq!(encoded.shape(), (3, 3));
let encoded_dense = encoded.to_dense();
assert_abs_diff_eq!(encoded_dense[[0, 0]], 0.0, epsilon = 1e-10); assert_abs_diff_eq!(encoded_dense[[0, 1]], 0.0, epsilon = 1e-10); assert_abs_diff_eq!(encoded_dense[[0, 2]], 0.0, epsilon = 1e-10);
assert_abs_diff_eq!(encoded_dense[[1, 0]], 1.0, epsilon = 1e-10); assert_abs_diff_eq!(encoded_dense[[1, 1]], 0.0, epsilon = 1e-10); assert_abs_diff_eq!(encoded_dense[[1, 2]], 1.0, epsilon = 1e-10); }
#[test]
fn test_ordinal_encoder() {
let data = Array::from_shape_vec(
(4, 2),
vec![
2.0, 10.0, 1.0, 20.0, 3.0, 10.0, 2.0, 30.0,
],
)
.expect("Test data construction failed");
let mut encoder = OrdinalEncoder::with_defaults();
let encoded = encoder
.fit_transform(&data)
.expect("Test data construction failed");
assert_eq!(encoded.shape(), &[4, 2]);
assert_abs_diff_eq!(encoded[[0, 0]], 1.0, epsilon = 1e-10); assert_abs_diff_eq!(encoded[[0, 1]], 0.0, epsilon = 1e-10); assert_abs_diff_eq!(encoded[[1, 0]], 0.0, epsilon = 1e-10); assert_abs_diff_eq!(encoded[[1, 1]], 1.0, epsilon = 1e-10); assert_abs_diff_eq!(encoded[[2, 0]], 2.0, epsilon = 1e-10); assert_abs_diff_eq!(encoded[[2, 1]], 0.0, epsilon = 1e-10); assert_abs_diff_eq!(encoded[[3, 0]], 1.0, epsilon = 1e-10); assert_abs_diff_eq!(encoded[[3, 1]], 2.0, epsilon = 1e-10); }
#[test]
fn test_unknown_category_handling() {
let train_data =
Array::from_shape_vec((2, 1), vec![1.0, 2.0]).expect("Test data construction failed");
let test_data = Array::from_shape_vec(
(1, 1),
vec![3.0], )
.expect("Test data construction failed");
let mut encoder = OneHotEncoder::with_defaults(); encoder
.fit(&train_data)
.expect("Test data construction failed");
assert!(encoder.transform(&test_data).is_err());
let mut encoder =
OneHotEncoder::new(None, "ignore", false).expect("Test data construction failed");
encoder
.fit(&train_data)
.expect("Test data construction failed");
let encoded = encoder
.transform(&test_data)
.expect("Test data construction failed");
assert_eq!(encoded.shape(), (1, 2));
let encoded_dense = encoded.to_dense();
assert_abs_diff_eq!(encoded_dense[[0, 0]], 0.0, epsilon = 1e-10);
assert_abs_diff_eq!(encoded_dense[[0, 1]], 0.0, epsilon = 1e-10);
}
#[test]
fn test_ordinal_encoder_unknown_value() {
let train_data =
Array::from_shape_vec((2, 1), vec![1.0, 2.0]).expect("Test data construction failed");
let test_data = Array::from_shape_vec(
(1, 1),
vec![3.0], )
.expect("Test data construction failed");
let mut encoder = OrdinalEncoder::new("use_encoded_value", Some(-1.0))
.expect("Test data construction failed");
encoder
.fit(&train_data)
.expect("Test data construction failed");
let encoded = encoder
.transform(&test_data)
.expect("Test data construction failed");
assert_eq!(encoded.shape(), &[1, 1]);
assert_abs_diff_eq!(encoded[[0, 0]], -1.0, epsilon = 1e-10);
}
#[test]
fn test_get_feature_names() {
let data = Array::from_shape_vec((2, 2), vec![1.0, 10.0, 2.0, 20.0])
.expect("Test data construction failed");
let mut encoder = OneHotEncoder::with_defaults();
encoder.fit(&data).expect("Test data construction failed");
let feature_names = encoder
.get_feature_names(None)
.expect("Test data construction failed");
assert_eq!(feature_names.len(), 4);
let custom_names = vec!["feat_a".to_string(), "feat_b".to_string()];
let feature_names = encoder
.get_feature_names(Some(&custom_names))
.expect("Test data construction failed");
assert!(feature_names[0].starts_with("feat_a_cat_"));
assert!(feature_names[2].starts_with("feat_b_cat_"));
}
#[test]
fn test_target_encoder_mean_strategy() {
let x = Array::from_shape_vec((6, 1), vec![0.0, 1.0, 2.0, 0.0, 1.0, 2.0])
.expect("Test data construction failed");
let y = vec![1.0, 2.0, 3.0, 1.5, 2.5, 3.5];
let mut encoder = TargetEncoder::new("mean", 0.0, 0.0).expect("Test data construction failed");
let encoded = encoder
.fit_transform(&x, &y)
.expect("Test data construction failed");
assert_eq!(encoded.shape(), &[6, 1]);
assert_abs_diff_eq!(encoded[[0, 0]], 1.25, epsilon = 1e-10);
assert_abs_diff_eq!(encoded[[1, 0]], 2.25, epsilon = 1e-10);
assert_abs_diff_eq!(encoded[[2, 0]], 3.25, epsilon = 1e-10);
assert_abs_diff_eq!(encoded[[3, 0]], 1.25, epsilon = 1e-10);
assert_abs_diff_eq!(encoded[[4, 0]], 2.25, epsilon = 1e-10);
assert_abs_diff_eq!(encoded[[5, 0]], 3.25, epsilon = 1e-10);
assert_abs_diff_eq!(encoder.global_mean(), 2.25, epsilon = 1e-10);
}
#[test]
fn test_target_encoder_median_strategy() {
let x = Array::from_shape_vec((4, 1), vec![0.0, 1.0, 0.0, 1.0])
.expect("Test data construction failed");
let y = vec![1.0, 2.0, 3.0, 4.0];
let mut encoder =
TargetEncoder::new("median", 0.0, 0.0).expect("Test data construction failed");
let encoded = encoder
.fit_transform(&x, &y)
.expect("Test data construction failed");
assert_abs_diff_eq!(encoded[[0, 0]], 2.0, epsilon = 1e-10);
assert_abs_diff_eq!(encoded[[1, 0]], 3.0, epsilon = 1e-10);
assert_abs_diff_eq!(encoded[[2, 0]], 2.0, epsilon = 1e-10);
assert_abs_diff_eq!(encoded[[3, 0]], 3.0, epsilon = 1e-10);
}
#[test]
fn test_target_encoder_count_strategy() {
let x = Array::from_shape_vec((5, 1), vec![0.0, 1.0, 0.0, 2.0, 1.0])
.expect("Test data construction failed");
let y = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let mut encoder = TargetEncoder::new("count", 0.0, 0.0).expect("Test data construction failed");
let encoded = encoder
.fit_transform(&x, &y)
.expect("Test data construction failed");
assert_abs_diff_eq!(encoded[[0, 0]], 2.0, epsilon = 1e-10);
assert_abs_diff_eq!(encoded[[1, 0]], 2.0, epsilon = 1e-10);
assert_abs_diff_eq!(encoded[[2, 0]], 2.0, epsilon = 1e-10);
assert_abs_diff_eq!(encoded[[3, 0]], 1.0, epsilon = 1e-10);
assert_abs_diff_eq!(encoded[[4, 0]], 2.0, epsilon = 1e-10);
}
#[test]
fn test_target_encoder_sum_strategy() {
let x = Array::from_shape_vec((4, 1), vec![0.0, 1.0, 0.0, 1.0])
.expect("Test data construction failed");
let y = vec![1.0, 2.0, 3.0, 4.0];
let mut encoder = TargetEncoder::new("sum", 0.0, 0.0).expect("Test data construction failed");
let encoded = encoder
.fit_transform(&x, &y)
.expect("Test data construction failed");
assert_abs_diff_eq!(encoded[[0, 0]], 4.0, epsilon = 1e-10);
assert_abs_diff_eq!(encoded[[1, 0]], 6.0, epsilon = 1e-10);
assert_abs_diff_eq!(encoded[[2, 0]], 4.0, epsilon = 1e-10);
assert_abs_diff_eq!(encoded[[3, 0]], 6.0, epsilon = 1e-10);
}
#[test]
fn test_target_encoder_smoothing() {
let x =
Array::from_shape_vec((3, 1), vec![0.0, 1.0, 2.0]).expect("Test data construction failed");
let y = vec![1.0, 2.0, 3.0];
let mut encoder = TargetEncoder::new("mean", 1.0, 0.0).expect("Test data construction failed");
let encoded = encoder
.fit_transform(&x, &y)
.expect("Test data construction failed");
assert_abs_diff_eq!(encoded[[0, 0]], 1.5, epsilon = 1e-10);
assert_abs_diff_eq!(encoded[[1, 0]], 2.0, epsilon = 1e-10);
assert_abs_diff_eq!(encoded[[2, 0]], 2.5, epsilon = 1e-10);
}
#[test]
fn test_target_encoder_unknown_categories() {
let train_x =
Array::from_shape_vec((3, 1), vec![0.0, 1.0, 2.0]).expect("Test data construction failed");
let train_y = vec![1.0, 2.0, 3.0];
let test_x =
Array::from_shape_vec((2, 1), vec![3.0, 4.0]).expect("Test data construction failed");
let mut encoder = TargetEncoder::new("mean", 0.0, -1.0).expect("Test data construction failed");
encoder
.fit(&train_x, &train_y)
.expect("Test data construction failed");
let encoded = encoder
.transform(&test_x)
.expect("Test data construction failed");
assert_abs_diff_eq!(encoded[[0, 0]], -1.0, epsilon = 1e-10);
assert_abs_diff_eq!(encoded[[1, 0]], -1.0, epsilon = 1e-10);
}
#[test]
fn test_target_encoder_unknown_categories_global_mean() {
let train_x =
Array::from_shape_vec((3, 1), vec![0.0, 1.0, 2.0]).expect("Test data construction failed");
let train_y = vec![1.0, 2.0, 3.0];
let test_x = Array::from_shape_vec((1, 1), vec![3.0]).expect("Test data construction failed");
let mut encoder = TargetEncoder::new("mean", 0.0, 0.0).expect("Test data construction failed"); encoder
.fit(&train_x, &train_y)
.expect("Test data construction failed");
let encoded = encoder
.transform(&test_x)
.expect("Test data construction failed");
assert_abs_diff_eq!(encoded[[0, 0]], 2.0, epsilon = 1e-10); }
#[test]
fn test_target_encoder_multi_feature() {
let x = Array::from_shape_vec((4, 2), vec![0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0])
.expect("Test data construction failed");
let y = vec![1.0, 2.0, 3.0, 4.0];
let mut encoder = TargetEncoder::new("mean", 0.0, 0.0).expect("Test data construction failed");
let encoded = encoder
.fit_transform(&x, &y)
.expect("Test data construction failed");
assert_eq!(encoded.shape(), &[4, 2]);
assert_abs_diff_eq!(encoded[[0, 0]], 2.0, epsilon = 1e-10);
assert_abs_diff_eq!(encoded[[0, 1]], 2.5, epsilon = 1e-10);
assert_abs_diff_eq!(encoded[[1, 0]], 3.0, epsilon = 1e-10);
assert_abs_diff_eq!(encoded[[1, 1]], 2.5, epsilon = 1e-10);
}
#[test]
fn test_target_encoder_cross_validation() {
let x = Array::from_shape_vec(
(10, 1),
vec![0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0],
)
.expect("Test data construction failed");
let y = vec![1.0, 2.0, 1.5, 2.5, 1.2, 2.2, 1.3, 2.3, 1.1, 2.1];
let mut encoder = TargetEncoder::new("mean", 0.0, 0.0).expect("Test data construction failed");
let encoded = encoder
.fit_transform_cv(&x, &y, 5)
.expect("Test data construction failed");
assert_eq!(encoded.shape(), &[10, 1]);
assert!(encoded[[0, 0]] < encoded[[1, 0]]); assert!(encoded[[2, 0]] < encoded[[3, 0]]);
}
#[test]
fn test_target_encoder_convenience_methods() {
let _x = Array::from_shape_vec((4, 1), vec![0.0, 1.0, 0.0, 1.0])
.expect("Test data construction failed");
let _y = [1.0, 2.0, 3.0, 4.0];
let encoder1 = TargetEncoder::with_mean(1.0);
assert_eq!(encoder1.strategy, "mean");
assert_abs_diff_eq!(encoder1.smoothing, 1.0, epsilon = 1e-10);
let encoder2 = TargetEncoder::with_median(0.5);
assert_eq!(encoder2.strategy, "median");
assert_abs_diff_eq!(encoder2.smoothing, 0.5, epsilon = 1e-10);
}
#[test]
fn test_target_encoder_validation_errors() {
assert!(TargetEncoder::new("invalid", 0.0, 0.0).is_err());
assert!(TargetEncoder::new("mean", -1.0, 0.0).is_err());
let x =
Array::from_shape_vec((3, 1), vec![0.0, 1.0, 2.0]).expect("Test data construction failed");
let y = vec![1.0, 2.0];
let mut encoder = TargetEncoder::new("mean", 0.0, 0.0).expect("Test data construction failed");
assert!(encoder.fit(&x, &y).is_err());
let encoder2 = TargetEncoder::new("mean", 0.0, 0.0).expect("Test data construction failed");
assert!(encoder2.transform(&x).is_err());
let train_x =
Array::from_shape_vec((2, 1), vec![0.0, 1.0]).expect("Test data construction failed");
let test_x = Array::from_shape_vec((2, 2), vec![0.0, 1.0, 1.0, 0.0])
.expect("Test data construction failed");
let train_y = vec![1.0, 2.0];
let mut encoder = TargetEncoder::new("mean", 0.0, 0.0).expect("Test data construction failed");
encoder
.fit(&train_x, &train_y)
.expect("Test data construction failed");
assert!(encoder.transform(&test_x).is_err());
let x = Array::from_shape_vec((4, 1), vec![0.0, 1.0, 0.0, 1.0])
.expect("Test data construction failed");
let y = vec![1.0, 2.0, 3.0, 4.0];
let mut encoder = TargetEncoder::new("mean", 0.0, 0.0).expect("Test data construction failed");
assert!(encoder.fit_transform_cv(&x, &y, 1).is_err()); }
#[test]
fn test_target_encoder_accessors() {
let x =
Array::from_shape_vec((3, 1), vec![0.0, 1.0, 2.0]).expect("Test data construction failed");
let y = vec![1.0, 2.0, 3.0];
let mut encoder = TargetEncoder::new("mean", 0.0, 0.0).expect("Test data construction failed");
assert!(!encoder.is_fitted());
assert!(encoder.encodings().is_none());
encoder.fit(&x, &y).expect("Test data construction failed");
assert!(encoder.is_fitted());
assert!(encoder.encodings().is_some());
assert_abs_diff_eq!(encoder.global_mean(), 2.0, epsilon = 1e-10);
let encodings = encoder.encodings().expect("Test data construction failed");
assert_eq!(encodings.len(), 1); assert_eq!(encodings[0].len(), 3); }
#[test]
fn test_target_encoder_empty_data() {
let empty_x = Array2::<f64>::zeros((0, 1));
let empty_y = vec![];
let mut encoder = TargetEncoder::new("mean", 0.0, 0.0).expect("Test data construction failed");
assert!(encoder.fit(&empty_x, &empty_y).is_err());
}
#[test]
fn test_binary_encoder_basic() {
let data = Array::from_shape_vec((4, 1), vec![0.0, 1.0, 2.0, 3.0])
.expect("Test data construction failed");
let mut encoder = BinaryEncoder::with_defaults();
let encoded = encoder
.fit_transform(&data)
.expect("Test data construction failed");
assert_eq!(encoded.shape(), &[4, 2]);
assert_abs_diff_eq!(encoded[[0, 0]], 0.0, epsilon = 1e-10); assert_abs_diff_eq!(encoded[[0, 1]], 0.0, epsilon = 1e-10);
assert_abs_diff_eq!(encoded[[1, 0]], 0.0, epsilon = 1e-10); assert_abs_diff_eq!(encoded[[1, 1]], 1.0, epsilon = 1e-10);
assert_abs_diff_eq!(encoded[[2, 0]], 1.0, epsilon = 1e-10); assert_abs_diff_eq!(encoded[[2, 1]], 0.0, epsilon = 1e-10);
assert_abs_diff_eq!(encoded[[3, 0]], 1.0, epsilon = 1e-10); assert_abs_diff_eq!(encoded[[3, 1]], 1.0, epsilon = 1e-10);
}
#[test]
fn test_binary_encoder_power_of_two() {
let data = Array::from_shape_vec((8, 1), vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0])
.expect("Test data construction failed");
let mut encoder = BinaryEncoder::with_defaults();
let encoded = encoder
.fit_transform(&data)
.expect("Test data construction failed");
assert_eq!(encoded.shape(), &[8, 3]);
assert_abs_diff_eq!(encoded[[0, 0]], 0.0, epsilon = 1e-10); assert_abs_diff_eq!(encoded[[0, 1]], 0.0, epsilon = 1e-10);
assert_abs_diff_eq!(encoded[[0, 2]], 0.0, epsilon = 1e-10);
assert_abs_diff_eq!(encoded[[7, 0]], 1.0, epsilon = 1e-10); assert_abs_diff_eq!(encoded[[7, 1]], 1.0, epsilon = 1e-10);
assert_abs_diff_eq!(encoded[[7, 2]], 1.0, epsilon = 1e-10);
}
#[test]
fn test_binary_encoder_non_power_of_two() {
let data = Array::from_shape_vec((5, 1), vec![0.0, 1.0, 2.0, 3.0, 4.0])
.expect("Test data construction failed");
let mut encoder = BinaryEncoder::with_defaults();
let encoded = encoder
.fit_transform(&data)
.expect("Test data construction failed");
assert_eq!(encoded.shape(), &[5, 3]);
assert_eq!(
encoder
.n_output_features()
.expect("Failed to get output features count"),
3
);
}
#[test]
fn test_binary_encoder_single_category() {
let data =
Array::from_shape_vec((3, 1), vec![5.0, 5.0, 5.0]).expect("Test data construction failed");
let mut encoder = BinaryEncoder::with_defaults();
let encoded = encoder
.fit_transform(&data)
.expect("Test data construction failed");
assert_eq!(encoded.shape(), &[3, 1]);
assert_eq!(
encoder
.n_output_features()
.expect("Failed to get output features count"),
1
);
for i in 0..3 {
assert_abs_diff_eq!(encoded[[i, 0]], 0.0, epsilon = 1e-10);
}
}
#[test]
fn test_binary_encoder_multi_feature() {
let data = Array::from_shape_vec(
(4, 2),
vec![
0.0, 10.0, 1.0, 11.0, 2.0, 10.0, 0.0, 11.0,
],
)
.expect("Test data construction failed");
let mut encoder = BinaryEncoder::with_defaults();
let encoded = encoder
.fit_transform(&data)
.expect("Test data construction failed");
assert_eq!(encoded.shape(), &[4, 3]);
assert_eq!(
encoder
.n_output_features()
.expect("Failed to get output features count"),
3
);
let n_binary_features = encoder
.n_binary_features()
.expect("Test data construction failed");
assert_eq!(n_binary_features, &[2, 1]);
}
#[test]
fn test_binary_encoder_separate_fit_transform() {
let train_data =
Array::from_shape_vec((3, 1), vec![0.0, 1.0, 2.0]).expect("Test data construction failed");
let test_data =
Array::from_shape_vec((2, 1), vec![1.0, 0.0]).expect("Test data construction failed");
let mut encoder = BinaryEncoder::with_defaults();
encoder
.fit(&train_data)
.expect("Test data construction failed");
assert!(encoder.is_fitted());
let encoded = encoder
.transform(&test_data)
.expect("Test data construction failed");
assert_eq!(encoded.shape(), &[2, 2]);
let train_encoded = encoder
.transform(&train_data)
.expect("Test data construction failed");
assert_abs_diff_eq!(encoded[[0, 0]], train_encoded[[1, 0]], epsilon = 1e-10); assert_abs_diff_eq!(encoded[[0, 1]], train_encoded[[1, 1]], epsilon = 1e-10);
}
#[test]
fn test_binary_encoder_unknown_categories_error() {
let train_data =
Array::from_shape_vec((2, 1), vec![0.0, 1.0]).expect("Test data construction failed");
let test_data =
Array::from_shape_vec((1, 1), vec![2.0]).expect("Test data construction failed");
let mut encoder = BinaryEncoder::new("error").expect("Test data construction failed");
encoder
.fit(&train_data)
.expect("Test data construction failed");
assert!(encoder.transform(&test_data).is_err());
}
#[test]
fn test_binary_encoder_unknown_categories_ignore() {
let train_data =
Array::from_shape_vec((2, 1), vec![0.0, 1.0]).expect("Test data construction failed");
let test_data =
Array::from_shape_vec((1, 1), vec![2.0]).expect("Test data construction failed");
let mut encoder = BinaryEncoder::new("ignore").expect("Test data construction failed");
encoder
.fit(&train_data)
.expect("Test data construction failed");
let encoded = encoder
.transform(&test_data)
.expect("Test data construction failed");
assert_eq!(encoded.shape(), &[1, 1]); assert_abs_diff_eq!(encoded[[0, 0]], 0.0, epsilon = 1e-10);
}
#[test]
fn test_binary_encoder_categories_accessor() {
let data = Array::from_shape_vec((3, 1), vec![10.0, 20.0, 30.0])
.expect("Test data construction failed");
let mut encoder = BinaryEncoder::with_defaults();
assert!(!encoder.is_fitted());
assert!(encoder.categories().is_none());
assert!(encoder.n_binary_features().is_none());
assert!(encoder.n_output_features().is_none());
encoder.fit(&data).expect("Test data construction failed");
assert!(encoder.is_fitted());
assert!(encoder.categories().is_some());
assert!(encoder.n_binary_features().is_some());
assert!(encoder.n_output_features().is_some());
let categories = encoder.categories().expect("Test data construction failed");
assert_eq!(categories.len(), 1); assert_eq!(categories[0].len(), 3);
let category_map = &categories[0];
assert!(category_map.contains_key(&10));
assert!(category_map.contains_key(&20));
assert!(category_map.contains_key(&30));
}
#[test]
fn test_binary_encoder_int_to_binary() {
assert_eq!(BinaryEncoder::int_to_binary(0, 3), vec![0, 0, 0]);
assert_eq!(BinaryEncoder::int_to_binary(1, 3), vec![0, 0, 1]);
assert_eq!(BinaryEncoder::int_to_binary(2, 3), vec![0, 1, 0]);
assert_eq!(BinaryEncoder::int_to_binary(3, 3), vec![0, 1, 1]);
assert_eq!(BinaryEncoder::int_to_binary(7, 3), vec![1, 1, 1]);
assert_eq!(BinaryEncoder::int_to_binary(5, 4), vec![0, 1, 0, 1]);
assert_eq!(BinaryEncoder::int_to_binary(1, 1), vec![1]);
}
#[test]
fn test_binary_encoder_validation_errors() {
assert!(BinaryEncoder::new("invalid").is_err());
let empty_data = Array2::<f64>::zeros((0, 1));
let mut encoder = BinaryEncoder::with_defaults();
assert!(encoder.fit(&empty_data).is_err());
let data =
Array::from_shape_vec((2, 1), vec![0.0, 1.0]).expect("Test data construction failed");
let encoder = BinaryEncoder::with_defaults();
assert!(encoder.transform(&data).is_err());
let train_data =
Array::from_shape_vec((2, 1), vec![0.0, 1.0]).expect("Test data construction failed");
let test_data = Array::from_shape_vec((2, 2), vec![0.0, 1.0, 1.0, 0.0])
.expect("Test data construction failed");
let mut encoder = BinaryEncoder::with_defaults();
encoder
.fit(&train_data)
.expect("Test data construction failed");
assert!(encoder.transform(&test_data).is_err());
}
#[test]
fn test_binary_encoder_consistency() {
let data = Array::from_shape_vec((4, 1), vec![3.0, 1.0, 4.0, 1.0])
.expect("Test data construction failed");
let mut encoder = BinaryEncoder::with_defaults();
let encoded1 = encoder
.fit_transform(&data)
.expect("Test data construction failed");
let encoded2 = encoder
.transform(&data)
.expect("Test data construction failed");
for i in 0..encoded1.shape()[0] {
for j in 0..encoded1.shape()[1] {
assert_abs_diff_eq!(encoded1[[i, j]], encoded2[[i, j]], epsilon = 1e-10);
}
}
assert_abs_diff_eq!(encoded1[[1, 0]], encoded1[[3, 0]], epsilon = 1e-10); assert_abs_diff_eq!(encoded1[[1, 1]], encoded1[[3, 1]], epsilon = 1e-10);
}
#[test]
fn test_binary_encoder_memory_efficiency() {
let data = Array::from_shape_vec(
(10, 1),
vec![0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0],
)
.expect("Test data construction failed");
let mut binary_encoder = BinaryEncoder::with_defaults();
let binary_encoded = binary_encoder
.fit_transform(&data)
.expect("Test data construction failed");
let mut onehot_encoder = OneHotEncoder::with_defaults();
let onehot_encoded = onehot_encoder
.fit_transform(&data)
.expect("Test data construction failed");
assert_eq!(binary_encoded.shape()[1], 4); assert_eq!(onehot_encoded.shape().1, 10); assert!(binary_encoded.shape()[1] < onehot_encoded.shape().1);
}
#[test]
fn test_sparse_matrix_basic() {
let mut sparse = SparseMatrix::new((3, 4));
sparse.push(0, 1, 1.0);
sparse.push(1, 2, 1.0);
sparse.push(2, 0, 1.0);
assert_eq!(sparse.shape, (3, 4));
assert_eq!(sparse.nnz(), 3);
let dense = sparse.to_dense();
assert_eq!(dense.shape(), &[3, 4]);
assert_eq!(dense[[0, 1]], 1.0);
assert_eq!(dense[[1, 2]], 1.0);
assert_eq!(dense[[2, 0]], 1.0);
assert_eq!(dense[[0, 0]], 0.0); }
#[test]
fn test_onehot_sparse_output() {
let data = Array::from_shape_vec((4, 2), vec![0.0, 1.0, 1.0, 2.0, 2.0, 0.0, 0.0, 1.0])
.expect("Test data construction failed");
let mut encoder_sparse =
OneHotEncoder::new(None, "error", true).expect("Test data construction failed");
let result_sparse = encoder_sparse
.fit_transform(&data)
.expect("Test data construction failed");
match &result_sparse {
EncodedOutput::Sparse(sparse) => {
assert_eq!(sparse.shape, (4, 6)); assert_eq!(sparse.nnz(), 8);
let dense = sparse.to_dense();
assert_eq!(dense[[0, 0]], 1.0); assert_eq!(dense[[0, 4]], 1.0); assert_eq!(dense[[0, 1]], 0.0); }
EncodedOutput::Dense(_) => assert!(false, "Expected sparse output, got dense"),
}
let mut encoder_dense =
OneHotEncoder::new(None, "error", false).expect("Test data construction failed");
let result_dense = encoder_dense
.fit_transform(&data)
.expect("Test data construction failed");
match result_dense {
EncodedOutput::Dense(dense) => {
assert_eq!(dense.shape(), &[4, 6]);
let sparse_as_dense = result_sparse.to_dense();
for i in 0..4 {
for j in 0..6 {
assert_abs_diff_eq!(dense[[i, j]], sparse_as_dense[[i, j]], epsilon = 1e-10);
}
}
}
EncodedOutput::Sparse(_) => assert!(false, "Expected dense output, got sparse"),
}
}
#[test]
fn test_onehot_sparse_with_drop() {
let data =
Array::from_shape_vec((3, 1), vec![0.0, 1.0, 2.0]).expect("Test data construction failed");
let mut encoder = OneHotEncoder::new(Some("first".to_string()), "error", true)
.expect("Test data construction failed");
let result = encoder
.fit_transform(&data)
.expect("Test data construction failed");
match result {
EncodedOutput::Sparse(sparse) => {
assert_eq!(sparse.shape, (3, 2)); assert_eq!(sparse.nnz(), 2);
let dense = sparse.to_dense();
assert_eq!(dense[[0, 0]], 0.0); assert_eq!(dense[[0, 1]], 0.0);
assert_eq!(dense[[1, 0]], 1.0); assert_eq!(dense[[2, 1]], 1.0); }
EncodedOutput::Dense(_) => assert!(false, "Expected sparse output, got dense"),
}
}
#[test]
fn test_onehot_sparse_backward_compatibility() {
let data =
Array::from_shape_vec((2, 1), vec![0.0, 1.0]).expect("Test data construction failed");
let mut encoder =
OneHotEncoder::new(None, "error", true).expect("Test data construction failed");
encoder.fit(&data).expect("Test data construction failed");
let dense_result = encoder
.transform_dense(&data)
.expect("Test data construction failed");
assert_eq!(dense_result.shape(), &[2, 2]);
assert_eq!(dense_result[[0, 0]], 1.0);
assert_eq!(dense_result[[1, 1]], 1.0);
let mut encoder2 =
OneHotEncoder::new(None, "error", true).expect("Test data construction failed");
let dense_result2 = encoder2
.fit_transform_dense(&data)
.expect("Test data construction failed");
assert_eq!(dense_result2.shape(), &[2, 2]);
for i in 0..2 {
for j in 0..2 {
assert_abs_diff_eq!(dense_result[[i, j]], dense_result2[[i, j]], epsilon = 1e-10);
}
}
}
#[test]
fn test_encoded_output_methods() {
let dense_array = Array::from_shape_vec((2, 3), vec![1.0, 0.0, 0.0, 0.0, 1.0, 0.0])
.expect("Test data construction failed");
let dense_output = EncodedOutput::Dense(dense_array);
let mut sparse_matrix = SparseMatrix::new((2, 3));
sparse_matrix.push(0, 0, 1.0);
sparse_matrix.push(1, 1, 1.0);
let sparse_output = EncodedOutput::Sparse(sparse_matrix);
assert_eq!(dense_output.shape(), (2, 3));
assert_eq!(sparse_output.shape(), (2, 3));
let dense_from_dense = dense_output.to_dense();
let dense_from_sparse = sparse_output.to_dense();
assert_eq!(dense_from_dense.shape(), &[2, 3]);
assert_eq!(dense_from_sparse.shape(), &[2, 3]);
assert_eq!(dense_from_dense[[0, 0]], 1.0);
assert_eq!(dense_from_sparse[[0, 0]], 1.0);
assert_eq!(dense_from_dense[[1, 1]], 1.0);
assert_eq!(dense_from_sparse[[1, 1]], 1.0);
}