pub mod advanced_analytics;
pub mod balancing;
pub mod dataset;
pub mod enhanced_analytics;
pub mod extensions;
pub mod feature_engineering;
pub mod sampling;
pub mod scaling;
pub mod serialization;
pub mod splitting;
pub use dataset::Dataset;
pub use serialization::*;
pub use splitting::{
k_fold_split, stratified_k_fold_split, time_series_split, train_test_split,
CrossValidationFolds,
};
pub use sampling::{
bootstrap_sample, importance_sample, multiple_bootstrap_samples, random_sample,
stratified_sample,
};
pub use balancing::{
create_balanced_dataset, generate_synthetic_samples, random_oversample, random_undersample,
BalancingStrategy,
};
pub use scaling::{min_max_scale, normalize, robust_scale, StatsExt};
pub use feature_engineering::{
create_binned_features, polynomial_features, statistical_features, BinningStrategy,
};
pub use advanced_analytics::{
analyze_dataset_advanced, quick_quality_assessment, AdvancedDatasetAnalyzer,
AdvancedQualityMetrics, CorrelationInsights, NormalityAssessment,
};
pub type Array1<T> = scirs2_core::ndarray::Array1<T>;
pub type Array2<T> = scirs2_core::ndarray::Array2<T>;
#[cfg(test)]
mod tests {
use super::*;
use scirs2_core::ndarray::Array2;
#[test]
fn test_module_integration() {
let data = Array2::from_shape_vec(
(6, 2),
vec![
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0,
],
)
.expect("Test: operation failed");
let target = scirs2_core::ndarray::Array1::from(vec![0.0, 0.0, 1.0, 1.0, 1.0, 1.0]);
let dataset = Dataset::new(data.clone(), Some(target.clone()));
assert_eq!(dataset.n_samples(), 6);
assert_eq!(dataset.n_features(), 2);
let (train, test) = train_test_split(&dataset, 0.3, Some(42)).expect("Operation failed");
assert_eq!(train.n_samples() + test.n_samples(), 6);
let indices = random_sample(6, 3, false, Some(42)).expect("Operation failed");
assert_eq!(indices.len(), 3);
let (balanced_data, _balanced_targets) =
random_oversample(&data, &target, Some(42)).expect("Operation failed");
assert!(balanced_data.nrows() > data.nrows());
let mut scaled_data = data.clone();
min_max_scale(&mut scaled_data, (0.0, 1.0));
assert!(scaled_data.iter().all(|&x| (0.0..=1.0).contains(&x)));
let poly_features = polynomial_features(&data, 2, true).expect("Operation failed");
assert!(poly_features.ncols() > data.ncols()); }
#[test]
fn test_backward_compatibility() {
use crate::utils::*;
let data = Array2::from_shape_vec((4, 2), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0])
.expect("Operation failed");
let targets = scirs2_core::ndarray::Array1::from(vec![0.0, 0.0, 1.0, 1.0]);
let dataset = Dataset::new(data.clone(), Some(targets.clone()));
let folds = k_fold_split(4, 2, false, Some(42)).expect("Operation failed");
let sample_indices = stratified_sample(&targets, 2, Some(42)).expect("Operation failed");
let (bal_data, _bal_targets) = create_balanced_dataset(
&data,
&targets,
BalancingStrategy::RandomOversample,
Some(42),
)
.expect("Test: operation failed");
assert_eq!(dataset.n_samples(), 4);
assert_eq!(folds.len(), 2);
assert_eq!(sample_indices.len(), 2);
assert!(bal_data.nrows() >= data.nrows());
}
#[test]
fn test_cross_validation_compatibility() {
let data = Array2::from_shape_vec((10, 3), (0..30).map(|x| x as f64).collect())
.expect("Operation failed");
let targets =
scirs2_core::ndarray::Array1::from((0..10).map(|x| (x % 3) as f64).collect::<Vec<_>>());
let dataset = Dataset::new(data, Some(targets.clone()));
let folds = k_fold_split(dataset.n_samples(), 5, true, Some(42)).expect("Operation failed");
assert_eq!(folds.len(), 5);
let stratified_folds =
stratified_k_fold_split(&targets, 3, true, Some(42)).expect("Operation failed");
assert_eq!(stratified_folds.len(), 3);
let ts_folds = time_series_split(dataset.n_samples(), 3, 2, 1).expect("Operation failed");
assert_eq!(ts_folds.len(), 3);
}
}