use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use scirs2_core::ndarray::RandomExt;
use scirs2_core::ndarray::{Array2, ArrayBase, Axis, Data, Ix2};
use scirs2_core::random::prelude::*;
use scirs2_core::random::OptimizedArrayRandom;
use scirs2_transform::*;
use std::hint::black_box;
const SAMPLE_SIZES: &[usize] = &[100, 1000, 10_000];
const FEATURE_SIZES: &[usize] = &[10, 50, 100];
#[allow(dead_code)]
fn bench_normalization(c: &mut Criterion) {
let mut group = c.benchmark_group("Normalization");
for &n_samples in SAMPLE_SIZES {
for &n_features in FEATURE_SIZES {
let mut rng = thread_rng();
let data = Array2::random_bulk(
Ix2(n_samples, n_features),
Uniform::new(-100.0, 100.0).expect("Operation failed"),
&mut rng,
);
group.throughput(Throughput::Elements((n_samples * n_features) as u64));
group.bench_with_input(
BenchmarkId::new("MinMax", format!("{}x{}", n_samples, n_features)),
&data,
|b, data| {
b.iter(|| {
let _result =
normalize_array(black_box(data), NormalizationMethod::MinMax, 0);
});
},
);
group.bench_with_input(
BenchmarkId::new("ZScore", format!("{}x{}", n_samples, n_features)),
&data,
|b, data| {
b.iter(|| {
let _result =
normalize_array(black_box(data), NormalizationMethod::ZScore, 0);
});
},
);
group.bench_with_input(
BenchmarkId::new("L2", format!("{}x{}", n_samples, n_features)),
&data,
|b, data| {
b.iter(|| {
let _result = normalize_array(black_box(data), NormalizationMethod::L2, 1);
});
},
);
group.bench_with_input(
BenchmarkId::new("Robust", format!("{}x{}", n_samples, n_features)),
&data,
|b, data| {
b.iter(|| {
let _result =
normalize_array(black_box(data), NormalizationMethod::Robust, 0);
});
},
);
}
}
group.finish();
}
#[cfg(feature = "simd")]
#[allow(dead_code)]
fn bench_simd_normalization(c: &mut Criterion) {
use scirs2_transform::normalize_simd::*;
let mut group = c.benchmark_group("SIMD_Normalization");
for &n_samples in SAMPLE_SIZES {
for &n_features in FEATURE_SIZES {
let mut rng = thread_rng();
let data = Array2::random_bulk(
Ix2(n_samples, n_features),
Uniform::new(-100.0, 100.0).expect("Operation failed"),
&mut rng,
);
group.throughput(Throughput::Elements((n_samples * n_features) as u64));
group.bench_with_input(
BenchmarkId::new("SIMD_MinMax", format!("{}x{}", n_samples, n_features)),
&data,
|b, data| {
b.iter(|| {
let _result =
simd_normalizearray(black_box(data), NormalizationMethod::MinMax, 0);
});
},
);
group.bench_with_input(
BenchmarkId::new("SIMD_ZScore", format!("{}x{}", n_samples, n_features)),
&data,
|b, data| {
b.iter(|| {
let _result =
simd_normalizearray(black_box(data), NormalizationMethod::ZScore, 0);
});
},
);
}
}
group.finish();
}
#[allow(dead_code)]
fn bench_scaling(c: &mut Criterion) {
let mut group = c.benchmark_group("Scaling");
for &n_samples in SAMPLE_SIZES {
for &n_features in FEATURE_SIZES {
let mut rng = thread_rng();
let data = Array2::random_bulk(
Ix2(n_samples, n_features),
Uniform::new(-100.0, 100.0).expect("Operation failed"),
&mut rng,
);
group.throughput(Throughput::Elements((n_samples * n_features) as u64));
group.bench_with_input(
BenchmarkId::new("MaxAbsScaler", format!("{}x{}", n_samples, n_features)),
&data,
|b, data| {
let mut scaler = MaxAbsScaler::new();
scaler.fit(data).expect("Operation failed");
b.iter(|| {
let _result = scaler.transform(black_box(data));
});
},
);
group.bench_with_input(
BenchmarkId::new(
"QuantileTransformer",
format!("{}x{}", n_samples, n_features),
),
&data,
|b, data| {
let mut transformer =
QuantileTransformer::new(100, "uniform", false).expect("Operation failed");
transformer.fit(data).expect("Operation failed");
b.iter(|| {
let _result = transformer.transform(black_box(data));
});
},
);
}
}
group.finish();
}
#[allow(dead_code)]
fn bench_feature_engineering(c: &mut Criterion) {
let mut group = c.benchmark_group("Feature_Engineering");
for &n_samples in &[100, 1000] {
for &n_features in &[5, 10, 20] {
let mut rng = thread_rng();
let data = Array2::random_bulk(
Ix2(n_samples, n_features),
Uniform::new(-10.0, 10.0).expect("Operation failed"),
&mut rng,
);
group.throughput(Throughput::Elements((n_samples * n_features) as u64));
group.bench_with_input(
BenchmarkId::new(
"PolynomialFeatures",
format!("{}x{}", n_samples, n_features),
),
&data,
|b, data| {
let poly = PolynomialFeatures::new(2, false, false);
b.iter(|| {
let _result = poly.transform(black_box(data));
});
},
);
group.bench_with_input(
BenchmarkId::new("PowerTransform", format!("{}x{}", n_samples, n_features)),
&data,
|b, data| {
let mut pt =
PowerTransformer::new("yeo-johnson", true).expect("Operation failed");
pt.fit(data).expect("Operation failed");
b.iter(|| {
let _result = pt.transform(black_box(data));
});
},
);
group.bench_with_input(
BenchmarkId::new("Binarize", format!("{}x{}", n_samples, n_features)),
&data,
|b, data| {
b.iter(|| {
let _result = binarize(black_box(data), 0.0);
});
},
);
}
}
group.finish();
}
#[allow(dead_code)]
fn bench_dimensionality_reduction(c: &mut Criterion) {
let mut group = c.benchmark_group("Dimensionality_Reduction");
for &n_samples in &[100, 500] {
for &n_features in &[20, 50] {
let mut rng = thread_rng();
let data = Array2::random_bulk(
Ix2(n_samples, n_features),
Uniform::new(-10.0, 10.0).expect("Operation failed"),
&mut rng,
);
let n_components = n_features / 2;
group.throughput(Throughput::Elements((n_samples * n_features) as u64));
group.bench_with_input(
BenchmarkId::new("PCA", format!("{}x{}", n_samples, n_features)),
&data,
|b, data| {
let mut pca = PCA::new(n_components, true, false);
pca.fit(data).expect("Operation failed");
b.iter(|| {
let _result = pca.transform(black_box(data));
});
},
);
group.bench_with_input(
BenchmarkId::new("TruncatedSVD", format!("{}x{}", n_samples, n_features)),
&data,
|b, data| {
let mut svd = TruncatedSVD::new(n_components);
svd.fit(data).expect("Operation failed");
b.iter(|| {
let _result = svd.transform(black_box(data));
});
},
);
}
}
group.finish();
}
#[allow(dead_code)]
fn bench_imputation(c: &mut Criterion) {
let mut group = c.benchmark_group("Imputation");
for &n_samples in SAMPLE_SIZES {
for &n_features in &[10, 20] {
let mut rng = thread_rng();
let mut data = Array2::random_bulk(
Ix2(n_samples, n_features),
Uniform::new(-10.0, 10.0).expect("Operation failed"),
&mut rng,
);
for i in 0..n_samples {
for j in 0..n_features {
if thread_rng().random::<f64>() < 0.1 {
data[[i, j]] = f64::NAN;
}
}
}
group.throughput(Throughput::Elements((n_samples * n_features) as u64));
group.bench_with_input(
BenchmarkId::new("SimpleImputer", format!("{}x{}", n_samples, n_features)),
&data,
|b, data| {
let mut imputer = SimpleImputer::new(ImputeStrategy::Mean, f64::NAN);
imputer.fit(data).expect("Operation failed");
b.iter(|| {
let _result = imputer.transform(black_box(data));
});
},
);
if n_samples <= 1000 {
group.bench_with_input(
BenchmarkId::new("KNNImputer", format!("{}x{}", n_samples, n_features)),
&data,
|b, data| {
let mut imputer = KNNImputer::new(
5,
DistanceMetric::Euclidean,
WeightingScheme::Distance,
f64::NAN,
);
imputer.fit(data).expect("Operation failed");
b.iter(|| {
let _result = imputer.transform(black_box(data));
});
},
);
}
}
}
group.finish();
}
#[allow(dead_code)]
fn bench_pipeline(c: &mut Criterion) {
let mut group = c.benchmark_group("Pipeline");
for &n_samples in &[100, 1000] {
for &n_features in &[10, 20] {
let mut rng = thread_rng();
let data = Array2::random_bulk(
Ix2(n_samples, n_features),
Uniform::new(-10.0, 10.0).expect("Operation failed"),
&mut rng,
);
group.throughput(Throughput::Elements((n_samples * n_features) as u64));
group.bench_with_input(
BenchmarkId::new(
"StandardScaler_PCA",
format!("{}x{}", n_samples, n_features),
),
&data,
|b, data| {
let normalizer = Normalizer::new(NormalizationMethod::ZScore, 0);
let pca = PCA::new(n_features / 2, true, false);
b.iter(|| {
let mut norm = normalizer.clone();
let normalized = norm
.fit_transform(black_box(data))
.expect("Operation failed");
let mut pca_copy = pca.clone();
let _result = pca_copy.fit_transform(&normalized);
});
},
);
}
}
group.finish();
}
criterion_group!(
benches,
bench_normalization,
bench_scaling,
bench_feature_engineering,
bench_dimensionality_reduction,
bench_imputation,
bench_pipeline
);
#[cfg(feature = "simd")]
criterion_group!(simd_benches, bench_simd_normalization);
#[cfg(not(feature = "simd"))]
criterion_main!(benches);
#[cfg(feature = "simd")]
criterion_main!(benches, simd_benches);