imbalanced-sampling 0.1.0

Resampling algorithms for imbalanced datasets in Rust - SMOTE, ADASYN, RandomUnderSampler
Documentation
use criterion::{black_box, criterion_group, criterion_main, Criterion, BenchmarkId};
use imbalanced_core::prelude::*;
use imbalanced_sampling::prelude::*;
use ndarray::{Array1, Array2};
use rand::{Rng, SeedableRng};
use rand_chacha::ChaCha8Rng;

fn generate_imbalanced_dataset(n_samples: usize, n_features: usize, imbalance_ratio: f64) -> (Array2<f64>, Array1<i32>) {
    let mut rng = ChaCha8Rng::seed_from_u64(42);
    
    let minority_samples = (n_samples as f64 * imbalance_ratio) as usize;
    let majority_samples = n_samples - minority_samples;
    
    let mut x = Array2::zeros((n_samples, n_features));
    let mut y = Array1::zeros(n_samples);
    
    // Generate majority class samples
    for i in 0..majority_samples {
        for j in 0..n_features {
            x[[i, j]] = rng.gen_range(-1.0..1.0);
        }
        y[i] = 0;
    }
    
    // Generate minority class samples
    for i in majority_samples..n_samples {
        for j in 0..n_features {
            x[[i, j]] = rng.gen_range(0.5..2.0);
        }
        y[i] = 1;
    }
    
    (x, y)
}

fn bench_smote(c: &mut Criterion) {
    let mut group = c.benchmark_group("SMOTE");
    
    // Different dataset sizes
    let sizes = vec![1000, 5000, 10000];
    let features = 10;
    let imbalance_ratio = 0.1; // 10% minority class
    
    for size in sizes {
        let (x, y) = generate_imbalanced_dataset(size, features, imbalance_ratio);
        let smote = SmoteStrategy::new(5);
        let config = imbalanced_sampling::smote::SmoteConfig::default();
        
        group.bench_with_input(
            BenchmarkId::new("samples", size),
            &(x, y),
            |b, (x, y)| {
                b.iter(|| {
                    black_box(smote.resample(x.view(), y.view(), &config).unwrap())
                })
            }
        );
    }
    
    group.finish();
}

fn bench_adasyn(c: &mut Criterion) {
    let mut group = c.benchmark_group("ADASYN");
    
    let sizes = vec![1000, 5000, 10000];
    let features = 10;
    let imbalance_ratio = 0.1;
    
    for size in sizes {
        let (x, y) = generate_imbalanced_dataset(size, features, imbalance_ratio);
        let adasyn = AdasynStrategy::new(5, 1.0);
        let config = imbalanced_sampling::adasyn::AdasynConfig::default();
        
        group.bench_with_input(
            BenchmarkId::new("samples", size),
            &(x, y),
            |b, (x, y)| {
                b.iter(|| {
                    black_box(adasyn.resample(x.view(), y.view(), &config).unwrap())
                })
            }
        );
    }
    
    group.finish();
}

fn bench_random_undersampler(c: &mut Criterion) {
    let mut group = c.benchmark_group("RandomUnderSampler");
    
    let sizes = vec![1000, 5000, 10000];
    let features = 10;
    let imbalance_ratio = 0.1;
    
    for size in sizes {
        let (x, y) = generate_imbalanced_dataset(size, features, imbalance_ratio);
        let undersampler = RandomUnderSampler::new();
        let config = imbalanced_sampling::random_undersampler::RandomUnderSamplerConfig::default();
        
        group.bench_with_input(
            BenchmarkId::new("samples", size),
            &(x, y),
            |b, (x, y)| {
                b.iter(|| {
                    black_box(undersampler.resample(x.view(), y.view(), &config).unwrap())
                })
            }
        );
    }
    
    group.finish();
}

fn bench_feature_scaling(c: &mut Criterion) {
    let mut group = c.benchmark_group("SMOTE_FeatureScaling");
    
    let size = 5000;
    let imbalance_ratio = 0.1;
    let features = vec![2, 10, 50, 100];
    
    for n_features in features {
        let (x, y) = generate_imbalanced_dataset(size, n_features, imbalance_ratio);
        let smote = SmoteStrategy::new(5);
        let config = imbalanced_sampling::smote::SmoteConfig::default();
        
        group.bench_with_input(
            BenchmarkId::new("features", n_features),
            &(x, y),
            |b, (x, y)| {
                b.iter(|| {
                    black_box(smote.resample(x.view(), y.view(), &config).unwrap())
                })
            }
        );
    }
    
    group.finish();
}

fn bench_k_neighbors_impact(c: &mut Criterion) {
    let mut group = c.benchmark_group("SMOTE_KNeighbors");
    
    let (x, y) = generate_imbalanced_dataset(5000, 10, 0.1);
    let k_values = vec![3, 5, 10, 15];
    
    for k in k_values {
        let smote = SmoteStrategy::new(k);
        let config = imbalanced_sampling::smote::SmoteConfig::default();
        
        group.bench_with_input(
            BenchmarkId::new("k", k),
            &(x.clone(), y.clone()),
            |b, (x, y)| {
                b.iter(|| {
                    black_box(smote.resample(x.view(), y.view(), &config).unwrap())
                })
            }
        );
    }
    
    group.finish();
}

fn bench_imbalance_ratio_impact(c: &mut Criterion) {
    let mut group = c.benchmark_group("SMOTE_ImbalanceRatio");
    
    let size = 5000;
    let features = 10;
    let ratios = vec![0.05, 0.1, 0.2, 0.3];
    
    for ratio in ratios {
        let (x, y) = generate_imbalanced_dataset(size, features, ratio);
        let smote = SmoteStrategy::new(5);
        let config = imbalanced_sampling::smote::SmoteConfig::default();
        
        group.bench_with_input(
            BenchmarkId::new("ratio", (ratio * 100.0) as u32),
            &(x, y),
            |b, (x, y)| {
                b.iter(|| {
                    black_box(smote.resample(x.view(), y.view(), &config).unwrap())
                })
            }
        );
    }
    
    group.finish();
}

criterion_group!(
    benches,
    bench_smote,
    bench_adasyn,
    bench_random_undersampler,
    bench_feature_scaling,
    bench_k_neighbors_impact,
    bench_imbalance_ratio_impact
);
criterion_main!(benches);