imbalanced-sampling 0.1.0

Resampling algorithms for imbalanced datasets in Rust - SMOTE, ADASYN, RandomUnderSampler
Documentation
use criterion::{black_box, criterion_group, criterion_main, Criterion, BenchmarkId};
use imbalanced_core::prelude::*;
use imbalanced_sampling::prelude::*;
use ndarray::{Array1, Array2};
use rand::{Rng, SeedableRng};
use rand_chacha::ChaCha8Rng;

fn generate_imbalanced_dataset(n_samples: usize, n_features: usize, imbalance_ratio: f64) -> (Array2<f64>, Array1<i32>) {
    let mut rng = ChaCha8Rng::seed_from_u64(42);
    
    let minority_samples = (n_samples as f64 * imbalance_ratio) as usize;
    let majority_samples = n_samples - minority_samples;
    
    let mut x = Array2::zeros((n_samples, n_features));
    let mut y = Array1::zeros(n_samples);
    
    // Generate majority class samples
    for i in 0..majority_samples {
        for j in 0..n_features {
            x[[i, j]] = rng.gen_range(-1.0..1.0);
        }
        y[i] = 0;
    }
    
    // Generate minority class samples
    for i in majority_samples..n_samples {
        for j in 0..n_features {
            x[[i, j]] = rng.gen_range(0.5..2.0);
        }
        y[i] = 1;
    }
    
    (x, y)
}

fn bench_smote_quick(c: &mut Criterion) {
    let mut group = c.benchmark_group("SMOTE_Quick");
    group.sample_size(10); // Reduce sample size for faster execution
    
    // Smaller dataset sizes for quicker benchmarks
    let sizes = vec![500, 1000, 2000];
    let features = 5; // Reduce features for speed
    let imbalance_ratio = 0.2; // Less extreme imbalance
    
    for size in sizes {
        let (x, y) = generate_imbalanced_dataset(size, features, imbalance_ratio);
        let smote = SmoteStrategy::new(5);
        let config = imbalanced_sampling::smote::SmoteConfig::default();
        
        group.bench_with_input(
            BenchmarkId::new("samples", size),
            &(x, y),
            |b, (x, y)| {
                b.iter(|| {
                    black_box(smote.resample(x.view(), y.view(), &config).unwrap())
                })
            }
        );
    }
    
    group.finish();
}

fn bench_adasyn_quick(c: &mut Criterion) {
    let mut group = c.benchmark_group("ADASYN_Quick");
    group.sample_size(10);
    
    let sizes = vec![500, 1000];
    let features = 5;
    let imbalance_ratio = 0.2;
    
    for size in sizes {
        let (x, y) = generate_imbalanced_dataset(size, features, imbalance_ratio);
        let adasyn = AdasynStrategy::new(5, 1.0);
        let config = imbalanced_sampling::adasyn::AdasynConfig::default();
        
        group.bench_with_input(
            BenchmarkId::new("samples", size),
            &(x, y),
            |b, (x, y)| {
                b.iter(|| {
                    black_box(adasyn.resample(x.view(), y.view(), &config).unwrap())
                })
            }
        );
    }
    
    group.finish();
}

fn bench_random_undersampler_quick(c: &mut Criterion) {
    let mut group = c.benchmark_group("RandomUnderSampler_Quick");
    group.sample_size(50); // Can handle more samples since it's very fast
    
    let sizes = vec![1000, 5000, 10000];
    let features = 10;
    let imbalance_ratio = 0.1;
    
    for size in sizes {
        let (x, y) = generate_imbalanced_dataset(size, features, imbalance_ratio);
        let undersampler = RandomUnderSampler::new();
        let config = imbalanced_sampling::random_undersampler::RandomUnderSamplerConfig::default();
        
        group.bench_with_input(
            BenchmarkId::new("samples", size),
            &(x, y),
            |b, (x, y)| {
                b.iter(|| {
                    black_box(undersampler.resample(x.view(), y.view(), &config).unwrap())
                })
            }
        );
    }
    
    group.finish();
}

criterion_group!(
    benches,
    bench_smote_quick,
    bench_adasyn_quick,
    bench_random_undersampler_quick
);
criterion_main!(benches);