#![cfg(test)]
use std::process::Command;
use std::time::Instant;
const RUNS: usize = 5;
const RATIO_CEILING: f64 = 0.10;
const N_SAMPLES: usize = 500;
const N_FEATURES: usize = 10;
const N_INFORMATIVE: usize = 8;
const N_CLASSES: usize = 3;
const SEED: u64 = 42;
fn median(xs: &[f64]) -> f64 {
let mut v = xs.to_vec();
v.sort_by(f64::total_cmp);
let n = v.len();
if n % 2 == 1 {
v[n / 2]
} else {
(v[n / 2 - 1] + v[n / 2]) / 2.0
}
}
fn apr_fit_predict_workload() {
use aprender::classification::GaussianNB;
use aprender::datasets::make_classification;
let (x, y) = make_classification(N_SAMPLES, N_FEATURES, N_INFORMATIVE, N_CLASSES, SEED);
let mut m = GaussianNB::new();
m.fit(&x, &y).expect("apr fit");
let _p = m.predict(&x).expect("apr predict");
}
fn time_apr_process(self_exe: &std::path::Path) -> f64 {
let run = || {
Command::new(self_exe)
.env("APR_SKLEARN_COLDSTART_CHILD", "1")
.output()
.expect("re-exec apr child")
};
let _ = run(); let mut times = Vec::with_capacity(RUNS);
for _ in 0..RUNS {
let t = Instant::now();
let out = run();
times.push(t.elapsed().as_secs_f64() * 1000.0);
assert!(out.status.success(), "apr child failed");
}
median(×)
}
fn time_sklearn_process() -> f64 {
let py = format!(
r#"
import numpy as np
from sklearn.naive_bayes import GaussianNB
from sklearn.datasets import make_classification
X, y = make_classification(
n_samples={n}, n_features={d}, n_informative={ni},
n_redundant=0, n_classes={c}, random_state={seed})
m = GaussianNB().fit(X, y)
_ = m.predict(X)
"#,
n = N_SAMPLES,
d = N_FEATURES,
ni = N_INFORMATIVE,
c = N_CLASSES,
seed = SEED,
);
let run = || {
Command::new("uv")
.args([
"run",
"--with",
"scikit-learn",
"--with",
"numpy",
"python3",
"-c",
&py,
])
.output()
.expect("run uv (is `uv` installed? this test is nightly-only)")
};
let _ = run(); let mut times = Vec::with_capacity(RUNS);
for _ in 0..RUNS {
let t = Instant::now();
let out = run();
times.push(t.elapsed().as_secs_f64() * 1000.0);
assert!(
out.status.success(),
"sklearn timing failed: {}",
String::from_utf8_lossy(&out.stderr)
);
}
median(×)
}
#[test]
#[ignore = "nightly-only: needs uv + scikit-learn (beat-speed-nightly.yml)"]
fn beat_sklearn_coldstart_speed() {
if std::env::var("APR_SKLEARN_COLDSTART_CHILD").is_ok() {
apr_fit_predict_workload();
return;
}
let self_exe = std::env::current_exe().expect("current_exe");
let apr_ms = time_apr_process(&self_exe);
let sklearn_ms = time_sklearn_process();
let ratio = apr_ms / sklearn_ms;
let speedup = sklearn_ms / apr_ms;
eprintln!(
"BEAT-SKLEARN-COLDSTART-SPEED: apr={apr_ms:.3}ms sklearn={sklearn_ms:.1}ms \
ratio={ratio:.5} (apr {speedup:.0}x faster), one-shot {N_SAMPLES}x{N_FEATURES} \
GaussianNB fit+predict, median of {RUNS}"
);
assert!(
ratio <= RATIO_CEILING,
"FALSIFY-BEAT-SKLEARN-COLDSTART-SPEED: apr/sklearn ratio {ratio:.5} > {RATIO_CEILING:.2} \
— apr lost its static-binary cold-start advantage for one-shot small fit+predict \
(apr={apr_ms:.3}ms, sklearn={sklearn_ms:.1}ms; contract beat-sklearn-coldstart-speed-v1.yaml)"
);
}