1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
//! FALSIFY-BEAT-SKLEARN-IRIS — the Pillar-1 beat-benchmark.
//!
//! Mission ([[project_mission_four_pillars]]): aprender must BEAT scikit-learn at
//! its canonical task, where "beat" is a *falsifiable* benchmark — apr ≥ sklearn
//! on accuracy on the SAME data/split. This gate fails CI if apr's
//! `RandomForestClassifier` regresses below sklearn's pinned Iris accuracy.
//!
//! ## Pinned scikit-learn baseline
//! `RandomForestClassifier(n_estimators=100)` on the canonical Iris dataset, with
//! a DETERMINISTIC split (sample index `i % 3 == 0` → test; n_train=100,
//! n_test=50). Over `random_state` 0..4: test_acc **mean 0.9560, min 0.9400,
//! max 0.9600**. Pinned 2026-06-11 via `uv run --with scikit-learn`. apr must
//! reach **≥ 0.92** (sklearn's floor minus a 2pp margin for RF-implementation
//! differences) — a fail means apr underperforms sklearn on its own hello-world.
//!
//! The same deterministic split is used on both sides, so the comparison is
//! apples-to-apples (apr's `train_test_split` is RNG-based and would NOT match
//! sklearn's, hence the explicit `i % 3` split here).
use aprender::datasets::load_iris;
use aprender::tree::RandomForestClassifier;
use aprender::Matrix;
/// sklearn's pinned minimum test accuracy on this exact split (see module docs).
const SKLEARN_IRIS_FLOOR: f64 = 0.94;
/// apr must come within 2pp of sklearn's floor to "match/beat" on accuracy.
const BEAT_THRESHOLD: f64 = SKLEARN_IRIS_FLOOR - 0.02;
#[test]
fn beat_sklearn_iris_accuracy() {
let (x, y) = load_iris();
let n_features = x.n_cols();
// Deterministic split: i % 3 == 0 -> test. Iris is stored in class-order
// blocks of 50, so i%3 lands evenly across all three classes.
let mut x_train = Vec::new();
let mut y_train: Vec<usize> = Vec::new();
let mut x_test = Vec::new();
let mut y_test: Vec<usize> = Vec::new();
for i in 0..x.n_rows() {
let row: Vec<f32> = (0..n_features).map(|j| x.get(i, j)).collect();
if i % 3 == 0 {
x_test.extend_from_slice(&row);
y_test.push(y[i]);
} else {
x_train.extend_from_slice(&row);
y_train.push(y[i]);
}
}
let n_train = y_train.len();
let n_test = y_test.len();
assert_eq!((n_train, n_test), (100, 50), "deterministic split shape");
let x_train = Matrix::from_vec(n_train, n_features, x_train).expect("train dims");
let x_test = Matrix::from_vec(n_test, n_features, x_test).expect("test dims");
let mut rf = RandomForestClassifier::new(100)
.with_max_depth(10)
.with_random_state(42);
rf.fit(&x_train, &y_train).expect("fit iris");
let preds = rf.predict(&x_test);
let correct = preds.iter().zip(&y_test).filter(|(p, t)| p == t).count();
let acc = correct as f64 / n_test as f64;
// Beat-benchmarks report their number, not just pass/fail.
eprintln!("BEAT-SKLEARN-IRIS: apr RandomForestClassifier test_acc = {acc:.4} (scikit-learn 0.9560 mean / 0.9400 floor on same split)");
assert!(
acc >= BEAT_THRESHOLD,
"FALSIFY-BEAT-SKLEARN-IRIS: apr RandomForestClassifier test_acc {acc:.4} < {BEAT_THRESHOLD:.2} \
(scikit-learn baseline 0.94-0.96 on the same deterministic i%3 split) — apr regressed below sklearn"
);
}