use aprender::primitives::Matrix;
use aprender::tree::GradientBoostingClassifier;
use proptest::prelude::*;
proptest! {
#![proptest_config(ProptestConfig::with_cases(64))]
#[test]
fn prop_predictions_binary(
n in 30usize..50,
d in 2usize..4,
seed in 0u64..10000,
) {
let mut rng = seed;
let mut next_f32 = || -> f32 {
rng = rng.wrapping_mul(6_364_136_223_846_793_005).wrapping_add(1);
((rng >> 33) as f32 / (u32::MAX >> 1) as f32) * 10.0 - 5.0
};
let features: Vec<f32> = (0..(n * d)).map(|_| next_f32()).collect();
let labels: Vec<usize> = (0..n).map(|_| {
rng = rng.wrapping_mul(6_364_136_223_846_793_005).wrapping_add(1);
(rng >> 33) as usize % 2
}).collect();
let x = Matrix::from_vec(n, d, features).expect("valid matrix dimensions");
let mut model = GradientBoostingClassifier::new()
.with_n_estimators(10)
.with_learning_rate(0.1)
.with_max_depth(3);
model.fit(&x, &labels).expect("fit succeeds");
let predictions = model.predict(&x).expect("predict succeeds");
for (i, &pred) in predictions.iter().enumerate() {
prop_assert!(
pred == 0 || pred == 1,
"FALSIFY-GBM-001: prediction[{}]={}, expected 0 or 1", i, pred
);
}
}
#[test]
fn prop_predictions_deterministic(
n in 30usize..50,
d in 2usize..4,
seed in 0u64..10000,
) {
let mut rng = seed;
let mut next_f32 = || -> f32 {
rng = rng.wrapping_mul(6_364_136_223_846_793_005).wrapping_add(1);
((rng >> 33) as f32 / (u32::MAX >> 1) as f32) * 10.0 - 5.0
};
let features: Vec<f32> = (0..(n * d)).map(|_| next_f32()).collect();
let labels: Vec<usize> = (0..n).map(|_| {
rng = rng.wrapping_mul(6_364_136_223_846_793_005).wrapping_add(1);
(rng >> 33) as usize % 2
}).collect();
let x = Matrix::from_vec(n, d, features).expect("valid matrix dimensions");
let mut model = GradientBoostingClassifier::new()
.with_n_estimators(10)
.with_learning_rate(0.1)
.with_max_depth(3);
model.fit(&x, &labels).expect("fit succeeds");
let pred1 = model.predict(&x).expect("predict succeeds (1st)");
let pred2 = model.predict(&x).expect("predict succeeds (2nd)");
prop_assert!(
pred1 == pred2,
"FALSIFY-GBM-002: predictions differ between calls"
);
}
#[test]
fn prop_separable_data_high_accuracy(
n_per_class in 15usize..25,
seed in 0u64..10000,
) {
let n = 2 * n_per_class;
let mut rng = seed;
let mut next_noise = || -> f32 {
rng = rng.wrapping_mul(6_364_136_223_846_793_005).wrapping_add(1);
let val = (rng >> 33) as f32 / u32::MAX as f32;
val - 0.5
};
let mut data = Vec::with_capacity(n * 2);
let mut labels = Vec::with_capacity(n);
for _ in 0..n_per_class {
data.push(-3.0 + next_noise());
data.push(-3.0 + next_noise());
labels.push(0);
}
for _ in 0..n_per_class {
data.push(3.0 + next_noise());
data.push(3.0 + next_noise());
labels.push(1);
}
let x = Matrix::from_vec(n, 2, data).expect("valid matrix dimensions");
let mut model = GradientBoostingClassifier::new()
.with_n_estimators(10)
.with_learning_rate(0.1)
.with_max_depth(3);
model.fit(&x, &labels).expect("fit succeeds");
let predictions = model.predict(&x).expect("predict succeeds");
let correct: usize = predictions
.iter()
.zip(labels.iter())
.filter(|(&pred, &actual)| pred == actual)
.count();
let accuracy = correct as f32 / n as f32;
prop_assert!(
accuracy > 0.8,
"FALSIFY-GBM-003: accuracy={} ({}/{}), expected > 0.8 with well-separated clusters",
accuracy, correct, n
);
}
#[test]
fn prop_prediction_count_matches_samples(
n in 30usize..50,
d in 2usize..4,
seed in 0u64..10000,
) {
let mut rng = seed;
let mut next_f32 = || -> f32 {
rng = rng.wrapping_mul(6_364_136_223_846_793_005).wrapping_add(1);
((rng >> 33) as f32 / (u32::MAX >> 1) as f32) * 10.0 - 5.0
};
let features: Vec<f32> = (0..(n * d)).map(|_| next_f32()).collect();
let labels: Vec<usize> = (0..n).map(|_| {
rng = rng.wrapping_mul(6_364_136_223_846_793_005).wrapping_add(1);
(rng >> 33) as usize % 2
}).collect();
let x = Matrix::from_vec(n, d, features).expect("valid matrix dimensions");
let mut model = GradientBoostingClassifier::new()
.with_n_estimators(10)
.with_learning_rate(0.1)
.with_max_depth(3);
model.fit(&x, &labels).expect("fit succeeds");
let predictions = model.predict(&x).expect("predict succeeds");
prop_assert!(
predictions.len() == n,
"FALSIFY-GBM-004: predicted {} samples, expected {} (d={})",
predictions.len(), n, d
);
}
}