#![allow(clippy::disallowed_methods)]
use aprender::classification::{GaussianNB, LinearSVM};
use aprender::primitives::Matrix;
use aprender::tree::GradientBoostingClassifier;
#[allow(clippy::too_many_lines)]
fn main() -> Result<(), Box<dyn std::error::Error>> {
println!("=== Gradient Boosting: Iris Binary Classification ===\n");
let (x_train, y_train, x_test, y_test) = load_binary_iris_data()?;
println!(
"Dataset: {} training samples, {} test samples",
x_train.n_rows(),
x_test.n_rows()
);
println!("Classes: 0=Setosa, 1=Versicolor\n");
println!("=== Part 1: Basic Gradient Boosting ===\n");
let mut gbm = GradientBoostingClassifier::new()
.with_n_estimators(50)
.with_learning_rate(0.1)
.with_max_depth(3);
gbm.fit(&x_train, &y_train)?;
let predictions = gbm.predict(&x_test)?;
let accuracy = compute_accuracy(&predictions, &y_test);
println!("Gradient Boosting Accuracy: {:.1}%", accuracy * 100.0);
println!("Number of trees trained: {}\n", gbm.n_estimators());
println!("=== Part 2: Effect of Number of Estimators ===\n");
for &n_est in &[10, 30, 50, 100] {
let mut gbm_n = GradientBoostingClassifier::new()
.with_n_estimators(n_est)
.with_learning_rate(0.1)
.with_max_depth(3);
gbm_n.fit(&x_train, &y_train)?;
let preds = gbm_n.predict(&x_test)?;
let acc = compute_accuracy(&preds, &y_test);
println!("n_estimators={:3}: Accuracy = {:.1}%", n_est, acc * 100.0);
}
println!();
println!("=== Part 3: Effect of Learning Rate ===\n");
for &lr in &[0.01, 0.05, 0.1, 0.5] {
let mut gbm_lr = GradientBoostingClassifier::new()
.with_n_estimators(50)
.with_learning_rate(lr)
.with_max_depth(3);
gbm_lr.fit(&x_train, &y_train)?;
let preds = gbm_lr.predict(&x_test)?;
let acc = compute_accuracy(&preds, &y_test);
println!("learning_rate={:.2}: Accuracy = {:.1}%", lr, acc * 100.0);
}
println!();
println!("=== Part 4: Effect of Tree Depth ===\n");
for &depth in &[1, 2, 3, 5] {
let mut gbm_depth = GradientBoostingClassifier::new()
.with_n_estimators(50)
.with_learning_rate(0.1)
.with_max_depth(depth);
gbm_depth.fit(&x_train, &y_train)?;
let preds = gbm_depth.predict(&x_test)?;
let acc = compute_accuracy(&preds, &y_test);
println!("max_depth={}: Accuracy = {:.1}%", depth, acc * 100.0);
}
println!();
println!("=== Part 5: Comparison with Other Classifiers ===\n");
let mut nb = GaussianNB::new();
nb.fit(&x_train, &y_train)?;
let nb_predictions = nb.predict(&x_test)?;
let nb_accuracy = compute_accuracy(&nb_predictions, &y_test);
let mut svm = LinearSVM::new()
.with_c(1.0)
.with_max_iter(1000)
.with_learning_rate(0.1);
svm.fit(&x_train, &y_train)?;
let svm_predictions = svm.predict(&x_test)?;
let svm_accuracy = compute_accuracy(&svm_predictions, &y_test);
println!("Classifier Accuracy");
println!("──────────────────────────────────");
println!("Gradient Boosting {:.1}%", accuracy * 100.0);
println!("Naive Bayes {:.1}%", nb_accuracy * 100.0);
println!("Linear SVM {:.1}%\n", svm_accuracy * 100.0);
println!("=== Part 6: Probabilistic Predictions ===\n");
let probas = gbm.predict_proba(&x_test)?;
println!("Sample Predicted P(Setosa) P(Versicolor)");
println!("────────────────────────────────────────────");
for i in 0..5.min(x_test.n_rows()) {
let pred = predictions[i];
let p0 = probas[i][0];
let p1 = probas[i][1];
let species = if pred == 0 {
"Setosa "
} else {
"Versicolor"
};
println!(" {i} {species} {p0:.3} {p1:.3}");
}
println!();
println!("=== Summary ===\n");
println!("Gradient Boosting Characteristics:");
println!("✓ Sequential ensemble of weak learners");
println!("✓ Learns from residual errors");
println!("✓ Lower learning rate + more trees = better generalization");
println!("✓ Shallow trees prevent overfitting");
println!("✓ State-of-the-art for tabular data\n");
println!("Hyperparameter Guidelines:");
println!("- n_estimators: 50-500 (more trees = better fit, slower)");
println!("- learning_rate: 0.01-0.3 (lower = better gen., needs more trees)");
println!("- max_depth: 3-8 (shallow trees prevent overfitting)\n");
println!("GBM vs Naive Bayes vs SVM:");
println!(
"- Accuracy: GBM {:.1}% vs NB {:.1}% vs SVM {:.1}%",
accuracy * 100.0,
nb_accuracy * 100.0,
svm_accuracy * 100.0
);
println!("- Training: GBM iterative vs NB instant vs SVM iterative");
println!("- Prediction: All O(trees·depth) or O(features)");
println!("- Strength: GBM complex patterns vs NB probabilistic vs SVM margin");
Ok(())
}
#[allow(clippy::type_complexity)]
fn load_binary_iris_data(
) -> Result<(Matrix<f32>, Vec<usize>, Matrix<f32>, Vec<usize>), &'static str> {
let x_train = Matrix::from_vec(
14,
4,
vec![
5.1, 3.5, 1.4, 0.2, 4.9, 3.0, 1.4, 0.2, 4.7, 3.2, 1.3, 0.2, 4.6, 3.1, 1.5, 0.2, 5.0,
3.6, 1.4, 0.2, 5.4, 3.9, 1.7, 0.4, 4.6, 3.4, 1.4, 0.3, 7.0, 3.2, 4.7, 1.4, 6.4, 3.2, 4.5, 1.5, 6.9, 3.1, 4.9, 1.5, 5.5, 2.3, 4.0, 1.3, 6.5,
2.8, 4.6, 1.5, 5.7, 2.8, 4.5, 1.3, 6.3, 3.3, 4.7, 1.6,
],
)?;
let y_train = vec![
0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, ];
let x_test = Matrix::from_vec(
6,
4,
vec![
5.0, 3.3, 1.4, 0.2, 4.4, 2.9, 1.4, 0.2, 4.9, 3.1, 1.5, 0.1, 4.9, 2.4, 3.3, 1.0, 6.6, 2.9, 4.6, 1.3, 5.2, 2.7, 3.9, 1.4,
],
)?;
let y_test = vec![
0, 0, 0, 1, 1, 1, ];
Ok((x_train, y_train, x_test, y_test))
}
fn compute_accuracy(predictions: &[usize], true_labels: &[usize]) -> f32 {
let correct = predictions
.iter()
.zip(true_labels.iter())
.filter(|(pred, true_label)| pred == true_label)
.count();
correct as f32 / true_labels.len() as f32
}