xgboost-rust 0.1.0

Rust bindings for XGBoost, a gradient boosting library for machine learning. Downloads XGBoost binaries at build time for cross-platform compatibility.
#[cfg(feature = "polars")]
use polars::prelude::*;
use std::time::Instant;
use xgboost_rust::Booster;

#[cfg(feature = "polars")]
use xgboost_rust::BoosterPolarsExt;

fn main() {
    #[cfg(not(feature = "polars"))]
    {
        println!("This example requires the 'polars' feature");
        println!("Run with: cargo run --example single_row_performance --features polars");
        return;
    }

    #[cfg(feature = "polars")]
    {
        // Check if model exists
        let model_path = "demo_model.json";
        if !std::path::Path::new(model_path).exists() {
            println!("Model file not found: {}", model_path);
            println!("Please create it first by running: python train_demo_model.py");
            println!("\nRunning synthetic overhead test instead...\n");
            run_synthetic_test();
            return;
        }

        println!("Loading model from {}...", model_path);
        let booster = Booster::load(model_path).expect("Failed to load model");

        run_real_model_test(&booster);
    }
}

#[cfg(feature = "polars")]
fn run_real_model_test(booster: &Booster) {
    println!("\n╔════════════════════════════════════════════════════════════╗");
    println!("║     Single-Row Prediction Performance Test (XGBoost)      ║");
    println!("╚════════════════════════════════════════════════════════════╝\n");

    let features = 10;
    let iterations = 100_000;

    // Create single-row test data
    let raw_data: Vec<f32> = (0..features).map(|i| i as f32 / 10.0).collect();

    let df = df! {
        "f0" => [0.0f32],
        "f1" => [0.1f32],
        "f2" => [0.2f32],
        "f3" => [0.3f32],
        "f4" => [0.4f32],
        "f5" => [0.5f32],
        "f6" => [0.6f32],
        "f7" => [0.7f32],
        "f8" => [0.8f32],
        "f9" => [0.9f32],
    }
    .expect("Failed to create DataFrame");

    println!("Test configuration:");
    println!("  - Rows: 1");
    println!("  - Features: {}", features);
    println!("  - Iterations: {}\n", iterations);

    // Warm up
    println!("Warming up...");
    for _ in 0..1000 {
        let _ = booster.predict(&raw_data, 1, features, 0, false);
        let _ = booster.predict_dataframe(&df, 0, false);
    }

    println!("Running benchmarks...\n");

    // Test 1: Raw array prediction
    let start = Instant::now();
    for _ in 0..iterations {
        let _ = booster
            .predict(&raw_data, 1, features, 0, false)
            .expect("Prediction failed");
    }
    let raw_duration = start.elapsed();

    let raw_avg_ns = raw_duration.as_nanos() / iterations;
    let raw_throughput = (iterations as f64 / raw_duration.as_secs_f64()) as u64;

    println!("Raw Array Prediction:");
    println!("  - Average: {} ns/prediction", raw_avg_ns);
    println!("  - Throughput: {} predictions/sec", raw_throughput);
    println!();

    // Test 2: Polars DataFrame prediction
    let start = Instant::now();
    for _ in 0..iterations {
        let _ = booster
            .predict_dataframe(&df, 0, false)
            .expect("Prediction failed");
    }
    let polars_duration = start.elapsed();

    let polars_avg_ns = polars_duration.as_nanos() / iterations;
    let polars_throughput = (iterations as f64 / polars_duration.as_secs_f64()) as u64;

    println!("Polars DataFrame Prediction:");
    println!("  - Average: {} ns/prediction", polars_avg_ns);
    println!("  - Throughput: {} predictions/sec", polars_throughput);
    println!();

    // Analysis
    let overhead_ns = polars_avg_ns.saturating_sub(raw_avg_ns);
    let overhead_pct = if raw_avg_ns > 0 {
        (overhead_ns as f64 / raw_avg_ns as f64 * 100.0)
    } else {
        0.0
    };

    println!("╔════════════════════════════════════════════════════════════╗");
    println!("║                        Analysis                            ║");
    println!("╠════════════════════════════════════════════════════════════╣");
    println!(
        "║  Polars Overhead: {} ns ({:.2}%)",
        overhead_ns, overhead_pct
    );
    println!(
        "║  Throughput loss: {} pred/sec",
        raw_throughput.saturating_sub(polars_throughput)
    );
    println!("╚════════════════════════════════════════════════════════════╝\n");

    println!("Interpretation:");
    if overhead_pct < 5.0 {
        println!("  ✅ Overhead is minimal (< 5%)");
        println!("  ✅ Polars integration is suitable for most use cases");
    } else if overhead_pct < 20.0 {
        println!("  ⚠️  Moderate overhead ({:.1}%)", overhead_pct);
        println!("  ⚠️  Consider raw arrays for very high-throughput scenarios");
    } else {
        println!("  ❌ Significant overhead ({:.1}%)", overhead_pct);
        println!("  ❌ Use raw arrays for single-row predictions in hot paths");
    }

    println!("\nNote: For batch predictions (>100 rows), overhead is much lower.");
}

#[cfg(feature = "polars")]
fn run_synthetic_test() {
    println!("╔════════════════════════════════════════════════════════════╗");
    println!("║         DataFrame Conversion Overhead Test                ║");
    println!("╚════════════════════════════════════════════════════════════╝\n");

    let iterations = 100_000;

    // Single row
    let df = df! {
        "f0" => [0.0f32],
        "f1" => [0.1f32],
        "f2" => [0.2f32],
        "f3" => [0.3f32],
        "f4" => [0.4f32],
        "f5" => [0.5f32],
        "f6" => [0.6f32],
        "f7" => [0.7f32],
        "f8" => [0.8f32],
        "f9" => [0.9f32],
    }
    .expect("Failed to create DataFrame");

    println!("Measuring just the conversion overhead (no model)...\n");

    // Warm up
    for _ in 0..1000 {
        let _: Vec<f32> = convert_dataframe_to_vec(&df);
    }

    let start = Instant::now();
    for _ in 0..iterations {
        let _: Vec<f32> = convert_dataframe_to_vec(&df);
    }
    let duration = start.elapsed();

    let avg_ns = duration.as_nanos() / iterations;
    let throughput = (iterations as f64 / duration.as_secs_f64()) as u64;

    println!("Conversion Performance:");
    println!("  - Average: {} ns/conversion", avg_ns);
    println!("  - Throughput: {} conversions/sec", throughput);
    println!("\nThis is the pure overhead of using Polars (before any prediction).");
}

#[cfg(feature = "polars")]
fn convert_dataframe_to_vec(df: &DataFrame) -> Vec<f32> {
    let num_rows = df.height();
    let num_cols = df.width();
    let mut data = Vec::with_capacity(num_rows * num_cols);

    for row_idx in 0..num_rows {
        for col in df.get_columns() {
            let series = col.as_materialized_series();
            if let Ok(ca) = series.f32() {
                if let Some(val) = ca.get(row_idx) {
                    data.push(val);
                }
            }
        }
    }

    data
}