#[cfg(feature = "polars")]
use polars::prelude::*;
use std::time::Instant;
use xgboost_rust::Booster;
#[cfg(feature = "polars")]
use xgboost_rust::BoosterPolarsExt;
fn main() {
#[cfg(not(feature = "polars"))]
{
println!("This example requires the 'polars' feature");
println!("Run with: cargo run --example single_row_performance --features polars");
return;
}
#[cfg(feature = "polars")]
{
let model_path = "demo_model.json";
if !std::path::Path::new(model_path).exists() {
println!("Model file not found: {}", model_path);
println!("Please create it first by running: python train_demo_model.py");
println!("\nRunning synthetic overhead test instead...\n");
run_synthetic_test();
return;
}
println!("Loading model from {}...", model_path);
let booster = Booster::load(model_path).expect("Failed to load model");
run_real_model_test(&booster);
}
}
#[cfg(feature = "polars")]
fn run_real_model_test(booster: &Booster) {
println!("\n╔════════════════════════════════════════════════════════════╗");
println!("║ Single-Row Prediction Performance Test (XGBoost) ║");
println!("╚════════════════════════════════════════════════════════════╝\n");
let features = 10;
let iterations = 100_000;
let raw_data: Vec<f32> = (0..features).map(|i| i as f32 / 10.0).collect();
let df = df! {
"f0" => [0.0f32],
"f1" => [0.1f32],
"f2" => [0.2f32],
"f3" => [0.3f32],
"f4" => [0.4f32],
"f5" => [0.5f32],
"f6" => [0.6f32],
"f7" => [0.7f32],
"f8" => [0.8f32],
"f9" => [0.9f32],
}
.expect("Failed to create DataFrame");
println!("Test configuration:");
println!(" - Rows: 1");
println!(" - Features: {}", features);
println!(" - Iterations: {}\n", iterations);
println!("Warming up...");
for _ in 0..1000 {
let _ = booster.predict(&raw_data, 1, features, 0, false);
let _ = booster.predict_dataframe(&df, 0, false);
}
println!("Running benchmarks...\n");
let start = Instant::now();
for _ in 0..iterations {
let _ = booster
.predict(&raw_data, 1, features, 0, false)
.expect("Prediction failed");
}
let raw_duration = start.elapsed();
let raw_avg_ns = raw_duration.as_nanos() / iterations;
let raw_throughput = (iterations as f64 / raw_duration.as_secs_f64()) as u64;
println!("Raw Array Prediction:");
println!(" - Average: {} ns/prediction", raw_avg_ns);
println!(" - Throughput: {} predictions/sec", raw_throughput);
println!();
let start = Instant::now();
for _ in 0..iterations {
let _ = booster
.predict_dataframe(&df, 0, false)
.expect("Prediction failed");
}
let polars_duration = start.elapsed();
let polars_avg_ns = polars_duration.as_nanos() / iterations;
let polars_throughput = (iterations as f64 / polars_duration.as_secs_f64()) as u64;
println!("Polars DataFrame Prediction:");
println!(" - Average: {} ns/prediction", polars_avg_ns);
println!(" - Throughput: {} predictions/sec", polars_throughput);
println!();
let overhead_ns = polars_avg_ns.saturating_sub(raw_avg_ns);
let overhead_pct = if raw_avg_ns > 0 {
(overhead_ns as f64 / raw_avg_ns as f64 * 100.0)
} else {
0.0
};
println!("╔════════════════════════════════════════════════════════════╗");
println!("║ Analysis ║");
println!("╠════════════════════════════════════════════════════════════╣");
println!(
"║ Polars Overhead: {} ns ({:.2}%)",
overhead_ns, overhead_pct
);
println!(
"║ Throughput loss: {} pred/sec",
raw_throughput.saturating_sub(polars_throughput)
);
println!("╚════════════════════════════════════════════════════════════╝\n");
println!("Interpretation:");
if overhead_pct < 5.0 {
println!(" ✅ Overhead is minimal (< 5%)");
println!(" ✅ Polars integration is suitable for most use cases");
} else if overhead_pct < 20.0 {
println!(" ⚠️ Moderate overhead ({:.1}%)", overhead_pct);
println!(" ⚠️ Consider raw arrays for very high-throughput scenarios");
} else {
println!(" ❌ Significant overhead ({:.1}%)", overhead_pct);
println!(" ❌ Use raw arrays for single-row predictions in hot paths");
}
println!("\nNote: For batch predictions (>100 rows), overhead is much lower.");
}
#[cfg(feature = "polars")]
fn run_synthetic_test() {
println!("╔════════════════════════════════════════════════════════════╗");
println!("║ DataFrame Conversion Overhead Test ║");
println!("╚════════════════════════════════════════════════════════════╝\n");
let iterations = 100_000;
let df = df! {
"f0" => [0.0f32],
"f1" => [0.1f32],
"f2" => [0.2f32],
"f3" => [0.3f32],
"f4" => [0.4f32],
"f5" => [0.5f32],
"f6" => [0.6f32],
"f7" => [0.7f32],
"f8" => [0.8f32],
"f9" => [0.9f32],
}
.expect("Failed to create DataFrame");
println!("Measuring just the conversion overhead (no model)...\n");
for _ in 0..1000 {
let _: Vec<f32> = convert_dataframe_to_vec(&df);
}
let start = Instant::now();
for _ in 0..iterations {
let _: Vec<f32> = convert_dataframe_to_vec(&df);
}
let duration = start.elapsed();
let avg_ns = duration.as_nanos() / iterations;
let throughput = (iterations as f64 / duration.as_secs_f64()) as u64;
println!("Conversion Performance:");
println!(" - Average: {} ns/conversion", avg_ns);
println!(" - Throughput: {} conversions/sec", throughput);
println!("\nThis is the pure overhead of using Polars (before any prediction).");
}
#[cfg(feature = "polars")]
fn convert_dataframe_to_vec(df: &DataFrame) -> Vec<f32> {
let num_rows = df.height();
let num_cols = df.width();
let mut data = Vec::with_capacity(num_rows * num_cols);
for row_idx in 0..num_rows {
for col in df.get_columns() {
let series = col.as_materialized_series();
if let Ok(ca) = series.f32() {
if let Some(val) = ca.get(row_idx) {
data.push(val);
}
}
}
}
data
}