#[cfg(feature = "polars")]
use polars::prelude::*;
use std::time::Instant;
fn main() {
#[cfg(not(feature = "polars"))]
{
println!("This example requires the 'polars' feature");
println!("Run with: cargo run --example conversion_benchmark --features polars --release");
return;
}
#[cfg(feature = "polars")]
{
println!("╔════════════════════════════════════════════════════════════╗");
println!("║ XGBoost Polars Conversion Benchmark ║");
println!("╚════════════════════════════════════════════════════════════╝\n");
if cfg!(debug_assertions) {
println!("⚠️ WARNING: Running in DEBUG mode!");
println!(" Use --release for accurate benchmarks\n");
}
run_benchmarks();
}
}
#[cfg(feature = "polars")]
fn run_benchmarks() {
let test_sizes = vec![
(1, 10, "Single row"),
(100, 10, "100 rows"),
(1000, 10, "1000 rows"),
(10000, 10, "10k rows"),
];
for (rows, cols, name) in test_sizes {
println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
println!("Test: {}", name);
println!("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
let df = create_test_df(rows, cols);
let iterations = if rows == 1 { 100_000 } else { 10_000 };
println!("\n✓ Optimized implementation (with .cast()):");
let start = Instant::now();
for _ in 0..iterations {
let _ = convert_optimized(&df);
}
let optimized_time = start.elapsed();
let optimized_ns = optimized_time.as_nanos() / iterations;
println!(" Time: {} ns/conversion", optimized_ns);
println!(
" Throughput: {:.1}M conversions/sec",
iterations as f64 / optimized_time.as_secs_f64() / 1_000_000.0
);
println!();
}
}
#[cfg(feature = "polars")]
fn create_test_df(rows: usize, cols: usize) -> DataFrame {
let mut columns = Vec::new();
for i in 0..cols {
let data: Vec<f32> = (0..rows).map(|j| (j * i) as f32).collect();
columns.push(Series::new(format!("f{}", i).into(), data).into());
}
DataFrame::new(columns).unwrap()
}
#[cfg(feature = "polars")]
fn convert_optimized(df: &DataFrame) -> Vec<f32> {
let num_rows = df.height();
let num_cols = df.width();
let mut data = vec![0.0f32; num_rows * num_cols];
for (col_idx, column) in df.get_columns().iter().enumerate() {
let series = column.as_materialized_series();
let f32_series = series.cast(&DataType::Float32).unwrap();
let ca = f32_series.f32().unwrap();
for (row_idx, opt_val) in ca.iter().enumerate() {
data[row_idx * num_cols + col_idx] = opt_val.unwrap();
}
}
data
}