use polars::prelude::*;
use treeboost::{
auto_train, auto_train_quick, auto_train_thorough, auto_train_with_mode, AutoBuilder,
AutoConfig, BoostingMode, TuningLevel,
};
fn create_linear_regression_dataset(n_rows: usize) -> DataFrame {
let mut rng = fastrand::Rng::with_seed(42);
let x1: Vec<f64> = (0..n_rows).map(|i| i as f64 / 10.0).collect();
let x2: Vec<f64> = (0..n_rows).map(|_| rng.f64() * 100.0).collect();
let x3: Vec<f64> = (0..n_rows).map(|_| rng.f64() * 50.0).collect();
let y: Vec<f64> = x1
.iter()
.zip(x2.iter())
.zip(x3.iter())
.map(|((&x1, &x2), &x3)| 2.0 * x1 + 0.5 * x2 - 0.1 * x3 + rng.f64() * 5.0 - 2.5)
.collect();
df!(
"x1" => x1,
"x2" => x2,
"x3" => x3,
"target" => y
)
.unwrap()
}
fn create_nonlinear_regression_dataset(n_rows: usize) -> DataFrame {
let mut rng = fastrand::Rng::with_seed(123);
let x1: Vec<f64> = (0..n_rows).map(|_| rng.f64() * 10.0).collect();
let x2: Vec<f64> = (0..n_rows).map(|_| rng.f64() * 10.0).collect();
let y: Vec<f64> = x1
.iter()
.zip(x2.iter())
.map(|(&x1, &x2)| (x1.sin() * x2) + rng.f64() * 2.0 - 1.0)
.collect();
df!(
"x1" => x1,
"x2" => x2,
"target" => y
)
.unwrap()
}
fn create_mixed_type_dataset(n_rows: usize) -> DataFrame {
let mut rng = fastrand::Rng::with_seed(456);
let numeric: Vec<f64> = (0..n_rows).map(|_| rng.f64() * 100.0).collect();
let categories = ["A", "B", "C", "D"];
let categorical: Vec<&str> = (0..n_rows)
.map(|_| categories[rng.usize(..categories.len())])
.collect();
let y: Vec<f64> = numeric
.iter()
.zip(categorical.iter())
.map(|(&num, &cat)| {
let cat_effect = match cat {
"A" => 10.0,
"B" => 20.0,
"C" => 30.0,
"D" => 40.0,
_ => 0.0,
};
num * 0.5 + cat_effect + rng.f64() * 5.0
})
.collect();
df!(
"numeric_feature" => numeric,
"categorical_feature" => categorical,
"target" => y
)
.unwrap()
}
fn create_dataset_with_id_column(n_rows: usize) -> DataFrame {
let mut rng = fastrand::Rng::with_seed(789);
let ids: Vec<i64> = (0..n_rows).map(|i| i as i64).collect();
let feature: Vec<f64> = (0..n_rows).map(|_| rng.f64() * 100.0).collect();
let target: Vec<f64> = feature
.iter()
.map(|&f| f * 2.0 + rng.f64() * 10.0)
.collect();
df!(
"id" => ids,
"feature" => feature,
"target" => target
)
.unwrap()
}
#[test]
fn test_auto_train_linear_data() {
let df = create_linear_regression_dataset(500);
let model = auto_train(&df, "target").expect("Training should succeed");
let predictions = model.predict(&df).expect("Prediction should succeed");
assert_eq!(predictions.len(), 500);
let mode = model.mode();
println!("Selected mode: {:?}", mode);
let summary = model.summary();
assert!(summary.contains("TreeBoost Pipeline Report"));
assert!(summary.contains("MODE SELECTION"));
}
#[test]
fn test_auto_train_nonlinear_data() {
let df = create_nonlinear_regression_dataset(500);
let model = auto_train(&df, "target").expect("Training should succeed");
let mode = model.mode();
println!("Selected mode for non-linear data: {:?}", mode);
let predictions = model.predict(&df).expect("Prediction should succeed");
assert_eq!(predictions.len(), 500);
}
#[test]
fn test_auto_train_quick() {
let df = create_linear_regression_dataset(300);
let model = auto_train_quick(&df, "target").expect("Quick training should succeed");
let predictions = model.predict(&df).expect("Prediction should succeed");
assert_eq!(predictions.len(), 300);
let build_time = model.build_time();
println!("Quick build time: {:?}", build_time);
}
#[test]
fn test_auto_train_thorough() {
let df = create_linear_regression_dataset(200);
let model = auto_train_thorough(&df, "target").expect("Thorough training should succeed");
let predictions = model.predict(&df).expect("Prediction should succeed");
assert_eq!(predictions.len(), 200);
if let Some(ltt_tuning) = model.ltt_tuning() {
println!("LTT tuning R²: {:.4}", ltt_tuning.linear_r2);
}
}
#[test]
fn test_auto_train_with_forced_mode() {
let df = create_nonlinear_regression_dataset(300);
let model = auto_train_with_mode(&df, "target", BoostingMode::LinearThenTree)
.expect("Training with forced mode should succeed");
assert_eq!(model.mode(), BoostingMode::LinearThenTree);
let predictions = model.predict(&df).expect("Prediction should succeed");
assert_eq!(predictions.len(), 300);
}
#[test]
fn test_auto_builder_with_config() {
let df = create_linear_regression_dataset(400);
let config = AutoConfig::default()
.with_tuning(TuningLevel::Quick)
.with_auto_features(true)
.with_verbose(false);
let builder = AutoBuilder::with_config(config);
let result = builder.fit(&df, "target").expect("Fit should succeed");
assert!(result.column_profile.is_some());
let model = treeboost::AutoModel::from_build_result(result);
let predictions = model.predict(&df).expect("Prediction should succeed");
assert_eq!(predictions.len(), 400);
}
#[test]
fn test_profiler_drops_id_columns() {
let df = create_dataset_with_id_column(500);
let model = auto_train(&df, "target").expect("Training should succeed");
if let Some(profile) = model.column_profile() {
let dropped_cols: Vec<_> = profile
.drop_columns
.iter()
.map(|d| d.name.as_str())
.collect();
println!("Dropped columns: {:?}", dropped_cols);
assert!(
dropped_cols.contains(&"id"),
"ID column should be automatically dropped"
);
}
}
#[test]
fn test_mixed_type_dataset() {
let df = create_mixed_type_dataset(400);
let result = auto_train(&df, "target");
match result {
Ok(model) => {
let predictions = model.predict(&df).expect("Prediction should succeed");
assert_eq!(predictions.len(), 400);
}
Err(e) => {
println!("Mixed type dataset error (expected): {}", e);
assert!(
e.to_string().contains("numeric") || e.to_string().contains("categorical"),
"Error should mention numeric or categorical features"
);
}
}
}
#[test]
fn test_phase_times_reported() {
let df = create_linear_regression_dataset(300);
let model = auto_train(&df, "target").expect("Training should succeed");
let phase_times = model.phase_times();
println!("Profiling: {:?}", phase_times.profiling);
println!("Preprocessing: {:?}", phase_times.preprocessing);
println!("Feature Engineering: {:?}", phase_times.feature_engineering);
println!("Analysis: {:?}", phase_times.analysis);
println!("Tuning: {:?}", phase_times.tuning);
println!("Training: {:?}", phase_times.training);
}
#[test]
fn test_summary_contains_key_info() {
let df = create_linear_regression_dataset(200);
let model = auto_train(&df, "target").expect("Training should succeed");
let summary = model.summary();
assert!(summary.contains("TreeBoost Pipeline Report"));
assert!(summary.contains("MODE SELECTION"));
assert!(summary.contains("Total Time:"));
assert!(summary.contains("Phase Breakdown:"));
println!("=== AutoModel Summary ===\n{}", summary);
}
#[test]
fn test_autobuilder_verbose_mode() {
let df = create_linear_regression_dataset(200);
let builder = AutoBuilder::new().with_verbose(true);
let result = builder.fit(&df, "target");
assert!(result.is_ok(), "Verbose mode should not break training");
}
#[test]
fn test_different_tuning_levels() {
let df = create_linear_regression_dataset(150);
let model_quick = auto_train_quick(&df, "target").expect("Quick should succeed");
let time_quick = model_quick.build_time();
let model_standard = auto_train(&df, "target").expect("Standard should succeed");
let time_standard = model_standard.build_time();
let model_thorough = auto_train_thorough(&df, "target").expect("Thorough should succeed");
let time_thorough = model_thorough.build_time();
println!(
"Build times - Quick: {:?}, Standard: {:?}, Thorough: {:?}",
time_quick, time_standard, time_thorough
);
assert_eq!(model_quick.predict(&df).unwrap().len(), 150);
assert_eq!(model_standard.predict(&df).unwrap().len(), 150);
assert_eq!(model_thorough.predict(&df).unwrap().len(), 150);
}
#[test]
fn test_time_budget_control() {
use std::time::Duration;
let df = create_linear_regression_dataset(300);
let builder = AutoBuilder::new()
.with_time_budget(Duration::from_secs(15))
.with_verbose(true);
let start = std::time::Instant::now();
let result = builder
.fit(&df, "target")
.expect("Training with time budget should succeed");
let elapsed = start.elapsed();
println!("Training with 15s budget completed in {:?}", elapsed);
assert!(
elapsed < Duration::from_secs(20),
"Training took {:?}, exceeding budget tolerance",
elapsed
);
let model = treeboost::AutoModel::from_build_result(result);
let predictions = model.predict(&df).expect("Prediction should succeed");
assert_eq!(predictions.len(), 300);
}
#[test]
fn test_time_budget_adaptations() {
use std::time::Duration;
let df = create_linear_regression_dataset(200);
let builder = AutoBuilder::new()
.with_time_budget(Duration::from_secs(5))
.with_verbose(true)
.with_auto_features(true) .with_tuning(TuningLevel::Standard);
let result = builder
.fit(&df, "target")
.expect("Training with tight budget should succeed");
let model = treeboost::AutoModel::from_build_result(result);
assert_eq!(model.predict(&df).unwrap().len(), 200);
println!("Tight budget model trained successfully");
}
#[test]
fn test_time_budget_via_config() {
use std::time::Duration;
let df = create_linear_regression_dataset(150);
let config = AutoConfig::default()
.with_time_budget(Duration::from_secs(20))
.with_tuning(TuningLevel::Thorough);
let builder = AutoBuilder::with_config(config);
let result = builder.fit(&df, "target").expect("Training should succeed");
let model = treeboost::AutoModel::from_build_result(result);
assert_eq!(model.predict(&df).unwrap().len(), 150);
println!("Time budget via config works");
}
#[test]
fn test_auto_model_incremental_update() {
use treeboost::AutoModel;
let df1 = create_linear_regression_dataset(200);
let mut model =
AutoModel::train_quick(&df1, "target").expect("Initial training should succeed");
let trees_before = model.num_trees();
assert!(
trees_before > 0,
"Model should have trees after initial training"
);
let df2 = create_linear_regression_dataset(100);
let report = model.update(&df2, 5).expect("Update should succeed");
assert_eq!(report.trees_before, trees_before);
assert_eq!(report.trees_added, 5);
assert_eq!(report.trees_after, trees_before + 5);
assert_eq!(report.rows_trained, 100);
assert_eq!(report.target_column, "target");
let predictions = model
.predict(&df1)
.expect("Prediction should work after update");
assert_eq!(predictions.len(), 200);
println!("Incremental update report: {}", report);
}
#[test]
fn test_auto_model_trb_save_load() {
use treeboost::AutoModel;
let df = create_linear_regression_dataset(200);
let model = AutoModel::train_quick(&df, "target").expect("Training should succeed");
let trees_before = model.num_trees();
let dir = tempfile::tempdir().expect("Failed to create temp dir");
let path = dir.path().join("model.trb");
model
.save_trb(&path, "Test model")
.expect("Save should succeed");
assert!(path.exists(), "TRB file should exist");
let loaded = AutoModel::load_trb(&path, "target").expect("Load should succeed");
assert_eq!(
loaded.num_trees(),
trees_before,
"Loaded model should have same number of trees"
);
assert_eq!(
loaded.mode(),
model.mode(),
"Loaded model should have same mode"
);
}
#[test]
fn test_auto_model_trb_update_cycle() {
use treeboost::AutoModel;
let df1 = create_linear_regression_dataset(200);
let df2 = create_linear_regression_dataset(100);
let mut model = AutoModel::train_quick(&df1, "target").expect("Training should succeed");
let trees_after_initial = model.num_trees();
let initial_preds = model.predict(&df1).expect("Initial prediction should work");
assert_eq!(initial_preds.len(), 200);
let dir = tempfile::tempdir().expect("Failed to create temp dir");
let path = dir.path().join("model.trb");
model
.save_trb(&path, "Initial training")
.expect("Save should succeed");
let initial_size = std::fs::metadata(&path).unwrap().len();
model.update(&df2, 5).expect("Update should succeed");
let trees_after_update = model.num_trees();
assert_eq!(trees_after_update, trees_after_initial + 5);
let updated_preds = model
.predict(&df1)
.expect("Updated model prediction should work");
assert_eq!(updated_preds.len(), 200);
model
.save_trb_update(&path, 100, "Update batch")
.expect("Save update should succeed");
let updated_size = std::fs::metadata(&path).unwrap().len();
assert!(
updated_size > initial_size,
"TRB file should grow after update"
);
let loaded = AutoModel::load_trb(&path, "target").expect("Load should succeed");
assert_eq!(loaded.num_trees(), trees_after_update);
}
#[test]
fn test_auto_model_update_report_display() {
use treeboost::AutoModelUpdateReport;
let report = AutoModelUpdateReport {
rows_trained: 1000,
trees_before: 10,
trees_after: 20,
trees_added: 10,
mode: BoostingMode::PureTree,
target_column: "price".to_string(),
};
let display = format!("{}", report);
assert!(display.contains("1000 rows"));
assert!(display.contains("price"));
assert!(display.contains("10 trees added"));
}
#[test]
fn test_auto_model_config_preserved_across_updates() {
use treeboost::AutoModel;
let df = create_linear_regression_dataset(200);
let mut model = AutoModel::train_with_mode(&df, "target", BoostingMode::PureTree)
.expect("Training should succeed");
let original_mode = model.mode();
assert_eq!(original_mode, BoostingMode::PureTree);
model.update(&df, 5).expect("Update should succeed");
assert_eq!(
model.mode(),
original_mode,
"Mode should be preserved after update"
);
}