treeboost 0.1.0

High-performance Gradient Boosted Decision Tree engine for large-scale tabular data
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
//! AutoModel: Self-contained trained model from AutoBuilder
//!
//! AutoModel wraps the result of AutoBuilder training and provides a clean
//! interface for predictions and model inspection.
//!
//! # Features
//!
//! - **One-line training**: `AutoModel::train(&df, "target")?`
//! - **Easy prediction**: `model.predict(&df)?`
//! - **Training summary**: Access all metadata from the build process
//! - **Serialization**: Save/load trained models
//!
//! # Example
//!
//! ```ignore
//! use treeboost::model::AutoModel;
//! use polars::prelude::*;
//!
//! // One-line training
//! let model = AutoModel::train(&df, "target")?;
//!
//! // See what mode was selected
//! println!("Mode: {:?}", model.mode());
//! println!("Build time: {:?}", model.build_time());
//!
//! // Predict
//! let predictions = model.predict(&test_df)?;
//!
//! // Save for later
//! model.save("model.rkyv")?;
//!
//! // Load and use
//! let loaded = AutoModel::load("model.rkyv")?;
//! let preds = loaded.predict(&test_df)?;
//! ```

use crate::analysis::{Confidence, DataFrameProfile, DatasetAnalysis};
use crate::dataset::{BinnedDataset, DataPipeline};
use crate::features::FeaturePlan;
use crate::loss::MseLoss;
use crate::model::{
    AutoBuilder, AutoConfig, BoostingMode, BuildPhaseTimes, BuildResult, TreeTuningResult,
    TuningLevel, UniversalModel,
};
use crate::preprocessing::PreprocessingPlan;
use crate::tuner::ltt::LttTuningResult;
use crate::{Result, TreeBoostError};
use polars::prelude::*;
use std::time::Duration;

/// AutoModel: Trained model with full metadata
///
/// This is the main user-facing type for trained models. It wraps the
/// `UniversalModel` along with all the metadata from the training process.
pub struct AutoModel {
    /// The underlying trained model (UniversalModel handles all modes and ensembles)
    model: UniversalModel,

    /// Boosting mode that was used
    mode: BoostingMode,

    /// Target column name used during training
    target_column: String,

    /// Confidence in mode selection
    mode_confidence: Option<Confidence>,

    /// Preprocessing plan that was applied
    preprocessing_plan: Option<PreprocessingPlan>,

    /// Feature engineering plan that was applied
    feature_plan: Option<FeaturePlan>,

    /// LTT tuning result (if applicable)
    ltt_tuning: Option<LttTuningResult>,

    /// Tree tuning result (if PureTree/RandomForest mode was used)
    tree_tuning: Option<TreeTuningResult>,

    /// Column profile from analysis
    column_profile: Option<DataFrameProfile>,

    /// Dataset analysis result
    analysis: Option<DatasetAnalysis>,

    /// Fitted pipeline state (CRITICAL for inference!)
    pipeline_state: Option<crate::dataset::PipelineState>,

    /// Total build time
    build_time: Duration,

    /// Time breakdown by phase
    phase_times: BuildPhaseTimes,
}

impl AutoModel {
    /// Create an AutoModel from a BuildResult
    pub fn from_build_result(result: BuildResult) -> Self {
        Self {
            model: result.model,
            mode: result.mode,
            target_column: result.target_column,
            mode_confidence: result.mode_confidence,
            preprocessing_plan: result.preprocessing_plan,
            feature_plan: result.feature_plan,
            ltt_tuning: result.ltt_tuning,
            tree_tuning: result.tree_tuning,
            column_profile: result.column_profile,
            pipeline_state: result.pipeline_state,
            analysis: result.analysis,
            build_time: result.build_time,
            phase_times: result.phase_times,
        }
    }

    /// Train a model with default settings (the simplest API)
    ///
    /// This is the recommended entry point for most users.
    ///
    /// # Example
    ///
    /// ```ignore
    /// let model = AutoModel::train(&df, "price")?;
    /// let predictions = model.predict(&test_df)?;
    /// ```
    pub fn train(df: &DataFrame, target_col: &str) -> Result<Self> {
        let builder = AutoBuilder::new();
        let result = builder.fit(df, target_col)?;
        Ok(Self::from_build_result(result))
    }

    /// Train with quick settings (minimal tuning, fast training)
    pub fn train_quick(df: &DataFrame, target_col: &str) -> Result<Self> {
        let builder = AutoBuilder::new().with_tuning(TuningLevel::Quick);
        let result = builder.fit(df, target_col)?;
        Ok(Self::from_build_result(result))
    }

    /// Train with thorough settings (extensive tuning, best accuracy)
    pub fn train_thorough(df: &DataFrame, target_col: &str) -> Result<Self> {
        let builder = AutoBuilder::new().with_tuning(TuningLevel::Thorough);
        let result = builder.fit(df, target_col)?;
        Ok(Self::from_build_result(result))
    }

    /// Train with a specific mode (bypass auto-selection)
    pub fn train_with_mode(df: &DataFrame, target_col: &str, mode: BoostingMode) -> Result<Self> {
        let builder = AutoBuilder::new().with_mode(mode);
        let result = builder.fit(df, target_col)?;
        Ok(Self::from_build_result(result))
    }

    /// Train with custom configuration
    pub fn train_with_config(df: &DataFrame, target_col: &str, config: AutoConfig) -> Result<Self> {
        let builder = AutoBuilder::with_config(config);
        let result = builder.fit(df, target_col)?;
        Ok(Self::from_build_result(result))
    }

    /// Predict on a DataFrame
    ///
    /// Returns predictions as a Vec<f32>.
    pub fn predict(&self, df: &DataFrame) -> Result<Vec<f32>> {
        // Convert DataFrame to BinnedDataset for prediction
        // Also get preprocessed DataFrame with encoded categoricals
        let (preprocessed_df, dataset) = self.prepare_dataset_for_prediction(df)?;

        // For LinearThenTree, use dual-representation inference if FeatureExtractor is stored
        if matches!(self.mode, crate::model::BoostingMode::LinearThenTree) {
            if let Some(ref extractor) = self.model.feature_extractor() {
                // CRITICAL: Extract from preprocessed_df (with encoded categoricals),
                // NOT from original df (with String categoricals)
                let (raw_features, _num_features) =
                    extractor.extract(&preprocessed_df, &self.target_column)?;

                return Ok(self
                    .model
                    .predict_with_raw_features(&dataset, &raw_features));
            }
        }

        Ok(self.model.predict(&dataset))
    }

    /// Predict using only the linear component (LinearThenTree mode only)
    ///
    /// For fair comparison between linear-only and full LinearThenTree.
    /// Uses the same preprocessing pipeline as the full model.
    ///
    /// # Returns
    /// - `Ok(Vec<f32>)`: Linear-only predictions (base + linear model)
    /// - `Err`: If model is not LinearThenTree mode
    pub fn predict_linear_only(&self, df: &DataFrame) -> Result<Vec<f32>> {
        if !matches!(self.mode, crate::model::BoostingMode::LinearThenTree) {
            return Err(TreeBoostError::Config(
                "predict_linear_only() only available for LinearThenTree mode".to_string(),
            ));
        }

        // Use same preprocessing as full prediction
        let (preprocessed_df, dataset) = self.prepare_dataset_for_prediction(df)?;

        // Extract features using FeatureExtractor
        if let Some(ref extractor) = self.model.feature_extractor() {
            let (raw_features, _num_features) =
                extractor.extract(&preprocessed_df, &self.target_column)?;

            // Get linear-only predictions from the model
            return Ok(self.model.predict_linear_only(&dataset, &raw_features)?);
        }

        Err(TreeBoostError::Config(
            "LinearThenTree model missing FeatureExtractor - cannot predict".to_string(),
        ))
    }

    /// Predict on a BinnedDataset (for advanced users)
    pub fn predict_binned(&self, dataset: &BinnedDataset) -> Vec<f32> {
        self.model.predict(dataset)
    }

    /// Get the boosting mode that was used
    pub fn mode(&self) -> BoostingMode {
        self.mode
    }

    /// Get confidence in the mode selection
    pub fn mode_confidence(&self) -> Option<Confidence> {
        self.mode_confidence
    }

    /// Get total training time
    pub fn build_time(&self) -> Duration {
        self.build_time
    }

    /// Get time breakdown by phase
    pub fn phase_times(&self) -> &BuildPhaseTimes {
        &self.phase_times
    }

    /// Get the preprocessing plan that was applied
    pub fn preprocessing_plan(&self) -> Option<&PreprocessingPlan> {
        self.preprocessing_plan.as_ref()
    }

    /// Get the feature engineering plan that was applied
    pub fn feature_plan(&self) -> Option<&FeaturePlan> {
        self.feature_plan.as_ref()
    }

    /// Get the column profile from analysis
    pub fn column_profile(&self) -> Option<&DataFrameProfile> {
        self.column_profile.as_ref()
    }

    /// Get the dataset analysis result
    pub fn analysis(&self) -> Option<&DatasetAnalysis> {
        self.analysis.as_ref()
    }

    /// Get LTT tuning result (if LTT mode was used)
    pub fn ltt_tuning(&self) -> Option<&LttTuningResult> {
        self.ltt_tuning.as_ref()
    }

    /// Get tree tuning result (if PureTree/RandomForest mode was used)
    pub fn tree_tuning(&self) -> Option<&TreeTuningResult> {
        self.tree_tuning.as_ref()
    }

    /// Get number of trees in the model
    pub fn num_trees(&self) -> usize {
        self.model.num_trees()
    }

    /// Get number of features
    pub fn num_features(&self) -> usize {
        self.model.num_features()
    }

    /// Get the underlying UniversalModel
    pub fn inner(&self) -> &UniversalModel {
        &self.model
    }

    /// Get the discovered UniversalConfig
    ///
    /// This returns the config that AutoModel discovered through analysis and tuning.
    /// You can export this config to JSON and use it to retrain with UniversalModel directly.
    pub fn config(&self) -> &crate::model::UniversalConfig {
        self.model.config()
    }

    /// Get a comprehensive summary of the training process
    ///
    /// This shows the full "Smart Engineer" report explaining every decision
    pub fn summary(&self) -> String {
        let mut lines = vec![
            "┌─────────────────────────────────────────────────────────────────┐".to_string(),
            "│                  TreeBoost Pipeline Report                      │".to_string(),
            "└─────────────────────────────────────────────────────────────────┘".to_string(),
            "".to_string(),
        ];

        // Section 1: Data Profile
        if let Some(ref profile) = self.column_profile {
            lines.push("═══ DATA PROFILE ═══".to_string());
            lines.push(format!("  Rows: {}", profile.num_rows));
            lines.push(format!("  Columns: {} total", profile.columns.len()));
            lines.push(format!("    • Numeric: {}", profile.num_numeric));
            lines.push(format!("    • Categorical: {}", profile.num_categorical));
            lines.push(format!(
                "  Target: {} ({:?})",
                self.target_column, profile.task_type
            ));

            if !profile.drop_columns.is_empty() {
                lines.push("".to_string());
                lines.push(format!("  Dropped {} columns:", profile.drop_columns.len()));
                for dropped in &profile.drop_columns {
                    lines.push(format!("    • '{}' - {}", dropped.name, dropped.reason));
                }
            }
            lines.push("".to_string());
        }

        // Section 2: Preprocessing Decisions
        if let Some(ref plan) = self.preprocessing_plan {
            lines.push("═══ PREPROCESSING DECISIONS ═══".to_string());
            if !plan.reasoning.is_empty() {
                for reason in &plan.reasoning {
                    lines.push(format!("{}", reason));
                }
            } else {
                lines.push("  • No special preprocessing required".to_string());
            }
            lines.push("".to_string());
        }

        // Section 3: Feature Engineering
        if let Some(ref plan) = self.feature_plan {
            lines.push("═══ FEATURE ENGINEERING ═══".to_string());

            if !plan.polynomial_features.is_empty() {
                lines.push(format!(
                    "  Polynomial features ({}): ",
                    plan.polynomial_features.len()
                ));
                for feat in &plan.polynomial_features {
                    lines.push(format!("{}", feat));
                }
            }

            if !plan.ratio_pairs.is_empty() {
                lines.push(format!("  Ratio features ({}): ", plan.ratio_pairs.len()));
                for (f1, f2) in &plan.ratio_pairs {
                    lines.push(format!("{}/{}", f1, f2));
                }
            }

            if !plan.interaction_pairs.is_empty() {
                lines.push(format!(
                    "  Interaction features ({}): ",
                    plan.interaction_pairs.len()
                ));
                for (f1, f2) in plan.interaction_pairs.iter().take(5) {
                    lines.push(format!("{} × {}", f1, f2));
                }
                if plan.interaction_pairs.len() > 5 {
                    lines.push(format!(
                        "    ... and {} more",
                        plan.interaction_pairs.len() - 5
                    ));
                }
            }

            if !plan.reasoning.is_empty() {
                lines.push("".to_string());
                lines.push("  Reasoning:".to_string());
                for reason in &plan.reasoning {
                    lines.push(format!("{}", reason));
                }
            }
            lines.push("".to_string());
        }

        // Section 4: Mode Selection
        lines.push("═══ MODE SELECTION ═══".to_string());
        lines.push(format!("  Selected: {:?}", self.mode));
        lines.push(format!(
            "  Confidence: {:?}",
            self.mode_confidence
                .as_ref()
                .map(|c| format!("{:?}", c))
                .unwrap_or("N/A".to_string())
        ));

        if let Some(ref analysis) = self.analysis {
            lines.push("".to_string());
            lines.push("  Analysis Results:".to_string());
            lines.push(format!(
                "    • Linear R²: {:.4} ({})",
                analysis.linear_r2,
                if analysis.linear_r2 > 0.5 {
                    "Strong"
                } else if analysis.linear_r2 > 0.3 {
                    "Moderate"
                } else {
                    "Weak"
                }
            ));
            lines.push(format!(
                "    • Tree Gain: {:.4} ({})",
                analysis.tree_gain,
                if analysis.tree_gain > 0.3 {
                    "Strong"
                } else if analysis.tree_gain > 0.1 {
                    "Moderate"
                } else {
                    "Weak"
                }
            ));

            // Show the recommended mode from analysis
            let recommended_mode = analysis.recommend_mode();
            let reasoning = if analysis.linear_r2 > 0.5 && analysis.tree_gain > 0.1 {
                "Strong linear trend + residual structure → Hybrid approach"
            } else if analysis.linear_r2 > 0.5 {
                "Strong linear relationship → Linear model dominates"
            } else if analysis.tree_gain > 0.1 {
                "Non-linear patterns → Tree-based approach"
            } else {
                "Moderate signals → Pure tree model"
            };

            lines.push("".to_string());
            lines.push(format!("  Recommended: {:?}", recommended_mode));
            lines.push(format!("  Reasoning: {}", reasoning));
        }
        lines.push("".to_string());

        // Section 5: Tuning Results (if applicable)
        if let Some(ref tuning) = self.ltt_tuning {
            lines.push("═══ LTT TUNING RESULTS ═══".to_string());
            lines.push("  Linear Phase:".to_string());
            lines.push(format!("    • R²: {:.4}", tuning.linear_r2));
            lines.push(format!("    • Lambda: {:.4}", tuning.linear_params.lambda));
            lines.push(format!(
                "    • L1 Ratio: {:.4} ({})",
                tuning.linear_params.l1_ratio,
                if tuning.linear_params.l1_ratio == 0.0 {
                    "Ridge"
                } else if tuning.linear_params.l1_ratio == 1.0 {
                    "LASSO"
                } else {
                    "ElasticNet"
                }
            ));
            lines.push("".to_string());
            lines.push("  Tree Phase:".to_string());
            lines.push(format!("    • Max Depth: {}", tuning.tree_params.max_depth));
            lines.push(format!(
                "    • Learning Rate: {:.4}",
                tuning.tree_params.learning_rate
            ));
            lines.push(format!(
                "    • Num Rounds: {}",
                tuning.tree_params.num_rounds
            ));
            lines.push("".to_string());
            lines.push(format!("  Final RMSE: {:.4}", tuning.final_rmse));
            lines.push("".to_string());
        }

        // Section 6: Training Summary
        lines.push("═══ TRAINING SUMMARY ═══".to_string());
        lines.push(format!(
            "  Total Time: {:.3}s",
            self.build_time.as_secs_f64()
        ));
        lines.push("".to_string());
        lines.push("  Phase Breakdown:".to_string());
        lines.push(format!("    • Profiling: {:?}", self.phase_times.profiling));
        lines.push(format!(
            "    • Preprocessing: {:?}",
            self.phase_times.preprocessing
        ));
        lines.push(format!(
            "    • Feature Engineering: {:?}",
            self.phase_times.feature_engineering
        ));
        lines.push(format!("    • Analysis: {:?}", self.phase_times.analysis));
        lines.push(format!("    • Tuning: {:?}", self.phase_times.tuning));
        lines.push(format!("    • Training: {:?}", self.phase_times.training));
        lines.push("".to_string());

        lines.push(
            "┌─────────────────────────────────────────────────────────────────┐".to_string(),
        );
        lines.push(
            "│      TreeBoost: The Smart Engineer That Explains Itself         │".to_string(),
        );
        lines.push(
            "└─────────────────────────────────────────────────────────────────┘".to_string(),
        );

        lines.join("\n")
    }

    /// Prepare a DataFrame for prediction
    fn prepare_dataset_for_prediction(&self, df: &DataFrame) -> Result<(DataFrame, BinnedDataset)> {
        // CRITICAL: Use the fitted pipeline state from training!
        // Without this, predictions will be nonsense because the model expects
        // features encoded the same way as during training.
        let pipeline_state = self.pipeline_state.as_ref().ok_or_else(|| {
            TreeBoostError::Data(
                "AutoModel missing fitted pipeline state - cannot make predictions".to_string(),
            )
        })?;

        // Use DataPipeline to transform the test data using the fitted state
        let pipeline = DataPipeline::with_defaults();

        // process_for_inference() applies the learned encodings/scalers/binners
        // Returns (preprocessed_df, dataset) where preprocessed_df has encoded categoricals
        let (preprocessed_df, dataset) =
            pipeline.process_for_inference(df.clone(), pipeline_state)?;

        Ok((preprocessed_df, dataset))
    }

    /// Export the discovered config to JSON
    ///
    /// This saves the UniversalConfig that AutoModel discovered through analysis and tuning.
    /// You can load this config and use it to retrain with UniversalModel directly.
    ///
    /// # Example
    ///
    /// ```ignore
    /// // Train with AutoModel
    /// let model = AutoModel::train(&df, "target")?;
    ///
    /// // Export the discovered config
    /// model.save_config("optimal_config.json")?;
    ///
    /// // Later: Load and tweak the config
    /// let config = UniversalConfig::load_json("optimal_config.json")?;
    /// let tweaked = config.with_learning_rate(0.05); // Adjust as needed
    ///
    /// // Retrain with the tweaked config
    /// let new_model = UniversalModel::train(&dataset, tweaked, &loss_fn)?;
    /// ```
    pub fn save_config(&self, path: impl AsRef<std::path::Path>) -> Result<()> {
        let config = self.config();

        let json = serde_json::to_string_pretty(config).map_err(|e| {
            TreeBoostError::Serialization(format!("Failed to serialize config to JSON: {}", e))
        })?;

        std::fs::write(path, json)?;
        Ok(())
    }

    /// Save the trained model to a file
    ///
    /// This saves the underlying UniversalModel (weights, trees, ensembles, etc.) for inference.
    /// The model can be loaded later using `UniversalModel::load()`.
    ///
    /// # Example
    ///
    /// ```ignore
    /// // Train and save both config and model
    /// let model = AutoModel::train(&df, "target")?;
    /// model.save_config("config.json")?;      // For retraining with AutoML
    /// model.save("model.rkyv")?;               // For inference
    ///
    /// // Later: Load for inference only (not AutoML)
    /// let loaded = UniversalModel::load("model.rkyv")?;
    /// let preds = loaded.predict(&dataset)?;
    /// ```
    pub fn save(&self, path: impl AsRef<std::path::Path>) -> Result<()> {
        self.model.save(path)
    }

    // =========================================================================
    // Incremental Learning Support
    // =========================================================================

    /// Update the model with new training data (incremental learning)
    ///
    /// This method continues training from the current model state:
    /// 1. Uses existing pipeline_state for consistent preprocessing
    /// 2. Updates the underlying UniversalModel with new trees
    /// 3. Preserves all configuration from original training
    ///
    /// # Arguments
    /// * `df` - New data to train on (must have same columns as original)
    /// * `additional_rounds` - Number of new boosting rounds (trees) to add
    ///
    /// # Example
    /// ```ignore
    /// // Train on January data
    /// let mut model = AutoModel::train(&jan_df, "target")?;
    ///
    /// // Update with February data (10 more trees)
    /// let report = model.update(&feb_df, 10)?;
    /// println!("{}", report);
    ///
    /// // Save the updated model
    /// model.save_trb("model.trb")?;
    /// ```
    pub fn update(
        &mut self,
        df: &DataFrame,
        additional_rounds: usize,
    ) -> Result<AutoModelUpdateReport> {
        let rows_before = df.height();

        // Convert DataFrame to BinnedDataset using existing pipeline
        let (_preprocessed_df, dataset) = self.prepare_dataset_for_prediction(df)?;

        // Get targets from the dataframe
        let target_series = df.column(&self.target_column).map_err(|e| {
            TreeBoostError::Data(format!(
                "Target column '{}' not found: {}",
                self.target_column, e
            ))
        })?;

        let targets: Vec<f32> = target_series
            .cast(&polars::datatypes::DataType::Float32)
            .map_err(|e| TreeBoostError::Data(format!("Failed to cast target to f32: {}", e)))?
            .f32()
            .map_err(|e| TreeBoostError::Data(format!("Failed to get f32 values: {}", e)))?
            .into_no_null_iter()
            .collect();

        // Create dataset with actual targets (avoids clone + modify pattern)
        let update_dataset = dataset.with_targets(targets);

        // Update the model (uses MSE loss by default, same as AutoBuilder)
        let loss_fn = MseLoss::new();
        let model_report = self
            .model
            .update(&update_dataset, &loss_fn, additional_rounds)?;

        Ok(AutoModelUpdateReport {
            rows_trained: rows_before,
            trees_before: model_report.trees_before,
            trees_after: model_report.trees_after,
            trees_added: model_report.trees_added,
            mode: self.mode,
            target_column: self.target_column.clone(),
        })
    }

    /// Save model to TRB (TreeBoost) incremental format
    ///
    /// TRB format supports incremental updates without rewriting the entire file.
    /// Use this format when you plan to update the model with new data.
    ///
    /// # Example
    /// ```ignore
    /// model.save_trb("model.trb", "Initial training on January data")?;
    /// ```
    pub fn save_trb(&self, path: impl AsRef<std::path::Path>, description: &str) -> Result<()> {
        self.model.save_trb(path, description)
    }

    /// Append an update to an existing TRB file
    ///
    /// This appends a new segment without rewriting the base model.
    ///
    /// # Arguments
    /// * `path` - Path to existing .trb file
    /// * `rows_trained` - Number of rows used in this update
    /// * `description` - Description of this update
    pub fn save_trb_update(
        &self,
        path: impl AsRef<std::path::Path>,
        rows_trained: usize,
        description: &str,
    ) -> Result<()> {
        self.model.save_trb_update(path, rows_trained, description)
    }

    /// Load model from TRB format for continued training
    ///
    /// This loads the base model and all updates, ready for further training.
    /// The returned AutoModel will have minimal metadata (no tuning results, etc.)
    /// but the underlying model and pipeline state are preserved.
    ///
    /// # Example
    /// ```ignore
    /// let mut model = AutoModel::load_trb("model.trb", "target")?;
    /// model.update(&new_data, 10)?;
    /// ```
    pub fn load_trb(path: impl AsRef<std::path::Path>, target_column: &str) -> Result<Self> {
        let model = crate::model::UniversalModel::load_trb(path)?;
        let mode = model.mode();

        Ok(Self {
            model,
            mode,
            target_column: target_column.to_string(),
            mode_confidence: None,
            preprocessing_plan: None,
            feature_plan: None,
            ltt_tuning: None,
            tree_tuning: None,
            column_profile: None,
            analysis: None,
            pipeline_state: None, // Note: pipeline_state not preserved in TRB format yet
            build_time: Duration::default(),
            phase_times: BuildPhaseTimes::default(),
        })
    }

    /// Check if model is compatible with dataset for incremental update
    pub fn is_compatible_for_update(&self, df: &DataFrame) -> bool {
        // Check that target column exists
        if df.column(&self.target_column).is_err() {
            return false;
        }

        // Try to prepare dataset - if it fails, not compatible
        self.prepare_dataset_for_prediction(df).is_ok()
    }

    /// Get mutable reference to the underlying UniversalModel
    pub fn model_mut(&mut self) -> &mut UniversalModel {
        &mut self.model
    }
}

/// Report from an AutoModel incremental training update
#[derive(Debug, Clone)]
pub struct AutoModelUpdateReport {
    /// Number of rows in the new training data
    pub rows_trained: usize,
    /// Number of trees before update
    pub trees_before: usize,
    /// Number of trees after update
    pub trees_after: usize,
    /// Number of trees added
    pub trees_added: usize,
    /// Boosting mode
    pub mode: BoostingMode,
    /// Target column name
    pub target_column: String,
}

impl std::fmt::Display for AutoModelUpdateReport {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(
            f,
            "AutoModel Update: {} rows on '{}', {} trees added ({} -> {}), mode={:?}",
            self.rows_trained,
            self.target_column,
            self.trees_added,
            self.trees_before,
            self.trees_after,
            self.mode
        )
    }
}

impl std::fmt::Debug for AutoModel {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("AutoModel")
            .field("mode", &self.mode)
            .field("mode_confidence", &self.mode_confidence)
            .field("build_time", &self.build_time)
            .finish()
    }
}

#[cfg(test)]
mod tests {
    #[test]
    fn test_auto_model_from_build_result() {
        // This test just verifies the struct construction works
        // Full integration tests would require a DataFrame
    }

    #[test]
    fn test_auto_model_summary_format() {
        // Create a minimal AutoModel to test summary formatting
        // This would need a real UniversalModel in practice
    }
}