#[cfg(feature = "optimized")]
use pandrs::column::{Column, Float64Column, StringColumn};
#[cfg(feature = "optimized")]
use pandrs::error::{Error, Result};
#[cfg(feature = "optimized")]
use pandrs::ml::metrics::classification::{
accuracy_score, f1_score, precision_score, recall_score,
};
#[cfg(feature = "optimized")]
use pandrs::ml::metrics::regression::{mean_squared_error, r2_score};
#[cfg(feature = "optimized")]
use pandrs::ml::models::{LinearRegression, LogisticRegression};
#[cfg(feature = "optimized")]
use pandrs::OptimizedDataFrame;
#[cfg(feature = "optimized")]
use rand::Rng;
#[cfg(not(feature = "optimized"))]
fn main() {
println!("This example requires the 'optimized' feature flag to be enabled.");
println!("Please recompile with:");
println!(" cargo run --example optimized_ml_model_example --features \"optimized\"");
}
#[cfg(feature = "optimized")]
#[allow(clippy::result_large_err)]
#[allow(clippy::result_large_err)]
fn main() -> Result<()> {
println!("Example of PandRS Model Training and Evaluation (OptimizedDataFrame version)");
println!("==========================================");
regression_example()?;
classification_example()?;
model_selection_example()?;
Ok(())
}
#[cfg(feature = "optimized")]
#[allow(clippy::result_large_err)]
#[allow(clippy::result_large_err)]
fn regression_example() -> Result<()> {
println!("\n==== Regression Model Example ====");
let reg_df = create_regression_data()?;
println!("Regression Data Sample:");
println!("{:?}", reg_df);
let train_size = (reg_df.row_count() as f64 * 0.7) as usize;
let test_size = reg_df.row_count() - train_size;
println!(
"Training Data Size: {}, Test Data Size: {}",
train_size, test_size
);
let _train_df = ®_df;
let test_df = ®_df;
let _features = ["feature1", "feature2", "feature3"];
let target = "target";
let _model = LinearRegression::new();
println!("\nLinear Regression Model Results (skipped for now):");
println!("Need to adapt OptimizedDataFrame to use with new DataFrame API");
let predictions = vec![0.0; test_df.row_count()];
let y_true: Vec<f64> = {
let target_col = test_df.column(target)?;
let float_col = target_col
.as_float64()
.ok_or(Error::Type("Expected float column".to_string()))?;
(0..test_df.row_count())
.filter_map(|i| float_col.get(i).ok().flatten())
.collect()
};
let mse = mean_squared_error(&y_true, &predictions)?;
let r2 = r2_score(&y_true, &predictions)?;
println!("\nModel Evaluation:");
println!("MSE: {}", mse);
println!("R^2: {}", r2);
println!("\nRegression using Feature Engineering Pipeline (skipped for now):");
println!("Pipeline API has been updated and needs to be adapted for this example");
let poly_predictions = vec![0.0; test_df.row_count()];
let poly_y_true = vec![0.0; test_df.row_count()];
let poly_mse = mean_squared_error(&poly_y_true, &poly_predictions)?;
let poly_r2 = r2_score(&poly_y_true, &poly_predictions)?;
println!("Evaluation of Linear Regression with Polynomial Features:");
println!("MSE: {}", poly_mse);
println!("R^2: {}", poly_r2);
Ok(())
}
#[cfg(feature = "optimized")]
#[allow(clippy::result_large_err)]
#[allow(clippy::result_large_err)]
fn classification_example() -> Result<()> {
println!("\n==== Classification Model Example ====");
let cls_df = create_classification_data()?;
println!("Classification Data Sample:");
println!("{:?}", cls_df);
let train_size = (cls_df.row_count() as f64 * 0.7) as usize;
let test_size = cls_df.row_count() - train_size;
println!(
"Training Data Size: {}, Test Data Size: {}",
train_size, test_size
);
let _train_df = &cls_df;
let test_df = &cls_df;
let _features = ["feature1", "feature2"];
let target = "target";
let _model = LogisticRegression::new();
println!("\nLogistic Regression Model Results (skipped for now):");
println!("Need to adapt OptimizedDataFrame to use with new DataFrame API");
let predictions = vec![0.0; test_df.row_count()];
let y_true: Vec<bool> = {
let target_col = test_df.column(target)?;
let string_col = target_col
.as_string()
.ok_or(Error::Type("Expected string column".to_string()))?;
(0..test_df.row_count())
.filter_map(|i| string_col.get(i).ok().flatten().map(|s| s == "1"))
.collect()
};
let pred_bool: Vec<bool> = predictions.iter().map(|&val| val > 0.5).collect();
let accuracy = accuracy_score(&y_true, &pred_bool)?;
let precision = precision_score(&y_true, &pred_bool)?;
let recall = recall_score(&y_true, &pred_bool)?;
let f1 = f1_score(&y_true, &pred_bool)?;
println!("\nModel Evaluation:");
println!("Accuracy: {}", accuracy);
println!("Precision: {}", precision);
println!("Recall: {}", recall);
println!("F1 Score: {}", f1);
println!("\nProbability Predictions Sample (skipped for now):");
println!("Probability prediction would go here, but API has changed");
Ok(())
}
#[cfg(feature = "optimized")]
#[allow(clippy::result_large_err)]
#[allow(clippy::result_large_err)]
fn model_selection_example() -> Result<()> {
println!("\n==== Model Selection and Evaluation Example ====");
let _reg_df = create_regression_data()?;
println!("\nCross-Validation (5-fold) Results:");
println!("Note: LinearRegression does not implement the Clone trait, so cross-validation cannot be performed");
Ok(())
}
#[cfg(feature = "optimized")]
#[allow(clippy::result_large_err)]
#[allow(clippy::result_large_err)]
fn create_regression_data() -> Result<OptimizedDataFrame> {
let mut rng = rand::rng();
let n = 100;
let feature1: Vec<f64> = (0..n).map(|_| rng.random_range(-10.0..10.0)).collect();
let feature2: Vec<f64> = (0..n).map(|_| rng.random_range(0.0..100.0)).collect();
let feature3: Vec<f64> = (0..n).map(|_| rng.random_range(-5.0..15.0)).collect();
let target: Vec<f64> = feature1
.iter()
.zip(feature2.iter())
.zip(feature3.iter())
.map(|((x1, x2), x3)| 2.0 * x1 + 0.5 * x2 - 1.5 * x3 + rng.random_range(-5.0..5.0))
.collect();
let mut df = OptimizedDataFrame::new();
let feature1_col = Float64Column::with_name(feature1, "feature1");
df.add_column("feature1", Column::Float64(feature1_col))?;
let feature2_col = Float64Column::with_name(feature2, "feature2");
df.add_column("feature2", Column::Float64(feature2_col))?;
let feature3_col = Float64Column::with_name(feature3, "feature3");
df.add_column("feature3", Column::Float64(feature3_col))?;
let target_col = Float64Column::with_name(target, "target");
df.add_column("target", Column::Float64(target_col))?;
Ok(df)
}
#[cfg(feature = "optimized")]
#[allow(clippy::result_large_err)]
#[allow(clippy::result_large_err)]
fn create_classification_data() -> Result<OptimizedDataFrame> {
let mut rng = rand::rng();
let n = 100;
let feature1: Vec<f64> = (0..n).map(|_| rng.random_range(-5.0..5.0)).collect();
let feature2: Vec<f64> = (0..n).map(|_| rng.random_range(-5.0..5.0)).collect();
let target: Vec<String> = feature1
.iter()
.zip(feature2.iter())
.map(|(x1, x2)| {
let z = 1.5 * x1 - 2.0 * x2;
let p = 1.0 / (1.0 + (-z).exp());
if rng.random::<f64>() < p {
"1".to_string()
} else {
"0".to_string()
}
})
.collect();
let mut df = OptimizedDataFrame::new();
let feature1_col = Float64Column::with_name(feature1, "feature1");
df.add_column("feature1", Column::Float64(feature1_col))?;
let feature2_col = Float64Column::with_name(feature2, "feature2");
df.add_column("feature2", Column::Float64(feature2_col))?;
let target_col = StringColumn::with_name(target, "target");
df.add_column("target", Column::String(target_col))?;
Ok(df)
}