use crate::ml_metrics::ModelMetrics;
use serde::Serialize;
use std::collections::HashMap;
use rayon::prelude::*;
use rand::Rng;
use smartcore::{
linalg::basic::matrix::DenseMatrix,
ensemble::random_forest_classifier::{
RandomForestClassifier,
RandomForestClassifierParameters
},
};
#[derive(Debug, Clone, PartialEq)]
pub enum PerformanceTrend {
Improving,
Stable,
Degrading,
}
#[derive(Debug, Clone, Serialize)]
pub struct ModelParameters {
pub max_depth: u16,
pub min_samples_split: usize,
pub learning_rate: f64,
pub n_trees: u16,
}
impl Default for ModelParameters {
fn default() -> Self {
Self {
max_depth: 10,
min_samples_split: 2,
learning_rate: 0.1,
n_trees: 100,
}
}
}
#[derive(Debug, Serialize)]
pub struct OptimizationStep {
pub params: ModelParameters,
pub metrics: ModelMetrics,
pub timestamp: chrono::DateTime<chrono::Utc>,
}
pub struct ModelOptimizer {
config: OptimizationConfig,
#[allow(dead_code)]
current_best: Option<OptimizationResult>,
optimization_history: Vec<OptimizationStep>,
}
#[derive(Debug, Clone)]
pub struct GridSearchConfig {
pub learning_rates: Vec<f64>,
pub max_depths: Vec<u16>,
pub min_samples_splits: Vec<usize>,
pub n_trees: Vec<u16>,
}
#[derive(Debug, Clone)]
pub struct OptimizationConfig {
pub learning_rate_range: (f64, f64),
pub max_iterations: usize,
pub early_stopping_patience: usize,
pub validation_split: f64,
}
#[derive(Debug, Serialize)]
pub struct OptimizationResult {
pub best_params: ModelParameters,
pub performance_improvement: f64,
pub training_time: std::time::Duration,
pub optimization_history: Vec<OptimizationStep>,
}
impl ModelOptimizer {
pub fn new(config: OptimizationConfig) -> Self {
Self {
config,
current_best: None,
optimization_history: Vec::new(),
}
}
pub fn optimize(&mut self, current_metrics: &ModelMetrics, trend: &PerformanceTrend) -> Option<ModelParameters> {
match trend {
PerformanceTrend::Degrading => Some(self.find_optimal_parameters(current_metrics)),
_ if current_metrics.f1_score < 0.7 => Some(self.find_optimal_parameters(current_metrics)),
_ => None,
}
}
fn find_optimal_parameters(&mut self, baseline_metrics: &ModelMetrics) -> ModelParameters {
let mut best_params = self.get_default_parameters();
let mut best_score = baseline_metrics.f1_score;
for i in 0..self.config.max_iterations {
let candidate_params = self.generate_candidate_parameters();
let performance = self.evaluate_parameters(&candidate_params);
if performance > best_score {
best_score = performance;
best_params = candidate_params;
self.optimization_history.push(OptimizationStep {
params: best_params.clone(),
metrics: ModelMetrics {
model_id: format!("opt_iter_{}", i),
timestamp: chrono::Utc::now(),
accuracy: performance,
precision: performance, recall: performance, f1_score: performance,
confusion_matrix: baseline_metrics.confusion_matrix.clone(),
feature_importance: HashMap::new(),
training_duration: std::time::Duration::from_secs(0),
},
timestamp: chrono::Utc::now(),
});
}
if self.should_stop_early() {
break;
}
}
best_params
}
fn get_default_parameters(&self) -> ModelParameters {
ModelParameters::default()
}
fn generate_candidate_parameters(&self) -> ModelParameters {
let mut rng = rand::thread_rng();
ModelParameters {
learning_rate: rng.gen_range(self.config.learning_rate_range.0..self.config.learning_rate_range.1),
max_depth: rng.gen_range(5..20) as u16,
min_samples_split: rng.gen_range(2..10),
n_trees: rng.gen_range(50..200) as u16,
}
}
fn evaluate_parameters(&self, params: &ModelParameters) -> f64 {
let base_score = 0.7;
let lr_factor = (-((params.learning_rate - 0.01).powi(2)) / 0.001).exp();
let depth_factor = (-((params.max_depth as f64 - 10.0).powi(2)) / 100.0).exp();
base_score * lr_factor * depth_factor
}
fn should_stop_early(&self) -> bool {
if self.optimization_history.len() < self.config.early_stopping_patience {
return false;
}
let recent_scores: Vec<f64> = self.optimization_history
.iter()
.rev()
.take(self.config.early_stopping_patience)
.map(|step| step.metrics.f1_score)
.collect();
let max_score = recent_scores.iter().fold(0.0f64, |a, &b| a.max(b));
let min_score = recent_scores.iter().fold(f64::INFINITY, |a, &b| a.min(b));
max_score - min_score < 0.001 }
pub fn grid_search(&mut self, validation_data: &ValidationData) -> ModelParameters {
let grid_config = self.create_grid_config();
let mut best_params = self.get_default_parameters();
let mut best_score = 0.0;
let results: Vec<(ModelParameters, f64)> = grid_config.parameter_combinations()
.par_iter()
.map(|params| {
let score = self.cross_validate(params, validation_data);
(params.clone(), score)
})
.collect();
for (params, score) in results {
if score > best_score {
best_score = score;
best_params = params;
}
}
best_params
}
fn cross_validate(&self, params: &ModelParameters, data: &ValidationData) -> f64 {
let k_folds = 5;
let fold_size = data.features.len() / k_folds;
let mut scores = Vec::with_capacity(k_folds);
for k in 0..k_folds {
let start_idx = k * fold_size;
let end_idx = start_idx + fold_size;
let test_features: Vec<Vec<f64>> = data.features[start_idx..end_idx].to_vec();
let test_labels: Vec<bool> = data.labels[start_idx..end_idx].to_vec();
let train_features: Vec<Vec<f64>> = data.features.iter()
.enumerate()
.filter(|(i, _)| *i < start_idx || *i >= end_idx)
.map(|(_, f)| f.clone())
.collect();
let train_labels: Vec<bool> = data.labels.iter()
.enumerate()
.filter(|(i, _)| *i < start_idx || *i >= end_idx)
.map(|(_, l)| *l)
.collect();
let score = self.train_and_evaluate(
params,
&train_features,
&train_labels,
&test_features,
&test_labels
);
scores.push(score);
}
scores.iter().sum::<f64>() / scores.len() as f64
}
fn train_and_evaluate(
&self,
params: &ModelParameters,
train_features: &[Vec<f64>],
train_labels: &[bool],
test_features: &[Vec<f64>],
test_labels: &[bool]
) -> f64 {
let x = DenseMatrix::from_2d_vec(&train_features.to_vec());
let y: Vec<i32> = train_labels.iter().map(|&b| if b { 1 } else { 0 }).collect();
let model = RandomForestClassifier::fit(
&x, &y,
RandomForestClassifierParameters {
n_trees: params.n_trees as u16,
max_depth: Some(params.max_depth as u16),
min_samples_leaf: 5,
min_samples_split: params.min_samples_split,
..Default::default()
}
).unwrap();
let x_test = DenseMatrix::from_2d_vec(&test_features.to_vec());
let predictions = model.predict(&x_test).unwrap();
let predictions: Vec<f64> = predictions.iter().map(|&p| p as f64).collect();
self.calculate_f1_score(&predictions, test_labels)
}
fn calculate_f1_score(&self, predictions: &[f64], actual: &[bool]) -> f64 {
let mut tp = 0;
let mut fp = 0;
let mut fn_count = 0;
for (pred, act) in predictions.iter().zip(actual.iter()) {
match (*pred > 0.5, *act) {
(true, true) => tp += 1,
(true, false) => fp += 1,
(false, true) => fn_count += 1,
_ => {}
}
}
let precision = if tp + fp == 0 { 0.0 } else { tp as f64 / (tp + fp) as f64 };
let recall = if tp + fn_count == 0 { 0.0 } else { tp as f64 / (tp + fn_count) as f64 };
if precision + recall == 0.0 {
0.0
} else {
2.0 * (precision * recall) / (precision + recall)
}
}
fn create_grid_config(&self) -> GridSearchConfig {
GridSearchConfig {
learning_rates: vec![0.01, 0.1, 0.5],
max_depths: vec![5, 10, 15],
min_samples_splits: vec![2, 5, 10],
n_trees: vec![50, 100, 200],
}
}
}
impl GridSearchConfig {
pub fn parameter_combinations(&self) -> Vec<ModelParameters> {
let mut combinations = Vec::new();
for &lr in &self.learning_rates {
for &md in &self.max_depths {
for &ms in &self.min_samples_splits {
for &nt in &self.n_trees {
combinations.push(ModelParameters {
learning_rate: lr,
max_depth: md,
min_samples_split: ms,
n_trees: nt,
});
}
}
}
}
combinations
}
}
#[derive(Debug)]
pub struct ValidationData {
pub features: Vec<Vec<f64>>,
pub labels: Vec<bool>,
}