roma_lib 0.1.1 - Docs.rs

use crate::observer::traits::AlgorithmObserver;
use crate::observer::AlgorithmEvent;
use crate::utils::chart::{ChartBuilder, Series};
use std::path::PathBuf;
use std::time::{SystemTime, UNIX_EPOCH};

const ITERATIONS_BETWEEN_CHART_UPDATES: usize = 15;

/// Observer that generates charts showing algorithm progress
pub struct ChartObserver {
    name: String,
    base_output_path: PathBuf,
    run_output_path: Option<PathBuf>,
    use_run_subdirectory: bool,

    // Data collection
    generations: Vec<usize>,
    evaluations: Vec<usize>,
    best_fitness_history: Vec<f64>,
    last_snapshot_seq: Option<u64>,

    // Configuration
    chart_width: u32,
    chart_height: u32,
}

impl ChartObserver {
    /// Creates a new ChartObserver
    ///
    /// The observer creates a structured path per run using this format:
    /// `<base>/<algorithm_slug>/run_<timestamp_ms>_<pid>/`.
    ///
    /// # Arguments
    /// * `base_output_path` - Root directory where run folders will be created
    pub fn new(base_output_path: PathBuf) -> Self {
        ChartObserver {
            name: "ChartObserver".to_string(),
            base_output_path,
            run_output_path: None,
            use_run_subdirectory: true,
            generations: Vec::new(),
            evaluations: Vec::new(),
            best_fitness_history: Vec::new(),
            last_snapshot_seq: None,
            chart_width: 1200,
            chart_height: 800,
        }
    }

    /// Creates a `ChartObserver` with a standard base directory.
    pub fn new_default() -> Self {
        Self::new(crate::observer::default_observers_output_path().join("charts"))
    }

    /// Disables automatic per-run subdirectories.
    ///
    /// When disabled, charts are written directly inside the base directory.
    pub fn with_flat_output(mut self) -> Self {
        self.use_run_subdirectory = false;
        self
    }

    /// Sets the chart dimensions
    pub fn with_dimensions(mut self, width: u32, height: u32) -> Self {
        self.chart_width = width;
        self.chart_height = height;
        self
    }

    fn sanitize_folder_component(raw: &str) -> String {
        let mut out = String::with_capacity(raw.len());
        let mut prev_is_sep = false;

        for ch in raw.chars() {
            let normalized = if ch.is_ascii_alphanumeric() {
                ch.to_ascii_lowercase()
            } else {
                '_'
            };

            if normalized == '_' {
                if prev_is_sep {
                    continue;
                }
                prev_is_sep = true;
                out.push('_');
            } else {
                prev_is_sep = false;
                out.push(normalized);
            }
        }

        let trimmed = out.trim_matches('_');
        if trimmed.is_empty() {
            "algorithm".to_string()
        } else {
            trimmed.to_string()
        }
    }

    fn build_run_output_path(&self, algorithm_name: &str) -> PathBuf {
        if !self.use_run_subdirectory {
            return self.base_output_path.clone();
        }

        let algorithm_folder = Self::sanitize_folder_component(algorithm_name);
        let timestamp_ms = SystemTime::now()
            .duration_since(UNIX_EPOCH)
            .map(|d| d.as_millis())
            .unwrap_or(0);
        let pid = std::process::id();

        self.base_output_path
            .join(algorithm_folder)
            .join(format!("run_{}_{}", timestamp_ms, pid))
    }

    fn resolve_output_path(&self) -> PathBuf {
        self.run_output_path
            .clone()
            .unwrap_or_else(|| self.base_output_path.clone())
    }

    fn prepare_output_directory(&mut self, algorithm_name: &str) {
        let output_path = self.build_run_output_path(algorithm_name);
        std::fs::create_dir_all(&output_path).ok();
        self.run_output_path = Some(output_path);
    }

    fn base_chart_builder(&self, title: &str, x_label: &str, y_label: &str) -> ChartBuilder {
        ChartBuilder::new()
            .title(title)
            .x_label(x_label)
            .y_label(y_label)
            .size(self.chart_width, self.chart_height)
            .x_min(0.0)
            .x_clamp_non_negative()
    }

    fn max_render_points(&self) -> usize {
        let based_on_width = (self.chart_width as usize).saturating_mul(2);
        based_on_width.clamp(240, 1800)
    }

    /// Downsamples a series of points to a maximum number, preserving the first and last points.
    fn downsample_points(&self, points: &[(f64, f64)]) -> Vec<(f64, f64)> {
        let max_points = self.max_render_points().max(2);
        if points.len() <= max_points {
            return points.to_vec();
        }

        let last_index = points.len() - 1;
        let step = last_index as f64 / (max_points - 1) as f64;
        let mut sampled = Vec::with_capacity(max_points);

        for index in 0..max_points {
            let point_index = (index as f64 * step).floor() as usize;
            sampled.push(points[point_index.min(last_index)]);
        }

        if let Some(last) = sampled.last_mut() {
            *last = points[last_index];
        }

        sampled
    }

    /// Consolidates duplicated snapshots by generation, keeping the best fitness.
    fn consolidate_best_by_generation(&self) -> (Vec<usize>, Vec<f64>) {
        use std::collections::BTreeMap;

        let mut by_generation: BTreeMap<usize, f64> = BTreeMap::new();

        for (generation, best) in self
            .generations
            .iter()
            .copied()
            .zip(self.best_fitness_history.iter().copied())
        {
            by_generation
                .entry(generation)
                .and_modify(|best_so_far| *best_so_far = best_so_far.max(best))
                .or_insert(best);
        }

        let mut generations = Vec::with_capacity(by_generation.len());
        let mut best_fitness = Vec::with_capacity(by_generation.len());
        for (generation, best) in by_generation {
            generations.push(generation);
            best_fitness.push(best);
        }

        (generations, best_fitness)
    }

    /// Generates a convergence chart showing fitness evolution over generations
    fn generate_convergence_chart(&self) -> Result<(), Box<dyn std::error::Error>> {
        if self.generations.is_empty() {
            return Ok(());
        }

        let output_file = self.resolve_output_path().join("convergence.svg");

        let (generations, best_fitness) = self.consolidate_best_by_generation();

        let best_data = generations
            .iter()
            .zip(best_fitness.iter())
            .map(|(generation, fitness)| (*generation as f64, *fitness))
            .collect::<Vec<(f64, f64)>>();

        let downsampled_best_data = self.downsample_points(&best_data);
        let best_series = Series::new("Best", downsampled_best_data).with_color("#2563eb");

        let min_solution_value = best_fitness.iter().copied().fold(f64::INFINITY, f64::min);

        let chart = self
            .base_chart_builder("Convergence", "Generation", "Fitness")
            .y_min(min_solution_value)
            .add_series(best_series)
            .build();

        chart.save(output_file)?;

        Ok(())
    }

    /// Generates a chart showing best fitness as a function of evaluations.
    ///
    /// This chart only includes the best metric (no average/worst series).
    fn generate_best_by_evaluations_chart(&self) -> Result<(), Box<dyn std::error::Error>> {
        if self.evaluations.is_empty() || self.best_fitness_history.is_empty() {
            return Ok(());
        }

        let output_file = self.resolve_output_path().join("best_by_evaluations.svg");

        let data = self
            .best_by_evaluations_points()
            .into_iter()
            .map(|(evaluations, best)| (evaluations as f64, best))
            .collect::<Vec<(f64, f64)>>();

        let downsampled_data = self.downsample_points(&data);

        let min_solution_value = downsampled_data
            .iter()
            .map(|(_, best)| *best)
            .fold(f64::INFINITY, f64::min);

        let series = Series::new("Best", downsampled_data).with_color("#2563eb");

        let chart = self
            .base_chart_builder("Best Fitness by Evaluations", "Evaluations", "Best Fitness")
            .y_min(min_solution_value)
            .add_series(series)
            .build();

        chart.save(output_file)?;

        Ok(())
    }

    fn best_by_evaluations_points(&self) -> Vec<(usize, f64)> {
        use std::collections::BTreeMap;

        let mut points_by_evaluations: BTreeMap<usize, f64> = BTreeMap::new();
        for (evaluations, best) in self
            .evaluations
            .iter()
            .copied()
            .zip(self.best_fitness_history.iter().copied())
        {
            points_by_evaluations
                .entry(evaluations)
                .and_modify(|best_so_far| *best_so_far = best_so_far.max(best))
                .or_insert(best);
        }

        points_by_evaluations.into_iter().collect()
    }

    fn generate_metrics_json(&self) -> Result<(), Box<dyn std::error::Error>> {
        if self.generations.is_empty() {
            return Ok(());
        }

        let output_file = self.resolve_output_path().join("metrics.json");
        let (generations, best_fitness) = self.consolidate_best_by_generation();
        let best_by_evaluations = self.best_by_evaluations_points();

        let convergence_points = generations
            .iter()
            .copied()
            .zip(best_fitness.iter().copied())
            .map(|(generation, best)| (generation as f64, best))
            .collect::<Vec<(f64, f64)>>();
        let convergence_points = self.downsample_points(&convergence_points);

        let best_by_evaluations_points = best_by_evaluations
            .iter()
            .map(|(evaluations, best)| (*evaluations as f64, *best))
            .collect::<Vec<(f64, f64)>>();
        let best_by_evaluations_points = self.downsample_points(&best_by_evaluations_points);

        let mut json = String::from("{\n  \"convergence\": [\n");
        for (index, (generation, best)) in convergence_points.iter().enumerate() {
            let comma = if index + 1 == convergence_points.len() {
                ""
            } else {
                ","
            };
            json.push_str(&format!(
                "    {{\"generation\":{},\"best\":{:.6}}}{}\n",
                *generation as usize, best, comma
            ));
        }

        json.push_str("  ],\n  \"best_by_evaluations\": [\n");
        for (index, (evaluations, best)) in best_by_evaluations_points.iter().enumerate() {
            let comma = if index + 1 == best_by_evaluations_points.len() {
                ""
            } else {
                ","
            };
            json.push_str(&format!(
                "    {{\"evaluations\":{},\"best\":{:.6}}}{}\n",
                *evaluations as usize, best, comma
            ));
        }
        json.push_str("  ]\n}\n");

        std::fs::write(output_file, json)?;
        Ok(())
    }
}

impl<T, Q> AlgorithmObserver<T, Q> for ChartObserver
where
    T: Clone + Send + 'static,
    Q: Clone + Send + 'static,
{
    fn update(&mut self, event: &AlgorithmEvent<T, Q>) {
        match event {
            AlgorithmEvent::Start { algorithm_name } => {
                println!("  ChartObserver: Monitoring algorithm '{}'", algorithm_name);
                self.prepare_output_directory(algorithm_name);
                println!(
                    "   Charts will be saved to: {}",
                    self.resolve_output_path().display()
                );

                self.generations.clear();
                self.evaluations.clear();
                self.best_fitness_history.clear();
                self.last_snapshot_seq = None;
            }
            AlgorithmEvent::ExecutionStateUpdated { state } => {
                if let Some(last_seq) = self.last_snapshot_seq {
                    if state.seq_id <= last_seq || state.iteration % ITERATIONS_BETWEEN_CHART_UPDATES != 0 {
                        return;
                    }
                }

                self.last_snapshot_seq = Some(state.seq_id);
                self.generations.push(state.iteration);
                self.evaluations.push(state.evaluations);
                self.best_fitness_history.push(state.best_fitness);
            }
            AlgorithmEvent::End { .. } => {
                println!("  Generating charts...");

                if let Err(e) = self.generate_convergence_chart() {
                    eprintln!("Error generating convergence chart: {}", e);
                }

                if let Err(e) = self.generate_best_by_evaluations_chart() {
                    eprintln!("Error generating best-by-evaluations chart: {}", e);
                }

                if let Err(e) = self.generate_metrics_json() {
                    eprintln!("Error generating metrics JSON: {}", e);
                }

                println!(
                    "  Charts saved to: {}",
                    self.resolve_output_path().display()
                );
            }
            AlgorithmEvent::Failed { .. } => {
                println!("  Generating charts from partial run after failure...");

                if let Err(e) = self.generate_convergence_chart() {
                    eprintln!("Error generating convergence chart: {}", e);
                }

                if let Err(e) = self.generate_best_by_evaluations_chart() {
                    eprintln!("Error generating best-by-evaluations chart: {}", e);
                }

                if let Err(e) = self.generate_metrics_json() {
                    eprintln!("Error generating metrics JSON: {}", e);
                }

                println!(
                    "  Partial charts saved to: {}",
                    self.resolve_output_path().display()
                );
            }
            _ => {}
        }
    }

    fn finalize(&mut self) {
        self.generate_convergence_chart().ok();
        self.generate_best_by_evaluations_chart().ok();
        self.generate_metrics_json().ok();
    }

    fn name(&self) -> &str {
        &self.name
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::observer::ObserverState;

    #[test]
    fn creates_structured_run_directory_on_start() {
        let base = std::env::temp_dir().join(format!(
            "roma_chart_observer_test_{}",
            SystemTime::now()
                .duration_since(UNIX_EPOCH)
                .map(|d| d.as_nanos())
                .unwrap_or(0)
        ));

        let mut observer = ChartObserver::new(base.clone());
        observer.update(&AlgorithmEvent::<bool>::Start {
            algorithm_name: "My GA/Experiment #1".to_string(),
        });

        let run_path = observer
            .run_output_path
            .clone()
            .expect("Run output path should be configured after Start event");

        assert!(run_path.starts_with(&base));
        assert!(run_path.exists());

        let algorithm_folder = run_path
            .parent()
            .and_then(|p| p.file_name())
            .and_then(|n| n.to_str())
            .expect("Algorithm folder should exist");
        assert_eq!(algorithm_folder, "my_ga_experiment_1");
    }

    #[test]
    fn writes_chart_files_inside_run_directory() {
        let base = std::env::temp_dir().join(format!(
            "roma_chart_observer_files_test_{}",
            SystemTime::now()
                .duration_since(UNIX_EPOCH)
                .map(|d| d.as_nanos())
                .unwrap_or(0)
        ));

        let mut observer = ChartObserver::new(base);
        observer.update(&AlgorithmEvent::<bool>::Start {
            algorithm_name: "NSGA-II".to_string(),
        });
        observer.update(&AlgorithmEvent::<bool>::ExecutionStateUpdated {
            state: ObserverState::new(0, 1, 10, 1.0, 0.8, 0.5, "selected=1/2".to_string()),
        });
        observer.update(&AlgorithmEvent::<bool>::End {
            total_generations: 1,
            total_evaluations: 10,
            termination_reason: None,
        });

        let run_path = observer
            .run_output_path
            .clone()
            .expect("Run output path should exist");

        assert!(run_path.join("convergence.svg").exists());
        assert!(run_path.join("best_by_evaluations.svg").exists());
    }

    #[test]
    fn writes_metrics_json_file_inside_run_directory() {
        let base = std::env::temp_dir().join(format!(
            "roma_chart_observer_metrics_test_{}",
            SystemTime::now()
                .duration_since(UNIX_EPOCH)
                .map(|d| d.as_nanos())
                .unwrap_or(0)
        ));

        let mut observer = ChartObserver::new(base);
        observer.update(&AlgorithmEvent::<bool>::Start {
            algorithm_name: "NSGA-II".to_string(),
        });
        observer.update(&AlgorithmEvent::<bool>::ExecutionStateUpdated {
            state: ObserverState::new(0, 1, 10, 1.0, 0.8, 0.5, "selected=1/2".to_string()),
        });
        observer.update(&AlgorithmEvent::<bool>::ExecutionStateUpdated {
            state: ObserverState::new(1, 2, 20, 1.3, 1.0, 0.7, "selected=2/2".to_string()),
        });
        observer.update(&AlgorithmEvent::<bool>::End {
            total_generations: 2,
            total_evaluations: 20,
            termination_reason: None,
        });

        let run_path = observer
            .run_output_path
            .clone()
            .expect("Run output path should exist");

        let metrics_path = run_path.join("metrics.json");
        assert!(metrics_path.exists());

        let contents = std::fs::read_to_string(metrics_path).expect("metrics file should exist");
        assert!(contents.contains("\"convergence\""));
        assert!(contents.contains("\"best_by_evaluations\""));
        assert!(!contents.contains("\"average\""));
        assert!(!contents.contains("\"worst\""));
    }

    #[test]
    fn convergence_chart_excludes_average_and_worst_series() {
        let base = std::env::temp_dir().join(format!(
            "roma_chart_observer_best_only_test_{}",
            SystemTime::now()
                .duration_since(UNIX_EPOCH)
                .map(|d| d.as_nanos())
                .unwrap_or(0)
        ));

        let mut observer = ChartObserver::new(base);
        observer.update(&AlgorithmEvent::<bool>::Start {
            algorithm_name: "NSGA-II".to_string(),
        });

        for generation in 0..20 {
            observer.update(&AlgorithmEvent::<bool>::ExecutionStateUpdated {
                state: ObserverState::new(
                    generation as u64,
                    generation,
                    (generation + 1) * 10,
                    generation as f64,
                    generation as f64 * 0.7,
                    generation as f64 * 0.2,
                    "selected=1/2".to_string(),
                ),
            });
        }

        observer.update(&AlgorithmEvent::<bool>::End {
            total_generations: 20,
            total_evaluations: 200,
            termination_reason: None,
        });

        let run_path = observer
            .run_output_path
            .clone()
            .expect("Run output path should exist");
        let convergence_svg =
            std::fs::read_to_string(run_path.join("convergence.svg")).expect("svg should exist");

        assert!(!convergence_svg.contains("Average"));
        assert!(!convergence_svg.contains("Worst"));
    }

    #[test]
    fn downsamples_convergence_chart_points() {
        let base = std::env::temp_dir().join(format!(
            "roma_chart_observer_downsample_test_{}",
            SystemTime::now()
                .duration_since(UNIX_EPOCH)
                .map(|d| d.as_nanos())
                .unwrap_or(0)
        ));

        let mut observer = ChartObserver::new(base).with_dimensions(300, 200);
        let max_points = observer.max_render_points();

        observer.update(&AlgorithmEvent::<bool>::Start {
            algorithm_name: "HC".to_string(),
        });

        for generation in 0..(max_points + 1200) {
            observer.update(&AlgorithmEvent::<bool>::ExecutionStateUpdated {
                state: ObserverState::new(
                    generation as u64,
                    generation,
                    generation + 1,
                    generation as f64,
                    generation as f64,
                    generation as f64,
                    "selected=1/2".to_string(),
                ),
            });
        }

        observer.update(&AlgorithmEvent::<bool>::End {
            total_generations: max_points + 1200,
            total_evaluations: max_points + 1200,
            termination_reason: None,
        });

        let run_path = observer
            .run_output_path
            .clone()
            .expect("Run output path should exist");
        let convergence_svg =
            std::fs::read_to_string(run_path.join("convergence.svg")).expect("svg should exist");
        let circle_count = convergence_svg.matches("<circle").count();

        assert!(
            circle_count <= max_points,
            "expected at most {} circles, got {}",
            max_points,
            circle_count
        );
    }
}