swarm-engine-ui 0.1.6

//! Eval Viewer - 評価シナリオの管理・実行・結果表示
//!
//! swarm-engine-eval のシナリオを可視化し、評価を実行するUIコンポーネント。
//! DashboardLayout を使用した3カラムレイアウト。

use std::cell::Cell;
use std::sync::mpsc;

use eframe::egui;
use egui_cha_ds::{
    CapacityGauge, DashboardLayout, DashboardState, SidebarConfig, Sparkline, Status,
    StatusIndicator, Theme,
};

use swarm_engine_core::agent::{DefaultBatchManagerAgent, ManagerId};
use swarm_engine_eval::prelude::EvalRunner;
use swarm_engine_eval::reporter::EvalReport;
use swarm_engine_eval::scenario::{EvalScenario, ScenarioId, ScenarioRegistry, TimeoutBehavior};
use swarm_engine_llm::{create_llm_invoker, OllamaDecider};

/// 評価実行状態
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum EvalState {
    #[default]
    Idle,
    Running,
    Completed,
    Failed,
}

/// Scenario detail for display (extracted to avoid borrow issues)
#[derive(Debug, Clone)]
struct ScenarioDetail {
    name: String,
    version: String,
    description: String,
    env_type: String,
    total_workers: usize,
    worker_templates: usize,
    manager_count: usize,
    max_ticks: u64,
    tick_duration_ms: u64,
    success_conditions: usize,
    failure_conditions: usize,
    timeout_behavior: String,
    milestones: Vec<(String, f64, bool)>, // (name, weight, partial)
}

/// Eval Viewer コンポーネント
pub struct EvalViewer {
    /// シナリオレジストリ
    registry: ScenarioRegistry,

    /// 選択中のシナリオID
    selected_scenario: Option<ScenarioId>,

    /// タグフィルタ
    tag_filter: String,

    /// 評価設定
    eval_runs: usize,
    eval_seed: Option<u64>,

    /// 評価状態
    eval_state: EvalState,

    /// 評価結果 (実際の EvalReport)
    eval_result: Option<EvalReport>,

    /// エラーメッセージ
    eval_error: Option<String>,

    /// 実行履歴のレイテンシ (Sparkline用)
    latency_history: Vec<f32>,

    /// Dashboard UI state (sidebar widths, etc.)
    dashboard_state: DashboardState,

    /// Channel receiver for background evaluation results
    result_rx: Option<mpsc::Receiver<Result<EvalReport, String>>>,
}

impl EvalViewer {
    pub fn new() -> Self {
        // Use discover() to load builtin + scan user scenarios from default paths
        let registry =
            ScenarioRegistry::discover().unwrap_or_else(|_| ScenarioRegistry::with_builtin());

        Self {
            registry,
            selected_scenario: None,
            tag_filter: String::new(),
            eval_runs: 5,
            eval_seed: Some(42),
            eval_state: EvalState::Idle,
            eval_result: None,
            eval_error: None,
            latency_history: Vec::new(),
            dashboard_state: DashboardState::new()
                .with_left_width(220.0)
                .with_right_width(280.0),
            result_rx: None,
        }
    }

    /// UIを表示
    pub fn show(&mut self, ui: &mut egui::Ui) {
        let theme = Theme::dark();

        // Check for background evaluation results
        self.poll_eval_result();

        // Request repaint while running
        if self.eval_state == EvalState::Running {
            ui.ctx()
                .request_repaint_after(std::time::Duration::from_millis(100));
        }

        // Collect actions to perform after UI rendering
        let action_select_scenario: Cell<Option<ScenarioId>> = Cell::new(None);
        let action_start_eval = Cell::new(false);

        // Extract data for closures (to avoid borrow issues)
        let selected_scenario = self.selected_scenario.clone();
        let tag_filter = self.tag_filter.clone();
        let eval_state = self.eval_state;
        let eval_runs = self.eval_runs;
        let eval_seed = self.eval_seed;
        let eval_result = self.eval_result.clone();
        let eval_error = self.eval_error.clone();
        let latency_history = self.latency_history.clone();

        // Build scenario lists for display
        let builtin_scenarios: Vec<_> = self
            .registry
            .list_builtin()
            .iter()
            .filter(|s| Self::matches_filter_static(&tag_filter, s))
            .map(|s| (s.meta.id.clone(), s.meta.name.clone(), s.meta.tags.clone()))
            .collect();

        let user_scenarios: Vec<_> = self
            .registry
            .list_user()
            .iter()
            .filter(|s| Self::matches_filter_static(&tag_filter, s))
            .map(|s| (s.meta.id.clone(), s.meta.name.clone()))
            .collect();

        // Get selected scenario details
        let scenario_detail = selected_scenario.as_ref().and_then(|id| {
            self.registry.get(id).map(|s| ScenarioDetail {
                name: s.meta.name.clone(),
                version: s.meta.version.clone(),
                description: s.meta.description.clone(),
                env_type: s.environment.env_type.clone(),
                total_workers: s.agents.workers.iter().map(|w| w.count).sum(),
                worker_templates: s.agents.workers.len(),
                manager_count: s.agents.managers.len(),
                max_ticks: s.app_config.max_ticks,
                tick_duration_ms: s.app_config.tick_duration_ms,
                success_conditions: s.conditions.success.len(),
                failure_conditions: s.conditions.failure.len(),
                timeout_behavior: match s.conditions.on_timeout {
                    TimeoutBehavior::Fail => "Fail",
                    TimeoutBehavior::PartialSuccess => "Partial Success",
                    TimeoutBehavior::MilestoneScore => "Milestone Score",
                }
                .to_string(),
                milestones: s
                    .milestones
                    .iter()
                    .map(|m| (m.name.clone(), m.weight, m.partial))
                    .collect(),
            })
        });

        // Mutable state for eval config (captured by closure)
        let mut new_eval_runs = eval_runs;
        let mut new_eval_seed = eval_seed;
        let mut new_tag_filter = tag_filter.clone();

        // Action: open scenarios directory
        let action_open_dir = Cell::new(false);

        // DashboardLayout with three columns
        DashboardLayout::new()
            .state(&mut self.dashboard_state)
            .left_sidebar_with_config(SidebarConfig::new(220.0).title("Scenarios"), |ui| {
                // Filter input
                ui.horizontal(|ui| {
                    ui.label("Filter:");
                    ui.text_edit_singleline(&mut new_tag_filter);
                });
                ui.add_space(4.0);

                // Open scenarios directory button
                if ui.small_button("📂 Open Scenarios Dir").clicked() {
                    action_open_dir.set(true);
                }
                ui.add_space(8.0);

                // Builtin scenarios
                ui.label(
                    egui::RichText::new("Builtin")
                        .strong()
                        .color(theme.text_primary),
                );
                egui::ScrollArea::vertical()
                    .id_salt("builtin_scenarios")
                    .max_height(200.0)
                    .show(ui, |ui| {
                        for (id, name, tags) in &builtin_scenarios {
                            let is_selected = selected_scenario.as_ref() == Some(id);
                            if ui.selectable_label(is_selected, name).clicked() {
                                action_select_scenario.set(Some(id.clone()));
                            }

                            if !tags.is_empty() {
                                ui.horizontal(|ui| {
                                    for tag in tags {
                                        ui.label(
                                            egui::RichText::new(format!("#{}", tag))
                                                .small()
                                                .color(theme.text_muted),
                                        );
                                    }
                                });
                            }
                        }
                    });

                ui.add_space(8.0);

                // User scenarios
                if !user_scenarios.is_empty() {
                    ui.label(
                        egui::RichText::new("User")
                            .strong()
                            .color(theme.text_primary),
                    );
                    egui::ScrollArea::vertical()
                        .id_salt("user_scenarios")
                        .max_height(150.0)
                        .show(ui, |ui| {
                            for (id, name) in &user_scenarios {
                                let is_selected = selected_scenario.as_ref() == Some(id);
                                if ui.selectable_label(is_selected, name).clicked() {
                                    action_select_scenario.set(Some(id.clone()));
                                }
                            }
                        });
                }
            })
            .right_sidebar_with_config(SidebarConfig::new(280.0).title("Results"), |ui| {
                if let Some(result) = &eval_result {
                    let agg = &result.aggregated;

                    // Pass Rate
                    ui.label(
                        egui::RichText::new("Pass Rate")
                            .strong()
                            .color(theme.text_primary),
                    );
                    let pass_color = if agg.success_rate >= 0.8 {
                        theme.state_success
                    } else if agg.success_rate >= 0.5 {
                        theme.state_warning
                    } else {
                        theme.state_danger
                    };
                    ui.colored_label(
                        pass_color,
                        format!(
                            "{:.1}% ({}/{})",
                            agg.success_rate * 100.0,
                            agg.successful_runs,
                            agg.total_runs
                        ),
                    );

                    ui.add_space(8.0);

                    // pass@k metrics
                    ui.label(
                        egui::RichText::new("Pass@k")
                            .strong()
                            .color(theme.text_primary),
                    );
                    ui.label(format!("pass@1: {:.1}%", agg.pass_at_1 * 100.0));
                    ui.label(format!("pass@5: {:.1}%", agg.pass_at_5 * 100.0));
                    if let Some(pass_10) = agg.pass_at_10 {
                        ui.label(format!("pass@10: {:.1}%", pass_10 * 100.0));
                    }

                    ui.add_space(8.0);
                    ui.separator();
                    ui.add_space(8.0);

                    // Statistics
                    ui.label(
                        egui::RichText::new("Statistics")
                            .strong()
                            .color(theme.text_primary),
                    );
                    let stats = &agg.statistics;
                    ui.label(format!(
                        "Ticks: {:.1} ± {:.1}",
                        stats.total_ticks.mean, stats.total_ticks.std_dev
                    ));
                    ui.label(format!(
                        "Throughput: {:.1} (eff: {:.1}) /sec",
                        stats.raw_throughput_per_sec.mean, stats.effective_throughput_per_sec.mean
                    ));
                    ui.label(format!(
                        "Manager rate: {:.1}%",
                        stats.manager_intervention_rate.mean * 100.0
                    ));

                    // Show LLM statistics
                    if stats.total_llm_invocations > 0 {
                        let error_rate = stats.total_llm_errors as f64
                            / stats.total_llm_invocations as f64
                            * 100.0;
                        if stats.total_llm_errors > 0 {
                            ui.colored_label(
                                egui::Color32::RED,
                                format!(
                                    "⚠ LLM: {}/{} failed ({:.1}%)",
                                    stats.total_llm_errors, stats.total_llm_invocations, error_rate
                                ),
                            );
                        } else {
                            ui.colored_label(
                                egui::Color32::GREEN,
                                format!("✓ LLM: {} calls, 0 errors", stats.total_llm_invocations),
                            );
                        }
                    }

                    ui.add_space(8.0);

                    // Latency Sparkline
                    if !latency_history.is_empty() {
                        ui.label("Tick Distribution");
                        Sparkline::new(&latency_history)
                            .height(60.0)
                            .color(theme.state_info)
                            .show(ui);
                    }

                    ui.add_space(8.0);
                    ui.separator();
                    ui.add_space(8.0);

                    // Pass/Fail Capacity Gauge
                    ui.label(
                        egui::RichText::new("Pass/Fail")
                            .strong()
                            .color(theme.text_primary),
                    );
                    CapacityGauge::from_fraction(agg.successful_runs as u64, agg.total_runs as u64)
                        .thresholds(0.5, 0.8)
                        .height(16.0)
                        .show(ui);

                    ui.add_space(16.0);

                    // Run Details
                    ui.label(
                        egui::RichText::new("Run Details")
                            .strong()
                            .color(theme.text_primary),
                    );
                    egui::ScrollArea::vertical()
                        .id_salt("run_details")
                        .max_height(200.0)
                        .show(ui, |ui| {
                            for run in &result.runs {
                                ui.horizontal(|ui| {
                                    let status = if run.success {
                                        Status::Active
                                    } else {
                                        Status::Error
                                    };
                                    StatusIndicator::new(status).size(8.0).show(ui);
                                    ui.label(format!(
                                        "#{}: {} ticks, {:.1}ms, {}",
                                        run.index,
                                        run.metrics.task.total_ticks,
                                        run.metrics.performance.total_duration_ms,
                                        run.termination_reason
                                    ));
                                });
                            }
                        });
                } else if let Some(err) = &eval_error {
                    ui.colored_label(theme.state_danger, "Evaluation Failed");
                    ui.add_space(8.0);
                    ui.label(egui::RichText::new(err).color(theme.state_danger).small());
                } else {
                    ui.label(egui::RichText::new("No results yet").color(theme.text_muted));
                    ui.add_space(8.0);
                    ui.label("Run an evaluation to see results");
                }
            })
            .main(|ui| {
                if let Some(detail) = &scenario_detail {
                    // Scenario info
                    ui.heading(&detail.name);
                    ui.label(
                        egui::RichText::new(format!("v{}", detail.version))
                            .small()
                            .color(theme.text_muted),
                    );

                    if !detail.description.is_empty() {
                        ui.add_space(4.0);
                        ui.label(&detail.description);
                    }

                    ui.add_space(8.0);
                    ui.separator();
                    ui.add_space(8.0);

                    // Two-column layout for details
                    ui.columns(2, |columns| {
                        // Left column: Environment & Agents
                        columns[0].label(
                            egui::RichText::new("Environment")
                                .strong()
                                .color(theme.text_primary),
                        );
                        columns[0].label(format!("Type: {}", detail.env_type));
                        columns[0].add_space(8.0);

                        columns[0].label(
                            egui::RichText::new("Agents")
                                .strong()
                                .color(theme.text_primary),
                        );
                        columns[0].label(format!(
                            "Workers: {} ({} templates)",
                            detail.total_workers, detail.worker_templates
                        ));
                        columns[0].label(format!("Managers: {}", detail.manager_count));
                        columns[0].add_space(8.0);

                        columns[0].label(
                            egui::RichText::new("Config")
                                .strong()
                                .color(theme.text_primary),
                        );
                        columns[0].label(format!("Max Ticks: {}", detail.max_ticks));
                        columns[0].label(format!("Tick Duration: {}ms", detail.tick_duration_ms));

                        // Right column: Conditions & Milestones
                        columns[1].label(
                            egui::RichText::new("Conditions")
                                .strong()
                                .color(theme.text_primary),
                        );
                        columns[1]
                            .label(format!("Success: {} conditions", detail.success_conditions));
                        columns[1]
                            .label(format!("Failure: {} conditions", detail.failure_conditions));
                        columns[1].label(format!("On Timeout: {}", detail.timeout_behavior));
                        columns[1].add_space(8.0);

                        if !detail.milestones.is_empty() {
                            columns[1].label(
                                egui::RichText::new("Milestones")
                                    .strong()
                                    .color(theme.text_primary),
                            );
                            for (name, weight, partial) in &detail.milestones {
                                columns[1].horizontal(|ui| {
                                    ui.label(format!("{} ({:.0}%)", name, weight * 100.0));
                                    if *partial {
                                        ui.label(
                                            egui::RichText::new("partial")
                                                .small()
                                                .color(theme.state_info),
                                        );
                                    }
                                });
                            }
                        }
                    });

                    ui.add_space(16.0);
                    ui.separator();
                    ui.add_space(8.0);

                    // Eval Config
                    ui.label(
                        egui::RichText::new("Eval Config")
                            .strong()
                            .color(theme.text_primary),
                    );

                    ui.horizontal(|ui| {
                        ui.label("Runs:");
                        ui.add(egui::DragValue::new(&mut new_eval_runs).range(1..=100));
                    });

                    ui.horizontal(|ui| {
                        ui.label("Seed:");
                        let mut use_seed = new_eval_seed.is_some();
                        if ui.checkbox(&mut use_seed, "").changed() {
                            new_eval_seed = if use_seed { Some(42) } else { None };
                        }
                        if let Some(seed) = &mut new_eval_seed {
                            ui.add(egui::DragValue::new(seed).range(0..=u64::MAX));
                        } else {
                            ui.label(egui::RichText::new("random").color(theme.text_muted));
                        }
                    });

                    ui.add_space(16.0);

                    // Run button
                    ui.horizontal(|ui| {
                        let can_run = eval_state == EvalState::Idle
                            || eval_state == EvalState::Completed
                            || eval_state == EvalState::Failed;

                        if ui
                            .add_enabled(can_run, egui::Button::new("▶ Run Eval"))
                            .clicked()
                        {
                            action_start_eval.set(true);
                        }

                        if eval_state == EvalState::Running {
                            StatusIndicator::new(Status::Active)
                                .label("Running...")
                                .show(ui);
                            ui.spinner();
                        } else if eval_state == EvalState::Completed {
                            StatusIndicator::new(Status::Idle)
                                .label("Completed")
                                .show(ui);
                        } else if eval_state == EvalState::Failed {
                            StatusIndicator::new(Status::Error).label("Failed").show(ui);
                        }
                    });
                } else {
                    ui.centered_and_justified(|ui| {
                        ui.label("Select a scenario from the list");
                    });
                }
            })
            .show(ui);

        // Apply collected actions
        if let Some(id) = action_select_scenario.take() {
            self.selected_scenario = Some(id);
            self.eval_result = None;
            self.eval_error = None;
            self.eval_state = EvalState::Idle;
        }

        if action_start_eval.get() {
            self.start_eval();
        }

        if action_open_dir.get() {
            self.open_scenarios_dir();
        }

        // Update mutable state
        self.tag_filter = new_tag_filter;
        self.eval_runs = new_eval_runs;
        self.eval_seed = new_eval_seed;
    }

    /// Poll for background evaluation results
    fn poll_eval_result(&mut self) {
        if let Some(rx) = &self.result_rx {
            match rx.try_recv() {
                Ok(Ok(report)) => {
                    // Build latency history from runs
                    self.latency_history = report
                        .runs
                        .iter()
                        .map(|r| r.metrics.task.total_ticks as f32)
                        .collect();

                    self.eval_result = Some(report);
                    self.eval_error = None;
                    self.eval_state = EvalState::Completed;
                    self.result_rx = None;
                }
                Ok(Err(err)) => {
                    self.eval_error = Some(err);
                    self.eval_result = None;
                    self.eval_state = EvalState::Failed;
                    self.result_rx = None;
                }
                Err(mpsc::TryRecvError::Empty) => {
                    // Still running
                }
                Err(mpsc::TryRecvError::Disconnected) => {
                    self.eval_error = Some("Evaluation thread disconnected".to_string());
                    self.eval_state = EvalState::Failed;
                    self.result_rx = None;
                }
            }
        }
    }

    /// Open scenarios directory in file manager
    fn open_scenarios_dir(&self) {
        use swarm_engine_core::config::PathResolver;

        let path = PathResolver::user_eval_scenarios_dir();

        // Create directory if it doesn't exist
        if !path.exists() {
            let _ = std::fs::create_dir_all(&path);
        }

        #[cfg(target_os = "macos")]
        {
            let _ = std::process::Command::new("open").arg(&path).spawn();
        }

        #[cfg(target_os = "linux")]
        {
            let _ = std::process::Command::new("xdg-open").arg(&path).spawn();
        }

        #[cfg(target_os = "windows")]
        {
            let _ = std::process::Command::new("explorer").arg(&path).spawn();
        }
    }

    /// Static filter matching (for use in closures)
    fn matches_filter_static(filter: &str, scenario: &EvalScenario) -> bool {
        if filter.is_empty() {
            return true;
        }

        let filter_lower = filter.to_lowercase();

        if scenario.meta.name.to_lowercase().contains(&filter_lower) {
            return true;
        }

        for tag in &scenario.meta.tags {
            if tag.to_lowercase().contains(&filter_lower) {
                return true;
            }
        }

        false
    }

    /// 評価を開始 (バックグラウンドで実行)
    fn start_eval(&mut self) {
        let Some(scenario_id) = &self.selected_scenario else {
            return;
        };

        let Some(scenario) = self.registry.get(scenario_id).cloned() else {
            self.eval_error = Some(format!("Scenario not found: {}", scenario_id));
            self.eval_state = EvalState::Failed;
            return;
        };

        self.eval_state = EvalState::Running;
        self.eval_result = None;
        self.eval_error = None;
        self.latency_history.clear();

        let runs = self.eval_runs;
        let seed = self.eval_seed.unwrap_or_else(|| {
            use std::time::{SystemTime, UNIX_EPOCH};
            SystemTime::now()
                .duration_since(UNIX_EPOCH)
                .map(|d| d.as_secs())
                .unwrap_or(42)
        });

        // Create channel for result
        let (tx, rx) = mpsc::channel();
        self.result_rx = Some(rx);

        // Spawn background thread for evaluation
        std::thread::spawn(move || {
            // Create runtime in the thread
            let rt = match tokio::runtime::Runtime::new() {
                Ok(rt) => rt,
                Err(e) => {
                    let _ = tx.send(Err(format!("Failed to create runtime: {}", e)));
                    return;
                }
            };

            let handle = rt.handle().clone();

            // Build OllamaConfig from scenario LLM config (same as CLI)
            let llm_config = scenario
                .llm
                .to_ollama_config(scenario.batch_processor.max_concurrency);
            let handle_for_invoker = handle.clone();

            let runner = EvalRunner::new(scenario, handle)
                .with_runs(runs)
                .with_seed(seed)
                .with_manager_factory(|| Box::new(DefaultBatchManagerAgent::new(ManagerId(0))))
                .with_batch_invoker_factory(move || {
                    let decider = OllamaDecider::new(llm_config.clone());
                    Box::new(create_llm_invoker(decider, handle_for_invoker.clone()))
                });

            let result = runner.run();

            match result {
                Ok(report) => {
                    let _ = tx.send(Ok(report));
                }
                Err(e) => {
                    let _ = tx.send(Err(format!("Evaluation failed: {}", e)));
                }
            }
        });
    }
}

impl Default for EvalViewer {
    fn default() -> Self {
        Self::new()
    }
}