swarm-engine-eval 0.1.6

//! EvalRunner - LearnableSwarm を使用した評価実行

use std::time::Duration;

use tokio::runtime::Handle;

use swarm_engine_core::actions::ActionDef;
use swarm_engine_core::agent::{
    BatchInvoker, DefaultBatchManagerAgent, GenericWorker, ManagementStrategy, ManagerAgent,
    ManagerId, WorkerAgent,
};
use swarm_engine_core::environment::EnvironmentBox;
use swarm_engine_core::events::TraceSubscriber;
use swarm_engine_core::exploration::{
    LearnedDependencyProvider, NodeRules, OperatorProvider, SelectResult,
};
use swarm_engine_core::extensions::Extensions;
use swarm_engine_core::learn::{
    profile_to_offline_model, CountTrigger, LearnableSwarmBuilder, LearningStore, OfflineModel,
    ScenarioProfile, TrainTrigger,
};
use swarm_engine_core::orchestrator::SwarmConfig;
use swarm_engine_core::types::{GroupId, SwarmTask};

use crate::environments::{
    CodeEnvironment, DeepSearchEnvironment, InternalDiagnosisEnvironment, MazeEnvironment,
    SearchEnvironment, TroubleshootingEnvironment,
};

use crate::aggregator::Aggregator;
use crate::config::DependencyProviderKind;
use crate::error::Result;
use crate::metrics::RunMetrics;
use crate::reporter::{ConfigSummary, EvalReport, SeedInfo};
use crate::run::{EvalRun, TerminationReason};
use crate::scenario::conditions::{ConditionValue, TimeoutBehavior};
use crate::scenario::{EvalScenario, ManagementStrategyConfig};
use crate::validation::{ScenarioValidator, WarningSeverity};

/// Evaluation seed for reproducibility
///
/// Stored in Extensions to allow Environment and other components to access it.
#[derive(Debug, Clone, Copy)]
pub struct EvalSeed(pub u64);

/// Factory for creating ManagerAgent
pub type ManagerFactory = Box<dyn Fn() -> Box<dyn ManagerAgent> + Send + Sync>;

/// Factory for creating BatchInvoker
pub type BatchInvokerFactory = Box<dyn Fn() -> Box<dyn BatchInvoker> + Send + Sync>;

/// Factory for creating OperatorProvider
pub type OperatorProviderFactory =
    Box<dyn Fn() -> Box<dyn OperatorProvider<NodeRules>> + Send + Sync>;

/// Evaluation runner using Orchestrator directly
///
/// # Example
///
/// ```ignore
/// let runner = EvalRunner::new(scenario, runtime.handle().clone())
///     .with_runs(5)
///     .with_seed(42)
///     .with_task(SwarmTask::new("Find the auth handler"))
///     .with_manager_factory(|| Box::new(MyManager::new()))
///     .with_batch_invoker_factory(|| Box::new(MyInvoker::new()));
///
/// let report = runner.run()?;
/// ```
pub struct EvalRunner {
    scenario: EvalScenario,
    runtime: Handle,
    runs: usize,
    seed: u64,
    /// Task to execute (optional)
    task: Option<SwarmTask>,
    /// Manager factory (creates new instance per run)
    manager_factory: Option<ManagerFactory>,
    /// BatchInvoker factory (creates new instance per run)
    batch_invoker_factory: Option<BatchInvokerFactory>,
    /// Extensions factory (creates new instance per run)
    extensions_factory: Option<Box<dyn Fn() -> Extensions + Send + Sync>>,
    /// OperatorProvider factory (creates new instance per run)
    operator_provider_factory: Option<OperatorProviderFactory>,
    /// Verbose output (print tick snapshots)
    verbose: bool,
    /// Enable ExplorationSpaceV2 tracking
    enable_exploration: bool,
    /// Dependency graph for action sequencing
    dependency_graph: Option<swarm_engine_core::exploration::DependencyGraph>,
    /// LearningStore for cross-session learning
    learning_store: Option<LearningStore>,
    /// TrainTrigger for Learning (default: run after every eval)
    train_trigger: Option<std::sync::Arc<dyn TrainTrigger>>,
    /// Skip learned action order from offline model
    skip_learned_action_order: bool,
    /// Trace subscriber for ActionEvent output
    trace_subscriber: Option<std::sync::Arc<dyn TraceSubscriber>>,
    /// ScenarioProfile for applying learned parameters
    scenario_profile: Option<ScenarioProfile>,
    /// Cached OfflineModel from ScenarioProfile (to avoid repeated conversion)
    offline_model_from_profile: Option<OfflineModel>,
    /// DependencyGraph プロバイダーの種類
    dependency_provider_kind: DependencyProviderKind,
}

impl EvalRunner {
    pub fn new(scenario: EvalScenario, runtime: Handle) -> Self {
        Self {
            scenario,
            runtime,
            runs: 1,
            seed: 42,
            task: None,
            manager_factory: None,
            batch_invoker_factory: None,
            extensions_factory: None,
            operator_provider_factory: None,
            verbose: false,
            enable_exploration: false,
            dependency_graph: None,
            learning_store: None,
            train_trigger: None,
            skip_learned_action_order: false,
            trace_subscriber: None,
            scenario_profile: None,
            offline_model_from_profile: None,
            dependency_provider_kind: DependencyProviderKind::default(),
        }
    }

    /// Set the DependencyGraph provider kind
    pub fn with_dependency_provider_kind(mut self, kind: DependencyProviderKind) -> Self {
        self.dependency_provider_kind = kind;
        self
    }

    /// Create DependencyGraphProvider based on configured kind
    fn create_dependency_provider(
        &self,
        action_order: &swarm_engine_core::learn::LearnedActionOrder,
    ) -> Box<dyn swarm_engine_core::exploration::DependencyGraphProvider> {
        // Note: Smart と Learned は統合され、両方とも LearnedDependencyProvider を使用
        // Smart の追加機能（select() による投票戦略）は LearnedDependencyProvider に統合済み
        match self.dependency_provider_kind {
            DependencyProviderKind::Learned | DependencyProviderKind::Smart => {
                Box::new(LearnedDependencyProvider::new(action_order.clone()))
            }
        }
    }

    /// Enable verbose output (print tick snapshots)
    pub fn with_verbose(mut self, verbose: bool) -> Self {
        self.verbose = verbose;
        self
    }

    /// Enable ExplorationSpace tracking
    pub fn with_exploration(mut self, enable: bool) -> Self {
        self.enable_exploration = enable;
        self
    }

    /// Set dependency graph for action sequencing
    ///
    /// The graph defines valid action transitions and terminal conditions.
    /// When set, actions will be filtered based on the graph structure.
    pub fn with_dependency_graph(
        mut self,
        graph: swarm_engine_core::exploration::DependencyGraph,
    ) -> Self {
        self.dependency_graph = Some(graph);
        self
    }

    /// Enable LearningStore for cross-session learning
    ///
    /// When enabled, statistics will be saved after each run and loaded as prior
    /// for subsequent runs. This enables incremental learning across sessions.
    ///
    /// # Example
    ///
    /// ```ignore
    /// runner.with_learning_store("~/.swarm-engine/learning")
    /// ```
    pub fn with_learning_store(mut self, path: impl AsRef<std::path::Path>) -> Self {
        match LearningStore::new(path) {
            Ok(store) => {
                // prior_snapshot, offline_model loading is delegated to LearnableSwarmBuilder
                self.learning_store = Some(store);
            }
            Err(e) => {
                eprintln!("Warning: Failed to create LearningStore: {}", e);
            }
        }
        self
    }

    /// Set TrainTrigger for Learning
    ///
    /// Controls when offline learning is executed after eval runs.
    /// Default: Learn after every run (if learning_store is configured).
    ///
    /// # Example
    ///
    /// ```ignore
    /// use swarm_engine_core::learn::CountTrigger;
    ///
    /// // Run learning after every 5 eval iterations
    /// runner.with_train_trigger(Arc::new(CountTrigger::new(5)))
    /// ```
    pub fn with_train_trigger(mut self, trigger: std::sync::Arc<dyn TrainTrigger>) -> Self {
        self.train_trigger = Some(trigger);
        self
    }

    /// Skip learned action order from offline model
    ///
    /// When enabled, the learned dependency graph (action_order) from the offline
    /// model will not be applied. This is useful for testing without learned priors.
    pub fn skip_learned_action_order(mut self, skip: bool) -> Self {
        self.skip_learned_action_order = skip;
        self
    }

    /// Set trace subscriber for ActionEvent output
    ///
    /// The subscriber will receive all ActionEvents during evaluation.
    /// Use InMemoryTraceSubscriber to collect events and dump them after evaluation,
    /// or JsonlTraceSubscriber for real-time output.
    ///
    /// # Example
    ///
    /// ```ignore
    /// use std::sync::Arc;
    /// use swarm_engine_core::events::InMemoryTraceSubscriber;
    ///
    /// let trace = Arc::new(InMemoryTraceSubscriber::new());
    /// runner.with_trace_subscriber(trace.clone());
    /// // ... run evaluation ...
    /// trace.dump_to_file("trace.jsonl")?;
    /// ```
    pub fn with_trace_subscriber(
        mut self,
        subscriber: std::sync::Arc<dyn TraceSubscriber>,
    ) -> Self {
        self.trace_subscriber = Some(subscriber);
        self
    }

    /// Apply a ScenarioProfile to use learned exploration parameters and strategies
    ///
    /// This converts the profile to an OfflineModel and applies it during execution.
    /// When set, the profile's learned components (exploration params, strategy config,
    /// action order) will be used instead of defaults.
    ///
    /// # Example
    ///
    /// ```ignore
    /// let store = ProfileStore::new("~/.swarm-engine/profiles")?;
    /// let profile = store.load("troubleshooting")?;
    /// runner.with_scenario_profile(profile);
    /// ```
    pub fn with_scenario_profile(mut self, profile: ScenarioProfile) -> Self {
        let offline_model = profile_to_offline_model(&profile);
        self.offline_model_from_profile = Some(offline_model);
        self.scenario_profile = Some(profile);
        self
    }

    pub fn with_runs(mut self, runs: usize) -> Self {
        self.runs = runs;
        self
    }

    pub fn with_seed(mut self, seed: u64) -> Self {
        self.seed = seed;
        self
    }

    /// Set the task to execute
    pub fn with_task(mut self, task: SwarmTask) -> Self {
        self.task = Some(task);
        self
    }

    /// Set manager factory (creates new Manager for each run)
    pub fn with_manager_factory<F>(mut self, factory: F) -> Self
    where
        F: Fn() -> Box<dyn ManagerAgent> + Send + Sync + 'static,
    {
        self.manager_factory = Some(Box::new(factory));
        self
    }

    /// Set batch invoker factory (creates new BatchInvoker for each run)
    pub fn with_batch_invoker_factory<F>(mut self, factory: F) -> Self
    where
        F: Fn() -> Box<dyn BatchInvoker> + Send + Sync + 'static,
    {
        self.batch_invoker_factory = Some(Box::new(factory));
        self
    }

    /// Set extensions factory (creates new Extensions for each run)
    pub fn with_extensions_factory<F>(mut self, factory: F) -> Self
    where
        F: Fn() -> Extensions + Send + Sync + 'static,
    {
        self.extensions_factory = Some(Box::new(factory));
        self
    }

    /// Set OperatorProvider factory (creates new provider for each run)
    ///
    /// Use this to configure the Selection strategy for exploration.
    /// Default is `AdaptiveProvider` if not specified.
    ///
    /// # Example
    ///
    /// ```ignore
    /// use swarm_engine_core::exploration::{HybridLlmProvider, ReviewPolicy};
    /// use swarm_engine_llm::LlmStrategyAdvisor;
    ///
    /// runner.with_operator_provider_factory(|| {
    ///     let advisor = LlmStrategyAdvisor::new(decider.clone(), handle.clone());
    ///     let policy = ReviewPolicy::default();
    ///     Box::new(HybridLlmProvider::new(advisor, policy))
    /// })
    /// ```
    pub fn with_operator_provider_factory<F>(mut self, factory: F) -> Self
    where
        F: Fn() -> Box<dyn OperatorProvider<NodeRules>> + Send + Sync + 'static,
    {
        self.operator_provider_factory = Some(Box::new(factory));
        self
    }

    pub fn run(&self) -> Result<EvalReport> {
        // Validate scenario before running
        let warnings = ScenarioValidator::validate_scenario(&self.scenario);
        for warning in &warnings {
            match warning.severity() {
                WarningSeverity::High => {
                    tracing::warn!(
                        severity = %warning.severity(),
                        "Scenario validation: {}",
                        warning
                    );
                }
                WarningSeverity::Medium => {
                    tracing::info!(
                        severity = %warning.severity(),
                        "Scenario validation: {}",
                        warning
                    );
                }
                _ => {
                    tracing::debug!(
                        severity = %warning.severity(),
                        "Scenario validation: {}",
                        warning
                    );
                }
            }
        }

        let mut eval_runs = Vec::with_capacity(self.runs);
        let mut run_seeds = Vec::with_capacity(self.runs);

        // Generate GroupId for this eval run (all iterations share the same GroupId)
        // This enables DPO learning to compare multiple executions under the same conditions
        let group_id = GroupId::new();

        for i in 0..self.runs {
            let run_seed = self.seed.wrapping_add(i as u64);
            run_seeds.push(run_seed);

            let result = self.run_single(i, run_seed, group_id)?;
            eval_runs.push(result);
        }

        let aggregated = Aggregator::aggregate(&eval_runs);

        // Note: action_order learning is handled by `learn` command, not eval
        // If specific behavior is needed, use MockProvider via LearnableSwarmBuilder

        Ok(EvalReport {
            config_summary: ConfigSummary {
                scenario_name: self.scenario.meta.name.clone(),
                scenario_id: self.scenario.meta.id.to_string(),
                worker_count: self.scenario.agents.workers.iter().map(|w| w.count).sum(),
                max_ticks: self.scenario.app_config.max_ticks,
                run_count: self.runs,
            },
            seed_info: SeedInfo {
                base_seed: self.seed,
                run_seeds,
            },
            runs: eval_runs,
            aggregated,
            assertion_results: vec![],
        })
    }

    /// Run a single evaluation iteration
    ///
    /// # Arguments
    /// * `index` - The iteration index (0-based)
    /// * `seed` - The random seed for this iteration
    /// * `group_id` - The group ID shared by all iterations in this eval run.
    ///   Used for DPO learning to compare multiple executions.
    fn run_single(&self, index: usize, seed: u64, group_id: GroupId) -> Result<EvalRun> {
        // ========================================================================
        // LearnableSwarmBuilder で Swarm を構築
        // ========================================================================
        let workers = self.build_workers();
        let management_strategy = self.build_management_strategy();

        let swarm_config = SwarmConfig {
            tick_duration: Duration::from_millis(self.scenario.app_config.tick_duration_ms),
            max_ticks: self.scenario.app_config.max_ticks,
            management_strategy,
        };

        // Extensions（LlmConfig, ActionsConfig, EvalSeed 等）
        let extensions = self.build_extensions_from_scenario(seed);

        // LearnableSwarmBuilder で構築開始
        let scenario_key = self.scenario.meta.id.learning_key();
        let mut builder = LearnableSwarmBuilder::new(self.runtime.clone())
            .scenario(&scenario_key)
            .swarm_config(swarm_config)
            .workers(workers)
            .extensions(extensions)
            .enable_exploration(
                self.enable_exploration || self.scenario.app_config.enable_exploration,
            );

        // Managers: from factory if provided, otherwise from scenario templates
        if let Some(factory) = &self.manager_factory {
            let manager = factory();
            builder = builder.add_manager(Box::new(DynManagerWrapper(manager)));
        } else {
            let managers = self.build_managers();
            for manager in managers {
                builder = builder.add_manager(Box::new(manager));
            }
        }

        // BatchInvoker if factory provided
        if let Some(factory) = &self.batch_invoker_factory {
            let invoker = factory();
            builder = builder.batch_invoker(Box::new(DynBatchInvokerWrapper(invoker)));
        }

        // OperatorProvider factory
        if let Some(factory) = &self.operator_provider_factory {
            let provider = factory();
            builder = builder.operator_provider(Box::new(DynOperatorProviderWrapper(provider)));
        }

        // ScenarioProfile の OfflineModel を適用（LearningStore より優先）
        if let Some(ref model) = self.offline_model_from_profile {
            builder = builder.offline_model(model.clone());

            // Offline model 適用の通知
            if self.operator_provider_factory.is_none() {
                println!(
                    "Profile offline model applied: ucb1_c={:.3}, maturity={}, strategy={}",
                    model.parameters.ucb1_c,
                    model.strategy_config.maturity_threshold,
                    model.strategy_config.initial_strategy
                );
            }

            // Learned action order（DependencyGraph 自動生成をスキップ）
            if !self.skip_learned_action_order {
                if let Some(ref action_order) = model.action_order {
                    let provider = self.create_dependency_provider(action_order);
                    builder = builder.dependency_provider(provider);
                    println!(
                        "Learned action order applied ({:?}): discover={:?}, not_discover={:?}",
                        self.dependency_provider_kind,
                        action_order.discover,
                        action_order.not_discover
                    );
                }
            } else if model.action_order.is_some() {
                println!("Learned action order skipped (--no-dep-graph)");
            }
        }

        // Learning 設定（LearningStore がある場合）
        // with_learning_store で prior_snapshot, offline_model, data_dir, learning_enabled を自動設定
        if let Some(ref store) = self.learning_store {
            builder = builder.with_learning_store(store.clone());

            // ScenarioProfile が設定されていない場合のみ、LearningStore の OfflineModel を適用
            if self.offline_model_from_profile.is_none() {
                // Offline model の情報を取得（println! と action_order 処理用）
                let offline_model_opt = builder.offline_model_ref().cloned();
                if let Some(ref model) = offline_model_opt {
                    // Offline model 適用の通知（OperatorProvider factory がない場合のみ）
                    if self.operator_provider_factory.is_none() {
                        println!(
                            "Offline model applied: ucb1_c={:.3}, maturity={}, strategy={}",
                            model.parameters.ucb1_c,
                            model.strategy_config.maturity_threshold,
                            model.strategy_config.initial_strategy
                        );
                    }

                    // Learned action order（DependencyGraph 自動生成をスキップ）
                    if !self.skip_learned_action_order {
                        if let Some(ref action_order) = model.action_order {
                            let provider = self.create_dependency_provider(action_order);
                            builder = builder.dependency_provider(provider);
                            println!(
                                "Learned action order applied ({:?}): discover={:?}, not_discover={:?}",
                                self.dependency_provider_kind,
                                action_order.discover,
                                action_order.not_discover
                            );
                        }
                    } else if model.action_order.is_some() {
                        println!("Learned action order skipped (--no-dep-graph)");
                    }
                }
            }

            // Train trigger
            if let Some(ref trigger) = self.train_trigger {
                builder = builder.train_trigger(std::sync::Arc::clone(trigger));
            } else {
                builder = builder.train_trigger(std::sync::Arc::new(CountTrigger::new(self.runs)));
            }
        }

        // Trace subscriber
        if let Some(ref subscriber) = self.trace_subscriber {
            builder = builder.with_trace_subscriber(std::sync::Arc::clone(subscriber));
        }

        // ========================================================================
        // LearnableSwarm を構築・実行
        // ========================================================================
        let mut swarm = builder.build()?;

        // Enable partitioning when multiple managers are configured
        let manager_count = self
            .scenario
            .agents
            .managers
            .iter()
            .map(|t| t.count)
            .sum::<usize>();
        if manager_count > 1 {
            swarm.orchestrator_mut().enable_partitioning();
        }

        // Determine task
        let task_to_run = self
            .task
            .clone()
            .or_else(|| self.build_task_from_scenario())
            .map(|task| task.with_group_id(group_id));

        // Run
        let result = if let Some(task) = task_to_run {
            swarm.run_task(task)?
        } else {
            swarm.run()
        };

        // ========================================================================
        // メトリクス収集
        // ========================================================================
        let state = swarm.orchestrator().state();
        let timed_out = result.total_ticks >= self.scenario.app_config.max_ticks;
        let environment_done = state.shared.is_environment_done();
        let total_actions = state.shared.stats.total_visits() as u64;
        let successful_actions = state.shared.stats.total_successes() as u64;
        let llm_invocations = state.shared.llm_invocations();
        let llm_invoke_errors = state.shared.llm_errors();

        let metrics = RunMetrics {
            task: crate::metrics::TaskMetrics {
                total_ticks: result.total_ticks,
                total_tasks: 0,
                completed_tasks: 0,
                total_actions,
                successful_actions,
                success_rate: state.shared.stats.success_rate(),
            },
            coordination: crate::metrics::CoordinationMetrics {
                manager_activations: llm_invocations,
                manager_intervention_rate: if result.total_ticks > 0 {
                    llm_invocations as f64 / result.total_ticks as f64
                } else {
                    0.0
                },
                ..Default::default()
            },
            performance: {
                let llm_error_rate = if llm_invocations > 0 {
                    llm_invoke_errors as f64 / llm_invocations as f64
                } else {
                    0.0
                };
                crate::metrics::PerformanceMetrics {
                    total_duration_ms: result.total_duration.as_millis() as f64,
                    avg_tick_latency_ms: if result.total_ticks > 0 {
                        result.total_duration.as_millis() as f64 / result.total_ticks as f64
                    } else {
                        0.0
                    },
                    raw_throughput_per_sec: if result.total_duration.as_secs_f64() > 0.0 {
                        total_actions as f64 / result.total_duration.as_secs_f64()
                    } else {
                        0.0
                    },
                    effective_throughput_per_sec: if result.total_duration.as_secs_f64() > 0.0 {
                        successful_actions as f64 / result.total_duration.as_secs_f64()
                    } else {
                        0.0
                    },
                    llm_invocations,
                    llm_invoke_errors,
                    llm_error_rate,
                    ..Default::default()
                }
            },
            robustness: Default::default(),
        };

        // Evaluate success/failure conditions
        let (success, termination_reason) = if !result.completed {
            (false, TerminationReason::Stopped)
        } else {
            self.evaluate_conditions(&metrics, environment_done, timed_out)
        };

        // Send shutdown signal to LearningDaemon (non-blocking)
        // NOTE: block_on() inside tokio runtime causes deadlock,
        //       so we only send shutdown signal and let Drop handle cleanup
        if swarm.is_learning_enabled() {
            // Emit stats snapshot before shutdown (required for learning data to be saved)
            swarm.emit_stats_snapshot();

            // Wait for LearningEventSubscriber to flush the event to LearningDaemon
            // Default flush interval is 1000ms, but we need to ensure the event is processed
            std::thread::sleep(std::time::Duration::from_millis(150));

            if let Some(tx) = swarm.take_shutdown_tx() {
                // Fire-and-forget shutdown signal
                let _ = tx.try_send(());
            }
        }

        Ok(EvalRun::new(
            index,
            seed,
            success,
            termination_reason,
            metrics,
        ))
    }

    fn build_workers(&self) -> Vec<Box<dyn WorkerAgent>> {
        let mut workers: Vec<Box<dyn WorkerAgent>> = Vec::new();

        for template in &self.scenario.agents.workers {
            for i in 0..template.count {
                let id = workers.len();
                let name = template.id_pattern.replace("{i}", &i.to_string());

                let worker = GenericWorker::new(id)
                    .with_name(name)
                    .with_require_guidance(true);

                workers.push(Box::new(worker));
            }
        }

        workers
    }

    fn build_managers(&self) -> Vec<DefaultBatchManagerAgent> {
        let mut managers = Vec::new();
        let mut manager_index = 0;

        for template in &self.scenario.agents.managers {
            let ids = template.generate_ids();
            for name in ids {
                let manager = DefaultBatchManagerAgent::new(ManagerId(manager_index))
                    .with_name(name)
                    .with_interval(self.scenario.manager.process_interval_ticks);

                managers.push(manager);
                manager_index += 1;
            }
        }

        // デフォルト: Manager テンプレートがない場合は 1 つ作成
        if managers.is_empty() {
            managers.push(
                DefaultBatchManagerAgent::new(ManagerId(0))
                    .with_name("default_manager")
                    .with_interval(self.scenario.manager.process_interval_ticks),
            );
        }

        managers
    }

    fn build_management_strategy(&self) -> ManagementStrategy {
        match &self.scenario.app_config.management_strategy {
            ManagementStrategyConfig::EveryTick {} => ManagementStrategy::EveryTick,
            ManagementStrategyConfig::IntervalBased { max_interval } => {
                ManagementStrategy::FixedInterval {
                    interval: *max_interval,
                }
            }
            ManagementStrategyConfig::EventDriven { triggers: _ } => {
                // Event-driven maps to completion-based
                ManagementStrategy::CompletionBased { max_wait_ticks: 50 }
            }
            ManagementStrategyConfig::Hybrid {
                max_interval,
                triggers: _,
            } => ManagementStrategy::Hybrid {
                preferred_interval: *max_interval,
                force_after_ticks: max_interval * 2,
            },
            ManagementStrategyConfig::Disabled {} => {
                // Disabled = very large interval (effectively never)
                ManagementStrategy::FixedInterval { interval: u64::MAX }
            }
        }
    }

    /// Build SwarmTask from scenario task config
    ///
    /// Returns None if task goal is empty
    fn build_task_from_scenario(&self) -> Option<SwarmTask> {
        let task_config = &self.scenario.task;

        if task_config.goal.is_empty() {
            return None;
        }

        // Build context JSON object
        let mut context = serde_json::Map::new();

        if let Some(target_path) = &task_config.context.target_path {
            context.insert(
                "target_path".to_string(),
                serde_json::Value::String(target_path.clone()),
            );
        }
        if let Some(working_dir) = &task_config.context.working_dir {
            context.insert(
                "working_dir".to_string(),
                serde_json::Value::String(working_dir.clone()),
            );
        }
        if let Some(max_depth) = task_config.context.max_depth {
            context.insert(
                "max_depth".to_string(),
                serde_json::Value::Number(serde_json::Number::from(max_depth)),
            );
        }

        // Add extra context (convert toml::Value to serde_json::Value)
        for (key, value) in &task_config.context.extra {
            if let Ok(json_value) = serde_json::to_value(value) {
                context.insert(key.clone(), json_value);
            }
        }

        let task =
            SwarmTask::new(&task_config.goal).with_context(serde_json::Value::Object(context));

        Some(task)
    }

    /// Build Extensions with LlmConfig, ActionsConfig, and EvalSeed from scenario
    fn build_extensions_from_scenario(&self, seed: u64) -> Extensions {
        let mut extensions = if let Some(factory) = &self.extensions_factory {
            factory()
        } else {
            Extensions::new()
        };

        // Insert EvalSeed for reproducibility
        extensions.insert(EvalSeed(seed));

        // Insert LlmConfig for BatchInvoker/Manager to use
        extensions.insert(self.scenario.llm.clone());

        // Insert LoRA config if specified (for BatchInvoker to use)
        if let Some(ref lora) = self.scenario.llm.lora {
            extensions.insert(lora.clone());
        }

        // Insert ManagerConfig for Manager to use
        extensions.insert(self.scenario.manager.clone());

        // Insert BatchProcessorConfig for BatchInvoker to use
        extensions.insert(self.scenario.batch_processor.clone());

        // Convert EvalActionsConfig to Core ActionsConfig for Worker/Manager to use
        let core_actions_config = self.scenario.actions.to_core_config();
        extensions.insert(core_actions_config);

        // Create and insert Environment based on env_type
        let env_type = self.scenario.environment.env_type.as_str();
        let env_params = &self.scenario.environment.params;

        let env_box: Option<EnvironmentBox> = match env_type {
            "maze" => {
                let map = env_params.get("map").and_then(|v| v.as_str()).unwrap_or("");
                let worker_count = env_params
                    .get("worker_count")
                    .and_then(|v| v.as_u64())
                    .unwrap_or(1) as usize;
                Some(Box::new(MazeEnvironment::from_str(map, worker_count)))
            }
            "code" => {
                // Currently only "auth" scenario is supported, default to it
                Some(Box::new(CodeEnvironment::auth_scenario()))
            }
            "troubleshooting" => {
                let scenario_name = env_params
                    .get("scenario")
                    .and_then(|v| v.as_str())
                    .unwrap_or("memory_leak");
                let env = match scenario_name {
                    "memory_leak" => TroubleshootingEnvironment::memory_leak_scenario(),
                    "cpu_spike" => TroubleshootingEnvironment::cpu_spike_scenario(),
                    "network_timeout" => TroubleshootingEnvironment::network_timeout_scenario(),
                    "medium" => TroubleshootingEnvironment::complex_scenario(15, 3, 2, seed),
                    "high" => TroubleshootingEnvironment::complex_scenario(30, 8, 3, seed),
                    "extreme" => TroubleshootingEnvironment::complex_scenario(50, 15, 4, seed),
                    "complex" => {
                        let total_services = env_params
                            .get("total_services")
                            .and_then(|v| v.as_u64())
                            .unwrap_or(15) as usize;
                        let noise_services = env_params
                            .get("noise_services")
                            .and_then(|v| v.as_u64())
                            .unwrap_or(3) as usize;
                        let cascade_depth = env_params
                            .get("cascade_depth")
                            .and_then(|v| v.as_u64())
                            .unwrap_or(2) as usize;
                        TroubleshootingEnvironment::complex_scenario(
                            total_services,
                            noise_services,
                            cascade_depth,
                            seed,
                        )
                    }
                    _ => TroubleshootingEnvironment::memory_leak_scenario(),
                };
                Some(Box::new(env))
            }
            "search" => {
                let scenario_name = env_params
                    .get("scenario")
                    .and_then(|v| v.as_str())
                    .unwrap_or("basic");
                let env = match scenario_name {
                    "basic" => SearchEnvironment::basic_scenario(),
                    "medium" => SearchEnvironment::medium_scenario(),
                    "large" => SearchEnvironment::large_scenario(),
                    "custom" => {
                        let file_count = env_params
                            .get("file_count")
                            .and_then(|v| v.as_u64())
                            .unwrap_or(5) as usize;
                        let target_index = env_params
                            .get("target_index")
                            .and_then(|v| v.as_u64())
                            .unwrap_or(2) as usize;
                        SearchEnvironment::custom_scenario(file_count, target_index, seed)
                    }
                    _ => SearchEnvironment::basic_scenario(),
                };
                Some(Box::new(env))
            }
            "internal_diagnosis" => {
                let scenario_name = env_params
                    .get("scenario")
                    .and_then(|v| v.as_str())
                    .unwrap_or("routing");
                let env = match scenario_name {
                    "routing" => InternalDiagnosisEnvironment::routing_error_scenario(),
                    "failover" => InternalDiagnosisEnvironment::failover_error_scenario(),
                    "worker_pool" => InternalDiagnosisEnvironment::worker_pool_scenario(),
                    "strategy" => InternalDiagnosisEnvironment::strategy_mismatch_scenario(),
                    "exploration" => InternalDiagnosisEnvironment::exploration_depth_scenario(),
                    "complex" => InternalDiagnosisEnvironment::complex_scenario(seed),
                    _ => InternalDiagnosisEnvironment::routing_error_scenario(),
                };
                Some(Box::new(env))
            }
            "deep_search" => {
                // TODO: Support multiple deep_search scenarios
                let _scenario_name = env_params
                    .get("scenario")
                    .and_then(|v| v.as_str())
                    .unwrap_or("tech_question");
                let env = DeepSearchEnvironment::tech_question_scenario();
                Some(Box::new(env))
            }
            // Real filesystem environment (Bash, Read, Write, Grep, Glob)
            "default" | "realworld" => {
                use swarm_engine_core::environment::DefaultEnvironment;
                let working_dir = env_params
                    .get("working_dir")
                    .and_then(|v| v.as_str())
                    .map(std::path::PathBuf::from);
                let env = if let Some(dir) = working_dir {
                    DefaultEnvironment::with_working_dir(dir)
                } else {
                    DefaultEnvironment::new()
                };
                Some(Box::new(env))
            }
            _ => None, // Unknown env_type - no Environment inserted
        };

        if let Some(env) = env_box {
            extensions.insert(env);
        }

        // Insert DependencyGraph if specified (explicit or from scenario)
        let graph = self.dependency_graph.clone().or_else(|| {
            self.scenario.dependency_graph.as_ref().and_then(|cfg| {
                let action_names = self.scenario.actions.action_names();
                cfg.to_core_graph(&action_names)
            })
        });
        if let Some(g) = graph {
            extensions.insert(g);
        }

        // Note: prior_snapshot is inserted by LearnableSwarmBuilder.with_learning_store()

        extensions
    }

    /// Evaluate scenario conditions to determine success/failure
    ///
    /// Returns (success, termination_reason)
    fn evaluate_conditions(
        &self,
        metrics: &RunMetrics,
        environment_done: bool,
        timed_out: bool,
    ) -> (bool, TerminationReason) {
        let conditions = &self.scenario.conditions;

        // 1. Check failure conditions first (any match = fail)
        for condition in &conditions.failure {
            if let Some(actual) =
                self.get_metric_value(&condition.metric, metrics, environment_done)
            {
                if condition.evaluate(&actual) {
                    return (false, TerminationReason::Failure);
                }
            }
        }

        // 2. Handle timeout
        if timed_out {
            return match conditions.on_timeout {
                TimeoutBehavior::Fail => (false, TerminationReason::Timeout),
                TimeoutBehavior::PartialSuccess => {
                    // Check if success conditions are met
                    let success = self.check_success_conditions(metrics, environment_done);
                    (success, TerminationReason::Timeout)
                }
                TimeoutBehavior::MilestoneScore => {
                    // TODO: Implement milestone scoring
                    (false, TerminationReason::Timeout)
                }
            };
        }

        // 3. Check success conditions (all must match)
        let success = self.check_success_conditions(metrics, environment_done);
        if success {
            (true, TerminationReason::Success)
        } else {
            // Not yet successful, but no failure conditions met either
            // This shouldn't happen if called after completion
            (false, TerminationReason::Stopped)
        }
    }

    /// Check if all success conditions are met
    fn check_success_conditions(&self, metrics: &RunMetrics, environment_done: bool) -> bool {
        let conditions = &self.scenario.conditions;

        // If no success conditions defined, consider successful
        if conditions.success.is_empty() {
            return true;
        }

        // All conditions must pass
        conditions.success.iter().all(|condition| {
            self.get_metric_value(&condition.metric, metrics, environment_done)
                .map(|actual| condition.evaluate(&actual))
                .unwrap_or(false)
        })
    }

    /// Get metric value by path (e.g., "environment.done", "task.success_rate")
    fn get_metric_value(
        &self,
        path: &str,
        metrics: &RunMetrics,
        environment_done: bool,
    ) -> Option<ConditionValue> {
        match path {
            // Environment metrics
            "environment.done" => Some(ConditionValue::Bool(environment_done)),

            // Task metrics
            "task.total_ticks" | "total_ticks" => {
                Some(ConditionValue::Integer(metrics.task.total_ticks as i64))
            }
            "task.success_rate" | "success_rate" => {
                Some(ConditionValue::Float(metrics.task.success_rate))
            }
            "task.total_actions" | "total_actions" => {
                Some(ConditionValue::Integer(metrics.task.total_actions as i64))
            }
            "task.successful_actions" | "successful_actions" => Some(ConditionValue::Integer(
                metrics.task.successful_actions as i64,
            )),

            // Performance metrics
            "performance.llm_error_rate" | "llm_error_rate" => {
                Some(ConditionValue::Float(metrics.performance.llm_error_rate))
            }
            "performance.llm_invocations" | "llm_invocations" => Some(ConditionValue::Integer(
                metrics.performance.llm_invocations as i64,
            )),

            // Coordination metrics
            "coordination.manager_activations" | "manager_activations" => Some(
                ConditionValue::Integer(metrics.coordination.manager_activations as i64),
            ),

            // Error metrics (from failed_actions)
            "errors.count" => {
                let failed = metrics
                    .task
                    .total_actions
                    .saturating_sub(metrics.task.successful_actions);
                Some(ConditionValue::Integer(failed as i64))
            }

            // Unknown metric
            _ => None,
        }
    }
}

// Wrapper to convert Box<dyn ManagerAgent> to impl ManagerAgent
struct DynManagerWrapper(Box<dyn ManagerAgent>);

impl ManagerAgent for DynManagerWrapper {
    fn prepare(
        &self,
        context: &swarm_engine_core::agent::TaskContext,
    ) -> swarm_engine_core::agent::BatchDecisionRequest {
        self.0.prepare(context)
    }

    fn finalize(
        &self,
        context: &swarm_engine_core::agent::TaskContext,
        responses: Vec<(
            swarm_engine_core::types::WorkerId,
            swarm_engine_core::agent::DecisionResponse,
        )>,
    ) -> swarm_engine_core::agent::ManagementDecision {
        self.0.finalize(context, responses)
    }

    fn id(&self) -> swarm_engine_core::agent::ManagerId {
        self.0.id()
    }

    fn name(&self) -> &str {
        self.0.name()
    }
}

// Wrapper to convert Box<dyn BatchInvoker> to impl BatchInvoker
struct DynBatchInvokerWrapper(Box<dyn BatchInvoker>);

impl BatchInvoker for DynBatchInvokerWrapper {
    fn invoke(
        &self,
        request: swarm_engine_core::agent::BatchDecisionRequest,
        extensions: &swarm_engine_core::extensions::Extensions,
    ) -> swarm_engine_core::agent::BatchInvokeResult {
        self.0.invoke(request, extensions)
    }

    fn plan_dependencies(
        &self,
        task: &str,
        actions: &[ActionDef],
        hint: Option<&SelectResult>,
    ) -> Option<swarm_engine_core::exploration::DependencyGraph> {
        self.0.plan_dependencies(task, actions, hint)
    }

    fn name(&self) -> &str {
        self.0.name()
    }
}

// Wrapper to convert Box<dyn OperatorProvider<NodeRules>> to impl OperatorProvider<NodeRules>
struct DynOperatorProviderWrapper(Box<dyn OperatorProvider<NodeRules>>);

impl OperatorProvider<NodeRules> for DynOperatorProviderWrapper {
    fn provide(
        &self,
        rules: NodeRules,
        context: Option<
            &swarm_engine_core::exploration::ProviderContext<
                '_,
                swarm_engine_core::exploration::ActionNodeData,
                String,
                swarm_engine_core::exploration::MapNodeState,
            >,
        >,
    ) -> swarm_engine_core::exploration::ConfigurableOperator<NodeRules> {
        self.0.provide(rules, context)
    }

    fn reevaluate(
        &self,
        operator: &mut swarm_engine_core::exploration::ConfigurableOperator<NodeRules>,
        ctx: &swarm_engine_core::exploration::ProviderContext<
            '_,
            swarm_engine_core::exploration::ActionNodeData,
            String,
            swarm_engine_core::exploration::MapNodeState,
        >,
    ) {
        self.0.reevaluate(operator, ctx)
    }

    fn name(&self) -> &str {
        self.0.name()
    }
}