1use std::time::Duration;
4
5use tokio::runtime::Handle;
6
7use swarm_engine_core::actions::ActionDef;
8use swarm_engine_core::agent::{
9 BatchInvoker, DefaultBatchManagerAgent, GenericWorker, ManagementStrategy, ManagerAgent,
10 ManagerId, WorkerAgent,
11};
12use swarm_engine_core::environment::EnvironmentBox;
13use swarm_engine_core::events::TraceSubscriber;
14use swarm_engine_core::exploration::{LearnedDependencyProvider, NodeRules, OperatorProvider};
15use swarm_engine_core::extensions::Extensions;
16use swarm_engine_core::learn::{
17 profile_to_offline_model, CountTrigger, LearnableSwarmBuilder, LearningStore, OfflineModel,
18 ScenarioProfile, TrainTrigger,
19};
20use swarm_engine_core::orchestrator::SwarmConfig;
21use swarm_engine_core::types::{GroupId, SwarmTask};
22
23use crate::environments::{
24 CodeEnvironment, DeepSearchEnvironment, InternalDiagnosisEnvironment, MazeEnvironment,
25 SearchEnvironment, TroubleshootingEnvironment,
26};
27
28use crate::aggregator::Aggregator;
29use crate::error::Result;
30use crate::metrics::RunMetrics;
31use crate::reporter::{ConfigSummary, EvalReport, SeedInfo};
32use crate::run::{EvalRun, TerminationReason};
33use crate::scenario::conditions::{ConditionValue, TimeoutBehavior};
34use crate::scenario::{EvalScenario, ManagementStrategyConfig};
35use crate::validation::{ScenarioValidator, WarningSeverity};
36
37#[derive(Debug, Clone, Copy)]
41pub struct EvalSeed(pub u64);
42
43pub type ManagerFactory = Box<dyn Fn() -> Box<dyn ManagerAgent> + Send + Sync>;
45
46pub type BatchInvokerFactory = Box<dyn Fn() -> Box<dyn BatchInvoker> + Send + Sync>;
48
49pub type OperatorProviderFactory =
51 Box<dyn Fn() -> Box<dyn OperatorProvider<NodeRules>> + Send + Sync>;
52
53pub struct EvalRunner {
68 scenario: EvalScenario,
69 runtime: Handle,
70 runs: usize,
71 seed: u64,
72 task: Option<SwarmTask>,
74 manager_factory: Option<ManagerFactory>,
76 batch_invoker_factory: Option<BatchInvokerFactory>,
78 extensions_factory: Option<Box<dyn Fn() -> Extensions + Send + Sync>>,
80 operator_provider_factory: Option<OperatorProviderFactory>,
82 verbose: bool,
84 enable_exploration: bool,
86 dependency_graph: Option<swarm_engine_core::exploration::DependencyGraph>,
88 learning_store: Option<LearningStore>,
90 train_trigger: Option<std::sync::Arc<dyn TrainTrigger>>,
92 skip_learned_action_order: bool,
94 trace_subscriber: Option<std::sync::Arc<dyn TraceSubscriber>>,
96 scenario_profile: Option<ScenarioProfile>,
98 offline_model_from_profile: Option<OfflineModel>,
100}
101
102impl EvalRunner {
103 pub fn new(scenario: EvalScenario, runtime: Handle) -> Self {
104 Self {
105 scenario,
106 runtime,
107 runs: 1,
108 seed: 42,
109 task: None,
110 manager_factory: None,
111 batch_invoker_factory: None,
112 extensions_factory: None,
113 operator_provider_factory: None,
114 verbose: false,
115 enable_exploration: false,
116 dependency_graph: None,
117 learning_store: None,
118 train_trigger: None,
119 skip_learned_action_order: false,
120 trace_subscriber: None,
121 scenario_profile: None,
122 offline_model_from_profile: None,
123 }
124 }
125
126 pub fn with_verbose(mut self, verbose: bool) -> Self {
128 self.verbose = verbose;
129 self
130 }
131
132 pub fn with_exploration(mut self, enable: bool) -> Self {
134 self.enable_exploration = enable;
135 self
136 }
137
138 pub fn with_dependency_graph(
143 mut self,
144 graph: swarm_engine_core::exploration::DependencyGraph,
145 ) -> Self {
146 self.dependency_graph = Some(graph);
147 self
148 }
149
150 pub fn with_learning_store(mut self, path: impl AsRef<std::path::Path>) -> Self {
161 match LearningStore::new(path) {
162 Ok(store) => {
163 self.learning_store = Some(store);
165 }
166 Err(e) => {
167 eprintln!("Warning: Failed to create LearningStore: {}", e);
168 }
169 }
170 self
171 }
172
173 pub fn with_train_trigger(mut self, trigger: std::sync::Arc<dyn TrainTrigger>) -> Self {
187 self.train_trigger = Some(trigger);
188 self
189 }
190
191 pub fn skip_learned_action_order(mut self, skip: bool) -> Self {
196 self.skip_learned_action_order = skip;
197 self
198 }
199
200 pub fn with_trace_subscriber(
218 mut self,
219 subscriber: std::sync::Arc<dyn TraceSubscriber>,
220 ) -> Self {
221 self.trace_subscriber = Some(subscriber);
222 self
223 }
224
225 pub fn with_scenario_profile(mut self, profile: ScenarioProfile) -> Self {
239 let offline_model = profile_to_offline_model(&profile);
240 self.offline_model_from_profile = Some(offline_model);
241 self.scenario_profile = Some(profile);
242 self
243 }
244
245 pub fn with_runs(mut self, runs: usize) -> Self {
246 self.runs = runs;
247 self
248 }
249
250 pub fn with_seed(mut self, seed: u64) -> Self {
251 self.seed = seed;
252 self
253 }
254
255 pub fn with_task(mut self, task: SwarmTask) -> Self {
257 self.task = Some(task);
258 self
259 }
260
261 pub fn with_manager_factory<F>(mut self, factory: F) -> Self
263 where
264 F: Fn() -> Box<dyn ManagerAgent> + Send + Sync + 'static,
265 {
266 self.manager_factory = Some(Box::new(factory));
267 self
268 }
269
270 pub fn with_batch_invoker_factory<F>(mut self, factory: F) -> Self
272 where
273 F: Fn() -> Box<dyn BatchInvoker> + Send + Sync + 'static,
274 {
275 self.batch_invoker_factory = Some(Box::new(factory));
276 self
277 }
278
279 pub fn with_extensions_factory<F>(mut self, factory: F) -> Self
281 where
282 F: Fn() -> Extensions + Send + Sync + 'static,
283 {
284 self.extensions_factory = Some(Box::new(factory));
285 self
286 }
287
288 pub fn with_operator_provider_factory<F>(mut self, factory: F) -> Self
306 where
307 F: Fn() -> Box<dyn OperatorProvider<NodeRules>> + Send + Sync + 'static,
308 {
309 self.operator_provider_factory = Some(Box::new(factory));
310 self
311 }
312
313 pub fn run(&self) -> Result<EvalReport> {
314 let warnings = ScenarioValidator::validate_scenario(&self.scenario);
316 for warning in &warnings {
317 match warning.severity() {
318 WarningSeverity::High => {
319 tracing::warn!(
320 severity = %warning.severity(),
321 "Scenario validation: {}",
322 warning
323 );
324 }
325 WarningSeverity::Medium => {
326 tracing::info!(
327 severity = %warning.severity(),
328 "Scenario validation: {}",
329 warning
330 );
331 }
332 _ => {
333 tracing::debug!(
334 severity = %warning.severity(),
335 "Scenario validation: {}",
336 warning
337 );
338 }
339 }
340 }
341
342 let mut eval_runs = Vec::with_capacity(self.runs);
343 let mut run_seeds = Vec::with_capacity(self.runs);
344
345 let group_id = GroupId::new();
348
349 for i in 0..self.runs {
350 let run_seed = self.seed.wrapping_add(i as u64);
351 run_seeds.push(run_seed);
352
353 let result = self.run_single(i, run_seed, group_id)?;
354 eval_runs.push(result);
355 }
356
357 let aggregated = Aggregator::aggregate(&eval_runs);
358
359 Ok(EvalReport {
363 config_summary: ConfigSummary {
364 scenario_name: self.scenario.meta.name.clone(),
365 scenario_id: self.scenario.meta.id.to_string(),
366 worker_count: self.scenario.agents.workers.iter().map(|w| w.count).sum(),
367 max_ticks: self.scenario.app_config.max_ticks,
368 run_count: self.runs,
369 },
370 seed_info: SeedInfo {
371 base_seed: self.seed,
372 run_seeds,
373 },
374 runs: eval_runs,
375 aggregated,
376 assertion_results: vec![],
377 })
378 }
379
380 fn run_single(&self, index: usize, seed: u64, group_id: GroupId) -> Result<EvalRun> {
388 let workers = self.build_workers();
392 let management_strategy = self.build_management_strategy();
393
394 let swarm_config = SwarmConfig {
395 tick_duration: Duration::from_millis(self.scenario.app_config.tick_duration_ms),
396 max_ticks: self.scenario.app_config.max_ticks,
397 management_strategy,
398 };
399
400 let extensions = self.build_extensions_from_scenario(seed);
402
403 let scenario_key = self.scenario.meta.id.learning_key();
405 let mut builder = LearnableSwarmBuilder::new(self.runtime.clone())
406 .scenario(&scenario_key)
407 .swarm_config(swarm_config)
408 .workers(workers)
409 .extensions(extensions)
410 .enable_exploration(
411 self.enable_exploration || self.scenario.app_config.enable_exploration,
412 );
413
414 if let Some(factory) = &self.manager_factory {
416 let manager = factory();
417 builder = builder.add_manager(Box::new(DynManagerWrapper(manager)));
418 } else {
419 let managers = self.build_managers();
420 for manager in managers {
421 builder = builder.add_manager(Box::new(manager));
422 }
423 }
424
425 if let Some(factory) = &self.batch_invoker_factory {
427 let invoker = factory();
428 builder = builder.batch_invoker(Box::new(DynBatchInvokerWrapper(invoker)));
429 }
430
431 if let Some(factory) = &self.operator_provider_factory {
433 let provider = factory();
434 builder = builder.operator_provider(Box::new(DynOperatorProviderWrapper(provider)));
435 }
436
437 if let Some(ref model) = self.offline_model_from_profile {
439 builder = builder.offline_model(model.clone());
440
441 if self.operator_provider_factory.is_none() {
443 println!(
444 "Profile offline model applied: ucb1_c={:.3}, maturity={}, strategy={}",
445 model.parameters.ucb1_c,
446 model.strategy_config.maturity_threshold,
447 model.strategy_config.initial_strategy
448 );
449 }
450
451 if !self.skip_learned_action_order {
453 if let Some(ref action_order) = model.action_order {
454 let provider = LearnedDependencyProvider::new(action_order.clone());
455 builder = builder.dependency_provider(Box::new(provider));
456 println!(
457 "Learned action order applied: discover={:?}, not_discover={:?}",
458 action_order.discover, action_order.not_discover
459 );
460 }
461 } else if model.action_order.is_some() {
462 println!("Learned action order skipped (--no-dep-graph)");
463 }
464 }
465
466 if let Some(ref store) = self.learning_store {
469 builder = builder.with_learning_store(store.clone());
470
471 if self.offline_model_from_profile.is_none() {
473 let offline_model_opt = builder.offline_model_ref().cloned();
475 if let Some(ref model) = offline_model_opt {
476 if self.operator_provider_factory.is_none() {
478 println!(
479 "Offline model applied: ucb1_c={:.3}, maturity={}, strategy={}",
480 model.parameters.ucb1_c,
481 model.strategy_config.maturity_threshold,
482 model.strategy_config.initial_strategy
483 );
484 }
485
486 if !self.skip_learned_action_order {
488 if let Some(ref action_order) = model.action_order {
489 let provider = LearnedDependencyProvider::new(action_order.clone());
490 builder = builder.dependency_provider(Box::new(provider));
491 println!(
492 "Learned action order applied: discover={:?}, not_discover={:?}",
493 action_order.discover, action_order.not_discover
494 );
495 }
496 } else if model.action_order.is_some() {
497 println!("Learned action order skipped (--no-dep-graph)");
498 }
499 }
500 }
501
502 if let Some(ref trigger) = self.train_trigger {
504 builder = builder.train_trigger(std::sync::Arc::clone(trigger));
505 } else {
506 builder = builder.train_trigger(std::sync::Arc::new(CountTrigger::new(self.runs)));
507 }
508 }
509
510 if let Some(ref subscriber) = self.trace_subscriber {
512 builder = builder.with_trace_subscriber(std::sync::Arc::clone(subscriber));
513 }
514
515 let mut swarm = builder.build()?;
519
520 let manager_count = self
522 .scenario
523 .agents
524 .managers
525 .iter()
526 .map(|t| t.count)
527 .sum::<usize>();
528 if manager_count > 1 {
529 swarm.orchestrator_mut().enable_partitioning();
530 }
531
532 let task_to_run = self
534 .task
535 .clone()
536 .or_else(|| self.build_task_from_scenario())
537 .map(|task| task.with_group_id(group_id));
538
539 let result = if let Some(task) = task_to_run {
541 swarm.run_task(task)?
542 } else {
543 swarm.run()
544 };
545
546 let state = swarm.orchestrator().state();
550 let timed_out = result.total_ticks >= self.scenario.app_config.max_ticks;
551 let environment_done = state.shared.is_environment_done();
552 let total_actions = state.shared.stats.total_visits() as u64;
553 let successful_actions = state.shared.stats.total_successes() as u64;
554 let llm_invocations = state.shared.llm_invocations();
555 let llm_invoke_errors = state.shared.llm_errors();
556
557 let metrics = RunMetrics {
558 task: crate::metrics::TaskMetrics {
559 total_ticks: result.total_ticks,
560 total_tasks: 0,
561 completed_tasks: 0,
562 total_actions,
563 successful_actions,
564 success_rate: state.shared.stats.success_rate(),
565 },
566 coordination: crate::metrics::CoordinationMetrics {
567 manager_activations: llm_invocations,
568 manager_intervention_rate: if result.total_ticks > 0 {
569 llm_invocations as f64 / result.total_ticks as f64
570 } else {
571 0.0
572 },
573 ..Default::default()
574 },
575 performance: {
576 let llm_error_rate = if llm_invocations > 0 {
577 llm_invoke_errors as f64 / llm_invocations as f64
578 } else {
579 0.0
580 };
581 crate::metrics::PerformanceMetrics {
582 total_duration_ms: result.total_duration.as_millis() as f64,
583 avg_tick_latency_ms: if result.total_ticks > 0 {
584 result.total_duration.as_millis() as f64 / result.total_ticks as f64
585 } else {
586 0.0
587 },
588 raw_throughput_per_sec: if result.total_duration.as_secs_f64() > 0.0 {
589 total_actions as f64 / result.total_duration.as_secs_f64()
590 } else {
591 0.0
592 },
593 effective_throughput_per_sec: if result.total_duration.as_secs_f64() > 0.0 {
594 successful_actions as f64 / result.total_duration.as_secs_f64()
595 } else {
596 0.0
597 },
598 llm_invocations,
599 llm_invoke_errors,
600 llm_error_rate,
601 ..Default::default()
602 }
603 },
604 robustness: Default::default(),
605 };
606
607 let (success, termination_reason) = if !result.completed {
609 (false, TerminationReason::Stopped)
610 } else {
611 self.evaluate_conditions(&metrics, environment_done, timed_out)
612 };
613
614 if swarm.is_learning_enabled() {
618 swarm.emit_stats_snapshot();
620
621 std::thread::sleep(std::time::Duration::from_millis(150));
624
625 if let Some(tx) = swarm.take_shutdown_tx() {
626 let _ = tx.try_send(());
628 }
629 }
630
631 Ok(EvalRun::new(
632 index,
633 seed,
634 success,
635 termination_reason,
636 metrics,
637 ))
638 }
639
640 fn build_workers(&self) -> Vec<Box<dyn WorkerAgent>> {
641 let mut workers: Vec<Box<dyn WorkerAgent>> = Vec::new();
642
643 for template in &self.scenario.agents.workers {
644 for i in 0..template.count {
645 let id = workers.len();
646 let name = template.id_pattern.replace("{i}", &i.to_string());
647
648 let worker = GenericWorker::new(id)
649 .with_name(name)
650 .with_require_guidance(true);
651
652 workers.push(Box::new(worker));
653 }
654 }
655
656 workers
657 }
658
659 fn build_managers(&self) -> Vec<DefaultBatchManagerAgent> {
660 let mut managers = Vec::new();
661 let mut manager_index = 0;
662
663 for template in &self.scenario.agents.managers {
664 let ids = template.generate_ids();
665 for name in ids {
666 let manager = DefaultBatchManagerAgent::new(ManagerId(manager_index))
667 .with_name(name)
668 .with_interval(self.scenario.manager.process_interval_ticks);
669
670 managers.push(manager);
671 manager_index += 1;
672 }
673 }
674
675 if managers.is_empty() {
677 managers.push(
678 DefaultBatchManagerAgent::new(ManagerId(0))
679 .with_name("default_manager")
680 .with_interval(self.scenario.manager.process_interval_ticks),
681 );
682 }
683
684 managers
685 }
686
687 fn build_management_strategy(&self) -> ManagementStrategy {
688 match &self.scenario.app_config.management_strategy {
689 ManagementStrategyConfig::EveryTick {} => ManagementStrategy::EveryTick,
690 ManagementStrategyConfig::IntervalBased { max_interval } => {
691 ManagementStrategy::FixedInterval {
692 interval: *max_interval,
693 }
694 }
695 ManagementStrategyConfig::EventDriven { triggers: _ } => {
696 ManagementStrategy::CompletionBased { max_wait_ticks: 50 }
698 }
699 ManagementStrategyConfig::Hybrid {
700 max_interval,
701 triggers: _,
702 } => ManagementStrategy::Hybrid {
703 preferred_interval: *max_interval,
704 force_after_ticks: max_interval * 2,
705 },
706 ManagementStrategyConfig::Disabled {} => {
707 ManagementStrategy::FixedInterval { interval: u64::MAX }
709 }
710 }
711 }
712
713 fn build_task_from_scenario(&self) -> Option<SwarmTask> {
717 let task_config = &self.scenario.task;
718
719 if task_config.goal.is_empty() {
720 return None;
721 }
722
723 let mut context = serde_json::Map::new();
725
726 if let Some(target_path) = &task_config.context.target_path {
727 context.insert(
728 "target_path".to_string(),
729 serde_json::Value::String(target_path.clone()),
730 );
731 }
732 if let Some(working_dir) = &task_config.context.working_dir {
733 context.insert(
734 "working_dir".to_string(),
735 serde_json::Value::String(working_dir.clone()),
736 );
737 }
738 if let Some(max_depth) = task_config.context.max_depth {
739 context.insert(
740 "max_depth".to_string(),
741 serde_json::Value::Number(serde_json::Number::from(max_depth)),
742 );
743 }
744
745 for (key, value) in &task_config.context.extra {
747 if let Ok(json_value) = serde_json::to_value(value) {
748 context.insert(key.clone(), json_value);
749 }
750 }
751
752 let task =
753 SwarmTask::new(&task_config.goal).with_context(serde_json::Value::Object(context));
754
755 Some(task)
756 }
757
758 fn build_extensions_from_scenario(&self, seed: u64) -> Extensions {
760 let mut extensions = if let Some(factory) = &self.extensions_factory {
761 factory()
762 } else {
763 Extensions::new()
764 };
765
766 extensions.insert(EvalSeed(seed));
768
769 extensions.insert(self.scenario.llm.clone());
771
772 if let Some(ref lora) = self.scenario.llm.lora {
774 extensions.insert(lora.clone());
775 }
776
777 extensions.insert(self.scenario.manager.clone());
779
780 extensions.insert(self.scenario.batch_processor.clone());
782
783 let core_actions_config = self.scenario.actions.to_core_config();
785 extensions.insert(core_actions_config);
786
787 let env_type = self.scenario.environment.env_type.as_str();
789 let env_params = &self.scenario.environment.params;
790
791 let env_box: Option<EnvironmentBox> = match env_type {
792 "maze" => {
793 let map = env_params.get("map").and_then(|v| v.as_str()).unwrap_or("");
794 let worker_count = env_params
795 .get("worker_count")
796 .and_then(|v| v.as_u64())
797 .unwrap_or(1) as usize;
798 Some(Box::new(MazeEnvironment::from_str(map, worker_count)))
799 }
800 "code" => {
801 Some(Box::new(CodeEnvironment::auth_scenario()))
803 }
804 "troubleshooting" => {
805 let scenario_name = env_params
806 .get("scenario")
807 .and_then(|v| v.as_str())
808 .unwrap_or("memory_leak");
809 let env = match scenario_name {
810 "memory_leak" => TroubleshootingEnvironment::memory_leak_scenario(),
811 "cpu_spike" => TroubleshootingEnvironment::cpu_spike_scenario(),
812 "network_timeout" => TroubleshootingEnvironment::network_timeout_scenario(),
813 "medium" => TroubleshootingEnvironment::complex_scenario(15, 3, 2, seed),
814 "high" => TroubleshootingEnvironment::complex_scenario(30, 8, 3, seed),
815 "extreme" => TroubleshootingEnvironment::complex_scenario(50, 15, 4, seed),
816 "complex" => {
817 let total_services = env_params
818 .get("total_services")
819 .and_then(|v| v.as_u64())
820 .unwrap_or(15) as usize;
821 let noise_services = env_params
822 .get("noise_services")
823 .and_then(|v| v.as_u64())
824 .unwrap_or(3) as usize;
825 let cascade_depth = env_params
826 .get("cascade_depth")
827 .and_then(|v| v.as_u64())
828 .unwrap_or(2) as usize;
829 TroubleshootingEnvironment::complex_scenario(
830 total_services,
831 noise_services,
832 cascade_depth,
833 seed,
834 )
835 }
836 _ => TroubleshootingEnvironment::memory_leak_scenario(),
837 };
838 Some(Box::new(env))
839 }
840 "search" => {
841 let scenario_name = env_params
842 .get("scenario")
843 .and_then(|v| v.as_str())
844 .unwrap_or("basic");
845 let env = match scenario_name {
846 "basic" => SearchEnvironment::basic_scenario(),
847 "medium" => SearchEnvironment::medium_scenario(),
848 "large" => SearchEnvironment::large_scenario(),
849 "custom" => {
850 let file_count = env_params
851 .get("file_count")
852 .and_then(|v| v.as_u64())
853 .unwrap_or(5) as usize;
854 let target_index = env_params
855 .get("target_index")
856 .and_then(|v| v.as_u64())
857 .unwrap_or(2) as usize;
858 SearchEnvironment::custom_scenario(file_count, target_index, seed)
859 }
860 _ => SearchEnvironment::basic_scenario(),
861 };
862 Some(Box::new(env))
863 }
864 "internal_diagnosis" => {
865 let scenario_name = env_params
866 .get("scenario")
867 .and_then(|v| v.as_str())
868 .unwrap_or("routing");
869 let env = match scenario_name {
870 "routing" => InternalDiagnosisEnvironment::routing_error_scenario(),
871 "failover" => InternalDiagnosisEnvironment::failover_error_scenario(),
872 "worker_pool" => InternalDiagnosisEnvironment::worker_pool_scenario(),
873 "strategy" => InternalDiagnosisEnvironment::strategy_mismatch_scenario(),
874 "exploration" => InternalDiagnosisEnvironment::exploration_depth_scenario(),
875 "complex" => InternalDiagnosisEnvironment::complex_scenario(seed),
876 _ => InternalDiagnosisEnvironment::routing_error_scenario(),
877 };
878 Some(Box::new(env))
879 }
880 "deep_search" => {
881 let _scenario_name = env_params
883 .get("scenario")
884 .and_then(|v| v.as_str())
885 .unwrap_or("tech_question");
886 let env = DeepSearchEnvironment::tech_question_scenario();
887 Some(Box::new(env))
888 }
889 "default" | "realworld" => {
891 use swarm_engine_core::environment::DefaultEnvironment;
892 let working_dir = env_params
893 .get("working_dir")
894 .and_then(|v| v.as_str())
895 .map(std::path::PathBuf::from);
896 let env = if let Some(dir) = working_dir {
897 DefaultEnvironment::with_working_dir(dir)
898 } else {
899 DefaultEnvironment::new()
900 };
901 Some(Box::new(env))
902 }
903 _ => None, };
905
906 if let Some(env) = env_box {
907 extensions.insert(env);
908 }
909
910 let graph = self.dependency_graph.clone().or_else(|| {
912 self.scenario.dependency_graph.as_ref().and_then(|cfg| {
913 let action_names = self.scenario.actions.action_names();
914 cfg.to_core_graph(&action_names)
915 })
916 });
917 if let Some(g) = graph {
918 extensions.insert(g);
919 }
920
921 extensions
924 }
925
926 fn evaluate_conditions(
930 &self,
931 metrics: &RunMetrics,
932 environment_done: bool,
933 timed_out: bool,
934 ) -> (bool, TerminationReason) {
935 let conditions = &self.scenario.conditions;
936
937 for condition in &conditions.failure {
939 if let Some(actual) =
940 self.get_metric_value(&condition.metric, metrics, environment_done)
941 {
942 if condition.evaluate(&actual) {
943 return (false, TerminationReason::Failure);
944 }
945 }
946 }
947
948 if timed_out {
950 return match conditions.on_timeout {
951 TimeoutBehavior::Fail => (false, TerminationReason::Timeout),
952 TimeoutBehavior::PartialSuccess => {
953 let success = self.check_success_conditions(metrics, environment_done);
955 (success, TerminationReason::Timeout)
956 }
957 TimeoutBehavior::MilestoneScore => {
958 (false, TerminationReason::Timeout)
960 }
961 };
962 }
963
964 let success = self.check_success_conditions(metrics, environment_done);
966 if success {
967 (true, TerminationReason::Success)
968 } else {
969 (false, TerminationReason::Stopped)
972 }
973 }
974
975 fn check_success_conditions(&self, metrics: &RunMetrics, environment_done: bool) -> bool {
977 let conditions = &self.scenario.conditions;
978
979 if conditions.success.is_empty() {
981 return true;
982 }
983
984 conditions.success.iter().all(|condition| {
986 self.get_metric_value(&condition.metric, metrics, environment_done)
987 .map(|actual| condition.evaluate(&actual))
988 .unwrap_or(false)
989 })
990 }
991
992 fn get_metric_value(
994 &self,
995 path: &str,
996 metrics: &RunMetrics,
997 environment_done: bool,
998 ) -> Option<ConditionValue> {
999 match path {
1000 "environment.done" => Some(ConditionValue::Bool(environment_done)),
1002
1003 "task.total_ticks" | "total_ticks" => {
1005 Some(ConditionValue::Integer(metrics.task.total_ticks as i64))
1006 }
1007 "task.success_rate" | "success_rate" => {
1008 Some(ConditionValue::Float(metrics.task.success_rate))
1009 }
1010 "task.total_actions" | "total_actions" => {
1011 Some(ConditionValue::Integer(metrics.task.total_actions as i64))
1012 }
1013 "task.successful_actions" | "successful_actions" => Some(ConditionValue::Integer(
1014 metrics.task.successful_actions as i64,
1015 )),
1016
1017 "performance.llm_error_rate" | "llm_error_rate" => {
1019 Some(ConditionValue::Float(metrics.performance.llm_error_rate))
1020 }
1021 "performance.llm_invocations" | "llm_invocations" => Some(ConditionValue::Integer(
1022 metrics.performance.llm_invocations as i64,
1023 )),
1024
1025 "coordination.manager_activations" | "manager_activations" => Some(
1027 ConditionValue::Integer(metrics.coordination.manager_activations as i64),
1028 ),
1029
1030 "errors.count" => {
1032 let failed = metrics
1033 .task
1034 .total_actions
1035 .saturating_sub(metrics.task.successful_actions);
1036 Some(ConditionValue::Integer(failed as i64))
1037 }
1038
1039 _ => None,
1041 }
1042 }
1043}
1044
1045struct DynManagerWrapper(Box<dyn ManagerAgent>);
1047
1048impl ManagerAgent for DynManagerWrapper {
1049 fn prepare(
1050 &self,
1051 context: &swarm_engine_core::agent::TaskContext,
1052 ) -> swarm_engine_core::agent::BatchDecisionRequest {
1053 self.0.prepare(context)
1054 }
1055
1056 fn finalize(
1057 &self,
1058 context: &swarm_engine_core::agent::TaskContext,
1059 responses: Vec<(
1060 swarm_engine_core::types::WorkerId,
1061 swarm_engine_core::agent::DecisionResponse,
1062 )>,
1063 ) -> swarm_engine_core::agent::ManagementDecision {
1064 self.0.finalize(context, responses)
1065 }
1066
1067 fn id(&self) -> swarm_engine_core::agent::ManagerId {
1068 self.0.id()
1069 }
1070
1071 fn name(&self) -> &str {
1072 self.0.name()
1073 }
1074}
1075
1076struct DynBatchInvokerWrapper(Box<dyn BatchInvoker>);
1078
1079impl BatchInvoker for DynBatchInvokerWrapper {
1080 fn invoke(
1081 &self,
1082 request: swarm_engine_core::agent::BatchDecisionRequest,
1083 extensions: &swarm_engine_core::extensions::Extensions,
1084 ) -> swarm_engine_core::agent::BatchInvokeResult {
1085 self.0.invoke(request, extensions)
1086 }
1087
1088 fn plan_dependencies(
1089 &self,
1090 task: &str,
1091 actions: &[ActionDef],
1092 ) -> Option<swarm_engine_core::exploration::DependencyGraph> {
1093 self.0.plan_dependencies(task, actions)
1094 }
1095
1096 fn name(&self) -> &str {
1097 self.0.name()
1098 }
1099}
1100
1101struct DynOperatorProviderWrapper(Box<dyn OperatorProvider<NodeRules>>);
1103
1104impl OperatorProvider<NodeRules> for DynOperatorProviderWrapper {
1105 fn provide(
1106 &self,
1107 rules: NodeRules,
1108 context: Option<
1109 &swarm_engine_core::exploration::ProviderContext<
1110 '_,
1111 swarm_engine_core::exploration::ActionNodeData,
1112 String,
1113 swarm_engine_core::exploration::MapNodeState,
1114 >,
1115 >,
1116 ) -> swarm_engine_core::exploration::ConfigurableOperator<NodeRules> {
1117 self.0.provide(rules, context)
1118 }
1119
1120 fn reevaluate(
1121 &self,
1122 operator: &mut swarm_engine_core::exploration::ConfigurableOperator<NodeRules>,
1123 ctx: &swarm_engine_core::exploration::ProviderContext<
1124 '_,
1125 swarm_engine_core::exploration::ActionNodeData,
1126 String,
1127 swarm_engine_core::exploration::MapNodeState,
1128 >,
1129 ) {
1130 self.0.reevaluate(operator, ctx)
1131 }
1132
1133 fn name(&self) -> &str {
1134 self.0.name()
1135 }
1136}