1use std::time::Duration;
4
5use tokio::runtime::Handle;
6
7use swarm_engine_core::actions::ActionDef;
8use swarm_engine_core::agent::{
9 BatchInvoker, DefaultBatchManagerAgent, GenericWorker, ManagementStrategy, ManagerAgent,
10 ManagerId, WorkerAgent,
11};
12use swarm_engine_core::environment::EnvironmentBox;
13use swarm_engine_core::events::TraceSubscriber;
14use swarm_engine_core::exploration::{LearnedDependencyProvider, NodeRules, OperatorProvider};
15use swarm_engine_core::extensions::Extensions;
16use swarm_engine_core::learn::{
17 profile_to_offline_model, CountTrigger, LearnableSwarmBuilder, LearningStore, OfflineModel,
18 ScenarioProfile, TrainTrigger,
19};
20use swarm_engine_core::orchestrator::SwarmConfig;
21use swarm_engine_core::types::{GroupId, SwarmTask};
22
23use crate::environments::{
24 CodeEnvironment, DeepSearchEnvironment, InternalDiagnosisEnvironment, MazeEnvironment,
25 SearchEnvironment, TroubleshootingEnvironment,
26};
27
28use crate::aggregator::Aggregator;
29use crate::error::Result;
30use crate::metrics::RunMetrics;
31use crate::reporter::{ConfigSummary, EvalReport, SeedInfo};
32use crate::run::{EvalRun, TerminationReason};
33use crate::scenario::conditions::{ConditionValue, TimeoutBehavior};
34use crate::scenario::{EvalScenario, ManagementStrategyConfig};
35
36#[derive(Debug, Clone, Copy)]
40pub struct EvalSeed(pub u64);
41
42pub type ManagerFactory = Box<dyn Fn() -> Box<dyn ManagerAgent> + Send + Sync>;
44
45pub type BatchInvokerFactory = Box<dyn Fn() -> Box<dyn BatchInvoker> + Send + Sync>;
47
48pub type OperatorProviderFactory =
50 Box<dyn Fn() -> Box<dyn OperatorProvider<NodeRules>> + Send + Sync>;
51
52pub struct EvalRunner {
67 scenario: EvalScenario,
68 runtime: Handle,
69 runs: usize,
70 seed: u64,
71 task: Option<SwarmTask>,
73 manager_factory: Option<ManagerFactory>,
75 batch_invoker_factory: Option<BatchInvokerFactory>,
77 extensions_factory: Option<Box<dyn Fn() -> Extensions + Send + Sync>>,
79 operator_provider_factory: Option<OperatorProviderFactory>,
81 verbose: bool,
83 enable_exploration: bool,
85 dependency_graph: Option<swarm_engine_core::exploration::DependencyGraph>,
87 learning_store: Option<LearningStore>,
89 train_trigger: Option<std::sync::Arc<dyn TrainTrigger>>,
91 skip_learned_action_order: bool,
93 trace_subscriber: Option<std::sync::Arc<dyn TraceSubscriber>>,
95 scenario_profile: Option<ScenarioProfile>,
97 offline_model_from_profile: Option<OfflineModel>,
99}
100
101impl EvalRunner {
102 pub fn new(scenario: EvalScenario, runtime: Handle) -> Self {
103 Self {
104 scenario,
105 runtime,
106 runs: 1,
107 seed: 42,
108 task: None,
109 manager_factory: None,
110 batch_invoker_factory: None,
111 extensions_factory: None,
112 operator_provider_factory: None,
113 verbose: false,
114 enable_exploration: false,
115 dependency_graph: None,
116 learning_store: None,
117 train_trigger: None,
118 skip_learned_action_order: false,
119 trace_subscriber: None,
120 scenario_profile: None,
121 offline_model_from_profile: None,
122 }
123 }
124
125 pub fn with_verbose(mut self, verbose: bool) -> Self {
127 self.verbose = verbose;
128 self
129 }
130
131 pub fn with_exploration(mut self, enable: bool) -> Self {
133 self.enable_exploration = enable;
134 self
135 }
136
137 pub fn with_dependency_graph(
142 mut self,
143 graph: swarm_engine_core::exploration::DependencyGraph,
144 ) -> Self {
145 self.dependency_graph = Some(graph);
146 self
147 }
148
149 pub fn with_learning_store(mut self, path: impl AsRef<std::path::Path>) -> Self {
160 match LearningStore::new(path) {
161 Ok(store) => {
162 self.learning_store = Some(store);
164 }
165 Err(e) => {
166 eprintln!("Warning: Failed to create LearningStore: {}", e);
167 }
168 }
169 self
170 }
171
172 pub fn with_train_trigger(mut self, trigger: std::sync::Arc<dyn TrainTrigger>) -> Self {
186 self.train_trigger = Some(trigger);
187 self
188 }
189
190 pub fn skip_learned_action_order(mut self, skip: bool) -> Self {
195 self.skip_learned_action_order = skip;
196 self
197 }
198
199 pub fn with_trace_subscriber(
217 mut self,
218 subscriber: std::sync::Arc<dyn TraceSubscriber>,
219 ) -> Self {
220 self.trace_subscriber = Some(subscriber);
221 self
222 }
223
224 pub fn with_scenario_profile(mut self, profile: ScenarioProfile) -> Self {
238 let offline_model = profile_to_offline_model(&profile);
239 self.offline_model_from_profile = Some(offline_model);
240 self.scenario_profile = Some(profile);
241 self
242 }
243
244 pub fn with_runs(mut self, runs: usize) -> Self {
245 self.runs = runs;
246 self
247 }
248
249 pub fn with_seed(mut self, seed: u64) -> Self {
250 self.seed = seed;
251 self
252 }
253
254 pub fn with_task(mut self, task: SwarmTask) -> Self {
256 self.task = Some(task);
257 self
258 }
259
260 pub fn with_manager_factory<F>(mut self, factory: F) -> Self
262 where
263 F: Fn() -> Box<dyn ManagerAgent> + Send + Sync + 'static,
264 {
265 self.manager_factory = Some(Box::new(factory));
266 self
267 }
268
269 pub fn with_batch_invoker_factory<F>(mut self, factory: F) -> Self
271 where
272 F: Fn() -> Box<dyn BatchInvoker> + Send + Sync + 'static,
273 {
274 self.batch_invoker_factory = Some(Box::new(factory));
275 self
276 }
277
278 pub fn with_extensions_factory<F>(mut self, factory: F) -> Self
280 where
281 F: Fn() -> Extensions + Send + Sync + 'static,
282 {
283 self.extensions_factory = Some(Box::new(factory));
284 self
285 }
286
287 pub fn with_operator_provider_factory<F>(mut self, factory: F) -> Self
305 where
306 F: Fn() -> Box<dyn OperatorProvider<NodeRules>> + Send + Sync + 'static,
307 {
308 self.operator_provider_factory = Some(Box::new(factory));
309 self
310 }
311
312 pub fn run(&self) -> Result<EvalReport> {
313 let mut eval_runs = Vec::with_capacity(self.runs);
314 let mut run_seeds = Vec::with_capacity(self.runs);
315
316 let group_id = GroupId::new();
319
320 for i in 0..self.runs {
321 let run_seed = self.seed.wrapping_add(i as u64);
322 run_seeds.push(run_seed);
323
324 let result = self.run_single(i, run_seed, group_id)?;
325 eval_runs.push(result);
326 }
327
328 let aggregated = Aggregator::aggregate(&eval_runs);
329
330 Ok(EvalReport {
334 config_summary: ConfigSummary {
335 scenario_name: self.scenario.meta.name.clone(),
336 scenario_id: self.scenario.meta.id.to_string(),
337 worker_count: self.scenario.agents.workers.iter().map(|w| w.count).sum(),
338 max_ticks: self.scenario.app_config.max_ticks,
339 run_count: self.runs,
340 },
341 seed_info: SeedInfo {
342 base_seed: self.seed,
343 run_seeds,
344 },
345 runs: eval_runs,
346 aggregated,
347 assertion_results: vec![],
348 })
349 }
350
351 fn run_single(&self, index: usize, seed: u64, group_id: GroupId) -> Result<EvalRun> {
359 let workers = self.build_workers();
363 let management_strategy = self.build_management_strategy();
364
365 let swarm_config = SwarmConfig {
366 tick_duration: Duration::from_millis(self.scenario.app_config.tick_duration_ms),
367 max_ticks: self.scenario.app_config.max_ticks,
368 management_strategy,
369 };
370
371 let extensions = self.build_extensions_from_scenario(seed);
373
374 let scenario_key = self.scenario.meta.id.learning_key();
376 let mut builder = LearnableSwarmBuilder::new(self.runtime.clone())
377 .scenario(&scenario_key)
378 .swarm_config(swarm_config)
379 .workers(workers)
380 .extensions(extensions)
381 .enable_exploration(
382 self.enable_exploration || self.scenario.app_config.enable_exploration,
383 );
384
385 if let Some(factory) = &self.manager_factory {
387 let manager = factory();
388 builder = builder.add_manager(Box::new(DynManagerWrapper(manager)));
389 } else {
390 let managers = self.build_managers();
391 for manager in managers {
392 builder = builder.add_manager(Box::new(manager));
393 }
394 }
395
396 if let Some(factory) = &self.batch_invoker_factory {
398 let invoker = factory();
399 builder = builder.batch_invoker(Box::new(DynBatchInvokerWrapper(invoker)));
400 }
401
402 if let Some(factory) = &self.operator_provider_factory {
404 let provider = factory();
405 builder = builder.operator_provider(Box::new(DynOperatorProviderWrapper(provider)));
406 }
407
408 if let Some(ref model) = self.offline_model_from_profile {
410 builder = builder.offline_model(model.clone());
411
412 if self.operator_provider_factory.is_none() {
414 println!(
415 "Profile offline model applied: ucb1_c={:.3}, maturity={}, strategy={}",
416 model.parameters.ucb1_c,
417 model.strategy_config.maturity_threshold,
418 model.strategy_config.initial_strategy
419 );
420 }
421
422 if !self.skip_learned_action_order {
424 if let Some(ref action_order) = model.action_order {
425 let provider = LearnedDependencyProvider::new(action_order.clone());
426 builder = builder.dependency_provider(Box::new(provider));
427 println!(
428 "Learned action order applied: discover={:?}, not_discover={:?}",
429 action_order.discover, action_order.not_discover
430 );
431 }
432 } else if model.action_order.is_some() {
433 println!("Learned action order skipped (--no-dep-graph)");
434 }
435 }
436
437 if let Some(ref store) = self.learning_store {
440 builder = builder.with_learning_store(store.clone());
441
442 if self.offline_model_from_profile.is_none() {
444 let offline_model_opt = builder.offline_model_ref().cloned();
446 if let Some(ref model) = offline_model_opt {
447 if self.operator_provider_factory.is_none() {
449 println!(
450 "Offline model applied: ucb1_c={:.3}, maturity={}, strategy={}",
451 model.parameters.ucb1_c,
452 model.strategy_config.maturity_threshold,
453 model.strategy_config.initial_strategy
454 );
455 }
456
457 if !self.skip_learned_action_order {
459 if let Some(ref action_order) = model.action_order {
460 let provider = LearnedDependencyProvider::new(action_order.clone());
461 builder = builder.dependency_provider(Box::new(provider));
462 println!(
463 "Learned action order applied: discover={:?}, not_discover={:?}",
464 action_order.discover, action_order.not_discover
465 );
466 }
467 } else if model.action_order.is_some() {
468 println!("Learned action order skipped (--no-dep-graph)");
469 }
470 }
471 }
472
473 if let Some(ref trigger) = self.train_trigger {
475 builder = builder.train_trigger(std::sync::Arc::clone(trigger));
476 } else {
477 builder = builder.train_trigger(std::sync::Arc::new(CountTrigger::new(self.runs)));
478 }
479 }
480
481 if let Some(ref subscriber) = self.trace_subscriber {
483 builder = builder.with_trace_subscriber(std::sync::Arc::clone(subscriber));
484 }
485
486 let mut swarm = builder.build()?;
490
491 let manager_count = self
493 .scenario
494 .agents
495 .managers
496 .iter()
497 .map(|t| t.count)
498 .sum::<usize>();
499 if manager_count > 1 {
500 swarm.orchestrator_mut().enable_partitioning();
501 }
502
503 let task_to_run = self
505 .task
506 .clone()
507 .or_else(|| self.build_task_from_scenario())
508 .map(|task| task.with_group_id(group_id));
509
510 let result = if let Some(task) = task_to_run {
512 swarm.run_task(task)?
513 } else {
514 swarm.run()
515 };
516
517 let state = swarm.orchestrator().state();
521 let timed_out = result.total_ticks >= self.scenario.app_config.max_ticks;
522 let environment_done = state.shared.is_environment_done();
523 let total_actions = state.shared.stats.total_visits() as u64;
524 let successful_actions = state.shared.stats.total_successes() as u64;
525 let llm_invocations = state.shared.llm_invocations();
526 let llm_invoke_errors = state.shared.llm_errors();
527
528 let metrics = RunMetrics {
529 task: crate::metrics::TaskMetrics {
530 total_ticks: result.total_ticks,
531 total_tasks: 0,
532 completed_tasks: 0,
533 total_actions,
534 successful_actions,
535 success_rate: state.shared.stats.success_rate(),
536 },
537 coordination: crate::metrics::CoordinationMetrics {
538 manager_activations: llm_invocations,
539 manager_intervention_rate: if result.total_ticks > 0 {
540 llm_invocations as f64 / result.total_ticks as f64
541 } else {
542 0.0
543 },
544 ..Default::default()
545 },
546 performance: {
547 let llm_error_rate = if llm_invocations > 0 {
548 llm_invoke_errors as f64 / llm_invocations as f64
549 } else {
550 0.0
551 };
552 crate::metrics::PerformanceMetrics {
553 total_duration_ms: result.total_duration.as_millis() as f64,
554 avg_tick_latency_ms: if result.total_ticks > 0 {
555 result.total_duration.as_millis() as f64 / result.total_ticks as f64
556 } else {
557 0.0
558 },
559 raw_throughput_per_sec: if result.total_duration.as_secs_f64() > 0.0 {
560 total_actions as f64 / result.total_duration.as_secs_f64()
561 } else {
562 0.0
563 },
564 effective_throughput_per_sec: if result.total_duration.as_secs_f64() > 0.0 {
565 successful_actions as f64 / result.total_duration.as_secs_f64()
566 } else {
567 0.0
568 },
569 llm_invocations,
570 llm_invoke_errors,
571 llm_error_rate,
572 ..Default::default()
573 }
574 },
575 robustness: Default::default(),
576 };
577
578 let (success, termination_reason) = if !result.completed {
580 (false, TerminationReason::Stopped)
581 } else {
582 self.evaluate_conditions(&metrics, environment_done, timed_out)
583 };
584
585 if swarm.is_learning_enabled() {
589 if let Some(tx) = swarm.take_shutdown_tx() {
590 let _ = tx.try_send(());
592 }
593 }
594
595 Ok(EvalRun::new(
596 index,
597 seed,
598 success,
599 termination_reason,
600 metrics,
601 ))
602 }
603
604 fn build_workers(&self) -> Vec<Box<dyn WorkerAgent>> {
605 let mut workers: Vec<Box<dyn WorkerAgent>> = Vec::new();
606
607 for template in &self.scenario.agents.workers {
608 for i in 0..template.count {
609 let id = workers.len();
610 let name = template.id_pattern.replace("{i}", &i.to_string());
611
612 let worker = GenericWorker::new(id)
613 .with_name(name)
614 .with_require_guidance(true);
615
616 workers.push(Box::new(worker));
617 }
618 }
619
620 workers
621 }
622
623 fn build_managers(&self) -> Vec<DefaultBatchManagerAgent> {
624 let mut managers = Vec::new();
625 let mut manager_index = 0;
626
627 for template in &self.scenario.agents.managers {
628 let ids = template.generate_ids();
629 for name in ids {
630 let manager = DefaultBatchManagerAgent::new(ManagerId(manager_index))
631 .with_name(name)
632 .with_interval(self.scenario.manager.process_interval_ticks);
633
634 managers.push(manager);
635 manager_index += 1;
636 }
637 }
638
639 if managers.is_empty() {
641 managers.push(
642 DefaultBatchManagerAgent::new(ManagerId(0))
643 .with_name("default_manager")
644 .with_interval(self.scenario.manager.process_interval_ticks),
645 );
646 }
647
648 managers
649 }
650
651 fn build_management_strategy(&self) -> ManagementStrategy {
652 match &self.scenario.app_config.management_strategy {
653 ManagementStrategyConfig::EveryTick {} => ManagementStrategy::EveryTick,
654 ManagementStrategyConfig::IntervalBased { max_interval } => {
655 ManagementStrategy::FixedInterval {
656 interval: *max_interval,
657 }
658 }
659 ManagementStrategyConfig::EventDriven { triggers: _ } => {
660 ManagementStrategy::CompletionBased { max_wait_ticks: 50 }
662 }
663 ManagementStrategyConfig::Hybrid {
664 max_interval,
665 triggers: _,
666 } => ManagementStrategy::Hybrid {
667 preferred_interval: *max_interval,
668 force_after_ticks: max_interval * 2,
669 },
670 ManagementStrategyConfig::Disabled {} => {
671 ManagementStrategy::FixedInterval { interval: u64::MAX }
673 }
674 }
675 }
676
677 fn build_task_from_scenario(&self) -> Option<SwarmTask> {
681 let task_config = &self.scenario.task;
682
683 if task_config.goal.is_empty() {
684 return None;
685 }
686
687 let mut context = serde_json::Map::new();
689
690 if let Some(target_path) = &task_config.context.target_path {
691 context.insert(
692 "target_path".to_string(),
693 serde_json::Value::String(target_path.clone()),
694 );
695 }
696 if let Some(working_dir) = &task_config.context.working_dir {
697 context.insert(
698 "working_dir".to_string(),
699 serde_json::Value::String(working_dir.clone()),
700 );
701 }
702 if let Some(max_depth) = task_config.context.max_depth {
703 context.insert(
704 "max_depth".to_string(),
705 serde_json::Value::Number(serde_json::Number::from(max_depth)),
706 );
707 }
708
709 for (key, value) in &task_config.context.extra {
711 if let Ok(json_value) = serde_json::to_value(value) {
712 context.insert(key.clone(), json_value);
713 }
714 }
715
716 let task =
717 SwarmTask::new(&task_config.goal).with_context(serde_json::Value::Object(context));
718
719 Some(task)
720 }
721
722 fn build_extensions_from_scenario(&self, seed: u64) -> Extensions {
724 let mut extensions = if let Some(factory) = &self.extensions_factory {
725 factory()
726 } else {
727 Extensions::new()
728 };
729
730 extensions.insert(EvalSeed(seed));
732
733 extensions.insert(self.scenario.llm.clone());
735
736 if let Some(ref lora) = self.scenario.llm.lora {
738 extensions.insert(lora.clone());
739 }
740
741 extensions.insert(self.scenario.manager.clone());
743
744 extensions.insert(self.scenario.batch_processor.clone());
746
747 let core_actions_config = self.scenario.actions.to_core_config();
749 extensions.insert(core_actions_config);
750
751 let env_type = self.scenario.environment.env_type.as_str();
753 let env_params = &self.scenario.environment.params;
754
755 let env_box: Option<EnvironmentBox> = match env_type {
756 "maze" => {
757 let map = env_params.get("map").and_then(|v| v.as_str()).unwrap_or("");
758 let worker_count = env_params
759 .get("worker_count")
760 .and_then(|v| v.as_u64())
761 .unwrap_or(1) as usize;
762 Some(Box::new(MazeEnvironment::from_str(map, worker_count)))
763 }
764 "code" => {
765 Some(Box::new(CodeEnvironment::auth_scenario()))
767 }
768 "troubleshooting" => {
769 let scenario_name = env_params
770 .get("scenario")
771 .and_then(|v| v.as_str())
772 .unwrap_or("memory_leak");
773 let env = match scenario_name {
774 "memory_leak" => TroubleshootingEnvironment::memory_leak_scenario(),
775 "cpu_spike" => TroubleshootingEnvironment::cpu_spike_scenario(),
776 "network_timeout" => TroubleshootingEnvironment::network_timeout_scenario(),
777 "medium" => TroubleshootingEnvironment::complex_scenario(15, 3, 2, seed),
778 "high" => TroubleshootingEnvironment::complex_scenario(30, 8, 3, seed),
779 "extreme" => TroubleshootingEnvironment::complex_scenario(50, 15, 4, seed),
780 "complex" => {
781 let total_services = env_params
782 .get("total_services")
783 .and_then(|v| v.as_u64())
784 .unwrap_or(15) as usize;
785 let noise_services = env_params
786 .get("noise_services")
787 .and_then(|v| v.as_u64())
788 .unwrap_or(3) as usize;
789 let cascade_depth = env_params
790 .get("cascade_depth")
791 .and_then(|v| v.as_u64())
792 .unwrap_or(2) as usize;
793 TroubleshootingEnvironment::complex_scenario(
794 total_services,
795 noise_services,
796 cascade_depth,
797 seed,
798 )
799 }
800 _ => TroubleshootingEnvironment::memory_leak_scenario(),
801 };
802 Some(Box::new(env))
803 }
804 "search" => {
805 let scenario_name = env_params
806 .get("scenario")
807 .and_then(|v| v.as_str())
808 .unwrap_or("basic");
809 let env = match scenario_name {
810 "basic" => SearchEnvironment::basic_scenario(),
811 "medium" => SearchEnvironment::medium_scenario(),
812 "large" => SearchEnvironment::large_scenario(),
813 "custom" => {
814 let file_count = env_params
815 .get("file_count")
816 .and_then(|v| v.as_u64())
817 .unwrap_or(5) as usize;
818 let target_index = env_params
819 .get("target_index")
820 .and_then(|v| v.as_u64())
821 .unwrap_or(2) as usize;
822 SearchEnvironment::custom_scenario(file_count, target_index, seed)
823 }
824 _ => SearchEnvironment::basic_scenario(),
825 };
826 Some(Box::new(env))
827 }
828 "internal_diagnosis" => {
829 let scenario_name = env_params
830 .get("scenario")
831 .and_then(|v| v.as_str())
832 .unwrap_or("routing");
833 let env = match scenario_name {
834 "routing" => InternalDiagnosisEnvironment::routing_error_scenario(),
835 "failover" => InternalDiagnosisEnvironment::failover_error_scenario(),
836 "worker_pool" => InternalDiagnosisEnvironment::worker_pool_scenario(),
837 "strategy" => InternalDiagnosisEnvironment::strategy_mismatch_scenario(),
838 "exploration" => InternalDiagnosisEnvironment::exploration_depth_scenario(),
839 "complex" => InternalDiagnosisEnvironment::complex_scenario(seed),
840 _ => InternalDiagnosisEnvironment::routing_error_scenario(),
841 };
842 Some(Box::new(env))
843 }
844 "deep_search" => {
845 let _scenario_name = env_params
847 .get("scenario")
848 .and_then(|v| v.as_str())
849 .unwrap_or("tech_question");
850 let env = DeepSearchEnvironment::tech_question_scenario();
851 Some(Box::new(env))
852 }
853 "default" | "realworld" => {
855 use swarm_engine_core::environment::DefaultEnvironment;
856 let working_dir = env_params
857 .get("working_dir")
858 .and_then(|v| v.as_str())
859 .map(std::path::PathBuf::from);
860 let env = if let Some(dir) = working_dir {
861 DefaultEnvironment::with_working_dir(dir)
862 } else {
863 DefaultEnvironment::new()
864 };
865 Some(Box::new(env))
866 }
867 _ => None, };
869
870 if let Some(env) = env_box {
871 extensions.insert(env);
872 }
873
874 let graph = self.dependency_graph.clone().or_else(|| {
876 self.scenario.dependency_graph.as_ref().and_then(|cfg| {
877 let action_names = self.scenario.actions.action_names();
878 cfg.to_core_graph(&action_names)
879 })
880 });
881 if let Some(g) = graph {
882 extensions.insert(g);
883 }
884
885 extensions
888 }
889
890 fn evaluate_conditions(
894 &self,
895 metrics: &RunMetrics,
896 environment_done: bool,
897 timed_out: bool,
898 ) -> (bool, TerminationReason) {
899 let conditions = &self.scenario.conditions;
900
901 for condition in &conditions.failure {
903 if let Some(actual) =
904 self.get_metric_value(&condition.metric, metrics, environment_done)
905 {
906 if condition.evaluate(&actual) {
907 return (false, TerminationReason::Failure);
908 }
909 }
910 }
911
912 if timed_out {
914 return match conditions.on_timeout {
915 TimeoutBehavior::Fail => (false, TerminationReason::Timeout),
916 TimeoutBehavior::PartialSuccess => {
917 let success = self.check_success_conditions(metrics, environment_done);
919 (success, TerminationReason::Timeout)
920 }
921 TimeoutBehavior::MilestoneScore => {
922 (false, TerminationReason::Timeout)
924 }
925 };
926 }
927
928 let success = self.check_success_conditions(metrics, environment_done);
930 if success {
931 (true, TerminationReason::Success)
932 } else {
933 (false, TerminationReason::Stopped)
936 }
937 }
938
939 fn check_success_conditions(&self, metrics: &RunMetrics, environment_done: bool) -> bool {
941 let conditions = &self.scenario.conditions;
942
943 if conditions.success.is_empty() {
945 return true;
946 }
947
948 conditions.success.iter().all(|condition| {
950 self.get_metric_value(&condition.metric, metrics, environment_done)
951 .map(|actual| condition.evaluate(&actual))
952 .unwrap_or(false)
953 })
954 }
955
956 fn get_metric_value(
958 &self,
959 path: &str,
960 metrics: &RunMetrics,
961 environment_done: bool,
962 ) -> Option<ConditionValue> {
963 match path {
964 "environment.done" => Some(ConditionValue::Bool(environment_done)),
966
967 "task.total_ticks" | "total_ticks" => {
969 Some(ConditionValue::Integer(metrics.task.total_ticks as i64))
970 }
971 "task.success_rate" | "success_rate" => {
972 Some(ConditionValue::Float(metrics.task.success_rate))
973 }
974 "task.total_actions" | "total_actions" => {
975 Some(ConditionValue::Integer(metrics.task.total_actions as i64))
976 }
977 "task.successful_actions" | "successful_actions" => Some(ConditionValue::Integer(
978 metrics.task.successful_actions as i64,
979 )),
980
981 "performance.llm_error_rate" | "llm_error_rate" => {
983 Some(ConditionValue::Float(metrics.performance.llm_error_rate))
984 }
985 "performance.llm_invocations" | "llm_invocations" => Some(ConditionValue::Integer(
986 metrics.performance.llm_invocations as i64,
987 )),
988
989 "coordination.manager_activations" | "manager_activations" => Some(
991 ConditionValue::Integer(metrics.coordination.manager_activations as i64),
992 ),
993
994 "errors.count" => {
996 let failed = metrics
997 .task
998 .total_actions
999 .saturating_sub(metrics.task.successful_actions);
1000 Some(ConditionValue::Integer(failed as i64))
1001 }
1002
1003 _ => None,
1005 }
1006 }
1007}
1008
1009struct DynManagerWrapper(Box<dyn ManagerAgent>);
1011
1012impl ManagerAgent for DynManagerWrapper {
1013 fn prepare(
1014 &self,
1015 context: &swarm_engine_core::agent::TaskContext,
1016 ) -> swarm_engine_core::agent::BatchDecisionRequest {
1017 self.0.prepare(context)
1018 }
1019
1020 fn finalize(
1021 &self,
1022 context: &swarm_engine_core::agent::TaskContext,
1023 responses: Vec<(
1024 swarm_engine_core::types::WorkerId,
1025 swarm_engine_core::agent::DecisionResponse,
1026 )>,
1027 ) -> swarm_engine_core::agent::ManagementDecision {
1028 self.0.finalize(context, responses)
1029 }
1030
1031 fn id(&self) -> swarm_engine_core::agent::ManagerId {
1032 self.0.id()
1033 }
1034
1035 fn name(&self) -> &str {
1036 self.0.name()
1037 }
1038}
1039
1040struct DynBatchInvokerWrapper(Box<dyn BatchInvoker>);
1042
1043impl BatchInvoker for DynBatchInvokerWrapper {
1044 fn invoke(
1045 &self,
1046 request: swarm_engine_core::agent::BatchDecisionRequest,
1047 extensions: &swarm_engine_core::extensions::Extensions,
1048 ) -> swarm_engine_core::agent::BatchInvokeResult {
1049 self.0.invoke(request, extensions)
1050 }
1051
1052 fn plan_dependencies(
1053 &self,
1054 task: &str,
1055 actions: &[ActionDef],
1056 ) -> Option<swarm_engine_core::exploration::DependencyGraph> {
1057 self.0.plan_dependencies(task, actions)
1058 }
1059
1060 fn name(&self) -> &str {
1061 self.0.name()
1062 }
1063}
1064
1065struct DynOperatorProviderWrapper(Box<dyn OperatorProvider<NodeRules>>);
1067
1068impl OperatorProvider<NodeRules> for DynOperatorProviderWrapper {
1069 fn provide(
1070 &self,
1071 rules: NodeRules,
1072 context: Option<
1073 &swarm_engine_core::exploration::ProviderContext<
1074 '_,
1075 swarm_engine_core::exploration::ActionNodeData,
1076 String,
1077 swarm_engine_core::exploration::MapNodeState,
1078 >,
1079 >,
1080 ) -> swarm_engine_core::exploration::ConfigurableOperator<NodeRules> {
1081 self.0.provide(rules, context)
1082 }
1083
1084 fn reevaluate(
1085 &self,
1086 operator: &mut swarm_engine_core::exploration::ConfigurableOperator<NodeRules>,
1087 ctx: &swarm_engine_core::exploration::ProviderContext<
1088 '_,
1089 swarm_engine_core::exploration::ActionNodeData,
1090 String,
1091 swarm_engine_core::exploration::MapNodeState,
1092 >,
1093 ) {
1094 self.0.reevaluate(operator, ctx)
1095 }
1096
1097 fn name(&self) -> &str {
1098 self.0.name()
1099 }
1100}