swarm_engine_eval/
runtime.rs

1//! Runtime Task Specification
2//!
3//! CLI の `run` コマンド用のタスク仕様。
4//! EvalScenario から継承し、Task だけ上書き可能。
5//!
6//! # Example
7//!
8//! ```ignore
9//! use swarm_engine_eval::runtime::RuntimeTaskSpec;
10//! use swarm_engine_eval::scenario::EvalScenario;
11//!
12//! // シナリオファイルから読み込み
13//! let scenario: EvalScenario = toml::from_str(&content)?;
14//!
15//! // RuntimeTaskSpec に変換し、タスクを上書き
16//! let spec = RuntimeTaskSpec::from(scenario)
17//!     .with_task("Fix the authentication bug in src/auth.rs");
18//! ```
19
20use crate::scenario::{
21    AgentsConfig, AppConfigTemplate, EnvironmentConfig, EvalConditions, EvalScenario, LlmConfig,
22    ManagerConfig, ScenarioActions, ScenarioMeta, TaskConfig,
23};
24use std::path::PathBuf;
25
26/// Runtime Task Specification
27///
28/// CLI から直接タスクを指定して実行するための仕様。
29/// EvalScenario をベースに、Task 部分のみ上書き可能。
30#[derive(Debug, Clone)]
31pub struct RuntimeTaskSpec {
32    /// シナリオメタ情報
33    pub meta: ScenarioMeta,
34
35    /// タスク定義（上書き可能）
36    pub task: TaskConfig,
37
38    /// LLM 設定
39    pub llm: LlmConfig,
40
41    /// Manager 設定
42    pub manager: ManagerConfig,
43
44    /// アクション定義
45    pub actions: ScenarioActions,
46
47    /// アプリ設定
48    pub app_config: AppConfigTemplate,
49
50    /// 環境設定（上書き可能）
51    pub environment: EnvironmentConfig,
52
53    /// エージェント設定
54    pub agents: AgentsConfig,
55
56    /// 成功/失敗条件
57    pub conditions: EvalConditions,
58
59    /// 作業ディレクトリ（オプション）
60    pub working_dir: Option<PathBuf>,
61}
62
63impl RuntimeTaskSpec {
64    /// タスクの goal を上書き
65    pub fn with_task(mut self, goal: impl Into<String>) -> Self {
66        self.task.goal = goal.into();
67        self
68    }
69
70    /// Environment の env_type を上書き
71    pub fn with_env_type(mut self, env_type: impl Into<String>) -> Self {
72        self.environment.env_type = env_type.into();
73        self
74    }
75
76    /// 作業ディレクトリを設定
77    pub fn with_working_dir(mut self, dir: impl Into<PathBuf>) -> Self {
78        self.working_dir = Some(dir.into());
79        self
80    }
81
82    /// max_ticks を上書き
83    pub fn with_max_ticks(mut self, max_ticks: u64) -> Self {
84        self.app_config.max_ticks = max_ticks;
85        self
86    }
87
88    /// EvalScenario に変換（Orchestrator 実行用）
89    ///
90    /// RuntimeTaskSpec から EvalScenario を再構築する。
91    /// 評価用の条件やマイルストーンは空になる。
92    ///
93    /// Note: `working_dir` が設定されている場合、`environment.params["working_dir"]` に伝播される。
94    pub fn into_eval_scenario(mut self) -> EvalScenario {
95        // Propagate working_dir to environment.params for DefaultEnvironment
96        if let Some(ref dir) = self.working_dir {
97            let params = self.environment.params.as_object_mut();
98            if let Some(obj) = params {
99                obj.insert(
100                    "working_dir".to_string(),
101                    serde_json::Value::String(dir.display().to_string()),
102                );
103            } else {
104                // params が Object でない場合は新規作成
105                let mut obj = serde_json::Map::new();
106                obj.insert(
107                    "working_dir".to_string(),
108                    serde_json::Value::String(dir.display().to_string()),
109                );
110                self.environment.params = serde_json::Value::Object(obj);
111            }
112        }
113
114        EvalScenario {
115            meta: self.meta,
116            task: self.task,
117            llm: self.llm,
118            manager: self.manager,
119            batch_processor: Default::default(),
120            dependency_graph: None,
121            actions: self.actions,
122            app_config: self.app_config,
123            environment: self.environment,
124            agents: self.agents,
125            conditions: self.conditions,
126            milestones: vec![],
127            variants: vec![],
128        }
129    }
130
131    /// 作業ディレクトリを取得（未設定の場合は cwd）
132    pub fn resolved_working_dir(&self) -> PathBuf {
133        self.working_dir
134            .clone()
135            .unwrap_or_else(|| std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")))
136    }
137}
138
139impl From<EvalScenario> for RuntimeTaskSpec {
140    fn from(scenario: EvalScenario) -> Self {
141        Self {
142            meta: scenario.meta,
143            task: scenario.task,
144            llm: scenario.llm,
145            manager: scenario.manager,
146            actions: scenario.actions,
147            app_config: scenario.app_config,
148            environment: scenario.environment,
149            agents: scenario.agents,
150            conditions: scenario.conditions,
151            working_dir: None,
152        }
153    }
154}
155
156#[cfg(test)]
157mod tests {
158    use super::*;
159
160    fn minimal_scenario() -> EvalScenario {
161        let toml_str = r#"
162            [meta]
163            name = "Test"
164            id = "test:minimal:v1"
165
166            [task]
167            goal = "Original goal"
168
169            [llm]
170            provider = "llama-server"
171            model = "test-model"
172
173            [app_config]
174            max_ticks = 100
175
176            [environment]
177            env_type = "default"
178
179            [agents]
180            [[agents.workers]]
181            id_pattern = "worker_{i}"
182            count = 1
183
184            [conditions]
185            on_timeout = "fail"
186        "#;
187        toml::from_str(toml_str).unwrap()
188    }
189
190    #[test]
191    fn test_from_eval_scenario() {
192        let scenario = minimal_scenario();
193        let spec = RuntimeTaskSpec::from(scenario);
194
195        assert_eq!(spec.task.goal, "Original goal");
196        assert_eq!(spec.environment.env_type, "default");
197    }
198
199    #[test]
200    fn test_with_task() {
201        let scenario = minimal_scenario();
202        let spec = RuntimeTaskSpec::from(scenario).with_task("New custom goal");
203
204        assert_eq!(spec.task.goal, "New custom goal");
205    }
206
207    #[test]
208    fn test_with_env_type() {
209        let scenario = minimal_scenario();
210        let spec = RuntimeTaskSpec::from(scenario).with_env_type("realworld");
211
212        assert_eq!(spec.environment.env_type, "realworld");
213    }
214
215    #[test]
216    fn test_into_eval_scenario() {
217        let scenario = minimal_scenario();
218        let spec = RuntimeTaskSpec::from(scenario)
219            .with_task("Modified goal")
220            .with_max_ticks(50);
221
222        let converted = spec.into_eval_scenario();
223
224        assert_eq!(converted.task.goal, "Modified goal");
225        assert_eq!(converted.app_config.max_ticks, 50);
226        assert!(converted.milestones.is_empty());
227        assert!(converted.variants.is_empty());
228    }
229
230    #[test]
231    fn test_working_dir_propagates_to_env_params() {
232        let scenario = minimal_scenario();
233        let spec = RuntimeTaskSpec::from(scenario).with_working_dir("/tmp/test_workspace");
234
235        let converted = spec.into_eval_scenario();
236
237        // working_dir should be propagated to environment.params
238        let params = converted.environment.params.as_object().unwrap();
239        assert_eq!(
240            params.get("working_dir").and_then(|v| v.as_str()),
241            Some("/tmp/test_workspace")
242        );
243    }
244
245    #[test]
246    fn test_working_dir_without_override() {
247        let scenario = minimal_scenario();
248        let spec = RuntimeTaskSpec::from(scenario);
249
250        // No working_dir set
251        assert!(spec.working_dir.is_none());
252
253        let converted = spec.into_eval_scenario();
254
255        // params should not have working_dir
256        let params = converted.environment.params.as_object();
257        assert!(params.is_none() || params.unwrap().get("working_dir").is_none());
258    }
259}
swarm_engine_eval/runtime.rs

swarm_engine_eval/
runtime.rs