Skip to main content

swarm_engine_eval/environments/
code.rs

1//! CodeEnvironment - コード探索環境
2//!
3//! 仮想的なコードベースを探索するシミュレーション環境。
4//!
5//! # アクション
6//!
7//! - `Grep`: パターンにマッチするファイルを検索
8//! - `Read`: ファイル内容を読み取り
9//! - `Done`: タスク完了を宣言
10//!
11//! # 設計
12//!
13//! - 仮想ファイルシステム(HashMap<path, content>)
14//! - Grep でファイルを探索、Read で内容確認
15//! - 目標ファイルを Read したら成功
16
17use std::collections::HashMap;
18use std::sync::RwLock;
19
20use swarm_engine_core::actions::ParamResolver;
21use swarm_engine_core::agent::WorkResult;
22use swarm_engine_core::environment::Environment;
23use swarm_engine_core::types::{Action, WorkerId};
24
25// ============================================================================
26// CodeEnvironment
27// ============================================================================
28
29/// コード探索環境
30pub struct CodeEnvironment {
31    /// 仮想ファイルシステム (path -> content)
32    files: HashMap<String, String>,
33    /// 目標ファイルパス(これを Read したら成功)
34    goal_file: String,
35    /// 内部状態
36    state: RwLock<CodeState>,
37}
38
39#[derive(Debug, Default)]
40struct CodeState {
41    /// 各 Worker が Read したファイル
42    read_files: HashMap<WorkerId, Vec<String>>,
43    /// 目標達成した Worker
44    completed: Vec<WorkerId>,
45}
46
47impl CodeEnvironment {
48    /// 新しいコード環境を作成
49    pub fn new(files: HashMap<String, String>, goal_file: impl Into<String>) -> Self {
50        Self {
51            files,
52            goal_file: goal_file.into(),
53            state: RwLock::new(CodeState::default()),
54        }
55    }
56
57    /// シンプルな認証コード探索シナリオを作成
58    pub fn auth_scenario() -> Self {
59        let mut files = HashMap::new();
60
61        // 構造
62        files.insert(
63            "src/main.rs".into(),
64            r#"
65mod auth;
66mod handlers;
67mod db;
68
69fn main() {
70    let app = create_app();
71    app.run();
72}
73"#
74            .into(),
75        );
76
77        files.insert(
78            "src/handlers/mod.rs".into(),
79            r#"
80pub mod user;
81pub mod admin;
82
83pub use user::*;
84pub use admin::*;
85"#
86            .into(),
87        );
88
89        files.insert(
90            "src/handlers/user.rs".into(),
91            r#"
92use crate::auth::verify_token;
93
94pub fn get_profile(token: &str) -> Result<User, Error> {
95    verify_token(token)?;
96    // ...
97}
98"#
99            .into(),
100        );
101
102        files.insert(
103            "src/auth.rs".into(),
104            r#"
105//! Authentication module
106//!
107//! Handles user authentication and token verification.
108
109pub fn verify_token(token: &str) -> Result<Claims, AuthError> {
110    let decoded = decode_jwt(token)?;
111    validate_claims(&decoded)?;
112    Ok(decoded.claims)
113}
114
115pub fn login(username: &str, password: &str) -> Result<String, AuthError> {
116    let user = find_user(username)?;
117    if verify_password(password, &user.password_hash) {
118        Ok(generate_jwt(&user))
119    } else {
120        Err(AuthError::InvalidPassword)
121    }
122}
123
124fn decode_jwt(token: &str) -> Result<TokenData, AuthError> {
125    // JWT decoding logic
126}
127"#
128            .into(),
129        );
130
131        files.insert(
132            "src/db.rs".into(),
133            r#"
134pub struct Database {
135    pool: Pool,
136}
137
138impl Database {
139    pub fn connect(url: &str) -> Self {
140        // ...
141    }
142}
143"#
144            .into(),
145        );
146
147        files.insert(
148            "config/settings.toml".into(),
149            r#"
150[server]
151port = 8080
152host = "0.0.0.0"
153
154[database]
155url = "postgres://localhost/app"
156"#
157            .into(),
158        );
159
160        Self::new(files, "src/auth.rs")
161    }
162
163    // ------------------------------------------------------------------------
164    // Action Handlers
165    // ------------------------------------------------------------------------
166
167    fn handle_grep(&self, _worker_id: WorkerId, action: &Action) -> WorkResult {
168        let resolver = ParamResolver::new(action);
169        let pattern = match resolver.require("pattern") {
170            Ok(s) => s.to_lowercase(),
171            Err(e) => return WorkResult::env_failure(format!("Grep: {}", e)),
172        };
173
174        // パターンにマッチするファイルを検索
175        let matches: Vec<&str> = self
176            .files
177            .iter()
178            .filter(|(path, content)| {
179                path.to_lowercase().contains(&pattern) || content.to_lowercase().contains(&pattern)
180            })
181            .map(|(path, _)| path.as_str())
182            .collect();
183
184        if matches.is_empty() {
185            WorkResult::env_success(format!("No files found matching '{}'", pattern))
186        } else {
187            let result = format!(
188                "Found {} file(s) matching '{}':\n{}",
189                matches.len(),
190                pattern,
191                matches.join("\n")
192            );
193            WorkResult::env_success(result)
194        }
195    }
196
197    fn handle_read(&self, worker_id: WorkerId, action: &Action) -> WorkResult {
198        let resolver = ParamResolver::new(action);
199        let path = match resolver.require("path") {
200            Ok(s) => s,
201            Err(e) => return WorkResult::env_failure(format!("Read: {}", e)),
202        };
203
204        // ファイルを検索
205        let content = self.files.get(path);
206
207        match content {
208            Some(content) => {
209                // 読んだファイルを記録
210                {
211                    let mut state = self.state.write().unwrap();
212                    state
213                        .read_files
214                        .entry(worker_id)
215                        .or_default()
216                        .push(path.to_string());
217                }
218
219                // 目標ファイルに到達?
220                if path == self.goal_file {
221                    let mut state = self.state.write().unwrap();
222                    if !state.completed.contains(&worker_id) {
223                        state.completed.push(worker_id);
224                    }
225                    return WorkResult::done_success(format!(
226                        "Found target file: {}\n\n{}",
227                        path,
228                        content.trim()
229                    ));
230                }
231
232                WorkResult::env_success(format!("=== {} ===\n{}", path, content.trim()))
233            }
234            None => WorkResult::env_failure(format!("File not found: {}", path)),
235        }
236    }
237
238    fn handle_list(&self, _worker_id: WorkerId) -> WorkResult {
239        let paths: Vec<&str> = self.files.keys().map(|s| s.as_str()).collect();
240        let result = format!("Files in codebase:\n{}", paths.join("\n"));
241        WorkResult::env_success(result)
242    }
243}
244
245impl Environment for CodeEnvironment {
246    fn step(&self, worker_id: WorkerId, action: &Action) -> WorkResult {
247        match action.name.as_str() {
248            "Grep" | "grep" => self.handle_grep(worker_id, action),
249            "Read" | "read" => self.handle_read(worker_id, action),
250            "List" | "list" | "ls" => self.handle_list(worker_id),
251            _ => WorkResult::unsupported(&action.name),
252        }
253    }
254
255    fn reset(&self) {
256        let mut state = self.state.write().unwrap();
257        state.read_files.clear();
258        state.completed.clear();
259    }
260
261    fn name(&self) -> &str {
262        "CodeEnvironment"
263    }
264}
265
266// ============================================================================
267// Tests
268// ============================================================================
269
270#[cfg(test)]
271mod tests {
272    use super::*;
273
274    fn is_success(result: &WorkResult) -> bool {
275        match result {
276            WorkResult::Acted { action_result, .. } => action_result.success,
277            WorkResult::Done { success, .. } => *success,
278            _ => false,
279        }
280    }
281
282    fn is_done(result: &WorkResult) -> bool {
283        matches!(result, WorkResult::Done { .. })
284    }
285
286    #[test]
287    fn test_grep_finds_files() {
288        let env = CodeEnvironment::auth_scenario();
289        let worker = WorkerId(0);
290
291        let action = Action {
292            name: "Grep".into(),
293            params: swarm_engine_core::types::ActionParams {
294                target: Some("auth".into()),
295                args: HashMap::new(),
296                data: vec![],
297            },
298        };
299
300        let result = env.step(worker, &action);
301        assert!(is_success(&result));
302    }
303
304    #[test]
305    fn test_read_existing_file() {
306        let env = CodeEnvironment::auth_scenario();
307        let worker = WorkerId(0);
308
309        let action = Action {
310            name: "Read".into(),
311            params: swarm_engine_core::types::ActionParams {
312                target: Some("src/main.rs".into()),
313                args: HashMap::new(),
314                data: vec![],
315            },
316        };
317
318        let result = env.step(worker, &action);
319        assert!(is_success(&result));
320        assert!(!is_done(&result)); // main.rs は目標ではない
321    }
322
323    #[test]
324    fn test_read_goal_file_completes() {
325        let env = CodeEnvironment::auth_scenario();
326        let worker = WorkerId(0);
327
328        let action = Action {
329            name: "Read".into(),
330            params: swarm_engine_core::types::ActionParams {
331                target: Some("src/auth.rs".into()),
332                args: HashMap::new(),
333                data: vec![],
334            },
335        };
336
337        let result = env.step(worker, &action);
338        assert!(is_success(&result));
339        assert!(is_done(&result)); // auth.rs が目標
340    }
341
342    #[test]
343    fn test_read_nonexistent_file() {
344        let env = CodeEnvironment::auth_scenario();
345        let worker = WorkerId(0);
346
347        let action = Action {
348            name: "Read".into(),
349            params: swarm_engine_core::types::ActionParams {
350                target: Some("src/notfound.rs".into()),
351                args: HashMap::new(),
352                data: vec![],
353            },
354        };
355
356        let result = env.step(worker, &action);
357        assert!(!is_success(&result));
358    }
359}