Skip to main content

swarm_engine_eval/environments/
code.rs

1//! CodeEnvironment - コード探索環境
2//!
3//! 仮想的なコードベースを探索するシミュレーション環境。
4//!
5//! # アクション
6//!
7//! - `Grep`: パターンにマッチするファイルを検索
8//! - `Read`: ファイル内容を読み取り
9//! - `Done`: タスク完了を宣言
10//!
11//! # 設計
12//!
13//! - 仮想ファイルシステム(HashMap<path, content>)
14//! - Grep でファイルを探索、Read で内容確認
15//! - 目標ファイルを Read したら成功
16
17use std::collections::HashMap;
18use std::sync::RwLock;
19
20use swarm_engine_core::agent::WorkResult;
21use swarm_engine_core::environment::Environment;
22use swarm_engine_core::types::{Action, WorkerId};
23
24// ============================================================================
25// CodeEnvironment
26// ============================================================================
27
28/// コード探索環境
29pub struct CodeEnvironment {
30    /// 仮想ファイルシステム (path -> content)
31    files: HashMap<String, String>,
32    /// 目標ファイルパス(これを Read したら成功)
33    goal_file: String,
34    /// 内部状態
35    state: RwLock<CodeState>,
36}
37
38#[derive(Debug, Default)]
39struct CodeState {
40    /// 各 Worker が Read したファイル
41    read_files: HashMap<WorkerId, Vec<String>>,
42    /// 目標達成した Worker
43    completed: Vec<WorkerId>,
44}
45
46impl CodeEnvironment {
47    /// 新しいコード環境を作成
48    pub fn new(files: HashMap<String, String>, goal_file: impl Into<String>) -> Self {
49        Self {
50            files,
51            goal_file: goal_file.into(),
52            state: RwLock::new(CodeState::default()),
53        }
54    }
55
56    /// シンプルな認証コード探索シナリオを作成
57    pub fn auth_scenario() -> Self {
58        let mut files = HashMap::new();
59
60        // 構造
61        files.insert(
62            "src/main.rs".into(),
63            r#"
64mod auth;
65mod handlers;
66mod db;
67
68fn main() {
69    let app = create_app();
70    app.run();
71}
72"#
73            .into(),
74        );
75
76        files.insert(
77            "src/handlers/mod.rs".into(),
78            r#"
79pub mod user;
80pub mod admin;
81
82pub use user::*;
83pub use admin::*;
84"#
85            .into(),
86        );
87
88        files.insert(
89            "src/handlers/user.rs".into(),
90            r#"
91use crate::auth::verify_token;
92
93pub fn get_profile(token: &str) -> Result<User, Error> {
94    verify_token(token)?;
95    // ...
96}
97"#
98            .into(),
99        );
100
101        files.insert(
102            "src/auth.rs".into(),
103            r#"
104//! Authentication module
105//!
106//! Handles user authentication and token verification.
107
108pub fn verify_token(token: &str) -> Result<Claims, AuthError> {
109    let decoded = decode_jwt(token)?;
110    validate_claims(&decoded)?;
111    Ok(decoded.claims)
112}
113
114pub fn login(username: &str, password: &str) -> Result<String, AuthError> {
115    let user = find_user(username)?;
116    if verify_password(password, &user.password_hash) {
117        Ok(generate_jwt(&user))
118    } else {
119        Err(AuthError::InvalidPassword)
120    }
121}
122
123fn decode_jwt(token: &str) -> Result<TokenData, AuthError> {
124    // JWT decoding logic
125}
126"#
127            .into(),
128        );
129
130        files.insert(
131            "src/db.rs".into(),
132            r#"
133pub struct Database {
134    pool: Pool,
135}
136
137impl Database {
138    pub fn connect(url: &str) -> Self {
139        // ...
140    }
141}
142"#
143            .into(),
144        );
145
146        files.insert(
147            "config/settings.toml".into(),
148            r#"
149[server]
150port = 8080
151host = "0.0.0.0"
152
153[database]
154url = "postgres://localhost/app"
155"#
156            .into(),
157        );
158
159        Self::new(files, "src/auth.rs")
160    }
161
162    // ------------------------------------------------------------------------
163    // Action Handlers
164    // ------------------------------------------------------------------------
165
166    fn handle_grep(&self, _worker_id: WorkerId, action: &Action) -> WorkResult {
167        let pattern = action
168            .params
169            .args
170            .get("pattern")
171            .or(action.params.target.as_ref())
172            .map(|s| s.to_lowercase())
173            .unwrap_or_default();
174
175        if pattern.is_empty() {
176            return WorkResult::env_failure("Grep requires a pattern");
177        }
178
179        // パターンにマッチするファイルを検索
180        let matches: Vec<&str> = self
181            .files
182            .iter()
183            .filter(|(path, content)| {
184                path.to_lowercase().contains(&pattern) || content.to_lowercase().contains(&pattern)
185            })
186            .map(|(path, _)| path.as_str())
187            .collect();
188
189        if matches.is_empty() {
190            WorkResult::env_success(format!("No files found matching '{}'", pattern))
191        } else {
192            let result = format!(
193                "Found {} file(s) matching '{}':\n{}",
194                matches.len(),
195                pattern,
196                matches.join("\n")
197            );
198            WorkResult::env_success(result)
199        }
200    }
201
202    fn handle_read(&self, worker_id: WorkerId, action: &Action) -> WorkResult {
203        let path = action
204            .params
205            .args
206            .get("path")
207            .or(action.params.target.as_ref())
208            .cloned()
209            .unwrap_or_default();
210
211        if path.is_empty() {
212            return WorkResult::env_failure("Read requires a file path");
213        }
214
215        // ファイルを検索
216        let content = self.files.get(&path);
217
218        match content {
219            Some(content) => {
220                // 読んだファイルを記録
221                {
222                    let mut state = self.state.write().unwrap();
223                    state
224                        .read_files
225                        .entry(worker_id)
226                        .or_default()
227                        .push(path.clone());
228                }
229
230                // 目標ファイルに到達?
231                if path == self.goal_file {
232                    let mut state = self.state.write().unwrap();
233                    if !state.completed.contains(&worker_id) {
234                        state.completed.push(worker_id);
235                    }
236                    return WorkResult::done_success(format!(
237                        "Found target file: {}\n\n{}",
238                        path,
239                        content.trim()
240                    ));
241                }
242
243                WorkResult::env_success(format!("=== {} ===\n{}", path, content.trim()))
244            }
245            None => WorkResult::env_failure(format!("File not found: {}", path)),
246        }
247    }
248
249    fn handle_list(&self, _worker_id: WorkerId) -> WorkResult {
250        let paths: Vec<&str> = self.files.keys().map(|s| s.as_str()).collect();
251        let result = format!("Files in codebase:\n{}", paths.join("\n"));
252        WorkResult::env_success(result)
253    }
254}
255
256impl Environment for CodeEnvironment {
257    fn step(&self, worker_id: WorkerId, action: &Action) -> WorkResult {
258        match action.name.as_str() {
259            "Grep" | "grep" => self.handle_grep(worker_id, action),
260            "Read" | "read" => self.handle_read(worker_id, action),
261            "List" | "list" | "ls" => self.handle_list(worker_id),
262            _ => WorkResult::unsupported(&action.name),
263        }
264    }
265
266    fn reset(&self) {
267        let mut state = self.state.write().unwrap();
268        state.read_files.clear();
269        state.completed.clear();
270    }
271
272    fn name(&self) -> &str {
273        "CodeEnvironment"
274    }
275}
276
277// ============================================================================
278// Tests
279// ============================================================================
280
281#[cfg(test)]
282mod tests {
283    use super::*;
284
285    fn is_success(result: &WorkResult) -> bool {
286        match result {
287            WorkResult::Acted { action_result, .. } => action_result.success,
288            WorkResult::Done { success, .. } => *success,
289            _ => false,
290        }
291    }
292
293    fn is_done(result: &WorkResult) -> bool {
294        matches!(result, WorkResult::Done { .. })
295    }
296
297    #[test]
298    fn test_grep_finds_files() {
299        let env = CodeEnvironment::auth_scenario();
300        let worker = WorkerId(0);
301
302        let action = Action {
303            name: "Grep".into(),
304            params: swarm_engine_core::types::ActionParams {
305                target: Some("auth".into()),
306                args: HashMap::new(),
307                data: vec![],
308            },
309        };
310
311        let result = env.step(worker, &action);
312        assert!(is_success(&result));
313    }
314
315    #[test]
316    fn test_read_existing_file() {
317        let env = CodeEnvironment::auth_scenario();
318        let worker = WorkerId(0);
319
320        let action = Action {
321            name: "Read".into(),
322            params: swarm_engine_core::types::ActionParams {
323                target: Some("src/main.rs".into()),
324                args: HashMap::new(),
325                data: vec![],
326            },
327        };
328
329        let result = env.step(worker, &action);
330        assert!(is_success(&result));
331        assert!(!is_done(&result)); // main.rs は目標ではない
332    }
333
334    #[test]
335    fn test_read_goal_file_completes() {
336        let env = CodeEnvironment::auth_scenario();
337        let worker = WorkerId(0);
338
339        let action = Action {
340            name: "Read".into(),
341            params: swarm_engine_core::types::ActionParams {
342                target: Some("src/auth.rs".into()),
343                args: HashMap::new(),
344                data: vec![],
345            },
346        };
347
348        let result = env.step(worker, &action);
349        assert!(is_success(&result));
350        assert!(is_done(&result)); // auth.rs が目標
351    }
352
353    #[test]
354    fn test_read_nonexistent_file() {
355        let env = CodeEnvironment::auth_scenario();
356        let worker = WorkerId(0);
357
358        let action = Action {
359            name: "Read".into(),
360            params: swarm_engine_core::types::ActionParams {
361                target: Some("src/notfound.rs".into()),
362                args: HashMap::new(),
363                data: vec![],
364            },
365        };
366
367        let result = env.step(worker, &action);
368        assert!(!is_success(&result));
369    }
370}