swarm_engine_eval/environments/
code.rs1use std::collections::HashMap;
18use std::sync::RwLock;
19
20use swarm_engine_core::actions::ParamResolver;
21use swarm_engine_core::agent::WorkResult;
22use swarm_engine_core::environment::Environment;
23use swarm_engine_core::types::{Action, WorkerId};
24
25pub struct CodeEnvironment {
31 files: HashMap<String, String>,
33 goal_file: String,
35 state: RwLock<CodeState>,
37}
38
39#[derive(Debug, Default)]
40struct CodeState {
41 read_files: HashMap<WorkerId, Vec<String>>,
43 completed: Vec<WorkerId>,
45}
46
47impl CodeEnvironment {
48 pub fn new(files: HashMap<String, String>, goal_file: impl Into<String>) -> Self {
50 Self {
51 files,
52 goal_file: goal_file.into(),
53 state: RwLock::new(CodeState::default()),
54 }
55 }
56
57 pub fn auth_scenario() -> Self {
59 let mut files = HashMap::new();
60
61 files.insert(
63 "src/main.rs".into(),
64 r#"
65mod auth;
66mod handlers;
67mod db;
68
69fn main() {
70 let app = create_app();
71 app.run();
72}
73"#
74 .into(),
75 );
76
77 files.insert(
78 "src/handlers/mod.rs".into(),
79 r#"
80pub mod user;
81pub mod admin;
82
83pub use user::*;
84pub use admin::*;
85"#
86 .into(),
87 );
88
89 files.insert(
90 "src/handlers/user.rs".into(),
91 r#"
92use crate::auth::verify_token;
93
94pub fn get_profile(token: &str) -> Result<User, Error> {
95 verify_token(token)?;
96 // ...
97}
98"#
99 .into(),
100 );
101
102 files.insert(
103 "src/auth.rs".into(),
104 r#"
105//! Authentication module
106//!
107//! Handles user authentication and token verification.
108
109pub fn verify_token(token: &str) -> Result<Claims, AuthError> {
110 let decoded = decode_jwt(token)?;
111 validate_claims(&decoded)?;
112 Ok(decoded.claims)
113}
114
115pub fn login(username: &str, password: &str) -> Result<String, AuthError> {
116 let user = find_user(username)?;
117 if verify_password(password, &user.password_hash) {
118 Ok(generate_jwt(&user))
119 } else {
120 Err(AuthError::InvalidPassword)
121 }
122}
123
124fn decode_jwt(token: &str) -> Result<TokenData, AuthError> {
125 // JWT decoding logic
126}
127"#
128 .into(),
129 );
130
131 files.insert(
132 "src/db.rs".into(),
133 r#"
134pub struct Database {
135 pool: Pool,
136}
137
138impl Database {
139 pub fn connect(url: &str) -> Self {
140 // ...
141 }
142}
143"#
144 .into(),
145 );
146
147 files.insert(
148 "config/settings.toml".into(),
149 r#"
150[server]
151port = 8080
152host = "0.0.0.0"
153
154[database]
155url = "postgres://localhost/app"
156"#
157 .into(),
158 );
159
160 Self::new(files, "src/auth.rs")
161 }
162
163 fn handle_grep(&self, _worker_id: WorkerId, action: &Action) -> WorkResult {
168 let resolver = ParamResolver::new(action);
169 let pattern = match resolver.require("pattern") {
170 Ok(s) => s.to_lowercase(),
171 Err(e) => return WorkResult::env_failure(format!("Grep: {}", e)),
172 };
173
174 let matches: Vec<&str> = self
176 .files
177 .iter()
178 .filter(|(path, content)| {
179 path.to_lowercase().contains(&pattern) || content.to_lowercase().contains(&pattern)
180 })
181 .map(|(path, _)| path.as_str())
182 .collect();
183
184 if matches.is_empty() {
185 WorkResult::env_success(format!("No files found matching '{}'", pattern))
186 } else {
187 let result = format!(
188 "Found {} file(s) matching '{}':\n{}",
189 matches.len(),
190 pattern,
191 matches.join("\n")
192 );
193 WorkResult::env_success(result)
194 }
195 }
196
197 fn handle_read(&self, worker_id: WorkerId, action: &Action) -> WorkResult {
198 let resolver = ParamResolver::new(action);
199 let path = match resolver.require("path") {
200 Ok(s) => s,
201 Err(e) => return WorkResult::env_failure(format!("Read: {}", e)),
202 };
203
204 let content = self.files.get(path);
206
207 match content {
208 Some(content) => {
209 {
211 let mut state = self.state.write().unwrap();
212 state
213 .read_files
214 .entry(worker_id)
215 .or_default()
216 .push(path.to_string());
217 }
218
219 if path == self.goal_file {
221 let mut state = self.state.write().unwrap();
222 if !state.completed.contains(&worker_id) {
223 state.completed.push(worker_id);
224 }
225 return WorkResult::done_success(format!(
226 "Found target file: {}\n\n{}",
227 path,
228 content.trim()
229 ));
230 }
231
232 WorkResult::env_success(format!("=== {} ===\n{}", path, content.trim()))
233 }
234 None => WorkResult::env_failure(format!("File not found: {}", path)),
235 }
236 }
237
238 fn handle_list(&self, _worker_id: WorkerId) -> WorkResult {
239 let paths: Vec<&str> = self.files.keys().map(|s| s.as_str()).collect();
240 let result = format!("Files in codebase:\n{}", paths.join("\n"));
241 WorkResult::env_success(result)
242 }
243}
244
245impl Environment for CodeEnvironment {
246 fn step(&self, worker_id: WorkerId, action: &Action) -> WorkResult {
247 match action.name.as_str() {
248 "Grep" | "grep" => self.handle_grep(worker_id, action),
249 "Read" | "read" => self.handle_read(worker_id, action),
250 "List" | "list" | "ls" => self.handle_list(worker_id),
251 _ => WorkResult::unsupported(&action.name),
252 }
253 }
254
255 fn reset(&self) {
256 let mut state = self.state.write().unwrap();
257 state.read_files.clear();
258 state.completed.clear();
259 }
260
261 fn name(&self) -> &str {
262 "CodeEnvironment"
263 }
264}
265
266#[cfg(test)]
271mod tests {
272 use super::*;
273
274 fn is_success(result: &WorkResult) -> bool {
275 match result {
276 WorkResult::Acted { action_result, .. } => action_result.success,
277 WorkResult::Done { success, .. } => *success,
278 _ => false,
279 }
280 }
281
282 fn is_done(result: &WorkResult) -> bool {
283 matches!(result, WorkResult::Done { .. })
284 }
285
286 #[test]
287 fn test_grep_finds_files() {
288 let env = CodeEnvironment::auth_scenario();
289 let worker = WorkerId(0);
290
291 let action = Action {
292 name: "Grep".into(),
293 params: swarm_engine_core::types::ActionParams {
294 target: Some("auth".into()),
295 args: HashMap::new(),
296 data: vec![],
297 },
298 };
299
300 let result = env.step(worker, &action);
301 assert!(is_success(&result));
302 }
303
304 #[test]
305 fn test_read_existing_file() {
306 let env = CodeEnvironment::auth_scenario();
307 let worker = WorkerId(0);
308
309 let action = Action {
310 name: "Read".into(),
311 params: swarm_engine_core::types::ActionParams {
312 target: Some("src/main.rs".into()),
313 args: HashMap::new(),
314 data: vec![],
315 },
316 };
317
318 let result = env.step(worker, &action);
319 assert!(is_success(&result));
320 assert!(!is_done(&result)); }
322
323 #[test]
324 fn test_read_goal_file_completes() {
325 let env = CodeEnvironment::auth_scenario();
326 let worker = WorkerId(0);
327
328 let action = Action {
329 name: "Read".into(),
330 params: swarm_engine_core::types::ActionParams {
331 target: Some("src/auth.rs".into()),
332 args: HashMap::new(),
333 data: vec![],
334 },
335 };
336
337 let result = env.step(worker, &action);
338 assert!(is_success(&result));
339 assert!(is_done(&result)); }
341
342 #[test]
343 fn test_read_nonexistent_file() {
344 let env = CodeEnvironment::auth_scenario();
345 let worker = WorkerId(0);
346
347 let action = Action {
348 name: "Read".into(),
349 params: swarm_engine_core::types::ActionParams {
350 target: Some("src/notfound.rs".into()),
351 args: HashMap::new(),
352 data: vec![],
353 },
354 };
355
356 let result = env.step(worker, &action);
357 assert!(!is_success(&result));
358 }
359}