swarm_engine_eval/environments/
code.rs1use std::collections::HashMap;
18use std::sync::RwLock;
19
20use swarm_engine_core::agent::WorkResult;
21use swarm_engine_core::environment::Environment;
22use swarm_engine_core::types::{Action, WorkerId};
23
24pub struct CodeEnvironment {
30 files: HashMap<String, String>,
32 goal_file: String,
34 state: RwLock<CodeState>,
36}
37
38#[derive(Debug, Default)]
39struct CodeState {
40 read_files: HashMap<WorkerId, Vec<String>>,
42 completed: Vec<WorkerId>,
44}
45
46impl CodeEnvironment {
47 pub fn new(files: HashMap<String, String>, goal_file: impl Into<String>) -> Self {
49 Self {
50 files,
51 goal_file: goal_file.into(),
52 state: RwLock::new(CodeState::default()),
53 }
54 }
55
56 pub fn auth_scenario() -> Self {
58 let mut files = HashMap::new();
59
60 files.insert(
62 "src/main.rs".into(),
63 r#"
64mod auth;
65mod handlers;
66mod db;
67
68fn main() {
69 let app = create_app();
70 app.run();
71}
72"#
73 .into(),
74 );
75
76 files.insert(
77 "src/handlers/mod.rs".into(),
78 r#"
79pub mod user;
80pub mod admin;
81
82pub use user::*;
83pub use admin::*;
84"#
85 .into(),
86 );
87
88 files.insert(
89 "src/handlers/user.rs".into(),
90 r#"
91use crate::auth::verify_token;
92
93pub fn get_profile(token: &str) -> Result<User, Error> {
94 verify_token(token)?;
95 // ...
96}
97"#
98 .into(),
99 );
100
101 files.insert(
102 "src/auth.rs".into(),
103 r#"
104//! Authentication module
105//!
106//! Handles user authentication and token verification.
107
108pub fn verify_token(token: &str) -> Result<Claims, AuthError> {
109 let decoded = decode_jwt(token)?;
110 validate_claims(&decoded)?;
111 Ok(decoded.claims)
112}
113
114pub fn login(username: &str, password: &str) -> Result<String, AuthError> {
115 let user = find_user(username)?;
116 if verify_password(password, &user.password_hash) {
117 Ok(generate_jwt(&user))
118 } else {
119 Err(AuthError::InvalidPassword)
120 }
121}
122
123fn decode_jwt(token: &str) -> Result<TokenData, AuthError> {
124 // JWT decoding logic
125}
126"#
127 .into(),
128 );
129
130 files.insert(
131 "src/db.rs".into(),
132 r#"
133pub struct Database {
134 pool: Pool,
135}
136
137impl Database {
138 pub fn connect(url: &str) -> Self {
139 // ...
140 }
141}
142"#
143 .into(),
144 );
145
146 files.insert(
147 "config/settings.toml".into(),
148 r#"
149[server]
150port = 8080
151host = "0.0.0.0"
152
153[database]
154url = "postgres://localhost/app"
155"#
156 .into(),
157 );
158
159 Self::new(files, "src/auth.rs")
160 }
161
162 fn handle_grep(&self, _worker_id: WorkerId, action: &Action) -> WorkResult {
167 let pattern = action
168 .params
169 .args
170 .get("pattern")
171 .or(action.params.target.as_ref())
172 .map(|s| s.to_lowercase())
173 .unwrap_or_default();
174
175 if pattern.is_empty() {
176 return WorkResult::env_failure("Grep requires a pattern");
177 }
178
179 let matches: Vec<&str> = self
181 .files
182 .iter()
183 .filter(|(path, content)| {
184 path.to_lowercase().contains(&pattern) || content.to_lowercase().contains(&pattern)
185 })
186 .map(|(path, _)| path.as_str())
187 .collect();
188
189 if matches.is_empty() {
190 WorkResult::env_success(format!("No files found matching '{}'", pattern))
191 } else {
192 let result = format!(
193 "Found {} file(s) matching '{}':\n{}",
194 matches.len(),
195 pattern,
196 matches.join("\n")
197 );
198 WorkResult::env_success(result)
199 }
200 }
201
202 fn handle_read(&self, worker_id: WorkerId, action: &Action) -> WorkResult {
203 let path = action
204 .params
205 .args
206 .get("path")
207 .or(action.params.target.as_ref())
208 .cloned()
209 .unwrap_or_default();
210
211 if path.is_empty() {
212 return WorkResult::env_failure("Read requires a file path");
213 }
214
215 let content = self.files.get(&path);
217
218 match content {
219 Some(content) => {
220 {
222 let mut state = self.state.write().unwrap();
223 state
224 .read_files
225 .entry(worker_id)
226 .or_default()
227 .push(path.clone());
228 }
229
230 if path == self.goal_file {
232 let mut state = self.state.write().unwrap();
233 if !state.completed.contains(&worker_id) {
234 state.completed.push(worker_id);
235 }
236 return WorkResult::done_success(format!(
237 "Found target file: {}\n\n{}",
238 path,
239 content.trim()
240 ));
241 }
242
243 WorkResult::env_success(format!("=== {} ===\n{}", path, content.trim()))
244 }
245 None => WorkResult::env_failure(format!("File not found: {}", path)),
246 }
247 }
248
249 fn handle_list(&self, _worker_id: WorkerId) -> WorkResult {
250 let paths: Vec<&str> = self.files.keys().map(|s| s.as_str()).collect();
251 let result = format!("Files in codebase:\n{}", paths.join("\n"));
252 WorkResult::env_success(result)
253 }
254}
255
256impl Environment for CodeEnvironment {
257 fn step(&self, worker_id: WorkerId, action: &Action) -> WorkResult {
258 match action.name.as_str() {
259 "Grep" | "grep" => self.handle_grep(worker_id, action),
260 "Read" | "read" => self.handle_read(worker_id, action),
261 "List" | "list" | "ls" => self.handle_list(worker_id),
262 _ => WorkResult::unsupported(&action.name),
263 }
264 }
265
266 fn reset(&self) {
267 let mut state = self.state.write().unwrap();
268 state.read_files.clear();
269 state.completed.clear();
270 }
271
272 fn name(&self) -> &str {
273 "CodeEnvironment"
274 }
275}
276
277#[cfg(test)]
282mod tests {
283 use super::*;
284
285 fn is_success(result: &WorkResult) -> bool {
286 match result {
287 WorkResult::Acted { action_result, .. } => action_result.success,
288 WorkResult::Done { success, .. } => *success,
289 _ => false,
290 }
291 }
292
293 fn is_done(result: &WorkResult) -> bool {
294 matches!(result, WorkResult::Done { .. })
295 }
296
297 #[test]
298 fn test_grep_finds_files() {
299 let env = CodeEnvironment::auth_scenario();
300 let worker = WorkerId(0);
301
302 let action = Action {
303 name: "Grep".into(),
304 params: swarm_engine_core::types::ActionParams {
305 target: Some("auth".into()),
306 args: HashMap::new(),
307 data: vec![],
308 },
309 };
310
311 let result = env.step(worker, &action);
312 assert!(is_success(&result));
313 }
314
315 #[test]
316 fn test_read_existing_file() {
317 let env = CodeEnvironment::auth_scenario();
318 let worker = WorkerId(0);
319
320 let action = Action {
321 name: "Read".into(),
322 params: swarm_engine_core::types::ActionParams {
323 target: Some("src/main.rs".into()),
324 args: HashMap::new(),
325 data: vec![],
326 },
327 };
328
329 let result = env.step(worker, &action);
330 assert!(is_success(&result));
331 assert!(!is_done(&result)); }
333
334 #[test]
335 fn test_read_goal_file_completes() {
336 let env = CodeEnvironment::auth_scenario();
337 let worker = WorkerId(0);
338
339 let action = Action {
340 name: "Read".into(),
341 params: swarm_engine_core::types::ActionParams {
342 target: Some("src/auth.rs".into()),
343 args: HashMap::new(),
344 data: vec![],
345 },
346 };
347
348 let result = env.step(worker, &action);
349 assert!(is_success(&result));
350 assert!(is_done(&result)); }
352
353 #[test]
354 fn test_read_nonexistent_file() {
355 let env = CodeEnvironment::auth_scenario();
356 let worker = WorkerId(0);
357
358 let action = Action {
359 name: "Read".into(),
360 params: swarm_engine_core::types::ActionParams {
361 target: Some("src/notfound.rs".into()),
362 args: HashMap::new(),
363 data: vec![],
364 },
365 };
366
367 let result = env.step(worker, &action);
368 assert!(!is_success(&result));
369 }
370}