1use serde_json::Value;
5use std::path::Path;
6
7const MAX_READ_LINES: usize = 200;
8const MAX_GREP_MATCHES: usize = 30;
9const MAX_OUTPUT_CHARS: usize = 4096;
10const COMMAND_TIMEOUT_SECS: u64 = 30;
11
12#[derive(Debug)]
14pub struct ToolResult {
15 pub tool_name: String,
16 pub content: String,
17 pub success: bool,
18 pub is_write: bool,
19 pub is_submit: bool,
20}
21
22pub async fn execute(tool_name: &str, args: &Value, workspace: &str) -> ToolResult {
24 match tool_name {
25 "read_file" => execute_read_file(args, workspace).await,
26 "grep_search" => execute_grep_search(args, workspace).await,
27 "list_directory" => execute_list_directory(args, workspace).await,
28 "run_command" => execute_run_command(args, workspace).await,
29 "write_file" => execute_write_file(args, workspace).await,
30 "apply_edit" => execute_apply_edit(args, workspace).await,
31 "submit" => execute_submit(),
32 _ => ToolResult {
33 tool_name: tool_name.to_string(),
34 content: format!("Unknown tool: {}", tool_name),
35 success: false,
36 is_write: false,
37 is_submit: false,
38 },
39 }
40}
41
42fn resolve_path(workspace: &str, relative: &str) -> Result<String, String> {
43 let cleaned = relative.trim_start_matches("./");
44 let root = Path::new(workspace);
45 let joined = crate::sandbox::validate_path_within(root, cleaned)?;
46 Ok(joined.to_string_lossy().into_owned())
47}
48
49fn safe_truncate(s: &str, max: usize) -> &str {
50 if max >= s.len() {
51 return s;
52 }
53 let mut end = max;
54 while end > 0 && !s.is_char_boundary(end) {
55 end -= 1;
56 }
57 &s[..end]
58}
59
60async fn execute_read_file(args: &Value, workspace: &str) -> ToolResult {
61 let path = args.get("path").and_then(|v| v.as_str()).unwrap_or("");
62 let start_line = args.get("start_line").and_then(|v| v.as_u64()).unwrap_or(1) as usize;
63 let end_line = args
64 .get("end_line")
65 .and_then(|v| v.as_u64())
66 .map(|v| v as usize);
67
68 if path.is_empty() {
69 return ToolResult {
70 tool_name: "read_file".into(),
71 content: "Error: path is required".into(),
72 success: false,
73 is_write: false,
74 is_submit: false,
75 };
76 }
77
78 let full_path = match resolve_path(workspace, path) {
79 Ok(p) => p,
80 Err(e) => {
81 return ToolResult {
82 tool_name: "read_file".into(),
83 content: format!("Error: {}", e),
84 success: false,
85 is_write: false,
86 is_submit: false,
87 }
88 }
89 };
90
91 match tokio::fs::read_to_string(&full_path).await {
92 Ok(contents) => {
93 let lines: Vec<&str> = contents.lines().collect();
94 let total = lines.len();
95 let start = start_line.saturating_sub(1).min(total);
96 let end = end_line.unwrap_or(start + MAX_READ_LINES).min(total);
97
98 let mut output = format!(
99 "File: {} ({} lines total, showing {}-{})\n\n",
100 path,
101 total,
102 start + 1,
103 end
104 );
105 for (i, line) in lines[start..end].iter().enumerate() {
106 output.push_str(&format!("{:>5} | {}\n", start + i + 1, line));
107 }
108 if end < total {
109 output.push_str(&format!(
110 "\n... {} more lines. Use start_line={} to continue.\n",
111 total - end,
112 end + 1
113 ));
114 }
115
116 ToolResult {
117 tool_name: "read_file".into(),
118 content: output,
119 success: true,
120 is_write: false,
121 is_submit: false,
122 }
123 }
124 Err(e) => ToolResult {
125 tool_name: "read_file".into(),
126 content: format!("Error reading {}: {}", path, e),
127 success: false,
128 is_write: false,
129 is_submit: false,
130 },
131 }
132}
133
134async fn execute_grep_search(args: &Value, workspace: &str) -> ToolResult {
135 let pattern = args.get("pattern").and_then(|v| v.as_str()).unwrap_or("");
136 let search_path = args.get("path").and_then(|v| v.as_str()).unwrap_or(".");
137
138 if pattern.is_empty() {
139 return ToolResult {
140 tool_name: "grep_search".into(),
141 content: "Error: pattern is required".into(),
142 success: false,
143 is_write: false,
144 is_submit: false,
145 };
146 }
147
148 let full_path = match resolve_path(workspace, search_path) {
149 Ok(p) => p,
150 Err(e) => {
151 return ToolResult {
152 tool_name: "grep_search".into(),
153 content: format!("Error: {}", e),
154 success: false,
155 is_write: false,
156 is_submit: false,
157 }
158 }
159 };
160
161 let result = tokio::process::Command::new("grep")
162 .args([
163 "-rn",
164 "--include=*.py",
165 "--include=*.pyx",
166 "--include=*.pyi",
167 "--include=*.cfg",
168 "--include=*.toml",
169 "--include=*.txt",
170 "--include=*.rst",
171 "--include=*.md",
172 "--include=*.yml",
173 "--include=*.yaml",
174 "--include=*.json",
175 "--exclude-dir=.git",
176 "--exclude-dir=__pycache__",
177 "--exclude-dir=*.egg-info",
178 "--exclude-dir=.tox",
179 "--exclude-dir=build",
180 pattern,
181 &full_path,
182 ])
183 .output()
184 .await;
185
186 match result {
187 Ok(output) => {
188 let stdout = String::from_utf8_lossy(&output.stdout);
189 let lines: Vec<&str> = stdout.lines().collect();
190 let total_matches = lines.len();
191 let prefix = format!("{}/", workspace.trim_end_matches('/'));
192 let mut result_text = format!("Found {} matches for '{}'\n\n", total_matches, pattern);
193 for line in lines.iter().take(MAX_GREP_MATCHES) {
194 let clean = line.strip_prefix(&prefix).unwrap_or(line);
195 result_text.push_str(clean);
196 result_text.push('\n');
197 }
198 if total_matches > MAX_GREP_MATCHES {
199 result_text.push_str(&format!(
200 "\n... {} more matches (showing first {})\n",
201 total_matches - MAX_GREP_MATCHES,
202 MAX_GREP_MATCHES
203 ));
204 }
205 ToolResult {
206 tool_name: "grep_search".into(),
207 content: result_text,
208 success: true,
209 is_write: false,
210 is_submit: false,
211 }
212 }
213 Err(e) => ToolResult {
214 tool_name: "grep_search".into(),
215 content: format!("Error running grep: {}", e),
216 success: false,
217 is_write: false,
218 is_submit: false,
219 },
220 }
221}
222
223async fn execute_list_directory(args: &Value, workspace: &str) -> ToolResult {
224 let dir_path = args.get("path").and_then(|v| v.as_str()).unwrap_or(".");
225 let full_path = match resolve_path(workspace, dir_path) {
226 Ok(p) => p,
227 Err(e) => {
228 return ToolResult {
229 tool_name: "list_directory".into(),
230 content: format!("Error: {}", e),
231 success: false,
232 is_write: false,
233 is_submit: false,
234 }
235 }
236 };
237
238 let path = Path::new(&full_path);
239 if !path.is_dir() {
240 return ToolResult {
241 tool_name: "list_directory".into(),
242 content: format!("Not a directory: {}", dir_path),
243 success: false,
244 is_write: false,
245 is_submit: false,
246 };
247 }
248
249 let mut entries: Vec<String> = Vec::new();
250 match tokio::fs::read_dir(&full_path).await {
251 Ok(mut dir) => {
252 while let Ok(Some(entry)) = dir.next_entry().await {
253 let name = entry.file_name().to_string_lossy().to_string();
254 if name.starts_with('.')
255 || name == "__pycache__"
256 || name == "node_modules"
257 || name.ends_with(".egg-info")
258 || name == ".tox"
259 {
260 continue;
261 }
262 if let Ok(meta) = entry.metadata().await {
263 if meta.is_dir() {
264 entries.push(format!("[dir] {}/", name));
265 } else {
266 let size = meta.len();
267 let size_str = if size < 1024 {
268 format!("{} B", size)
269 } else if size < 1024 * 1024 {
270 format!("{:.1} KB", size as f64 / 1024.0)
271 } else {
272 format!("{:.1} MB", size as f64 / (1024.0 * 1024.0))
273 };
274 entries.push(format!("[file] {} ({})", name, size_str));
275 }
276 }
277 }
278 entries.sort();
279 let mut output = format!("Directory: {} ({} entries)\n\n", dir_path, entries.len());
280 for e in &entries {
281 output.push_str(e);
282 output.push('\n');
283 }
284 ToolResult {
285 tool_name: "list_directory".into(),
286 content: output,
287 success: true,
288 is_write: false,
289 is_submit: false,
290 }
291 }
292 Err(e) => ToolResult {
293 tool_name: "list_directory".into(),
294 content: format!("Error reading directory {}: {}", dir_path, e),
295 success: false,
296 is_write: false,
297 is_submit: false,
298 },
299 }
300}
301
302const ALLOWED_RUN_COMMAND_HEADS: &[&str] = &[
305 "pytest",
306 "python",
307 "python3",
308 "pip",
309 "pip3",
310 "uv",
311 "ruff",
312 "black",
313 "mypy",
314 "tox",
315 "pre-commit",
316 "ls",
317 "cat",
318 "head",
319 "tail",
320 "grep",
321 "find",
322 "wc",
323 "echo",
324 "true",
325 "false",
326 "test",
327 "mkdir",
328 "rmdir",
329 "rm",
330 "touch",
331 "cp",
332 "mv",
333 "git",
334 "make",
335 "cargo",
336 "npm",
337 "yarn",
338 "node",
339];
340
341async fn execute_run_command(args: &Value, workspace: &str) -> ToolResult {
342 let cmd = args.get("command").and_then(|v| v.as_str()).unwrap_or("");
343 if cmd.is_empty() {
344 return ToolResult {
345 tool_name: "run_command".into(),
346 content: "Error: command is required".into(),
347 success: false,
348 is_write: false,
349 is_submit: false,
350 };
351 }
352
353 let mut argv: Vec<String> = match shell_words::split(cmd) {
358 Ok(v) if !v.is_empty() => v,
359 Ok(_) => {
360 return ToolResult {
361 tool_name: "run_command".into(),
362 content: "Error: empty command after parsing".into(),
363 success: false,
364 is_write: false,
365 is_submit: false,
366 };
367 }
368 Err(e) => {
369 return ToolResult {
370 tool_name: "run_command".into(),
371 content: format!(
372 "Error: cannot parse command (use simple argv form, no unbalanced quotes): {}",
373 e
374 ),
375 success: false,
376 is_write: false,
377 is_submit: false,
378 };
379 }
380 };
381
382 for token in &argv {
386 if matches!(
387 token.as_str(),
388 "&&" | "||" | ";" | "|" | ">" | ">>" | "<" | "<<" | "&"
389 ) {
390 return ToolResult {
391 tool_name: "run_command".into(),
392 content: format!(
393 "Error: shell metacharacter '{}' is not supported. Run as separate commands.",
394 token
395 ),
396 success: false,
397 is_write: false,
398 is_submit: false,
399 };
400 }
401 if token.contains('\0') {
402 return ToolResult {
403 tool_name: "run_command".into(),
404 content: "Error: null byte in argument".into(),
405 success: false,
406 is_write: false,
407 is_submit: false,
408 };
409 }
410 }
411
412 if argv[0] == "python" {
415 argv[0] = "python3".to_string();
416 }
417
418 let head = argv[0].clone();
419 if !ALLOWED_RUN_COMMAND_HEADS.iter().any(|a| *a == head) {
420 return ToolResult {
421 tool_name: "run_command".into(),
422 content: format!(
423 "Error: program '{}' is not in the allowlist. Edit ALLOWED_RUN_COMMAND_HEADS in swebench_tools.rs to permit it.",
424 head
425 ),
426 success: false,
427 is_write: false,
428 is_submit: false,
429 };
430 }
431
432 let result = tokio::time::timeout(
433 std::time::Duration::from_secs(COMMAND_TIMEOUT_SECS),
434 tokio::process::Command::new(&head)
435 .args(&argv[1..])
436 .current_dir(workspace)
437 .output(),
438 )
439 .await;
440
441 match result {
442 Ok(Ok(output)) => {
443 let stdout = String::from_utf8_lossy(&output.stdout);
444 let stderr = String::from_utf8_lossy(&output.stderr);
445 let exit_code = output.status.code().unwrap_or(-1);
446 let mut text = format!("Exit code: {}\n", exit_code);
447 if !stdout.is_empty() {
448 text.push_str(&format!(
449 "\nSTDOUT:\n{}",
450 safe_truncate(&stdout, MAX_OUTPUT_CHARS)
451 ));
452 if stdout.len() > MAX_OUTPUT_CHARS {
453 text.push_str(&format!("\n... truncated ({} chars total)\n", stdout.len()));
454 }
455 }
456 if !stderr.is_empty() {
457 text.push_str(&format!(
458 "\nSTDERR:\n{}",
459 safe_truncate(&stderr, MAX_OUTPUT_CHARS)
460 ));
461 }
462 ToolResult {
463 tool_name: "run_command".into(),
464 content: text,
465 success: exit_code == 0,
466 is_write: false,
467 is_submit: false,
468 }
469 }
470 Ok(Err(e)) => ToolResult {
471 tool_name: "run_command".into(),
472 content: format!("Error executing command: {}", e),
473 success: false,
474 is_write: false,
475 is_submit: false,
476 },
477 Err(_) => ToolResult {
478 tool_name: "run_command".into(),
479 content: format!("Command timed out after {}s", COMMAND_TIMEOUT_SECS),
480 success: false,
481 is_write: false,
482 is_submit: false,
483 },
484 }
485}
486
487async fn execute_write_file(args: &Value, workspace: &str) -> ToolResult {
488 let path = args.get("path").and_then(|v| v.as_str()).unwrap_or("");
489 let content = args.get("content").and_then(|v| v.as_str()).unwrap_or("");
490 if path.is_empty() {
491 return ToolResult {
492 tool_name: "write_file".into(),
493 content: "Error: path is required".into(),
494 success: false,
495 is_write: true,
496 is_submit: false,
497 };
498 }
499 let full_path = match resolve_path(workspace, path) {
500 Ok(p) => p,
501 Err(e) => {
502 return ToolResult {
503 tool_name: "write_file".into(),
504 content: format!("Error: {}", e),
505 success: false,
506 is_write: true,
507 is_submit: false,
508 }
509 }
510 };
511
512 if let Some(parent) = Path::new(&full_path).parent() {
513 if let Err(e) = tokio::fs::create_dir_all(parent).await {
514 return ToolResult {
515 tool_name: "write_file".into(),
516 content: format!("Error creating directories: {}", e),
517 success: false,
518 is_write: true,
519 is_submit: false,
520 };
521 }
522 }
523
524 match tokio::fs::write(&full_path, content).await {
525 Ok(()) => {
526 let lines = content.lines().count();
527 ToolResult {
528 tool_name: "write_file".into(),
529 content: format!(
530 "File written: {} ({} lines, {} bytes)",
531 path,
532 lines,
533 content.len()
534 ),
535 success: true,
536 is_write: true,
537 is_submit: false,
538 }
539 }
540 Err(e) => ToolResult {
541 tool_name: "write_file".into(),
542 content: format!("Error writing {}: {}", path, e),
543 success: false,
544 is_write: true,
545 is_submit: false,
546 },
547 }
548}
549
550async fn execute_apply_edit(args: &Value, workspace: &str) -> ToolResult {
551 let path = args.get("path").and_then(|v| v.as_str()).unwrap_or("");
552 let old_text = args.get("old_text").and_then(|v| v.as_str()).unwrap_or("");
553 let new_text = args.get("new_text").and_then(|v| v.as_str()).unwrap_or("");
554
555 if path.is_empty() || old_text.is_empty() {
556 return ToolResult {
557 tool_name: "apply_edit".into(),
558 content: "Error: path and old_text are required".into(),
559 success: false,
560 is_write: true,
561 is_submit: false,
562 };
563 }
564
565 let full_path = match resolve_path(workspace, path) {
566 Ok(p) => p,
567 Err(e) => {
568 return ToolResult {
569 tool_name: "apply_edit".into(),
570 content: format!("Error: {}", e),
571 success: false,
572 is_write: true,
573 is_submit: false,
574 }
575 }
576 };
577
578 match tokio::fs::read_to_string(&full_path).await {
579 Ok(contents) => {
580 if let Some(pos) = contents.find(old_text) {
581 let new_contents = format!(
582 "{}{}{}",
583 &contents[..pos],
584 new_text,
585 &contents[pos + old_text.len()..]
586 );
587 let remaining = &new_contents[pos + new_text.len()..];
588 let extra = remaining.matches(old_text).count();
589
590 match tokio::fs::write(&full_path, &new_contents).await {
591 Ok(()) => {
592 let mut msg = format!("Edit applied to {}", path);
593 if extra > 0 {
594 msg.push_str(&format!(
595 " (warning: {} more occurrence(s) of old_text remain)",
596 extra
597 ));
598 }
599 ToolResult {
600 tool_name: "apply_edit".into(),
601 content: msg,
602 success: true,
603 is_write: true,
604 is_submit: false,
605 }
606 }
607 Err(e) => ToolResult {
608 tool_name: "apply_edit".into(),
609 content: format!("Error writing {}: {}", path, e),
610 success: false,
611 is_write: true,
612 is_submit: false,
613 },
614 }
615 } else {
616 ToolResult {
617 tool_name: "apply_edit".into(),
618 content: format!("Error: old_text not found in {}.\nold_text (first 200 chars): '{}'\nFile preview (first 500 chars):\n{}", path, safe_truncate(old_text, 200), safe_truncate(&contents, 500)),
619 success: false, is_write: true, is_submit: false,
620 }
621 }
622 }
623 Err(e) => ToolResult {
624 tool_name: "apply_edit".into(),
625 content: format!("Error reading {}: {}", path, e),
626 success: false,
627 is_write: true,
628 is_submit: false,
629 },
630 }
631}
632
633fn execute_submit() -> ToolResult {
634 ToolResult {
635 tool_name: "submit".into(),
636 content: "Submission recorded. Your changes will now be evaluated against the test suite."
637 .into(),
638 success: true,
639 is_write: false,
640 is_submit: true,
641 }
642}
643
644#[cfg(test)]
645mod tests {
646 use super::*;
647 use serde_json::json;
648
649 fn run(cmd: &str) -> ToolResult {
650 let workspace = std::env::temp_dir()
651 .join(format!("bcf-sw-{}", std::process::id()))
652 .to_string_lossy()
653 .into_owned();
654 std::fs::create_dir_all(&workspace).ok();
655 let args = json!({"command": cmd});
656 let rt = tokio::runtime::Runtime::new().unwrap();
657 rt.block_on(execute_run_command(&args, &workspace))
658 }
659
660 #[test]
665 fn test_run_command_rejects_command_substitution() {
666 let r = run("$(echo rm) -rf /");
667 assert!(!r.success);
668 assert!(
671 r.content.contains("allowlist") || r.content.contains("metacharacter"),
672 "unexpected error: {}",
673 r.content
674 );
675 }
676
677 #[test]
678 fn test_run_command_rejects_compound_metachars() {
679 for cmd in &["echo a && echo b", "echo a ; echo b", "echo a | grep b"] {
680 let r = run(cmd);
681 assert!(!r.success, "should have rejected: {}", cmd);
682 assert!(
683 r.content.contains("metacharacter"),
684 "expected metachar error for {:?}, got: {}",
685 cmd,
686 r.content
687 );
688 }
689 }
690
691 #[test]
692 fn test_run_command_rejects_unallowlisted_head() {
693 let r = run("curl http://example.com");
694 assert!(!r.success);
695 assert!(r.content.contains("allowlist"), "got: {}", r.content);
696 }
697
698 #[test]
699 fn test_run_command_python_rewrite_only_for_bare_head() {
700 let r = run("python --version");
704 assert!(
705 !r.content.contains("not in the allowlist"),
706 "python head should pass allowlist after rewrite: {}",
707 r.content
708 );
709
710 let r = run("echo pythonic_test_file");
715 assert!(
716 !r.content.contains("python3ic"),
717 "python rewrite should not corrupt non-head tokens: {}",
718 r.content
719 );
720 }
721
722 #[test]
723 fn test_resolve_path_filename_with_dotdot_allowed() {
724 let r = resolve_path("/tmp/wsroot", "file..py");
728 assert!(
729 r.is_ok(),
730 "filename with double-dots should be allowed, got {:?}",
731 r
732 );
733 }
734
735 #[test]
736 fn test_resolve_path_blocks_real_traversal() {
737 assert!(resolve_path("/tmp/wsroot", "../etc/passwd").is_err());
738 assert!(resolve_path("/tmp/wsroot", "app/../../etc/shadow").is_err());
739 assert!(resolve_path("/tmp/wsroot", "/etc/passwd").is_err());
740 }
741}