1use super::traits::{Tool, ToolResult};
2use crate::security::SecurityPolicy;
3use async_trait::async_trait;
4use serde_json::json;
5use std::sync::Arc;
6
7const MAX_FILE_SIZE_BYTES: u64 = 10 * 1024 * 1024;
8
9pub struct FileReadTool {
11 security: Arc<SecurityPolicy>,
12}
13
14impl FileReadTool {
15 pub fn new(security: Arc<SecurityPolicy>) -> Self {
16 Self { security }
17 }
18}
19
20#[async_trait]
21impl Tool for FileReadTool {
22 fn name(&self) -> &str {
23 "file_read"
24 }
25
26 fn description(&self) -> &str {
27 "Read file contents with line numbers. Supports partial reading via offset and limit. Extracts text from PDF; other binary files are read with lossy UTF-8 conversion."
28 }
29
30 fn parameters_schema(&self) -> serde_json::Value {
31 json!({
32 "type": "object",
33 "properties": {
34 "path": {
35 "type": "string",
36 "description": "Path to the file. Relative paths resolve from workspace; outside paths require policy allowlist."
37 },
38 "offset": {
39 "type": "integer",
40 "description": "Starting line number (1-based, default: 1)"
41 },
42 "limit": {
43 "type": "integer",
44 "description": "Maximum number of lines to return (default: all)"
45 }
46 },
47 "required": ["path"]
48 })
49 }
50
51 async fn execute(&self, args: serde_json::Value) -> anyhow::Result<ToolResult> {
52 let path = args
53 .get("path")
54 .and_then(|v| v.as_str())
55 .ok_or_else(|| anyhow::anyhow!("Missing 'path' parameter"))?;
56
57 if self.security.is_rate_limited() {
58 return Ok(ToolResult {
59 success: false,
60 output: String::new(),
61 error: Some("Rate limit exceeded: too many actions in the last hour".into()),
62 });
63 }
64
65 if !self.security.is_path_allowed(path) {
67 return Ok(ToolResult {
68 success: false,
69 output: String::new(),
70 error: Some(format!("Path not allowed by security policy: {path}")),
71 });
72 }
73
74 if !self.security.record_action() {
78 return Ok(ToolResult {
79 success: false,
80 output: String::new(),
81 error: Some("Rate limit exceeded: action budget exhausted".into()),
82 });
83 }
84
85 let full_path = self.security.resolve_tool_path(path);
86
87 let resolved_path = match tokio::fs::canonicalize(&full_path).await {
89 Ok(p) => p,
90 Err(e) => {
91 return Ok(ToolResult {
92 success: false,
93 output: String::new(),
94 error: Some(format!("Failed to resolve file path: {e}")),
95 });
96 }
97 };
98
99 if !self.security.is_resolved_path_allowed(&resolved_path) {
100 return Ok(ToolResult {
101 success: false,
102 output: String::new(),
103 error: Some(
104 self.security
105 .resolved_path_violation_message(&resolved_path),
106 ),
107 });
108 }
109
110 match tokio::fs::metadata(&resolved_path).await {
112 Ok(meta) => {
113 if meta.len() > MAX_FILE_SIZE_BYTES {
114 return Ok(ToolResult {
115 success: false,
116 output: String::new(),
117 error: Some(format!(
118 "File too large: {} bytes (limit: {MAX_FILE_SIZE_BYTES} bytes)",
119 meta.len()
120 )),
121 });
122 }
123 }
124 Err(e) => {
125 return Ok(ToolResult {
126 success: false,
127 output: String::new(),
128 error: Some(format!("Failed to read file metadata: {e}")),
129 });
130 }
131 }
132
133 match tokio::fs::read_to_string(&resolved_path).await {
134 Ok(contents) => {
135 let lines: Vec<&str> = contents.lines().collect();
136 let total = lines.len();
137
138 if total == 0 {
139 return Ok(ToolResult {
140 success: true,
141 output: String::new(),
142 error: None,
143 });
144 }
145
146 let offset = args
147 .get("offset")
148 .and_then(|v| v.as_u64())
149 .map(|v| {
150 usize::try_from(v.max(1))
151 .unwrap_or(usize::MAX)
152 .saturating_sub(1)
153 })
154 .unwrap_or(0);
155 let start = offset.min(total);
156
157 let end = match args.get("limit").and_then(|v| v.as_u64()) {
158 Some(l) => {
159 let limit = usize::try_from(l).unwrap_or(usize::MAX);
160 (start.saturating_add(limit)).min(total)
161 }
162 None => total,
163 };
164
165 if start >= end {
166 return Ok(ToolResult {
167 success: true,
168 output: format!("[No lines in range, file has {total} lines]"),
169 error: None,
170 });
171 }
172
173 let numbered: String = lines[start..end]
174 .iter()
175 .enumerate()
176 .map(|(i, line)| format!("{}: {}", start + i + 1, line))
177 .collect::<Vec<_>>()
178 .join("\n");
179
180 let partial = start > 0 || end < total;
181 let summary = if partial {
182 format!("\n[Lines {}-{} of {total}]", start + 1, end)
183 } else {
184 format!("\n[{total} lines total]")
185 };
186
187 Ok(ToolResult {
188 success: true,
189 output: format!("{numbered}{summary}"),
190 error: None,
191 })
192 }
193 Err(_) => {
194 let bytes = tokio::fs::read(&resolved_path)
196 .await
197 .map_err(|e| anyhow::anyhow!("Failed to read file: {e}"))?;
198
199 if let Some(text) = try_extract_pdf_text(&bytes) {
200 return Ok(ToolResult {
201 success: true,
202 output: text,
203 error: None,
204 });
205 }
206
207 let lossy = String::from_utf8_lossy(&bytes).into_owned();
209 Ok(ToolResult {
210 success: true,
211 output: lossy,
212 error: None,
213 })
214 }
215 }
216 }
217}
218
219#[cfg(feature = "rag-pdf")]
220fn try_extract_pdf_text(bytes: &[u8]) -> Option<String> {
221 if bytes.len() < 5 || &bytes[..5] != b"%PDF-" {
222 return None;
223 }
224 let text = pdf_extract::extract_text_from_mem(bytes).ok()?;
225 if text.trim().is_empty() {
226 return None;
227 }
228 Some(text)
229}
230
231#[cfg(not(feature = "rag-pdf"))]
232fn try_extract_pdf_text(_bytes: &[u8]) -> Option<String> {
233 None
234}
235
236#[cfg(test)]
237mod tests {
238 use super::*;
239 use crate::security::{AutonomyLevel, SecurityPolicy};
240
241 fn test_security(workspace: std::path::PathBuf) -> Arc<SecurityPolicy> {
242 Arc::new(SecurityPolicy {
243 autonomy: AutonomyLevel::Supervised,
244 workspace_dir: workspace,
245 ..SecurityPolicy::default()
246 })
247 }
248
249 fn test_security_with(
250 workspace: std::path::PathBuf,
251 autonomy: AutonomyLevel,
252 max_actions_per_hour: u32,
253 ) -> Arc<SecurityPolicy> {
254 Arc::new(SecurityPolicy {
255 autonomy,
256 workspace_dir: workspace,
257 max_actions_per_hour,
258 ..SecurityPolicy::default()
259 })
260 }
261
262 #[test]
263 fn file_read_name() {
264 let tool = FileReadTool::new(test_security(std::env::temp_dir()));
265 assert_eq!(tool.name(), "file_read");
266 }
267
268 #[test]
269 fn file_read_schema_has_path() {
270 let tool = FileReadTool::new(test_security(std::env::temp_dir()));
271 let schema = tool.parameters_schema();
272 assert!(schema["properties"]["path"].is_object());
273 assert!(schema["properties"]["offset"].is_object());
274 assert!(schema["properties"]["limit"].is_object());
275 assert!(
276 schema["required"]
277 .as_array()
278 .unwrap()
279 .contains(&json!("path"))
280 );
281 assert!(
283 !schema["required"]
284 .as_array()
285 .unwrap()
286 .contains(&json!("offset"))
287 );
288 }
289
290 #[tokio::test]
291 async fn file_read_existing_file() {
292 let dir = std::env::temp_dir().join("construct_test_file_read");
293 let _ = tokio::fs::remove_dir_all(&dir).await;
294 tokio::fs::create_dir_all(&dir).await.unwrap();
295 tokio::fs::write(dir.join("test.txt"), "hello world")
296 .await
297 .unwrap();
298
299 let tool = FileReadTool::new(test_security(dir.clone()));
300 let result = tool.execute(json!({"path": "test.txt"})).await.unwrap();
301 assert!(result.success);
302 assert!(result.output.contains("1: hello world"));
303 assert!(result.output.contains("[1 lines total]"));
304 assert!(result.error.is_none());
305
306 let _ = tokio::fs::remove_dir_all(&dir).await;
307 }
308
309 #[tokio::test]
310 async fn file_read_nonexistent_file() {
311 let dir = std::env::temp_dir().join("construct_test_file_read_missing");
312 let _ = tokio::fs::remove_dir_all(&dir).await;
313 tokio::fs::create_dir_all(&dir).await.unwrap();
314
315 let tool = FileReadTool::new(test_security(dir.clone()));
316 let result = tool.execute(json!({"path": "nope.txt"})).await.unwrap();
317 assert!(!result.success);
318 assert!(result.error.as_ref().unwrap().contains("Failed to resolve"));
319
320 let _ = tokio::fs::remove_dir_all(&dir).await;
321 }
322
323 #[tokio::test]
324 async fn file_read_blocks_path_traversal() {
325 let dir = std::env::temp_dir().join("construct_test_file_read_traversal");
326 let _ = tokio::fs::remove_dir_all(&dir).await;
327 tokio::fs::create_dir_all(&dir).await.unwrap();
328
329 let tool = FileReadTool::new(test_security(dir.clone()));
330 let result = tool
331 .execute(json!({"path": "../../../etc/passwd"}))
332 .await
333 .unwrap();
334 assert!(!result.success);
335 assert!(result.error.as_ref().unwrap().contains("not allowed"));
336
337 let _ = tokio::fs::remove_dir_all(&dir).await;
338 }
339
340 #[tokio::test]
341 async fn file_read_blocks_absolute_path() {
342 let tool = FileReadTool::new(test_security(std::env::temp_dir()));
343 let result = tool.execute(json!({"path": "/etc/passwd"})).await.unwrap();
344 assert!(!result.success);
345 assert!(result.error.as_ref().unwrap().contains("not allowed"));
346 }
347
348 #[tokio::test]
349 async fn file_read_blocks_when_rate_limited() {
350 let dir = std::env::temp_dir().join("construct_test_file_read_rate_limited");
351 let _ = tokio::fs::remove_dir_all(&dir).await;
352 tokio::fs::create_dir_all(&dir).await.unwrap();
353 tokio::fs::write(dir.join("test.txt"), "hello world")
354 .await
355 .unwrap();
356
357 let tool = FileReadTool::new(test_security_with(
358 dir.clone(),
359 AutonomyLevel::Supervised,
360 0,
361 ));
362 let result = tool.execute(json!({"path": "test.txt"})).await.unwrap();
363
364 assert!(!result.success);
365 assert!(
366 result
367 .error
368 .as_deref()
369 .unwrap_or("")
370 .contains("Rate limit exceeded")
371 );
372
373 let _ = tokio::fs::remove_dir_all(&dir).await;
374 }
375
376 #[tokio::test]
377 async fn file_read_allows_readonly_mode() {
378 let dir = std::env::temp_dir().join("construct_test_file_read_readonly");
379 let _ = tokio::fs::remove_dir_all(&dir).await;
380 tokio::fs::create_dir_all(&dir).await.unwrap();
381 tokio::fs::write(dir.join("test.txt"), "readonly ok")
382 .await
383 .unwrap();
384
385 let tool = FileReadTool::new(test_security_with(dir.clone(), AutonomyLevel::ReadOnly, 20));
386 let result = tool.execute(json!({"path": "test.txt"})).await.unwrap();
387
388 assert!(result.success);
389 assert!(result.output.contains("1: readonly ok"));
390
391 let _ = tokio::fs::remove_dir_all(&dir).await;
392 }
393
394 #[tokio::test]
395 async fn file_read_missing_path_param() {
396 let tool = FileReadTool::new(test_security(std::env::temp_dir()));
397 let result = tool.execute(json!({})).await;
398 assert!(result.is_err());
399 }
400
401 #[tokio::test]
402 async fn file_read_empty_file() {
403 let dir = std::env::temp_dir().join("construct_test_file_read_empty");
404 let _ = tokio::fs::remove_dir_all(&dir).await;
405 tokio::fs::create_dir_all(&dir).await.unwrap();
406 tokio::fs::write(dir.join("empty.txt"), "").await.unwrap();
407
408 let tool = FileReadTool::new(test_security(dir.clone()));
409 let result = tool.execute(json!({"path": "empty.txt"})).await.unwrap();
410 assert!(result.success);
411 assert_eq!(result.output, "");
412
413 let _ = tokio::fs::remove_dir_all(&dir).await;
414 }
415
416 #[tokio::test]
417 async fn file_read_nested_path() {
418 let dir = std::env::temp_dir().join("construct_test_file_read_nested");
419 let _ = tokio::fs::remove_dir_all(&dir).await;
420 tokio::fs::create_dir_all(dir.join("sub/dir"))
421 .await
422 .unwrap();
423 tokio::fs::write(dir.join("sub/dir/deep.txt"), "deep content")
424 .await
425 .unwrap();
426
427 let tool = FileReadTool::new(test_security(dir.clone()));
428 let result = tool
429 .execute(json!({"path": "sub/dir/deep.txt"}))
430 .await
431 .unwrap();
432 assert!(result.success);
433 assert!(result.output.contains("1: deep content"));
434
435 let _ = tokio::fs::remove_dir_all(&dir).await;
436 }
437
438 #[cfg(unix)]
439 #[tokio::test]
440 async fn file_read_blocks_symlink_escape() {
441 use std::os::unix::fs::symlink;
442
443 let root = std::env::temp_dir().join("construct_test_file_read_symlink_escape");
444 let workspace = root.join("workspace");
445 let outside = root.join("outside");
446
447 let _ = tokio::fs::remove_dir_all(&root).await;
448 tokio::fs::create_dir_all(&workspace).await.unwrap();
449 tokio::fs::create_dir_all(&outside).await.unwrap();
450
451 tokio::fs::write(outside.join("secret.txt"), "outside workspace")
452 .await
453 .unwrap();
454
455 symlink(outside.join("secret.txt"), workspace.join("escape.txt")).unwrap();
456
457 let tool = FileReadTool::new(test_security(workspace.clone()));
458 let result = tool.execute(json!({"path": "escape.txt"})).await.unwrap();
459
460 assert!(!result.success);
461 assert!(
462 result
463 .error
464 .as_deref()
465 .unwrap_or("")
466 .contains("escapes workspace")
467 );
468
469 let _ = tokio::fs::remove_dir_all(&root).await;
470 }
471
472 #[tokio::test]
473 async fn file_read_outside_workspace_allowed_when_workspace_only_disabled() {
474 let root = std::env::temp_dir().join("construct_test_file_read_allowed_roots_hint");
475 let workspace = root.join("workspace");
476 let outside = root.join("outside");
477 let outside_file = outside.join("notes.txt");
478
479 let _ = tokio::fs::remove_dir_all(&root).await;
480 tokio::fs::create_dir_all(&workspace).await.unwrap();
481 tokio::fs::create_dir_all(&outside).await.unwrap();
482 tokio::fs::write(&outside_file, "outside").await.unwrap();
483
484 let security = Arc::new(SecurityPolicy {
485 autonomy: AutonomyLevel::Supervised,
486 workspace_dir: workspace,
487 workspace_only: false,
488 forbidden_paths: vec![],
489 ..SecurityPolicy::default()
490 });
491 let tool = FileReadTool::new(security);
492
493 let result = tool
494 .execute(json!({"path": outside_file.to_string_lossy().to_string()}))
495 .await
496 .unwrap();
497
498 assert!(result.success);
499 assert!(result.error.is_none());
500 assert!(result.output.contains("outside"));
501
502 let _ = tokio::fs::remove_dir_all(&root).await;
503 }
504
505 #[tokio::test]
506 async fn file_read_nonexistent_consumes_rate_limit_budget() {
507 let dir = std::env::temp_dir().join("construct_test_file_read_probe");
508 let _ = tokio::fs::remove_dir_all(&dir).await;
509 tokio::fs::create_dir_all(&dir).await.unwrap();
510
511 let tool = FileReadTool::new(test_security_with(
513 dir.clone(),
514 AutonomyLevel::Supervised,
515 2,
516 ));
517
518 let r1 = tool.execute(json!({"path": "nope1.txt"})).await.unwrap();
520 assert!(!r1.success);
521 assert!(r1.error.as_ref().unwrap().contains("Failed to resolve"));
522
523 let r2 = tool.execute(json!({"path": "nope2.txt"})).await.unwrap();
524 assert!(!r2.success);
525 assert!(r2.error.as_ref().unwrap().contains("Failed to resolve"));
526
527 let r3 = tool.execute(json!({"path": "nope3.txt"})).await.unwrap();
529 assert!(!r3.success);
530 assert!(
531 r3.error.as_ref().unwrap().contains("Rate limit"),
532 "Expected rate limit error, got: {:?}",
533 r3.error
534 );
535
536 let _ = tokio::fs::remove_dir_all(&dir).await;
537 }
538
539 #[tokio::test]
540 async fn file_read_with_offset_and_limit() {
541 let dir = std::env::temp_dir().join("construct_test_file_read_offset");
542 let _ = tokio::fs::remove_dir_all(&dir).await;
543 tokio::fs::create_dir_all(&dir).await.unwrap();
544 tokio::fs::write(dir.join("lines.txt"), "aaa\nbbb\nccc\nddd\neee")
545 .await
546 .unwrap();
547
548 let tool = FileReadTool::new(test_security(dir.clone()));
549
550 let result = tool
552 .execute(json!({"path": "lines.txt", "offset": 2, "limit": 2}))
553 .await
554 .unwrap();
555 assert!(result.success);
556 assert!(result.output.contains("2: bbb"));
557 assert!(result.output.contains("3: ccc"));
558 assert!(!result.output.contains("1: aaa"));
559 assert!(!result.output.contains("4: ddd"));
560 assert!(result.output.contains("[Lines 2-3 of 5]"));
561
562 let result = tool
564 .execute(json!({"path": "lines.txt", "offset": 4}))
565 .await
566 .unwrap();
567 assert!(result.success);
568 assert!(result.output.contains("4: ddd"));
569 assert!(result.output.contains("5: eee"));
570 assert!(result.output.contains("[Lines 4-5 of 5]"));
571
572 let result = tool
574 .execute(json!({"path": "lines.txt", "limit": 2}))
575 .await
576 .unwrap();
577 assert!(result.success);
578 assert!(result.output.contains("1: aaa"));
579 assert!(result.output.contains("2: bbb"));
580 assert!(!result.output.contains("3: ccc"));
581 assert!(result.output.contains("[Lines 1-2 of 5]"));
582
583 let result = tool.execute(json!({"path": "lines.txt"})).await.unwrap();
585 assert!(result.success);
586 assert!(result.output.contains("1: aaa"));
587 assert!(result.output.contains("5: eee"));
588 assert!(result.output.contains("[5 lines total]"));
589
590 let _ = tokio::fs::remove_dir_all(&dir).await;
591 }
592
593 #[tokio::test]
594 async fn file_read_offset_beyond_end() {
595 let dir = std::env::temp_dir().join("construct_test_file_read_offset_end");
596 let _ = tokio::fs::remove_dir_all(&dir).await;
597 tokio::fs::create_dir_all(&dir).await.unwrap();
598 tokio::fs::write(dir.join("short.txt"), "one\ntwo")
599 .await
600 .unwrap();
601
602 let tool = FileReadTool::new(test_security(dir.clone()));
603 let result = tool
604 .execute(json!({"path": "short.txt", "offset": 100}))
605 .await
606 .unwrap();
607 assert!(result.success);
608 assert!(
609 result
610 .output
611 .contains("[No lines in range, file has 2 lines]")
612 );
613
614 let _ = tokio::fs::remove_dir_all(&dir).await;
615 }
616
617 #[tokio::test]
618 async fn file_read_rejects_oversized_file() {
619 let dir = std::env::temp_dir().join("construct_test_file_read_large");
620 let _ = tokio::fs::remove_dir_all(&dir).await;
621 tokio::fs::create_dir_all(&dir).await.unwrap();
622
623 let big = vec![b'x'; 10 * 1024 * 1024 + 1];
625 tokio::fs::write(dir.join("huge.bin"), &big).await.unwrap();
626
627 let tool = FileReadTool::new(test_security(dir.clone()));
628 let result = tool.execute(json!({"path": "huge.bin"})).await.unwrap();
629 assert!(!result.success);
630 assert!(result.error.as_ref().unwrap().contains("File too large"));
631
632 let _ = tokio::fs::remove_dir_all(&dir).await;
633 }
634
635 #[tokio::test]
637 async fn file_read_extracts_pdf_text() {
638 let dir = std::env::temp_dir().join("construct_test_file_read_pdf");
639 let _ = tokio::fs::remove_dir_all(&dir).await;
640 tokio::fs::create_dir_all(&dir).await.unwrap();
641
642 let fixture = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
643 .join("tests/fixtures/test_document.pdf");
644 tokio::fs::copy(&fixture, dir.join("report.pdf"))
645 .await
646 .expect("copy PDF fixture");
647
648 let tool = FileReadTool::new(test_security(dir.clone()));
649 let result = tool.execute(json!({"path": "report.pdf"})).await.unwrap();
650
651 assert!(
652 result.success,
653 "PDF read must succeed, error: {:?}",
654 result.error
655 );
656 assert!(
657 result.output.contains("Hello"),
658 "extracted text must contain 'Hello', got: {}",
659 result.output
660 );
661
662 let _ = tokio::fs::remove_dir_all(&dir).await;
663 }
664
665 #[tokio::test]
667 async fn file_read_lossy_reads_binary_file() {
668 let dir = std::env::temp_dir().join("construct_test_file_read_lossy");
669 let _ = tokio::fs::remove_dir_all(&dir).await;
670 tokio::fs::create_dir_all(&dir).await.unwrap();
671
672 let binary_data: Vec<u8> = vec![0x00, 0x80, 0xFF, 0xFE, b'h', b'i', 0x80];
674 tokio::fs::write(dir.join("data.bin"), &binary_data)
675 .await
676 .unwrap();
677
678 let tool = FileReadTool::new(test_security(dir.clone()));
679 let result = tool.execute(json!({"path": "data.bin"})).await.unwrap();
680
681 assert!(
682 result.success,
683 "lossy read must succeed, error: {:?}",
684 result.error
685 );
686 assert!(
687 result.output.contains('\u{FFFD}'),
688 "lossy output must contain replacement character, got: {:?}",
689 result.output
690 );
691 assert!(
692 result.output.contains("hi"),
693 "lossy output must preserve valid ASCII, got: {:?}",
694 result.output
695 );
696
697 let _ = tokio::fs::remove_dir_all(&dir).await;
698 }
699
700 mod e2e_helpers {
703 use crate::config::MemoryConfig;
704 use crate::memory::{self, Memory};
705 use crate::observability::{NoopObserver, Observer};
706 use crate::providers::{ChatMessage, ChatRequest, ChatResponse, Provider};
707 use std::sync::{Arc, Mutex};
708
709 pub type SharedRequests = Arc<Mutex<Vec<Vec<ChatMessage>>>>;
710
711 pub struct RecordingProvider {
712 responses: Mutex<Vec<ChatResponse>>,
713 pub requests: SharedRequests,
714 }
715
716 impl RecordingProvider {
717 pub fn new(responses: Vec<ChatResponse>) -> (Self, SharedRequests) {
718 let requests: SharedRequests = Arc::new(Mutex::new(Vec::new()));
719 let provider = Self {
720 responses: Mutex::new(responses),
721 requests: requests.clone(),
722 };
723 (provider, requests)
724 }
725 }
726
727 #[async_trait::async_trait]
728 impl Provider for RecordingProvider {
729 async fn chat_with_system(
730 &self,
731 _system_prompt: Option<&str>,
732 _message: &str,
733 _model: &str,
734 _temperature: f64,
735 ) -> anyhow::Result<String> {
736 Ok("fallback".into())
737 }
738
739 async fn chat(
740 &self,
741 request: ChatRequest<'_>,
742 _model: &str,
743 _temperature: f64,
744 ) -> anyhow::Result<ChatResponse> {
745 self.requests
746 .lock()
747 .unwrap()
748 .push(request.messages.to_vec());
749
750 let mut guard = self.responses.lock().unwrap();
751 if guard.is_empty() {
752 return Ok(ChatResponse {
753 text: Some("done".into()),
754 tool_calls: vec![],
755 usage: None,
756 reasoning_content: None,
757 });
758 }
759 Ok(guard.remove(0))
760 }
761 }
762
763 pub fn make_memory() -> Arc<dyn Memory> {
764 let cfg = MemoryConfig {
765 backend: "none".into(),
766 ..MemoryConfig::default()
767 };
768 Arc::from(memory::create_memory(&cfg, &std::env::temp_dir(), None).unwrap())
769 }
770
771 pub fn make_observer() -> Arc<dyn Observer> {
772 Arc::from(NoopObserver {})
773 }
774 }
775
776 #[tokio::test]
780 async fn e2e_agent_file_read_pdf_extraction() {
781 use crate::agent::agent::Agent;
782 use crate::agent::dispatcher::NativeToolDispatcher;
783 use crate::providers::{ChatResponse, Provider, ToolCall};
784 use e2e_helpers::*;
785
786 let workspace = std::env::temp_dir().join("construct_test_e2e_file_read_pdf");
788 let _ = tokio::fs::remove_dir_all(&workspace).await;
789 tokio::fs::create_dir_all(&workspace).await.unwrap();
790
791 let fixture = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
792 .join("tests/fixtures/test_document.pdf");
793 tokio::fs::copy(&fixture, workspace.join("report.pdf"))
794 .await
795 .expect("copy PDF fixture");
796
797 let security = Arc::new(SecurityPolicy {
799 autonomy: AutonomyLevel::Supervised,
800 workspace_dir: workspace.clone(),
801 ..SecurityPolicy::default()
802 });
803 let file_read_tool: Box<dyn Tool> = Box::new(FileReadTool::new(security));
804
805 let (provider, recorded) = RecordingProvider::new(vec![
807 ChatResponse {
809 text: Some(String::new()),
810 tool_calls: vec![ToolCall {
811 id: "tc1".into(),
812 name: "file_read".into(),
813 arguments: r#"{"path": "report.pdf"}"#.into(),
814 }],
815 usage: None,
816 reasoning_content: None,
817 },
818 ChatResponse {
820 text: Some("The PDF contains a greeting: Hello PDF".into()),
821 tool_calls: vec![],
822 usage: None,
823 reasoning_content: None,
824 },
825 ]);
826
827 let mut agent = Agent::builder()
828 .provider(Box::new(provider) as Box<dyn Provider>)
829 .tools(vec![file_read_tool])
830 .memory(make_memory())
831 .observer(make_observer())
832 .tool_dispatcher(Box::new(NativeToolDispatcher))
833 .workspace_dir(workspace.clone())
834 .build()
835 .unwrap();
836
837 let response = agent
839 .turn("Read report.pdf and tell me what it says")
840 .await
841 .unwrap();
842
843 assert!(
845 response.contains("Hello PDF"),
846 "agent response must contain PDF content, got: {response}",
847 );
848
849 {
851 let all_requests = recorded.lock().unwrap();
852 assert!(
853 all_requests.len() >= 2,
854 "expected at least 2 provider requests (initial + after tool), got {}",
855 all_requests.len(),
856 );
857
858 let second_request = &all_requests[1];
859 let tool_result_msg = second_request
860 .iter()
861 .find(|m| m.role == "tool")
862 .expect("second request must contain a tool result message");
863
864 assert!(
865 tool_result_msg.content.contains("Hello"),
866 "tool result must contain extracted PDF text 'Hello', got: {}",
867 tool_result_msg.content,
868 );
869 }
870
871 let _ = tokio::fs::remove_dir_all(&workspace).await;
872 }
873
874 #[tokio::test]
877 async fn e2e_agent_file_read_lossy_binary() {
878 use crate::agent::agent::Agent;
879 use crate::agent::dispatcher::NativeToolDispatcher;
880 use crate::providers::{ChatResponse, Provider, ToolCall};
881 use e2e_helpers::*;
882
883 let workspace = std::env::temp_dir().join("construct_test_e2e_file_read_lossy");
885 let _ = tokio::fs::remove_dir_all(&workspace).await;
886 tokio::fs::create_dir_all(&workspace).await.unwrap();
887
888 let binary_data: Vec<u8> = vec![0x00, 0x80, 0xFF, 0xFE, b'v', b'a', b'l', b'i', b'd', 0x80];
889 tokio::fs::write(workspace.join("data.bin"), &binary_data)
890 .await
891 .unwrap();
892
893 let security = Arc::new(SecurityPolicy {
894 autonomy: AutonomyLevel::Supervised,
895 workspace_dir: workspace.clone(),
896 ..SecurityPolicy::default()
897 });
898 let file_read_tool: Box<dyn Tool> = Box::new(FileReadTool::new(security));
899
900 let (provider, recorded) = RecordingProvider::new(vec![
901 ChatResponse {
902 text: Some(String::new()),
903 tool_calls: vec![ToolCall {
904 id: "tc1".into(),
905 name: "file_read".into(),
906 arguments: r#"{"path": "data.bin"}"#.into(),
907 }],
908 usage: None,
909 reasoning_content: None,
910 },
911 ChatResponse {
912 text: Some("The file appears to be binary data.".into()),
913 tool_calls: vec![],
914 usage: None,
915 reasoning_content: None,
916 },
917 ]);
918
919 let mut agent = Agent::builder()
920 .provider(Box::new(provider) as Box<dyn Provider>)
921 .tools(vec![file_read_tool])
922 .memory(make_memory())
923 .observer(make_observer())
924 .tool_dispatcher(Box::new(NativeToolDispatcher))
925 .workspace_dir(workspace.clone())
926 .build()
927 .unwrap();
928
929 let response = agent.turn("Read data.bin").await.unwrap();
930
931 assert!(
932 response.contains("binary"),
933 "agent response must mention binary, got: {response}",
934 );
935
936 {
938 let all_requests = recorded.lock().unwrap();
939 assert!(
940 all_requests.len() >= 2,
941 "expected at least 2 provider requests, got {}",
942 all_requests.len(),
943 );
944
945 let tool_result_msg = all_requests[1]
946 .iter()
947 .find(|m| m.role == "tool")
948 .expect("second request must contain a tool result message");
949
950 assert!(
951 tool_result_msg.content.contains("valid"),
952 "tool result must preserve valid ASCII from binary file, got: {}",
953 tool_result_msg.content,
954 );
955 assert!(
956 tool_result_msg.content.contains('\u{FFFD}'),
957 "tool result must contain replacement character for invalid bytes, got: {}",
958 tool_result_msg.content,
959 );
960 }
961
962 let _ = tokio::fs::remove_dir_all(&workspace).await;
963 }
964
965 #[tokio::test]
971 #[ignore = "requires valid OpenAI Codex OAuth credentials"]
972 async fn e2e_live_file_read_pdf() {
973 use crate::agent::agent::Agent;
974 use crate::agent::dispatcher::XmlToolDispatcher;
975 use crate::providers::openai_codex::OpenAiCodexProvider;
976 use crate::providers::{Provider, ProviderRuntimeOptions};
977 use e2e_helpers::*;
978
979 let workspace = std::env::temp_dir().join("construct_test_e2e_live_file_read_pdf");
981 let _ = tokio::fs::remove_dir_all(&workspace).await;
982 tokio::fs::create_dir_all(&workspace).await.unwrap();
983
984 let fixture = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
985 .join("tests/fixtures/test_document.pdf");
986 tokio::fs::copy(&fixture, workspace.join("report.pdf"))
987 .await
988 .expect("copy PDF fixture");
989
990 let security = Arc::new(SecurityPolicy {
992 autonomy: AutonomyLevel::Supervised,
993 workspace_dir: workspace.clone(),
994 ..SecurityPolicy::default()
995 });
996 let file_read_tool: Box<dyn Tool> = Box::new(FileReadTool::new(security));
997
998 let provider = OpenAiCodexProvider::new(&ProviderRuntimeOptions::default(), None)
1000 .expect("provider should initialize");
1001
1002 let mut agent = Agent::builder()
1003 .provider(Box::new(provider) as Box<dyn Provider>)
1004 .tools(vec![file_read_tool])
1005 .memory(make_memory())
1006 .observer(make_observer())
1007 .tool_dispatcher(Box::new(XmlToolDispatcher))
1008 .workspace_dir(workspace.clone())
1009 .model_name("gpt-5.3-codex".to_string())
1010 .build()
1011 .unwrap();
1012
1013 let response = agent
1015 .turn("Use the file_read tool to read report.pdf, then tell me what text it contains. Be concise.")
1016 .await
1017 .unwrap();
1018
1019 eprintln!("=== Live e2e response ===\n{response}\n=========================");
1020
1021 let lower = response.to_lowercase();
1023 assert!(
1024 lower.contains("hello"),
1025 "model response must reference extracted PDF text 'Hello PDF', got: {response}",
1026 );
1027
1028 let _ = tokio::fs::remove_dir_all(&workspace).await;
1029 }
1030
1031 #[tokio::test]
1032 async fn file_read_blocks_null_byte_in_path() {
1033 let dir = std::env::temp_dir().join("construct_test_file_read_null_byte");
1034 let _ = tokio::fs::remove_dir_all(&dir).await;
1035 tokio::fs::create_dir_all(&dir).await.unwrap();
1036
1037 let tool = FileReadTool::new(test_security(dir.clone()));
1038 let result = tool
1039 .execute(json!({"path": "test\0evil.txt"}))
1040 .await
1041 .unwrap();
1042 assert!(!result.success);
1043 assert!(result.error.as_ref().unwrap().contains("not allowed"));
1044
1045 let _ = tokio::fs::remove_dir_all(&dir).await;
1046 }
1047
1048 #[tokio::test]
1049 async fn file_read_allowed_root_with_workspace_only() {
1050 let root = std::env::temp_dir().join("construct_test_file_read_allowed_root");
1051 let workspace = root.join("workspace");
1052 let allowed = root.join("allowed_dir");
1053
1054 let _ = tokio::fs::remove_dir_all(&root).await;
1055 tokio::fs::create_dir_all(&workspace).await.unwrap();
1056 tokio::fs::create_dir_all(&allowed).await.unwrap();
1057 tokio::fs::write(allowed.join("data.txt"), "allowed content")
1058 .await
1059 .unwrap();
1060
1061 let security = Arc::new(SecurityPolicy {
1062 autonomy: AutonomyLevel::Supervised,
1063 workspace_dir: workspace.clone(),
1064 workspace_only: true,
1065 allowed_roots: vec![allowed.clone()],
1066 ..SecurityPolicy::default()
1067 });
1068 let tool = FileReadTool::new(security);
1069
1070 let abs_path = allowed.join("data.txt").to_string_lossy().to_string();
1072 let result = tool.execute(json!({"path": &abs_path})).await.unwrap();
1073
1074 assert!(
1075 result.success,
1076 "file_read with allowed_root path should succeed, error: {:?}",
1077 result.error
1078 );
1079 assert!(result.output.contains("allowed content"));
1080
1081 let outside = root.join("outside");
1083 tokio::fs::create_dir_all(&outside).await.unwrap();
1084 tokio::fs::write(outside.join("secret.txt"), "secret")
1085 .await
1086 .unwrap();
1087 let outside_path = outside.join("secret.txt").to_string_lossy().to_string();
1088 let result = tool.execute(json!({"path": &outside_path})).await.unwrap();
1089 assert!(!result.success);
1090
1091 let _ = tokio::fs::remove_dir_all(&root).await;
1092 }
1093}