construct/tools/
file_read.rs

1use super::traits::{Tool, ToolResult};
2use crate::security::SecurityPolicy;
3use async_trait::async_trait;
4use serde_json::json;
5use std::sync::Arc;
6
7const MAX_FILE_SIZE_BYTES: u64 = 10 * 1024 * 1024;
8
9/// Read file contents with path sandboxing
10pub struct FileReadTool {
11    security: Arc<SecurityPolicy>,
12}
13
14impl FileReadTool {
15    pub fn new(security: Arc<SecurityPolicy>) -> Self {
16        Self { security }
17    }
18}
19
20#[async_trait]
21impl Tool for FileReadTool {
22    fn name(&self) -> &str {
23        "file_read"
24    }
25
26    fn description(&self) -> &str {
27        "Read file contents with line numbers. Supports partial reading via offset and limit. Extracts text from PDF; other binary files are read with lossy UTF-8 conversion."
28    }
29
30    fn parameters_schema(&self) -> serde_json::Value {
31        json!({
32            "type": "object",
33            "properties": {
34                "path": {
35                    "type": "string",
36                    "description": "Path to the file. Relative paths resolve from workspace; outside paths require policy allowlist."
37                },
38                "offset": {
39                    "type": "integer",
40                    "description": "Starting line number (1-based, default: 1)"
41                },
42                "limit": {
43                    "type": "integer",
44                    "description": "Maximum number of lines to return (default: all)"
45                }
46            },
47            "required": ["path"]
48        })
49    }
50
51    async fn execute(&self, args: serde_json::Value) -> anyhow::Result<ToolResult> {
52        let path = args
53            .get("path")
54            .and_then(|v| v.as_str())
55            .ok_or_else(|| anyhow::anyhow!("Missing 'path' parameter"))?;
56
57        if self.security.is_rate_limited() {
58            return Ok(ToolResult {
59                success: false,
60                output: String::new(),
61                error: Some("Rate limit exceeded: too many actions in the last hour".into()),
62            });
63        }
64
65        // Security check: validate path is within workspace
66        if !self.security.is_path_allowed(path) {
67            return Ok(ToolResult {
68                success: false,
69                output: String::new(),
70                error: Some(format!("Path not allowed by security policy: {path}")),
71            });
72        }
73
74        // Record action BEFORE canonicalization so that every non-trivially-rejected
75        // request consumes rate limit budget. This prevents attackers from probing
76        // path existence (via canonicalize errors) without rate limit cost.
77        if !self.security.record_action() {
78            return Ok(ToolResult {
79                success: false,
80                output: String::new(),
81                error: Some("Rate limit exceeded: action budget exhausted".into()),
82            });
83        }
84
85        let full_path = self.security.resolve_tool_path(path);
86
87        // Resolve path before reading to block symlink escapes.
88        let resolved_path = match tokio::fs::canonicalize(&full_path).await {
89            Ok(p) => p,
90            Err(e) => {
91                return Ok(ToolResult {
92                    success: false,
93                    output: String::new(),
94                    error: Some(format!("Failed to resolve file path: {e}")),
95                });
96            }
97        };
98
99        if !self.security.is_resolved_path_allowed(&resolved_path) {
100            return Ok(ToolResult {
101                success: false,
102                output: String::new(),
103                error: Some(
104                    self.security
105                        .resolved_path_violation_message(&resolved_path),
106                ),
107            });
108        }
109
110        // Check file size AFTER canonicalization to prevent TOCTOU symlink bypass
111        match tokio::fs::metadata(&resolved_path).await {
112            Ok(meta) => {
113                if meta.len() > MAX_FILE_SIZE_BYTES {
114                    return Ok(ToolResult {
115                        success: false,
116                        output: String::new(),
117                        error: Some(format!(
118                            "File too large: {} bytes (limit: {MAX_FILE_SIZE_BYTES} bytes)",
119                            meta.len()
120                        )),
121                    });
122                }
123            }
124            Err(e) => {
125                return Ok(ToolResult {
126                    success: false,
127                    output: String::new(),
128                    error: Some(format!("Failed to read file metadata: {e}")),
129                });
130            }
131        }
132
133        match tokio::fs::read_to_string(&resolved_path).await {
134            Ok(contents) => {
135                let lines: Vec<&str> = contents.lines().collect();
136                let total = lines.len();
137
138                if total == 0 {
139                    return Ok(ToolResult {
140                        success: true,
141                        output: String::new(),
142                        error: None,
143                    });
144                }
145
146                let offset = args
147                    .get("offset")
148                    .and_then(|v| v.as_u64())
149                    .map(|v| {
150                        usize::try_from(v.max(1))
151                            .unwrap_or(usize::MAX)
152                            .saturating_sub(1)
153                    })
154                    .unwrap_or(0);
155                let start = offset.min(total);
156
157                let end = match args.get("limit").and_then(|v| v.as_u64()) {
158                    Some(l) => {
159                        let limit = usize::try_from(l).unwrap_or(usize::MAX);
160                        (start.saturating_add(limit)).min(total)
161                    }
162                    None => total,
163                };
164
165                if start >= end {
166                    return Ok(ToolResult {
167                        success: true,
168                        output: format!("[No lines in range, file has {total} lines]"),
169                        error: None,
170                    });
171                }
172
173                let numbered: String = lines[start..end]
174                    .iter()
175                    .enumerate()
176                    .map(|(i, line)| format!("{}: {}", start + i + 1, line))
177                    .collect::<Vec<_>>()
178                    .join("\n");
179
180                let partial = start > 0 || end < total;
181                let summary = if partial {
182                    format!("\n[Lines {}-{} of {total}]", start + 1, end)
183                } else {
184                    format!("\n[{total} lines total]")
185                };
186
187                Ok(ToolResult {
188                    success: true,
189                    output: format!("{numbered}{summary}"),
190                    error: None,
191                })
192            }
193            Err(_) => {
194                // Not valid UTF-8 — read raw bytes and try to extract text
195                let bytes = tokio::fs::read(&resolved_path)
196                    .await
197                    .map_err(|e| anyhow::anyhow!("Failed to read file: {e}"))?;
198
199                if let Some(text) = try_extract_pdf_text(&bytes) {
200                    return Ok(ToolResult {
201                        success: true,
202                        output: text,
203                        error: None,
204                    });
205                }
206
207                // Lossy fallback — replaces invalid bytes with U+FFFD
208                let lossy = String::from_utf8_lossy(&bytes).into_owned();
209                Ok(ToolResult {
210                    success: true,
211                    output: lossy,
212                    error: None,
213                })
214            }
215        }
216    }
217}
218
219#[cfg(feature = "rag-pdf")]
220fn try_extract_pdf_text(bytes: &[u8]) -> Option<String> {
221    if bytes.len() < 5 || &bytes[..5] != b"%PDF-" {
222        return None;
223    }
224    let text = pdf_extract::extract_text_from_mem(bytes).ok()?;
225    if text.trim().is_empty() {
226        return None;
227    }
228    Some(text)
229}
230
231#[cfg(not(feature = "rag-pdf"))]
232fn try_extract_pdf_text(_bytes: &[u8]) -> Option<String> {
233    None
234}
235
236#[cfg(test)]
237mod tests {
238    use super::*;
239    use crate::security::{AutonomyLevel, SecurityPolicy};
240
241    fn test_security(workspace: std::path::PathBuf) -> Arc<SecurityPolicy> {
242        Arc::new(SecurityPolicy {
243            autonomy: AutonomyLevel::Supervised,
244            workspace_dir: workspace,
245            ..SecurityPolicy::default()
246        })
247    }
248
249    fn test_security_with(
250        workspace: std::path::PathBuf,
251        autonomy: AutonomyLevel,
252        max_actions_per_hour: u32,
253    ) -> Arc<SecurityPolicy> {
254        Arc::new(SecurityPolicy {
255            autonomy,
256            workspace_dir: workspace,
257            max_actions_per_hour,
258            ..SecurityPolicy::default()
259        })
260    }
261
262    #[test]
263    fn file_read_name() {
264        let tool = FileReadTool::new(test_security(std::env::temp_dir()));
265        assert_eq!(tool.name(), "file_read");
266    }
267
268    #[test]
269    fn file_read_schema_has_path() {
270        let tool = FileReadTool::new(test_security(std::env::temp_dir()));
271        let schema = tool.parameters_schema();
272        assert!(schema["properties"]["path"].is_object());
273        assert!(schema["properties"]["offset"].is_object());
274        assert!(schema["properties"]["limit"].is_object());
275        assert!(
276            schema["required"]
277                .as_array()
278                .unwrap()
279                .contains(&json!("path"))
280        );
281        // offset and limit are optional
282        assert!(
283            !schema["required"]
284                .as_array()
285                .unwrap()
286                .contains(&json!("offset"))
287        );
288    }
289
290    #[tokio::test]
291    async fn file_read_existing_file() {
292        let dir = std::env::temp_dir().join("construct_test_file_read");
293        let _ = tokio::fs::remove_dir_all(&dir).await;
294        tokio::fs::create_dir_all(&dir).await.unwrap();
295        tokio::fs::write(dir.join("test.txt"), "hello world")
296            .await
297            .unwrap();
298
299        let tool = FileReadTool::new(test_security(dir.clone()));
300        let result = tool.execute(json!({"path": "test.txt"})).await.unwrap();
301        assert!(result.success);
302        assert!(result.output.contains("1: hello world"));
303        assert!(result.output.contains("[1 lines total]"));
304        assert!(result.error.is_none());
305
306        let _ = tokio::fs::remove_dir_all(&dir).await;
307    }
308
309    #[tokio::test]
310    async fn file_read_nonexistent_file() {
311        let dir = std::env::temp_dir().join("construct_test_file_read_missing");
312        let _ = tokio::fs::remove_dir_all(&dir).await;
313        tokio::fs::create_dir_all(&dir).await.unwrap();
314
315        let tool = FileReadTool::new(test_security(dir.clone()));
316        let result = tool.execute(json!({"path": "nope.txt"})).await.unwrap();
317        assert!(!result.success);
318        assert!(result.error.as_ref().unwrap().contains("Failed to resolve"));
319
320        let _ = tokio::fs::remove_dir_all(&dir).await;
321    }
322
323    #[tokio::test]
324    async fn file_read_blocks_path_traversal() {
325        let dir = std::env::temp_dir().join("construct_test_file_read_traversal");
326        let _ = tokio::fs::remove_dir_all(&dir).await;
327        tokio::fs::create_dir_all(&dir).await.unwrap();
328
329        let tool = FileReadTool::new(test_security(dir.clone()));
330        let result = tool
331            .execute(json!({"path": "../../../etc/passwd"}))
332            .await
333            .unwrap();
334        assert!(!result.success);
335        assert!(result.error.as_ref().unwrap().contains("not allowed"));
336
337        let _ = tokio::fs::remove_dir_all(&dir).await;
338    }
339
340    #[tokio::test]
341    async fn file_read_blocks_absolute_path() {
342        let tool = FileReadTool::new(test_security(std::env::temp_dir()));
343        let result = tool.execute(json!({"path": "/etc/passwd"})).await.unwrap();
344        assert!(!result.success);
345        assert!(result.error.as_ref().unwrap().contains("not allowed"));
346    }
347
348    #[tokio::test]
349    async fn file_read_blocks_when_rate_limited() {
350        let dir = std::env::temp_dir().join("construct_test_file_read_rate_limited");
351        let _ = tokio::fs::remove_dir_all(&dir).await;
352        tokio::fs::create_dir_all(&dir).await.unwrap();
353        tokio::fs::write(dir.join("test.txt"), "hello world")
354            .await
355            .unwrap();
356
357        let tool = FileReadTool::new(test_security_with(
358            dir.clone(),
359            AutonomyLevel::Supervised,
360            0,
361        ));
362        let result = tool.execute(json!({"path": "test.txt"})).await.unwrap();
363
364        assert!(!result.success);
365        assert!(
366            result
367                .error
368                .as_deref()
369                .unwrap_or("")
370                .contains("Rate limit exceeded")
371        );
372
373        let _ = tokio::fs::remove_dir_all(&dir).await;
374    }
375
376    #[tokio::test]
377    async fn file_read_allows_readonly_mode() {
378        let dir = std::env::temp_dir().join("construct_test_file_read_readonly");
379        let _ = tokio::fs::remove_dir_all(&dir).await;
380        tokio::fs::create_dir_all(&dir).await.unwrap();
381        tokio::fs::write(dir.join("test.txt"), "readonly ok")
382            .await
383            .unwrap();
384
385        let tool = FileReadTool::new(test_security_with(dir.clone(), AutonomyLevel::ReadOnly, 20));
386        let result = tool.execute(json!({"path": "test.txt"})).await.unwrap();
387
388        assert!(result.success);
389        assert!(result.output.contains("1: readonly ok"));
390
391        let _ = tokio::fs::remove_dir_all(&dir).await;
392    }
393
394    #[tokio::test]
395    async fn file_read_missing_path_param() {
396        let tool = FileReadTool::new(test_security(std::env::temp_dir()));
397        let result = tool.execute(json!({})).await;
398        assert!(result.is_err());
399    }
400
401    #[tokio::test]
402    async fn file_read_empty_file() {
403        let dir = std::env::temp_dir().join("construct_test_file_read_empty");
404        let _ = tokio::fs::remove_dir_all(&dir).await;
405        tokio::fs::create_dir_all(&dir).await.unwrap();
406        tokio::fs::write(dir.join("empty.txt"), "").await.unwrap();
407
408        let tool = FileReadTool::new(test_security(dir.clone()));
409        let result = tool.execute(json!({"path": "empty.txt"})).await.unwrap();
410        assert!(result.success);
411        assert_eq!(result.output, "");
412
413        let _ = tokio::fs::remove_dir_all(&dir).await;
414    }
415
416    #[tokio::test]
417    async fn file_read_nested_path() {
418        let dir = std::env::temp_dir().join("construct_test_file_read_nested");
419        let _ = tokio::fs::remove_dir_all(&dir).await;
420        tokio::fs::create_dir_all(dir.join("sub/dir"))
421            .await
422            .unwrap();
423        tokio::fs::write(dir.join("sub/dir/deep.txt"), "deep content")
424            .await
425            .unwrap();
426
427        let tool = FileReadTool::new(test_security(dir.clone()));
428        let result = tool
429            .execute(json!({"path": "sub/dir/deep.txt"}))
430            .await
431            .unwrap();
432        assert!(result.success);
433        assert!(result.output.contains("1: deep content"));
434
435        let _ = tokio::fs::remove_dir_all(&dir).await;
436    }
437
438    #[cfg(unix)]
439    #[tokio::test]
440    async fn file_read_blocks_symlink_escape() {
441        use std::os::unix::fs::symlink;
442
443        let root = std::env::temp_dir().join("construct_test_file_read_symlink_escape");
444        let workspace = root.join("workspace");
445        let outside = root.join("outside");
446
447        let _ = tokio::fs::remove_dir_all(&root).await;
448        tokio::fs::create_dir_all(&workspace).await.unwrap();
449        tokio::fs::create_dir_all(&outside).await.unwrap();
450
451        tokio::fs::write(outside.join("secret.txt"), "outside workspace")
452            .await
453            .unwrap();
454
455        symlink(outside.join("secret.txt"), workspace.join("escape.txt")).unwrap();
456
457        let tool = FileReadTool::new(test_security(workspace.clone()));
458        let result = tool.execute(json!({"path": "escape.txt"})).await.unwrap();
459
460        assert!(!result.success);
461        assert!(
462            result
463                .error
464                .as_deref()
465                .unwrap_or("")
466                .contains("escapes workspace")
467        );
468
469        let _ = tokio::fs::remove_dir_all(&root).await;
470    }
471
472    #[tokio::test]
473    async fn file_read_outside_workspace_allowed_when_workspace_only_disabled() {
474        let root = std::env::temp_dir().join("construct_test_file_read_allowed_roots_hint");
475        let workspace = root.join("workspace");
476        let outside = root.join("outside");
477        let outside_file = outside.join("notes.txt");
478
479        let _ = tokio::fs::remove_dir_all(&root).await;
480        tokio::fs::create_dir_all(&workspace).await.unwrap();
481        tokio::fs::create_dir_all(&outside).await.unwrap();
482        tokio::fs::write(&outside_file, "outside").await.unwrap();
483
484        let security = Arc::new(SecurityPolicy {
485            autonomy: AutonomyLevel::Supervised,
486            workspace_dir: workspace,
487            workspace_only: false,
488            forbidden_paths: vec![],
489            ..SecurityPolicy::default()
490        });
491        let tool = FileReadTool::new(security);
492
493        let result = tool
494            .execute(json!({"path": outside_file.to_string_lossy().to_string()}))
495            .await
496            .unwrap();
497
498        assert!(result.success);
499        assert!(result.error.is_none());
500        assert!(result.output.contains("outside"));
501
502        let _ = tokio::fs::remove_dir_all(&root).await;
503    }
504
505    #[tokio::test]
506    async fn file_read_nonexistent_consumes_rate_limit_budget() {
507        let dir = std::env::temp_dir().join("construct_test_file_read_probe");
508        let _ = tokio::fs::remove_dir_all(&dir).await;
509        tokio::fs::create_dir_all(&dir).await.unwrap();
510
511        // Allow only 2 actions total
512        let tool = FileReadTool::new(test_security_with(
513            dir.clone(),
514            AutonomyLevel::Supervised,
515            2,
516        ));
517
518        // Both reads fail (file doesn't exist) but should consume budget
519        let r1 = tool.execute(json!({"path": "nope1.txt"})).await.unwrap();
520        assert!(!r1.success);
521        assert!(r1.error.as_ref().unwrap().contains("Failed to resolve"));
522
523        let r2 = tool.execute(json!({"path": "nope2.txt"})).await.unwrap();
524        assert!(!r2.success);
525        assert!(r2.error.as_ref().unwrap().contains("Failed to resolve"));
526
527        // Third attempt should be rate limited even though file doesn't exist
528        let r3 = tool.execute(json!({"path": "nope3.txt"})).await.unwrap();
529        assert!(!r3.success);
530        assert!(
531            r3.error.as_ref().unwrap().contains("Rate limit"),
532            "Expected rate limit error, got: {:?}",
533            r3.error
534        );
535
536        let _ = tokio::fs::remove_dir_all(&dir).await;
537    }
538
539    #[tokio::test]
540    async fn file_read_with_offset_and_limit() {
541        let dir = std::env::temp_dir().join("construct_test_file_read_offset");
542        let _ = tokio::fs::remove_dir_all(&dir).await;
543        tokio::fs::create_dir_all(&dir).await.unwrap();
544        tokio::fs::write(dir.join("lines.txt"), "aaa\nbbb\nccc\nddd\neee")
545            .await
546            .unwrap();
547
548        let tool = FileReadTool::new(test_security(dir.clone()));
549
550        // Read lines 2-3
551        let result = tool
552            .execute(json!({"path": "lines.txt", "offset": 2, "limit": 2}))
553            .await
554            .unwrap();
555        assert!(result.success);
556        assert!(result.output.contains("2: bbb"));
557        assert!(result.output.contains("3: ccc"));
558        assert!(!result.output.contains("1: aaa"));
559        assert!(!result.output.contains("4: ddd"));
560        assert!(result.output.contains("[Lines 2-3 of 5]"));
561
562        // Read from offset 4 to end
563        let result = tool
564            .execute(json!({"path": "lines.txt", "offset": 4}))
565            .await
566            .unwrap();
567        assert!(result.success);
568        assert!(result.output.contains("4: ddd"));
569        assert!(result.output.contains("5: eee"));
570        assert!(result.output.contains("[Lines 4-5 of 5]"));
571
572        // Limit only (first 2 lines)
573        let result = tool
574            .execute(json!({"path": "lines.txt", "limit": 2}))
575            .await
576            .unwrap();
577        assert!(result.success);
578        assert!(result.output.contains("1: aaa"));
579        assert!(result.output.contains("2: bbb"));
580        assert!(!result.output.contains("3: ccc"));
581        assert!(result.output.contains("[Lines 1-2 of 5]"));
582
583        // Full read (no offset/limit) shows all lines
584        let result = tool.execute(json!({"path": "lines.txt"})).await.unwrap();
585        assert!(result.success);
586        assert!(result.output.contains("1: aaa"));
587        assert!(result.output.contains("5: eee"));
588        assert!(result.output.contains("[5 lines total]"));
589
590        let _ = tokio::fs::remove_dir_all(&dir).await;
591    }
592
593    #[tokio::test]
594    async fn file_read_offset_beyond_end() {
595        let dir = std::env::temp_dir().join("construct_test_file_read_offset_end");
596        let _ = tokio::fs::remove_dir_all(&dir).await;
597        tokio::fs::create_dir_all(&dir).await.unwrap();
598        tokio::fs::write(dir.join("short.txt"), "one\ntwo")
599            .await
600            .unwrap();
601
602        let tool = FileReadTool::new(test_security(dir.clone()));
603        let result = tool
604            .execute(json!({"path": "short.txt", "offset": 100}))
605            .await
606            .unwrap();
607        assert!(result.success);
608        assert!(
609            result
610                .output
611                .contains("[No lines in range, file has 2 lines]")
612        );
613
614        let _ = tokio::fs::remove_dir_all(&dir).await;
615    }
616
617    #[tokio::test]
618    async fn file_read_rejects_oversized_file() {
619        let dir = std::env::temp_dir().join("construct_test_file_read_large");
620        let _ = tokio::fs::remove_dir_all(&dir).await;
621        tokio::fs::create_dir_all(&dir).await.unwrap();
622
623        // Create a file just over 10 MB
624        let big = vec![b'x'; 10 * 1024 * 1024 + 1];
625        tokio::fs::write(dir.join("huge.bin"), &big).await.unwrap();
626
627        let tool = FileReadTool::new(test_security(dir.clone()));
628        let result = tool.execute(json!({"path": "huge.bin"})).await.unwrap();
629        assert!(!result.success);
630        assert!(result.error.as_ref().unwrap().contains("File too large"));
631
632        let _ = tokio::fs::remove_dir_all(&dir).await;
633    }
634
635    /// PDF files should be readable via pdf-extract text extraction.
636    #[tokio::test]
637    async fn file_read_extracts_pdf_text() {
638        let dir = std::env::temp_dir().join("construct_test_file_read_pdf");
639        let _ = tokio::fs::remove_dir_all(&dir).await;
640        tokio::fs::create_dir_all(&dir).await.unwrap();
641
642        let fixture = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
643            .join("tests/fixtures/test_document.pdf");
644        tokio::fs::copy(&fixture, dir.join("report.pdf"))
645            .await
646            .expect("copy PDF fixture");
647
648        let tool = FileReadTool::new(test_security(dir.clone()));
649        let result = tool.execute(json!({"path": "report.pdf"})).await.unwrap();
650
651        assert!(
652            result.success,
653            "PDF read must succeed, error: {:?}",
654            result.error
655        );
656        assert!(
657            result.output.contains("Hello"),
658            "extracted text must contain 'Hello', got: {}",
659            result.output
660        );
661
662        let _ = tokio::fs::remove_dir_all(&dir).await;
663    }
664
665    /// Non-UTF-8 binary files should be read with lossy conversion.
666    #[tokio::test]
667    async fn file_read_lossy_reads_binary_file() {
668        let dir = std::env::temp_dir().join("construct_test_file_read_lossy");
669        let _ = tokio::fs::remove_dir_all(&dir).await;
670        tokio::fs::create_dir_all(&dir).await.unwrap();
671
672        // Write bytes that are not valid UTF-8 and not a PDF
673        let binary_data: Vec<u8> = vec![0x00, 0x80, 0xFF, 0xFE, b'h', b'i', 0x80];
674        tokio::fs::write(dir.join("data.bin"), &binary_data)
675            .await
676            .unwrap();
677
678        let tool = FileReadTool::new(test_security(dir.clone()));
679        let result = tool.execute(json!({"path": "data.bin"})).await.unwrap();
680
681        assert!(
682            result.success,
683            "lossy read must succeed, error: {:?}",
684            result.error
685        );
686        assert!(
687            result.output.contains('\u{FFFD}'),
688            "lossy output must contain replacement character, got: {:?}",
689            result.output
690        );
691        assert!(
692            result.output.contains("hi"),
693            "lossy output must preserve valid ASCII, got: {:?}",
694            result.output
695        );
696
697        let _ = tokio::fs::remove_dir_all(&dir).await;
698    }
699
700    // ── E2E: full agent pipeline with real FileReadTool + PDF extraction ──
701
702    mod e2e_helpers {
703        use crate::config::MemoryConfig;
704        use crate::memory::{self, Memory};
705        use crate::observability::{NoopObserver, Observer};
706        use crate::providers::{ChatMessage, ChatRequest, ChatResponse, Provider};
707        use std::sync::{Arc, Mutex};
708
709        pub type SharedRequests = Arc<Mutex<Vec<Vec<ChatMessage>>>>;
710
711        pub struct RecordingProvider {
712            responses: Mutex<Vec<ChatResponse>>,
713            pub requests: SharedRequests,
714        }
715
716        impl RecordingProvider {
717            pub fn new(responses: Vec<ChatResponse>) -> (Self, SharedRequests) {
718                let requests: SharedRequests = Arc::new(Mutex::new(Vec::new()));
719                let provider = Self {
720                    responses: Mutex::new(responses),
721                    requests: requests.clone(),
722                };
723                (provider, requests)
724            }
725        }
726
727        #[async_trait::async_trait]
728        impl Provider for RecordingProvider {
729            async fn chat_with_system(
730                &self,
731                _system_prompt: Option<&str>,
732                _message: &str,
733                _model: &str,
734                _temperature: f64,
735            ) -> anyhow::Result<String> {
736                Ok("fallback".into())
737            }
738
739            async fn chat(
740                &self,
741                request: ChatRequest<'_>,
742                _model: &str,
743                _temperature: f64,
744            ) -> anyhow::Result<ChatResponse> {
745                self.requests
746                    .lock()
747                    .unwrap()
748                    .push(request.messages.to_vec());
749
750                let mut guard = self.responses.lock().unwrap();
751                if guard.is_empty() {
752                    return Ok(ChatResponse {
753                        text: Some("done".into()),
754                        tool_calls: vec![],
755                        usage: None,
756                        reasoning_content: None,
757                    });
758                }
759                Ok(guard.remove(0))
760            }
761        }
762
763        pub fn make_memory() -> Arc<dyn Memory> {
764            let cfg = MemoryConfig {
765                backend: "none".into(),
766                ..MemoryConfig::default()
767            };
768            Arc::from(memory::create_memory(&cfg, &std::env::temp_dir(), None).unwrap())
769        }
770
771        pub fn make_observer() -> Arc<dyn Observer> {
772            Arc::from(NoopObserver {})
773        }
774    }
775
776    /// End-to-end test: scripted provider calls `file_read` on a real PDF
777    /// fixture, the tool extracts text via pdf-extract, and the extracted
778    /// content reaches the provider in the tool result message.
779    #[tokio::test]
780    async fn e2e_agent_file_read_pdf_extraction() {
781        use crate::agent::agent::Agent;
782        use crate::agent::dispatcher::NativeToolDispatcher;
783        use crate::providers::{ChatResponse, Provider, ToolCall};
784        use e2e_helpers::*;
785
786        // ── Set up workspace with PDF fixture ──
787        let workspace = std::env::temp_dir().join("construct_test_e2e_file_read_pdf");
788        let _ = tokio::fs::remove_dir_all(&workspace).await;
789        tokio::fs::create_dir_all(&workspace).await.unwrap();
790
791        let fixture = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
792            .join("tests/fixtures/test_document.pdf");
793        tokio::fs::copy(&fixture, workspace.join("report.pdf"))
794            .await
795            .expect("copy PDF fixture");
796
797        // ── Build real FileReadTool ──
798        let security = Arc::new(SecurityPolicy {
799            autonomy: AutonomyLevel::Supervised,
800            workspace_dir: workspace.clone(),
801            ..SecurityPolicy::default()
802        });
803        let file_read_tool: Box<dyn Tool> = Box::new(FileReadTool::new(security));
804
805        // ── Script provider: call file_read → then answer ──
806        let (provider, recorded) = RecordingProvider::new(vec![
807            // Turn 1 response: provider asks to read the PDF
808            ChatResponse {
809                text: Some(String::new()),
810                tool_calls: vec![ToolCall {
811                    id: "tc1".into(),
812                    name: "file_read".into(),
813                    arguments: r#"{"path": "report.pdf"}"#.into(),
814                }],
815                usage: None,
816                reasoning_content: None,
817            },
818            // Turn 1 continued: provider sees tool result and answers
819            ChatResponse {
820                text: Some("The PDF contains a greeting: Hello PDF".into()),
821                tool_calls: vec![],
822                usage: None,
823                reasoning_content: None,
824            },
825        ]);
826
827        let mut agent = Agent::builder()
828            .provider(Box::new(provider) as Box<dyn Provider>)
829            .tools(vec![file_read_tool])
830            .memory(make_memory())
831            .observer(make_observer())
832            .tool_dispatcher(Box::new(NativeToolDispatcher))
833            .workspace_dir(workspace.clone())
834            .build()
835            .unwrap();
836
837        // ── Execute ──
838        let response = agent
839            .turn("Read report.pdf and tell me what it says")
840            .await
841            .unwrap();
842
843        // ── Verify final response ──
844        assert!(
845            response.contains("Hello PDF"),
846            "agent response must contain PDF content, got: {response}",
847        );
848
849        // ── Verify provider received extracted PDF text in tool result ──
850        {
851            let all_requests = recorded.lock().unwrap();
852            assert!(
853                all_requests.len() >= 2,
854                "expected at least 2 provider requests (initial + after tool), got {}",
855                all_requests.len(),
856            );
857
858            let second_request = &all_requests[1];
859            let tool_result_msg = second_request
860                .iter()
861                .find(|m| m.role == "tool")
862                .expect("second request must contain a tool result message");
863
864            assert!(
865                tool_result_msg.content.contains("Hello"),
866                "tool result must contain extracted PDF text 'Hello', got: {}",
867                tool_result_msg.content,
868            );
869        }
870
871        let _ = tokio::fs::remove_dir_all(&workspace).await;
872    }
873
874    /// End-to-end test: agent calls `file_read` on a binary file, gets
875    /// lossy UTF-8 output with replacement characters in the tool result.
876    #[tokio::test]
877    async fn e2e_agent_file_read_lossy_binary() {
878        use crate::agent::agent::Agent;
879        use crate::agent::dispatcher::NativeToolDispatcher;
880        use crate::providers::{ChatResponse, Provider, ToolCall};
881        use e2e_helpers::*;
882
883        // ── Set up workspace with binary file ──
884        let workspace = std::env::temp_dir().join("construct_test_e2e_file_read_lossy");
885        let _ = tokio::fs::remove_dir_all(&workspace).await;
886        tokio::fs::create_dir_all(&workspace).await.unwrap();
887
888        let binary_data: Vec<u8> = vec![0x00, 0x80, 0xFF, 0xFE, b'v', b'a', b'l', b'i', b'd', 0x80];
889        tokio::fs::write(workspace.join("data.bin"), &binary_data)
890            .await
891            .unwrap();
892
893        let security = Arc::new(SecurityPolicy {
894            autonomy: AutonomyLevel::Supervised,
895            workspace_dir: workspace.clone(),
896            ..SecurityPolicy::default()
897        });
898        let file_read_tool: Box<dyn Tool> = Box::new(FileReadTool::new(security));
899
900        let (provider, recorded) = RecordingProvider::new(vec![
901            ChatResponse {
902                text: Some(String::new()),
903                tool_calls: vec![ToolCall {
904                    id: "tc1".into(),
905                    name: "file_read".into(),
906                    arguments: r#"{"path": "data.bin"}"#.into(),
907                }],
908                usage: None,
909                reasoning_content: None,
910            },
911            ChatResponse {
912                text: Some("The file appears to be binary data.".into()),
913                tool_calls: vec![],
914                usage: None,
915                reasoning_content: None,
916            },
917        ]);
918
919        let mut agent = Agent::builder()
920            .provider(Box::new(provider) as Box<dyn Provider>)
921            .tools(vec![file_read_tool])
922            .memory(make_memory())
923            .observer(make_observer())
924            .tool_dispatcher(Box::new(NativeToolDispatcher))
925            .workspace_dir(workspace.clone())
926            .build()
927            .unwrap();
928
929        let response = agent.turn("Read data.bin").await.unwrap();
930
931        assert!(
932            response.contains("binary"),
933            "agent response must mention binary, got: {response}",
934        );
935
936        // Verify tool result contains lossy output with replacement chars
937        {
938            let all_requests = recorded.lock().unwrap();
939            assert!(
940                all_requests.len() >= 2,
941                "expected at least 2 provider requests, got {}",
942                all_requests.len(),
943            );
944
945            let tool_result_msg = all_requests[1]
946                .iter()
947                .find(|m| m.role == "tool")
948                .expect("second request must contain a tool result message");
949
950            assert!(
951                tool_result_msg.content.contains("valid"),
952                "tool result must preserve valid ASCII from binary file, got: {}",
953                tool_result_msg.content,
954            );
955            assert!(
956                tool_result_msg.content.contains('\u{FFFD}'),
957                "tool result must contain replacement character for invalid bytes, got: {}",
958                tool_result_msg.content,
959            );
960        }
961
962        let _ = tokio::fs::remove_dir_all(&workspace).await;
963    }
964
965    /// Live e2e: real OpenAI Codex provider + real FileReadTool + PDF fixture.
966    /// Verifies the model receives extracted PDF text and responds meaningfully.
967    ///
968    /// Requires valid OAuth credentials in `~/.construct/`.
969    /// Run: `cargo test --lib -- tools::file_read::tests::e2e_live_file_read_pdf --ignored --nocapture`
970    #[tokio::test]
971    #[ignore = "requires valid OpenAI Codex OAuth credentials"]
972    async fn e2e_live_file_read_pdf() {
973        use crate::agent::agent::Agent;
974        use crate::agent::dispatcher::XmlToolDispatcher;
975        use crate::providers::openai_codex::OpenAiCodexProvider;
976        use crate::providers::{Provider, ProviderRuntimeOptions};
977        use e2e_helpers::*;
978
979        // ── Set up workspace with PDF fixture ──
980        let workspace = std::env::temp_dir().join("construct_test_e2e_live_file_read_pdf");
981        let _ = tokio::fs::remove_dir_all(&workspace).await;
982        tokio::fs::create_dir_all(&workspace).await.unwrap();
983
984        let fixture = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
985            .join("tests/fixtures/test_document.pdf");
986        tokio::fs::copy(&fixture, workspace.join("report.pdf"))
987            .await
988            .expect("copy PDF fixture");
989
990        // ── Build real FileReadTool ──
991        let security = Arc::new(SecurityPolicy {
992            autonomy: AutonomyLevel::Supervised,
993            workspace_dir: workspace.clone(),
994            ..SecurityPolicy::default()
995        });
996        let file_read_tool: Box<dyn Tool> = Box::new(FileReadTool::new(security));
997
998        // ── Real provider (OpenAI Codex uses XML tool dispatch) ──
999        let provider = OpenAiCodexProvider::new(&ProviderRuntimeOptions::default(), None)
1000            .expect("provider should initialize");
1001
1002        let mut agent = Agent::builder()
1003            .provider(Box::new(provider) as Box<dyn Provider>)
1004            .tools(vec![file_read_tool])
1005            .memory(make_memory())
1006            .observer(make_observer())
1007            .tool_dispatcher(Box::new(XmlToolDispatcher))
1008            .workspace_dir(workspace.clone())
1009            .model_name("gpt-5.3-codex".to_string())
1010            .build()
1011            .unwrap();
1012
1013        // ── Execute ──
1014        let response = agent
1015            .turn("Use the file_read tool to read report.pdf, then tell me what text it contains. Be concise.")
1016            .await
1017            .unwrap();
1018
1019        eprintln!("=== Live e2e response ===\n{response}\n=========================");
1020
1021        // ── Verify model saw the actual PDF content ("Hello PDF") ──
1022        let lower = response.to_lowercase();
1023        assert!(
1024            lower.contains("hello"),
1025            "model response must reference extracted PDF text 'Hello PDF', got: {response}",
1026        );
1027
1028        let _ = tokio::fs::remove_dir_all(&workspace).await;
1029    }
1030
1031    #[tokio::test]
1032    async fn file_read_blocks_null_byte_in_path() {
1033        let dir = std::env::temp_dir().join("construct_test_file_read_null_byte");
1034        let _ = tokio::fs::remove_dir_all(&dir).await;
1035        tokio::fs::create_dir_all(&dir).await.unwrap();
1036
1037        let tool = FileReadTool::new(test_security(dir.clone()));
1038        let result = tool
1039            .execute(json!({"path": "test\0evil.txt"}))
1040            .await
1041            .unwrap();
1042        assert!(!result.success);
1043        assert!(result.error.as_ref().unwrap().contains("not allowed"));
1044
1045        let _ = tokio::fs::remove_dir_all(&dir).await;
1046    }
1047
1048    #[tokio::test]
1049    async fn file_read_allowed_root_with_workspace_only() {
1050        let root = std::env::temp_dir().join("construct_test_file_read_allowed_root");
1051        let workspace = root.join("workspace");
1052        let allowed = root.join("allowed_dir");
1053
1054        let _ = tokio::fs::remove_dir_all(&root).await;
1055        tokio::fs::create_dir_all(&workspace).await.unwrap();
1056        tokio::fs::create_dir_all(&allowed).await.unwrap();
1057        tokio::fs::write(allowed.join("data.txt"), "allowed content")
1058            .await
1059            .unwrap();
1060
1061        let security = Arc::new(SecurityPolicy {
1062            autonomy: AutonomyLevel::Supervised,
1063            workspace_dir: workspace.clone(),
1064            workspace_only: true,
1065            allowed_roots: vec![allowed.clone()],
1066            ..SecurityPolicy::default()
1067        });
1068        let tool = FileReadTool::new(security);
1069
1070        // Absolute path under allowed_root should succeed
1071        let abs_path = allowed.join("data.txt").to_string_lossy().to_string();
1072        let result = tool.execute(json!({"path": &abs_path})).await.unwrap();
1073
1074        assert!(
1075            result.success,
1076            "file_read with allowed_root path should succeed, error: {:?}",
1077            result.error
1078        );
1079        assert!(result.output.contains("allowed content"));
1080
1081        // Path outside both workspace and allowed_roots should still fail
1082        let outside = root.join("outside");
1083        tokio::fs::create_dir_all(&outside).await.unwrap();
1084        tokio::fs::write(outside.join("secret.txt"), "secret")
1085            .await
1086            .unwrap();
1087        let outside_path = outside.join("secret.txt").to_string_lossy().to_string();
1088        let result = tool.execute(json!({"path": &outside_path})).await.unwrap();
1089        assert!(!result.success);
1090
1091        let _ = tokio::fs::remove_dir_all(&root).await;
1092    }
1093}
construct/tools/file_read.rs

construct/tools/
file_read.rs