Skip to main content

agentzero_tools/
read_file.rs

1use agentzero_core::{Tool, ToolContext, ToolResult};
2use anyhow::{anyhow, Context};
3use async_trait::async_trait;
4use std::path::{Component, Path, PathBuf};
5use tokio::fs;
6use tokio::io::AsyncReadExt;
7
8const DEFAULT_MAX_READ_BYTES: u64 = 256 * 1024;
9
10#[derive(Debug, Clone)]
11pub struct ReadFilePolicy {
12    pub allowed_root: PathBuf,
13    pub max_read_bytes: u64,
14    pub allow_binary: bool,
15}
16
17impl ReadFilePolicy {
18    pub fn default_for_root(allowed_root: PathBuf) -> Self {
19        Self {
20            allowed_root,
21            max_read_bytes: DEFAULT_MAX_READ_BYTES,
22            allow_binary: false,
23        }
24    }
25}
26
27pub struct ReadFileTool {
28    allowed_root: PathBuf,
29    max_read_bytes: u64,
30    allow_binary: bool,
31}
32
33impl ReadFileTool {
34    pub fn new(policy: ReadFilePolicy) -> Self {
35        Self {
36            allowed_root: policy.allowed_root,
37            max_read_bytes: policy.max_read_bytes,
38            allow_binary: policy.allow_binary,
39        }
40    }
41
42    fn resolve_safe(&self, input_path: &str, workspace_root: &str) -> anyhow::Result<PathBuf> {
43        if input_path.trim().is_empty() {
44            return Err(anyhow!("file path is required"));
45        }
46
47        let input = Path::new(input_path);
48        if input.is_absolute() {
49            return Err(anyhow!("absolute paths are not allowed"));
50        }
51        if input
52            .components()
53            .any(|c| matches!(c, Component::ParentDir))
54        {
55            return Err(anyhow!("path traversal is not allowed"));
56        }
57
58        let joined = Path::new(workspace_root).join(input_path);
59        let normalized = joined
60            .canonicalize()
61            .with_context(|| format!("unable to resolve file path: {input_path}"))?;
62        let canonical_allowed_root = self
63            .allowed_root
64            .canonicalize()
65            .context("unable to resolve allowed root")?;
66        if !normalized.starts_with(canonical_allowed_root) {
67            return Err(anyhow!("path is outside allowed root"));
68        }
69        Ok(normalized)
70    }
71
72    fn validate_file_policy(&self, raw: &[u8]) -> anyhow::Result<()> {
73        if raw.len() as u64 > self.max_read_bytes {
74            return Err(anyhow!(
75                "file is too large (max {} bytes)",
76                self.max_read_bytes
77            ));
78        }
79
80        if !self.allow_binary && Self::looks_binary(raw) {
81            return Err(anyhow!("binary files are not allowed"));
82        }
83
84        Ok(())
85    }
86
87    fn looks_binary(raw: &[u8]) -> bool {
88        if raw.is_empty() {
89            return false;
90        }
91        if raw.contains(&0) {
92            return true;
93        }
94        if std::str::from_utf8(raw).is_err() {
95            return true;
96        }
97
98        // Fallback heuristic for control-character-heavy payloads.
99        let control_count = raw
100            .iter()
101            .filter(|b| **b < 0x09 || (**b > 0x0D && **b < 0x20))
102            .count();
103        control_count * 10 > raw.len()
104    }
105
106    async fn read_limited(path: &Path, max_bytes: u64) -> anyhow::Result<Vec<u8>> {
107        let mut file = fs::File::open(path).await.context("failed to open file")?;
108        let mut bytes = Vec::new();
109        (&mut file)
110            .take(max_bytes + 1)
111            .read_to_end(&mut bytes)
112            .await
113            .context("failed to read file")?;
114        Ok(bytes)
115    }
116}
117
118#[async_trait]
119impl Tool for ReadFileTool {
120    fn name(&self) -> &'static str {
121        "read_file"
122    }
123
124    fn description(&self) -> &'static str {
125        "Read the contents of a file at the given path. Returns the file text or an error if the path is outside the workspace or the file is too large."
126    }
127
128    fn input_schema(&self) -> Option<serde_json::Value> {
129        Some(serde_json::json!({
130            "type": "object",
131            "properties": {
132                "path": {
133                    "type": "string",
134                    "description": "Relative path to the file to read"
135                }
136            },
137            "required": ["path"]
138        }))
139    }
140
141    async fn execute(&self, input: &str, ctx: &ToolContext) -> anyhow::Result<ToolResult> {
142        let safe_path = self.resolve_safe(input, &ctx.workspace_root)?;
143
144        // B7: Hard-link guard — refuse files with multiple hard links.
145        crate::autonomy::AutonomyPolicy::check_hard_links(&safe_path.to_string_lossy())?;
146
147        // B7: Sensitive file detection — block unless explicitly allowed.
148        if !ctx.allow_sensitive_file_reads
149            && crate::autonomy::is_sensitive_path(&safe_path.to_string_lossy())
150        {
151            return Err(anyhow!(
152                "refusing to read sensitive file: {}",
153                safe_path.display()
154            ));
155        }
156
157        let metadata = fs::metadata(&safe_path)
158            .await
159            .context("failed to read file metadata")?;
160        if metadata.len() > self.max_read_bytes {
161            return Err(anyhow!(
162                "file is too large (max {} bytes)",
163                self.max_read_bytes
164            ));
165        }
166
167        let raw = Self::read_limited(&safe_path, self.max_read_bytes).await?;
168        self.validate_file_policy(&raw)?;
169        let content = String::from_utf8(raw).context("only UTF-8 text files are supported")?;
170        Ok(ToolResult { output: content })
171    }
172}
173
174#[cfg(test)]
175mod tests {
176    use super::{ReadFilePolicy, ReadFileTool};
177    use agentzero_core::{Tool, ToolContext};
178    use std::fs;
179    use std::path::PathBuf;
180    use std::sync::atomic::{AtomicU64, Ordering};
181    use std::time::{SystemTime, UNIX_EPOCH};
182
183    static TEMP_COUNTER: AtomicU64 = AtomicU64::new(0);
184
185    fn temp_dir() -> PathBuf {
186        let nanos = SystemTime::now()
187            .duration_since(UNIX_EPOCH)
188            .expect("clock should be after unix epoch")
189            .as_nanos();
190        let seq = TEMP_COUNTER.fetch_add(1, Ordering::Relaxed);
191        let dir = std::env::temp_dir().join(format!(
192            "agentzero-tools-{}-{nanos}-{seq}",
193            std::process::id()
194        ));
195        fs::create_dir_all(&dir).expect("temp dir should be created");
196        dir
197    }
198
199    #[tokio::test]
200    async fn read_file_allows_text_within_root() {
201        let dir = temp_dir();
202        let file = dir.join("note.txt");
203        fs::write(&file, "hello").expect("test file should be written");
204
205        let tool = ReadFileTool::new(ReadFilePolicy::default_for_root(dir.clone()));
206        let result = tool
207            .execute(
208                "note.txt",
209                &ToolContext::new(dir.to_string_lossy().to_string()),
210            )
211            .await
212            .expect("read_file should succeed");
213
214        assert_eq!(result.output, "hello");
215        fs::remove_dir_all(dir).expect("temp dir should be removed");
216    }
217
218    #[tokio::test]
219    async fn read_file_rejects_binary_content() {
220        let dir = temp_dir();
221        let file = dir.join("blob.bin");
222        fs::write(&file, vec![0_u8, 159, 146, 150]).expect("binary test file should be written");
223
224        let tool = ReadFileTool::new(ReadFilePolicy::default_for_root(dir.clone()));
225        let result = tool
226            .execute(
227                "blob.bin",
228                &ToolContext::new(dir.to_string_lossy().to_string()),
229            )
230            .await;
231
232        assert!(result.is_err());
233        assert!(result
234            .expect_err("binary should be rejected")
235            .to_string()
236            .contains("binary files are not allowed"));
237        fs::remove_dir_all(dir).expect("temp dir should be removed");
238    }
239
240    #[tokio::test]
241    async fn read_file_rejects_path_traversal_outside_allowlist() {
242        let dir = temp_dir();
243        let sibling = temp_dir();
244        let outside_file = sibling.join("outside.txt");
245        fs::write(&outside_file, "nope").expect("outside test file should be written");
246
247        let tool = ReadFileTool::new(ReadFilePolicy::default_for_root(dir.clone()));
248        let result = tool
249            .execute(
250                "../outside.txt",
251                &ToolContext::new(dir.to_string_lossy().to_string()),
252            )
253            .await;
254
255        assert!(result.is_err());
256        assert!(result
257            .expect_err("traversal should be denied")
258            .to_string()
259            .contains("path traversal is not allowed"));
260        fs::remove_dir_all(dir).expect("temp dir should be removed");
261        fs::remove_dir_all(sibling).expect("sibling temp dir should be removed");
262    }
263
264    #[tokio::test]
265    async fn read_file_rejects_oversized_file() {
266        let dir = temp_dir();
267        let file = dir.join("large.txt");
268        fs::write(&file, "x".repeat(32)).expect("large file should be written");
269
270        let tool = ReadFileTool::new(ReadFilePolicy {
271            allowed_root: dir.clone(),
272            max_read_bytes: 8,
273            allow_binary: false,
274        });
275        let result = tool
276            .execute(
277                "large.txt",
278                &ToolContext::new(dir.to_string_lossy().to_string()),
279            )
280            .await;
281
282        assert!(result.is_err());
283        assert!(result
284            .expect_err("oversized file should be rejected")
285            .to_string()
286            .contains("file is too large"));
287        fs::remove_dir_all(dir).expect("temp dir should be removed");
288    }
289
290    #[tokio::test]
291    async fn read_file_rejects_invalid_utf8_as_binary_fallback() {
292        let dir = temp_dir();
293        let file = dir.join("non_utf8.bin");
294        fs::write(&file, vec![0xFF, 0xFE, 0xFD, 0xFC]).expect("binary bytes should be written");
295
296        let tool = ReadFileTool::new(ReadFilePolicy::default_for_root(dir.clone()));
297        let result = tool
298            .execute(
299                "non_utf8.bin",
300                &ToolContext::new(dir.to_string_lossy().to_string()),
301            )
302            .await;
303
304        assert!(result.is_err());
305        assert!(result
306            .expect_err("invalid utf8 should be treated as binary")
307            .to_string()
308            .contains("binary files are not allowed"));
309        fs::remove_dir_all(dir).expect("temp dir should be removed");
310    }
311
312    // B7: Hard-link guard tests
313
314    #[cfg(unix)]
315    #[tokio::test]
316    async fn read_file_rejects_hard_linked_file() {
317        let dir = temp_dir();
318        let original = dir.join("original.txt");
319        fs::write(&original, "secret").expect("original file should be written");
320        let link = dir.join("hardlink.txt");
321        fs::hard_link(&original, &link).expect("hard link should be created");
322
323        let tool = ReadFileTool::new(ReadFilePolicy::default_for_root(dir.clone()));
324        let result = tool
325            .execute(
326                "hardlink.txt",
327                &ToolContext::new(dir.to_string_lossy().to_string()),
328            )
329            .await;
330
331        assert!(result.is_err());
332        assert!(result
333            .expect_err("hard-linked file should be rejected")
334            .to_string()
335            .contains("hard link"));
336        fs::remove_dir_all(dir).expect("temp dir should be removed");
337    }
338
339    // B7: Sensitive file detection tests
340
341    #[tokio::test]
342    async fn read_file_blocks_sensitive_path() {
343        let dir = temp_dir();
344        let sensitive = dir.join(".env");
345        fs::write(&sensitive, "SECRET_KEY=abc123").expect("sensitive file should be written");
346
347        let tool = ReadFileTool::new(ReadFilePolicy::default_for_root(dir.clone()));
348        let result = tool
349            .execute(".env", &ToolContext::new(dir.to_string_lossy().to_string()))
350            .await;
351
352        assert!(result.is_err());
353        assert!(result
354            .expect_err("sensitive file should be blocked")
355            .to_string()
356            .contains("refusing to read sensitive file"));
357        fs::remove_dir_all(dir).expect("temp dir should be removed");
358    }
359
360    #[tokio::test]
361    async fn read_file_allows_sensitive_path_when_configured() {
362        let dir = temp_dir();
363        let sensitive = dir.join(".env");
364        fs::write(&sensitive, "SECRET_KEY=abc123").expect("sensitive file should be written");
365
366        let tool = ReadFileTool::new(ReadFilePolicy::default_for_root(dir.clone()));
367        let mut ctx = ToolContext::new(dir.to_string_lossy().to_string());
368        ctx.allow_sensitive_file_reads = true;
369        let result = tool
370            .execute(".env", &ctx)
371            .await
372            .expect("sensitive file should be allowed when configured");
373
374        assert!(result.output.contains("SECRET_KEY=abc123"));
375        fs::remove_dir_all(dir).expect("temp dir should be removed");
376    }
377
378    #[cfg(unix)]
379    #[tokio::test]
380    async fn read_file_rejects_symlink_pointing_outside_allowed_root() {
381        use std::os::unix::fs as unix_fs;
382
383        let allowed_root = temp_dir();
384        let outside_root = temp_dir();
385        let outside_file = outside_root.join("outside.txt");
386        fs::write(&outside_file, "outside").expect("outside file should be written");
387
388        let link_path = allowed_root.join("link.txt");
389        unix_fs::symlink(&outside_file, &link_path).expect("symlink should be created");
390
391        let tool = ReadFileTool::new(ReadFilePolicy::default_for_root(allowed_root.clone()));
392        let result = tool
393            .execute(
394                "link.txt",
395                &ToolContext::new(allowed_root.to_string_lossy().to_string()),
396            )
397            .await;
398
399        assert!(result.is_err());
400        assert!(result
401            .expect_err("outside symlink should be denied")
402            .to_string()
403            .contains("path is outside allowed root"));
404
405        fs::remove_dir_all(allowed_root).expect("allowed root temp dir should be removed");
406        fs::remove_dir_all(outside_root).expect("outside root temp dir should be removed");
407    }
408}