spec_ai_core/tools/builtin/
file_read.rs

1use crate::tools::{Tool, ToolResult};
2use anyhow::{anyhow, Context, Result};
3use async_trait::async_trait;
4use base64::{engine::general_purpose, Engine as _};
5use chrono::{DateTime, Utc};
6use serde::{Deserialize, Serialize};
7use serde_json::Value;
8use std::fs;
9use std::io::{BufRead, BufReader};
10use std::path::PathBuf;
11
12const DEFAULT_MAX_BYTES: usize = 1_048_576; // 1 MiB
13
14#[derive(Debug, Clone, Copy, Deserialize)]
15#[serde(rename_all = "snake_case")]
16#[derive(Default)]
17enum FileReadFormat {
18    #[default]
19    Text,
20    Base64,
21}
22
23#[derive(Debug, Deserialize)]
24struct FileReadArgs {
25    path: String,
26    #[serde(default)]
27    include_metadata: bool,
28    #[serde(default)]
29    format: FileReadFormat,
30    max_bytes: Option<usize>,
31    /// Read only the first N lines
32    head: Option<usize>,
33    /// Read only the last N lines
34    tail: Option<usize>,
35    /// Skip the first N lines (used with limit)
36    offset: Option<usize>,
37    /// Read at most N lines (used with offset)
38    limit: Option<usize>,
39}
40
41#[derive(Debug, Serialize)]
42struct FileMetadata {
43    size_bytes: u64,
44    modified: Option<String>,
45    created: Option<String>,
46}
47
48#[derive(Debug, Serialize)]
49struct FileReadOutput {
50    path: String,
51    encoding: &'static str,
52    bytes: usize,
53    content: String,
54    metadata: Option<FileMetadata>,
55}
56
57/// Tool for safely reading files from disk
58pub struct FileReadTool {
59    max_bytes: usize,
60}
61
62impl FileReadTool {
63    pub fn new() -> Self {
64        Self {
65            max_bytes: DEFAULT_MAX_BYTES,
66        }
67    }
68
69    pub fn with_max_bytes(mut self, max_bytes: usize) -> Self {
70        self.max_bytes = max_bytes;
71        self
72    }
73
74    fn ensure_within_limit(&self, requested: Option<usize>) -> usize {
75        requested
76            .map(|req| req.min(self.max_bytes))
77            .unwrap_or(self.max_bytes)
78    }
79
80    fn normalize_path(&self, input: &str) -> Result<PathBuf> {
81        if input.trim().is_empty() {
82            return Err(anyhow!("file_read requires a valid path"));
83        }
84
85        Ok(PathBuf::from(input))
86    }
87
88    fn serialize_metadata(metadata: &fs::Metadata) -> FileMetadata {
89        let modified = metadata.modified().ok().map(|time| {
90            let datetime: DateTime<Utc> = time.into();
91            datetime.to_rfc3339()
92        });
93        let created = metadata.created().ok().map(|time| {
94            let datetime: DateTime<Utc> = time.into();
95            datetime.to_rfc3339()
96        });
97
98        FileMetadata {
99            size_bytes: metadata.len(),
100            modified,
101            created,
102        }
103    }
104}
105
106impl Default for FileReadTool {
107    fn default() -> Self {
108        Self::new()
109    }
110}
111
112#[async_trait]
113impl Tool for FileReadTool {
114    fn name(&self) -> &str {
115        "file_read"
116    }
117
118    fn description(&self) -> &str {
119        "Reads files from disk with optional metadata and size limits"
120    }
121
122    fn parameters(&self) -> Value {
123        serde_json::json!({
124            "type": "object",
125            "properties": {
126                "path": {
127                    "type": "string",
128                    "description": "Relative or absolute file path to read"
129                },
130                "include_metadata": {
131                    "type": "boolean",
132                    "description": "Return file metadata (size, timestamps)",
133                    "default": false
134                },
135                "format": {
136                    "type": "string",
137                    "enum": ["text", "base64"],
138                    "description": "Return format for file contents",
139                    "default": "text"
140                },
141                "max_bytes": {
142                    "type": "integer",
143                    "description": "Override default read limit (bytes)",
144                    "minimum": 1
145                },
146                "head": {
147                    "type": "integer",
148                    "description": "Read only the first N lines (text format only)",
149                    "minimum": 1
150                },
151                "tail": {
152                    "type": "integer",
153                    "description": "Read only the last N lines (text format only)",
154                    "minimum": 1
155                },
156                "offset": {
157                    "type": "integer",
158                    "description": "Skip the first N lines (text format only, use with limit)",
159                    "minimum": 0
160                },
161                "limit": {
162                    "type": "integer",
163                    "description": "Read at most N lines (text format only, use with offset)",
164                    "minimum": 1
165                }
166            },
167            "required": ["path"]
168        })
169    }
170
171    async fn execute(&self, args: Value) -> Result<ToolResult> {
172        let args: FileReadArgs =
173            serde_json::from_value(args).context("Failed to parse file_read arguments")?;
174
175        let path = self.normalize_path(&args.path)?;
176        let file_metadata =
177            fs::metadata(&path).with_context(|| format!("File not found: {}", path.display()))?;
178
179        if !file_metadata.is_file() {
180            return Ok(ToolResult::failure(format!(
181                "{} is not a regular file",
182                path.display()
183            )));
184        }
185
186        // Check if line-based operations are requested
187        let use_line_mode = args.head.is_some()
188            || args.tail.is_some()
189            || args.offset.is_some()
190            || args.limit.is_some();
191
192        // Validate that line-based operations are only used with text format
193        if use_line_mode && !matches!(args.format, FileReadFormat::Text) {
194            return Ok(ToolResult::failure(
195                "Line-based operations (head, tail, offset, limit) are only supported with text format".to_string()
196            ));
197        }
198
199        // For line-based operations, we can bypass the byte limit check
200        // as we'll only read specific lines
201        let limit = self.ensure_within_limit(args.max_bytes);
202
203        if !use_line_mode && file_metadata.len() as usize > limit {
204            // Estimate lines for better error message
205            let estimated_lines = (file_metadata.len() / 80).max(1); // Assume ~80 chars per line
206            return Ok(ToolResult::failure(format!(
207                "File exceeds maximum allowed size of {} bytes (file is {} bytes). \
208                 Consider using line-based reading:\n\
209                 - Use 'head: N' to read first N lines\n\
210                 - Use 'tail: N' to read last N lines\n\
211                 - Use 'offset: M' with 'limit: N' to read N lines starting from line M\n\
212                 Estimated lines in file: ~{}",
213                limit,
214                file_metadata.len(),
215                estimated_lines
216            )));
217        }
218
219        let (encoding, content, actual_bytes) = if use_line_mode {
220            // Handle line-based reading
221            let file = fs::File::open(&path)
222                .with_context(|| format!("Failed to open file {}", path.display()))?;
223            let reader = BufReader::new(file);
224
225            let processed_content = if let Some(n) = args.head {
226                // Read first N lines
227                reader
228                    .lines()
229                    .take(n)
230                    .collect::<Result<Vec<_>, _>>()
231                    .context("Failed to read lines")?
232                    .join("\n")
233            } else if let Some(n) = args.tail {
234                // Read last N lines
235                let all_lines: Vec<String> = reader
236                    .lines()
237                    .collect::<Result<Vec<_>, _>>()
238                    .context("Failed to read lines")?;
239                let start = all_lines.len().saturating_sub(n);
240                all_lines[start..].join("\n")
241            } else {
242                // Handle offset and limit
243                let offset = args.offset.unwrap_or(0);
244                let limit = args.limit.unwrap_or(usize::MAX);
245
246                reader
247                    .lines()
248                    .skip(offset)
249                    .take(limit)
250                    .collect::<Result<Vec<_>, _>>()
251                    .context("Failed to read lines")?
252                    .join("\n")
253            };
254
255            let bytes = processed_content.as_bytes().len();
256            ("utf-8", processed_content, bytes)
257        } else {
258            // Read entire file (existing behavior)
259            let bytes = fs::read(&path)
260                .with_context(|| format!("Failed to read file {}", path.display()))?;
261            let actual_bytes = bytes.len();
262
263            match args.format {
264                FileReadFormat::Text => {
265                    let text = String::from_utf8_lossy(&bytes).to_string();
266                    ("utf-8", text, actual_bytes)
267                }
268                FileReadFormat::Base64 => (
269                    "base64",
270                    general_purpose::STANDARD.encode(&bytes),
271                    actual_bytes,
272                ),
273            }
274        };
275
276        let metadata = if args.include_metadata {
277            Some(Self::serialize_metadata(&file_metadata))
278        } else {
279            None
280        };
281
282        let output = FileReadOutput {
283            path: path.to_string_lossy().into_owned(),
284            encoding,
285            bytes: actual_bytes,
286            content,
287            metadata,
288        };
289
290        Ok(ToolResult::success(
291            serde_json::to_string(&output).context("Failed to serialize file_read output")?,
292        ))
293    }
294}
295
296#[cfg(test)]
297mod tests {
298    use super::*;
299    use std::io::Write;
300    use tempfile::NamedTempFile;
301
302    #[tokio::test]
303    async fn test_file_read_text() {
304        let mut tmp = NamedTempFile::new().unwrap();
305        writeln!(tmp, "hello world").unwrap();
306
307        let tool = FileReadTool::new();
308        let args = serde_json::json!({
309            "path": tmp.path().to_string_lossy(),
310            "include_metadata": true
311        });
312
313        let result = tool.execute(args).await.unwrap();
314        assert!(result.success);
315        let value: serde_json::Value = serde_json::from_str(&result.output).unwrap();
316        assert_eq!(value["encoding"], "utf-8");
317        assert!(value["metadata"]["size_bytes"].is_number());
318    }
319
320    #[tokio::test]
321    async fn test_file_read_binary_base64() {
322        let tmp = NamedTempFile::new().unwrap();
323        fs::write(tmp.path(), vec![0, 159, 146, 150]).unwrap();
324
325        let tool = FileReadTool::new();
326        let args = serde_json::json!({
327            "path": tmp.path().to_string_lossy(),
328            "format": "base64"
329        });
330
331        let result = tool.execute(args).await.unwrap();
332        assert!(result.success);
333        let value: serde_json::Value = serde_json::from_str(&result.output).unwrap();
334        assert_eq!(value["encoding"], "base64");
335        assert!(!value["content"].as_str().unwrap().is_empty());
336    }
337
338    #[tokio::test]
339    async fn test_file_read_too_large() {
340        let tmp = NamedTempFile::new().unwrap();
341        fs::write(tmp.path(), vec![1; DEFAULT_MAX_BYTES + 1]).unwrap();
342
343        let tool = FileReadTool::new();
344        let args = serde_json::json!({
345            "path": tmp.path().to_string_lossy()
346        });
347
348        let result = tool.execute(args).await.unwrap();
349        assert!(!result.success);
350        assert!(result.error.is_some());
351        assert!(result
352            .error
353            .unwrap()
354            .contains("Consider using line-based reading"));
355    }
356
357    #[tokio::test]
358    async fn test_file_read_head() {
359        let mut tmp = NamedTempFile::new().unwrap();
360        writeln!(tmp, "line1").unwrap();
361        writeln!(tmp, "line2").unwrap();
362        writeln!(tmp, "line3").unwrap();
363        writeln!(tmp, "line4").unwrap();
364        writeln!(tmp, "line5").unwrap();
365
366        let tool = FileReadTool::new();
367        let args = serde_json::json!({
368            "path": tmp.path().to_string_lossy(),
369            "head": 3
370        });
371
372        let result = tool.execute(args).await.unwrap();
373        assert!(result.success);
374        let value: serde_json::Value = serde_json::from_str(&result.output).unwrap();
375        let content = value["content"].as_str().unwrap();
376        assert_eq!(content, "line1\nline2\nline3");
377    }
378
379    #[tokio::test]
380    async fn test_file_read_tail() {
381        let mut tmp = NamedTempFile::new().unwrap();
382        writeln!(tmp, "line1").unwrap();
383        writeln!(tmp, "line2").unwrap();
384        writeln!(tmp, "line3").unwrap();
385        writeln!(tmp, "line4").unwrap();
386        writeln!(tmp, "line5").unwrap();
387
388        let tool = FileReadTool::new();
389        let args = serde_json::json!({
390            "path": tmp.path().to_string_lossy(),
391            "tail": 2
392        });
393
394        let result = tool.execute(args).await.unwrap();
395        assert!(result.success);
396        let value: serde_json::Value = serde_json::from_str(&result.output).unwrap();
397        let content = value["content"].as_str().unwrap();
398        assert_eq!(content, "line4\nline5");
399    }
400
401    #[tokio::test]
402    async fn test_file_read_offset_limit() {
403        let mut tmp = NamedTempFile::new().unwrap();
404        writeln!(tmp, "line1").unwrap();
405        writeln!(tmp, "line2").unwrap();
406        writeln!(tmp, "line3").unwrap();
407        writeln!(tmp, "line4").unwrap();
408        writeln!(tmp, "line5").unwrap();
409
410        let tool = FileReadTool::new();
411        let args = serde_json::json!({
412            "path": tmp.path().to_string_lossy(),
413            "offset": 1,
414            "limit": 3
415        });
416
417        let result = tool.execute(args).await.unwrap();
418        assert!(result.success);
419        let value: serde_json::Value = serde_json::from_str(&result.output).unwrap();
420        let content = value["content"].as_str().unwrap();
421        assert_eq!(content, "line2\nline3\nline4");
422    }
423
424    #[tokio::test]
425    async fn test_file_read_line_mode_with_base64_fails() {
426        let mut tmp = NamedTempFile::new().unwrap();
427        writeln!(tmp, "test").unwrap();
428
429        let tool = FileReadTool::new();
430        let args = serde_json::json!({
431            "path": tmp.path().to_string_lossy(),
432            "format": "base64",
433            "head": 10
434        });
435
436        let result = tool.execute(args).await.unwrap();
437        assert!(!result.success);
438        assert!(result.error.is_some());
439        assert!(result
440            .error
441            .unwrap()
442            .contains("only supported with text format"));
443    }
444}