spec_ai_core/tools/builtin/
file_read.rs1use crate::tools::{Tool, ToolResult};
2use anyhow::{anyhow, Context, Result};
3use async_trait::async_trait;
4use base64::{engine::general_purpose, Engine as _};
5use chrono::{DateTime, Utc};
6use serde::{Deserialize, Serialize};
7use serde_json::Value;
8use std::fs;
9use std::io::{BufRead, BufReader};
10use std::path::PathBuf;
11
12const DEFAULT_MAX_BYTES: usize = 1_048_576; #[derive(Debug, Clone, Copy, Deserialize)]
15#[serde(rename_all = "snake_case")]
16#[derive(Default)]
17enum FileReadFormat {
18 #[default]
19 Text,
20 Base64,
21}
22
23#[derive(Debug, Deserialize)]
24struct FileReadArgs {
25 path: String,
26 #[serde(default)]
27 include_metadata: bool,
28 #[serde(default)]
29 format: FileReadFormat,
30 max_bytes: Option<usize>,
31 head: Option<usize>,
33 tail: Option<usize>,
35 offset: Option<usize>,
37 limit: Option<usize>,
39}
40
41#[derive(Debug, Serialize)]
42struct FileMetadata {
43 size_bytes: u64,
44 modified: Option<String>,
45 created: Option<String>,
46}
47
48#[derive(Debug, Serialize)]
49struct FileReadOutput {
50 path: String,
51 encoding: &'static str,
52 bytes: usize,
53 content: String,
54 metadata: Option<FileMetadata>,
55}
56
57pub struct FileReadTool {
59 max_bytes: usize,
60}
61
62impl FileReadTool {
63 pub fn new() -> Self {
64 Self {
65 max_bytes: DEFAULT_MAX_BYTES,
66 }
67 }
68
69 pub fn with_max_bytes(mut self, max_bytes: usize) -> Self {
70 self.max_bytes = max_bytes;
71 self
72 }
73
74 fn ensure_within_limit(&self, requested: Option<usize>) -> usize {
75 requested
76 .map(|req| req.min(self.max_bytes))
77 .unwrap_or(self.max_bytes)
78 }
79
80 fn normalize_path(&self, input: &str) -> Result<PathBuf> {
81 if input.trim().is_empty() {
82 return Err(anyhow!("file_read requires a valid path"));
83 }
84
85 Ok(PathBuf::from(input))
86 }
87
88 fn serialize_metadata(metadata: &fs::Metadata) -> FileMetadata {
89 let modified = metadata.modified().ok().map(|time| {
90 let datetime: DateTime<Utc> = time.into();
91 datetime.to_rfc3339()
92 });
93 let created = metadata.created().ok().map(|time| {
94 let datetime: DateTime<Utc> = time.into();
95 datetime.to_rfc3339()
96 });
97
98 FileMetadata {
99 size_bytes: metadata.len(),
100 modified,
101 created,
102 }
103 }
104}
105
106impl Default for FileReadTool {
107 fn default() -> Self {
108 Self::new()
109 }
110}
111
112#[async_trait]
113impl Tool for FileReadTool {
114 fn name(&self) -> &str {
115 "file_read"
116 }
117
118 fn description(&self) -> &str {
119 "Reads files from disk with optional metadata and size limits"
120 }
121
122 fn parameters(&self) -> Value {
123 serde_json::json!({
124 "type": "object",
125 "properties": {
126 "path": {
127 "type": "string",
128 "description": "Relative or absolute file path to read"
129 },
130 "include_metadata": {
131 "type": "boolean",
132 "description": "Return file metadata (size, timestamps)",
133 "default": false
134 },
135 "format": {
136 "type": "string",
137 "enum": ["text", "base64"],
138 "description": "Return format for file contents",
139 "default": "text"
140 },
141 "max_bytes": {
142 "type": "integer",
143 "description": "Override default read limit (bytes)",
144 "minimum": 1
145 },
146 "head": {
147 "type": "integer",
148 "description": "Read only the first N lines (text format only)",
149 "minimum": 1
150 },
151 "tail": {
152 "type": "integer",
153 "description": "Read only the last N lines (text format only)",
154 "minimum": 1
155 },
156 "offset": {
157 "type": "integer",
158 "description": "Skip the first N lines (text format only, use with limit)",
159 "minimum": 0
160 },
161 "limit": {
162 "type": "integer",
163 "description": "Read at most N lines (text format only, use with offset)",
164 "minimum": 1
165 }
166 },
167 "required": ["path"]
168 })
169 }
170
171 async fn execute(&self, args: Value) -> Result<ToolResult> {
172 let args: FileReadArgs =
173 serde_json::from_value(args).context("Failed to parse file_read arguments")?;
174
175 let path = self.normalize_path(&args.path)?;
176 let file_metadata =
177 fs::metadata(&path).with_context(|| format!("File not found: {}", path.display()))?;
178
179 if !file_metadata.is_file() {
180 return Ok(ToolResult::failure(format!(
181 "{} is not a regular file",
182 path.display()
183 )));
184 }
185
186 let use_line_mode = args.head.is_some()
188 || args.tail.is_some()
189 || args.offset.is_some()
190 || args.limit.is_some();
191
192 if use_line_mode && !matches!(args.format, FileReadFormat::Text) {
194 return Ok(ToolResult::failure(
195 "Line-based operations (head, tail, offset, limit) are only supported with text format".to_string()
196 ));
197 }
198
199 let limit = self.ensure_within_limit(args.max_bytes);
202
203 if !use_line_mode && file_metadata.len() as usize > limit {
204 let estimated_lines = (file_metadata.len() / 80).max(1); return Ok(ToolResult::failure(format!(
207 "File exceeds maximum allowed size of {} bytes (file is {} bytes). \
208 Consider using line-based reading:\n\
209 - Use 'head: N' to read first N lines\n\
210 - Use 'tail: N' to read last N lines\n\
211 - Use 'offset: M' with 'limit: N' to read N lines starting from line M\n\
212 Estimated lines in file: ~{}",
213 limit,
214 file_metadata.len(),
215 estimated_lines
216 )));
217 }
218
219 let (encoding, content, actual_bytes) = if use_line_mode {
220 let file = fs::File::open(&path)
222 .with_context(|| format!("Failed to open file {}", path.display()))?;
223 let reader = BufReader::new(file);
224
225 let processed_content = if let Some(n) = args.head {
226 reader
228 .lines()
229 .take(n)
230 .collect::<Result<Vec<_>, _>>()
231 .context("Failed to read lines")?
232 .join("\n")
233 } else if let Some(n) = args.tail {
234 let all_lines: Vec<String> = reader
236 .lines()
237 .collect::<Result<Vec<_>, _>>()
238 .context("Failed to read lines")?;
239 let start = all_lines.len().saturating_sub(n);
240 all_lines[start..].join("\n")
241 } else {
242 let offset = args.offset.unwrap_or(0);
244 let limit = args.limit.unwrap_or(usize::MAX);
245
246 reader
247 .lines()
248 .skip(offset)
249 .take(limit)
250 .collect::<Result<Vec<_>, _>>()
251 .context("Failed to read lines")?
252 .join("\n")
253 };
254
255 let bytes = processed_content.as_bytes().len();
256 ("utf-8", processed_content, bytes)
257 } else {
258 let bytes = fs::read(&path)
260 .with_context(|| format!("Failed to read file {}", path.display()))?;
261 let actual_bytes = bytes.len();
262
263 match args.format {
264 FileReadFormat::Text => {
265 let text = String::from_utf8_lossy(&bytes).to_string();
266 ("utf-8", text, actual_bytes)
267 }
268 FileReadFormat::Base64 => (
269 "base64",
270 general_purpose::STANDARD.encode(&bytes),
271 actual_bytes,
272 ),
273 }
274 };
275
276 let metadata = if args.include_metadata {
277 Some(Self::serialize_metadata(&file_metadata))
278 } else {
279 None
280 };
281
282 let output = FileReadOutput {
283 path: path.to_string_lossy().into_owned(),
284 encoding,
285 bytes: actual_bytes,
286 content,
287 metadata,
288 };
289
290 Ok(ToolResult::success(
291 serde_json::to_string(&output).context("Failed to serialize file_read output")?,
292 ))
293 }
294}
295
296#[cfg(test)]
297mod tests {
298 use super::*;
299 use std::io::Write;
300 use tempfile::NamedTempFile;
301
302 #[tokio::test]
303 async fn test_file_read_text() {
304 let mut tmp = NamedTempFile::new().unwrap();
305 writeln!(tmp, "hello world").unwrap();
306
307 let tool = FileReadTool::new();
308 let args = serde_json::json!({
309 "path": tmp.path().to_string_lossy(),
310 "include_metadata": true
311 });
312
313 let result = tool.execute(args).await.unwrap();
314 assert!(result.success);
315 let value: serde_json::Value = serde_json::from_str(&result.output).unwrap();
316 assert_eq!(value["encoding"], "utf-8");
317 assert!(value["metadata"]["size_bytes"].is_number());
318 }
319
320 #[tokio::test]
321 async fn test_file_read_binary_base64() {
322 let tmp = NamedTempFile::new().unwrap();
323 fs::write(tmp.path(), vec![0, 159, 146, 150]).unwrap();
324
325 let tool = FileReadTool::new();
326 let args = serde_json::json!({
327 "path": tmp.path().to_string_lossy(),
328 "format": "base64"
329 });
330
331 let result = tool.execute(args).await.unwrap();
332 assert!(result.success);
333 let value: serde_json::Value = serde_json::from_str(&result.output).unwrap();
334 assert_eq!(value["encoding"], "base64");
335 assert!(!value["content"].as_str().unwrap().is_empty());
336 }
337
338 #[tokio::test]
339 async fn test_file_read_too_large() {
340 let tmp = NamedTempFile::new().unwrap();
341 fs::write(tmp.path(), vec![1; DEFAULT_MAX_BYTES + 1]).unwrap();
342
343 let tool = FileReadTool::new();
344 let args = serde_json::json!({
345 "path": tmp.path().to_string_lossy()
346 });
347
348 let result = tool.execute(args).await.unwrap();
349 assert!(!result.success);
350 assert!(result.error.is_some());
351 assert!(result
352 .error
353 .unwrap()
354 .contains("Consider using line-based reading"));
355 }
356
357 #[tokio::test]
358 async fn test_file_read_head() {
359 let mut tmp = NamedTempFile::new().unwrap();
360 writeln!(tmp, "line1").unwrap();
361 writeln!(tmp, "line2").unwrap();
362 writeln!(tmp, "line3").unwrap();
363 writeln!(tmp, "line4").unwrap();
364 writeln!(tmp, "line5").unwrap();
365
366 let tool = FileReadTool::new();
367 let args = serde_json::json!({
368 "path": tmp.path().to_string_lossy(),
369 "head": 3
370 });
371
372 let result = tool.execute(args).await.unwrap();
373 assert!(result.success);
374 let value: serde_json::Value = serde_json::from_str(&result.output).unwrap();
375 let content = value["content"].as_str().unwrap();
376 assert_eq!(content, "line1\nline2\nline3");
377 }
378
379 #[tokio::test]
380 async fn test_file_read_tail() {
381 let mut tmp = NamedTempFile::new().unwrap();
382 writeln!(tmp, "line1").unwrap();
383 writeln!(tmp, "line2").unwrap();
384 writeln!(tmp, "line3").unwrap();
385 writeln!(tmp, "line4").unwrap();
386 writeln!(tmp, "line5").unwrap();
387
388 let tool = FileReadTool::new();
389 let args = serde_json::json!({
390 "path": tmp.path().to_string_lossy(),
391 "tail": 2
392 });
393
394 let result = tool.execute(args).await.unwrap();
395 assert!(result.success);
396 let value: serde_json::Value = serde_json::from_str(&result.output).unwrap();
397 let content = value["content"].as_str().unwrap();
398 assert_eq!(content, "line4\nline5");
399 }
400
401 #[tokio::test]
402 async fn test_file_read_offset_limit() {
403 let mut tmp = NamedTempFile::new().unwrap();
404 writeln!(tmp, "line1").unwrap();
405 writeln!(tmp, "line2").unwrap();
406 writeln!(tmp, "line3").unwrap();
407 writeln!(tmp, "line4").unwrap();
408 writeln!(tmp, "line5").unwrap();
409
410 let tool = FileReadTool::new();
411 let args = serde_json::json!({
412 "path": tmp.path().to_string_lossy(),
413 "offset": 1,
414 "limit": 3
415 });
416
417 let result = tool.execute(args).await.unwrap();
418 assert!(result.success);
419 let value: serde_json::Value = serde_json::from_str(&result.output).unwrap();
420 let content = value["content"].as_str().unwrap();
421 assert_eq!(content, "line2\nline3\nline4");
422 }
423
424 #[tokio::test]
425 async fn test_file_read_line_mode_with_base64_fails() {
426 let mut tmp = NamedTempFile::new().unwrap();
427 writeln!(tmp, "test").unwrap();
428
429 let tool = FileReadTool::new();
430 let args = serde_json::json!({
431 "path": tmp.path().to_string_lossy(),
432 "format": "base64",
433 "head": 10
434 });
435
436 let result = tool.execute(args).await.unwrap();
437 assert!(!result.success);
438 assert!(result.error.is_some());
439 assert!(result
440 .error
441 .unwrap()
442 .contains("only supported with text format"));
443 }
444}