Skip to main content

astrid_tools/
read_file.rs

1//! Read file tool — reads a file with line numbers (cat -n style).
2
3use std::fmt::Write;
4
5use crate::{BuiltinTool, ToolContext, ToolError, ToolResult};
6use serde_json::Value;
7
8/// Default maximum lines to read.
9const DEFAULT_LINE_LIMIT: usize = 2000;
10/// Maximum line length before truncation.
11const MAX_LINE_LENGTH: usize = 2000;
12
13/// Built-in tool for reading files.
14pub struct ReadFileTool;
15
16#[async_trait::async_trait]
17impl BuiltinTool for ReadFileTool {
18    fn name(&self) -> &'static str {
19        "read_file"
20    }
21
22    fn description(&self) -> &'static str {
23        "Reads a file from the filesystem. Returns contents with line numbers (cat -n format). \
24         Default reads up to 2000 lines. Use offset and limit for large files. \
25         Lines longer than 2000 characters are truncated."
26    }
27
28    fn input_schema(&self) -> Value {
29        serde_json::json!({
30            "type": "object",
31            "properties": {
32                "file_path": {
33                    "type": "string",
34                    "description": "Absolute path to the file to read"
35                },
36                "offset": {
37                    "type": "integer",
38                    "description": "Line number to start reading from (1-based). Only provide for large files."
39                },
40                "limit": {
41                    "type": "integer",
42                    "description": "Number of lines to read. Only provide for large files."
43                }
44            },
45            "required": ["file_path"]
46        })
47    }
48
49    async fn execute(&self, args: Value, _ctx: &ToolContext) -> ToolResult {
50        let file_path = args
51            .get("file_path")
52            .and_then(Value::as_str)
53            .ok_or_else(|| ToolError::InvalidArguments("file_path is required".into()))?;
54
55        let offset = args
56            .get("offset")
57            .and_then(Value::as_u64)
58            .map(|v| usize::try_from(v).unwrap_or(usize::MAX));
59
60        let limit = args
61            .get("limit")
62            .and_then(Value::as_u64)
63            .map_or(DEFAULT_LINE_LIMIT, |v| {
64                usize::try_from(v).unwrap_or(usize::MAX)
65            });
66
67        let path = std::path::Path::new(file_path);
68        if !path.is_absolute() {
69            return Err(ToolError::InvalidArguments(
70                "file_path must be an absolute path".into(),
71            ));
72        }
73        if !path.exists() {
74            return Err(ToolError::PathNotFound(file_path.to_string()));
75        }
76
77        // Binary detection: read first 8KB and check for null bytes
78        let raw = tokio::fs::read(path).await?;
79        let check_len = raw.len().min(8192);
80        if raw[..check_len].contains(&0) {
81            return Err(ToolError::ExecutionFailed(format!(
82                "{file_path} appears to be a binary file"
83            )));
84        }
85
86        let content = String::from_utf8(raw)
87            .map_err(|_| ToolError::ExecutionFailed(format!("{file_path} is not valid UTF-8")))?;
88
89        let lines: Vec<&str> = content.lines().collect();
90        let total_lines = lines.len();
91
92        // Apply offset (1-based)
93        let start = offset.map_or(0, |o| o.saturating_sub(1));
94        let end = start.saturating_add(limit).min(total_lines);
95
96        if start >= total_lines {
97            return Ok(format!(
98                "(file has {total_lines} lines, offset {start} is past end)"
99            ));
100        }
101
102        let mut output = String::new();
103        for (idx, &line) in lines[start..end].iter().enumerate() {
104            // Safety: start and idx are bounded by total_lines, +1 for 1-based display
105            #[allow(clippy::arithmetic_side_effects)]
106            let line_num = start + idx + 1;
107            let display_line = if line.len() > MAX_LINE_LENGTH {
108                // Find a safe truncation point at a char boundary
109                let mut end = MAX_LINE_LENGTH;
110                while end > 0 && !line.is_char_boundary(end) {
111                    end = end.saturating_sub(1);
112                }
113                &line[..end]
114            } else {
115                line
116            };
117            let _ = writeln!(output, "{line_num:>6}\t{display_line}");
118        }
119
120        if end < total_lines {
121            let _ = write!(
122                output,
123                "\n(showing lines {}-{} of {total_lines}; use offset/limit for more)",
124                start.saturating_add(1),
125                end
126            );
127        }
128
129        Ok(output)
130    }
131}
132
133#[cfg(test)]
134mod tests {
135    use super::*;
136    use std::io::Write as IoWrite;
137    use tempfile::NamedTempFile;
138
139    fn ctx() -> ToolContext {
140        ToolContext::new(std::env::temp_dir(), None)
141    }
142
143    #[tokio::test]
144    async fn test_read_file_basic() {
145        let mut f = NamedTempFile::new().unwrap();
146        writeln!(f, "line one").unwrap();
147        writeln!(f, "line two").unwrap();
148        writeln!(f, "line three").unwrap();
149
150        let result = ReadFileTool
151            .execute(
152                serde_json::json!({"file_path": f.path().to_str().unwrap()}),
153                &ctx(),
154            )
155            .await
156            .unwrap();
157
158        assert!(result.contains("line one"));
159        assert!(result.contains("line two"));
160        assert!(result.contains("line three"));
161        assert!(result.contains("     1\t"));
162        assert!(result.contains("     2\t"));
163        assert!(result.contains("     3\t"));
164    }
165
166    #[tokio::test]
167    async fn test_read_file_not_found() {
168        let result = ReadFileTool
169            .execute(
170                serde_json::json!({"file_path": "/tmp/astrid_nonexistent_12345.txt"}),
171                &ctx(),
172            )
173            .await;
174
175        assert!(result.is_err());
176        assert!(matches!(result.unwrap_err(), ToolError::PathNotFound(_)));
177    }
178
179    #[tokio::test]
180    async fn test_read_file_with_offset_and_limit() {
181        let mut f = NamedTempFile::new().unwrap();
182        for i in 1..=20 {
183            writeln!(f, "line {i}").unwrap();
184        }
185
186        let result = ReadFileTool
187            .execute(
188                serde_json::json!({
189                    "file_path": f.path().to_str().unwrap(),
190                    "offset": 5,
191                    "limit": 3
192                }),
193                &ctx(),
194            )
195            .await
196            .unwrap();
197
198        assert!(result.contains("     5\t"));
199        assert!(result.contains("line 5"));
200        assert!(result.contains("line 7"));
201        assert!(!result.contains("line 8"));
202    }
203
204    #[tokio::test]
205    async fn test_read_binary_file() {
206        let mut f = NamedTempFile::new().unwrap();
207        f.write_all(&[0x00, 0x01, 0x02, 0xFF]).unwrap();
208
209        let result = ReadFileTool
210            .execute(
211                serde_json::json!({"file_path": f.path().to_str().unwrap()}),
212                &ctx(),
213            )
214            .await;
215
216        assert!(result.is_err());
217        let err = result.unwrap_err();
218        assert!(err.to_string().contains("binary file"));
219    }
220
221    #[tokio::test]
222    async fn test_read_file_missing_arg() {
223        let result = ReadFileTool.execute(serde_json::json!({}), &ctx()).await;
224
225        assert!(result.is_err());
226        assert!(matches!(
227            result.unwrap_err(),
228            ToolError::InvalidArguments(_)
229        ));
230    }
231
232    #[tokio::test]
233    async fn test_read_file_multibyte_truncation() {
234        // Create a file with a line containing multi-byte emoji near the truncation boundary.
235        // Each emoji is 4 bytes. A line of 501 emojis = 2004 bytes > MAX_LINE_LENGTH (2000).
236        // Byte 2000 falls inside the 501st emoji (bytes 2000-2003), so naive slicing panics.
237        let mut f = NamedTempFile::new().unwrap();
238        let line = "\u{1F525}".repeat(501); // 501 fire emojis = 2004 bytes
239        writeln!(f, "{line}").unwrap();
240
241        let result = ReadFileTool
242            .execute(
243                serde_json::json!({"file_path": f.path().to_str().unwrap()}),
244                &ctx(),
245            )
246            .await
247            .unwrap();
248
249        // Should not panic and should produce valid UTF-8 output
250        assert!(result.contains('\u{1F525}'));
251    }
252
253    #[tokio::test]
254    async fn test_read_file_rejects_relative_path() {
255        let result = ReadFileTool
256            .execute(
257                serde_json::json!({"file_path": "relative/path.txt"}),
258                &ctx(),
259            )
260            .await;
261
262        assert!(result.is_err());
263        assert!(matches!(
264            result.unwrap_err(),
265            ToolError::InvalidArguments(_)
266        ));
267    }
268}