1use std::fmt::Write;
4use std::path::Path;
5
6use async_trait::async_trait;
7use schemars::JsonSchema;
8use serde::Deserialize;
9
10use super::SchemaTool;
11use super::context::ExecutionContext;
12use crate::types::ToolResult;
13
14const LARGE_FILE_THRESHOLD: u64 = 10 * 1024 * 1024; #[derive(Debug, Deserialize, JsonSchema)]
17#[schemars(deny_unknown_fields)]
18pub struct ReadInput {
19 pub file_path: String,
21 #[serde(default)]
23 pub offset: Option<usize>,
24 #[serde(default)]
26 pub limit: Option<usize>,
27}
28
29#[derive(Debug, Clone, Copy, Default)]
30pub struct ReadTool;
31
32enum FileType {
33 Text,
34 #[cfg(feature = "multimedia")]
35 Pdf,
36 #[cfg(feature = "multimedia")]
37 Image,
38 Jupyter,
39}
40
41fn detect_file_type(path: &Path) -> FileType {
42 match path.extension().and_then(|e| e.to_str()) {
43 #[cfg(feature = "multimedia")]
44 Some("pdf") => FileType::Pdf,
45 #[cfg(feature = "multimedia")]
46 Some("png" | "jpg" | "jpeg" | "gif" | "webp" | "bmp" | "ico" | "tiff") => FileType::Image,
47 Some("ipynb") => FileType::Jupyter,
48 _ => FileType::Text,
49 }
50}
51
52async fn read_text(path: &Path, offset: usize, limit: usize) -> ToolResult {
53 let content = match tokio::fs::read_to_string(path).await {
54 Ok(c) => c,
55 Err(e) => return ToolResult::error(format!("Failed to read file: {}", e)),
56 };
57
58 let lines: Vec<&str> = content.lines().collect();
59 let total_lines = lines.len();
60 let selected_lines: Vec<&str> = lines.into_iter().skip(offset).take(limit).collect();
61
62 if selected_lines.is_empty() {
63 return ToolResult::success(format!(
64 "File is empty or offset {} exceeds file length {}",
65 offset, total_lines
66 ));
67 }
68
69 let estimated_capacity: usize = selected_lines
70 .iter()
71 .map(|line| 8 + line.len().min(2003))
72 .sum();
73 let mut output = String::with_capacity(estimated_capacity);
74
75 for (i, line) in selected_lines.iter().enumerate() {
76 if i > 0 {
77 output.push('\n');
78 }
79 let line_num = offset + i + 1;
80 if line.len() > 2000 {
81 let _ = write!(output, "{:>6}\t{}...", line_num, &line[..2000]);
82 } else {
83 let _ = write!(output, "{:>6}\t{}", line_num, line);
84 }
85 }
86
87 ToolResult::success(output)
88}
89
90#[cfg(feature = "multimedia")]
91async fn read_pdf(path: &Path) -> ToolResult {
92 let bytes = match tokio::fs::read(path).await {
93 Ok(b) => b,
94 Err(e) => return ToolResult::error(format!("Failed to read PDF: {}", e)),
95 };
96
97 match pdf_extract::extract_text_from_mem(&bytes) {
98 Ok(text) => ToolResult::success(text),
99 Err(e) => ToolResult::error(format!("Failed to extract PDF text: {}", e)),
100 }
101}
102
103#[cfg(feature = "multimedia")]
104async fn read_image(path: &Path) -> ToolResult {
105 use base64::Engine;
106
107 let bytes = match tokio::fs::read(path).await {
108 Ok(b) => b,
109 Err(e) => return ToolResult::error(format!("Failed to read image: {}", e)),
110 };
111
112 let mime = mime_guess::from_path(path)
113 .first()
114 .map(|m| m.to_string())
115 .unwrap_or_else(|| "application/octet-stream".to_string());
116
117 let encoded = base64::engine::general_purpose::STANDARD.encode(&bytes);
118 ToolResult::success(format!("data:{};base64,{}", mime, encoded))
119}
120
121async fn read_jupyter(path: &Path) -> ToolResult {
122 let content = match tokio::fs::read_to_string(path).await {
123 Ok(c) => c,
124 Err(e) => return ToolResult::error(format!("Failed to read notebook: {}", e)),
125 };
126
127 let notebook: serde_json::Value = match serde_json::from_str(&content) {
128 Ok(v) => v,
129 Err(e) => return ToolResult::error(format!("Invalid notebook JSON: {}", e)),
130 };
131
132 let cells = match notebook.get("cells").and_then(|c| c.as_array()) {
133 Some(c) => c,
134 None => return ToolResult::error("Invalid notebook: no cells array"),
135 };
136
137 let mut output = String::new();
138 for (i, cell) in cells.iter().enumerate() {
139 let cell_type = cell
140 .get("cell_type")
141 .and_then(|t| t.as_str())
142 .unwrap_or("unknown");
143 let source = cell.get("source").map(extract_source).unwrap_or_default();
144
145 let _ = writeln!(output, "--- Cell {} [{}] ---", i + 1, cell_type);
146 let _ = writeln!(output, "{}", source);
147
148 if cell_type == "code"
149 && let Some(outputs) = cell.get("outputs").and_then(|o| o.as_array())
150 {
151 for out in outputs {
152 if let Some(text) = out.get("text") {
153 let _ = writeln!(output, "[Output]\n{}", extract_source(text));
154 } else if let Some(data) = out.get("data")
155 && let Some(text) = data.get("text/plain")
156 {
157 let _ = writeln!(output, "[Output]\n{}", extract_source(text));
158 }
159 }
160 }
161 output.push('\n');
162 }
163
164 ToolResult::success(output)
165}
166
167fn extract_source(value: &serde_json::Value) -> String {
168 match value {
169 serde_json::Value::String(s) => s.clone(),
170 serde_json::Value::Array(arr) => arr
171 .iter()
172 .filter_map(|v| v.as_str())
173 .collect::<Vec<_>>()
174 .join(""),
175 _ => String::new(),
176 }
177}
178
179async fn warn_if_large_file(path: &Path) {
180 if let Ok(meta) = tokio::fs::metadata(path).await
181 && meta.len() > LARGE_FILE_THRESHOLD
182 {
183 tracing::warn!(
184 path = %path.display(),
185 size_mb = meta.len() / (1024 * 1024),
186 "Reading large file into memory"
187 );
188 }
189}
190
191#[async_trait]
192impl SchemaTool for ReadTool {
193 type Input = ReadInput;
194
195 const NAME: &'static str = "Read";
196
197 const DESCRIPTION: &'static str = r#"Reads a file from the local filesystem. You can access any file directly by using this tool.
198Assume this tool is able to read all files on the machine. If a path to a file is provided assume that path is valid. It is okay to read a file that does not exist; an error will be returned.
199
200Usage:
201- The file_path parameter must be an absolute path, not a relative path
202- By default, it reads up to 2000 lines starting from the beginning of the file
203- You can optionally specify a line offset and limit (especially handy for long files), but it's recommended to read the whole file by not providing these parameters
204- Any lines longer than 2000 characters will be truncated
205- Results are returned using cat -n format, with line numbers starting at 1
206- This tool can read images (eg PNG, JPG, etc). When reading an image file the contents are returned as base64-encoded data URI for multimodal processing.
207- This tool can read PDF files (.pdf). PDFs are processed page by page, extracting both text and visual content for analysis.
208- This tool can read Jupyter notebooks (.ipynb files) and returns all cells with their outputs, combining code, text, and visualizations.
209- This tool can only read files, not directories. To read a directory, use an ls command via the Bash tool.
210- You can call multiple tools in a single response. It is always better to speculatively read multiple potentially useful files in parallel.
211- If you read a file that exists but has empty contents you will receive a system reminder warning in place of file contents."#;
212
213 async fn handle(&self, input: ReadInput, context: &ExecutionContext) -> ToolResult {
214 let path = match context.try_resolve_for(Self::NAME, &input.file_path) {
215 Ok(p) => p,
216 Err(e) => return e,
217 };
218
219 let file_type = detect_file_type(path.as_path());
220
221 if !matches!(file_type, FileType::Text) {
222 warn_if_large_file(path.as_path()).await;
223 }
224
225 match file_type {
226 FileType::Text => {
227 let offset = input.offset.unwrap_or(0);
228 let limit = input.limit.unwrap_or(2000);
229 read_text(path.as_path(), offset, limit).await
230 }
231 #[cfg(feature = "multimedia")]
232 FileType::Pdf => read_pdf(path.as_path()).await,
233 #[cfg(feature = "multimedia")]
234 FileType::Image => read_image(path.as_path()).await,
235 FileType::Jupyter => read_jupyter(path.as_path()).await,
236 }
237 }
238}
239
240#[cfg(test)]
241mod tests {
242 use super::*;
243 use crate::tools::Tool;
244 use crate::types::ToolOutput;
245 use tempfile::tempdir;
246 use tokio::fs;
247
248 #[tokio::test]
249 async fn test_read_file() {
250 let dir = tempdir().unwrap();
251 let root = std::fs::canonicalize(dir.path()).unwrap();
252 let file_path = root.join("test.txt");
253 fs::write(&file_path, "line 1\nline 2\nline 3")
254 .await
255 .unwrap();
256
257 let test_context = ExecutionContext::from_path(&root).unwrap();
258 let tool = ReadTool;
259
260 let result = tool
261 .execute(
262 serde_json::json!({"file_path": file_path.to_str().unwrap()}),
263 &test_context,
264 )
265 .await;
266
267 match &result.output {
268 ToolOutput::Success(content) => {
269 assert!(content.contains("line 1"));
270 assert!(content.contains("line 2"));
271 assert!(content.contains("line 3"));
272 }
273 _ => panic!("Expected success"),
274 }
275 }
276
277 #[tokio::test]
278 async fn test_read_jupyter_notebook() {
279 let dir = tempdir().unwrap();
280 let root = std::fs::canonicalize(dir.path()).unwrap();
281 let file_path = root.join("test.ipynb");
282
283 let notebook = serde_json::json!({
284 "cells": [
285 {
286 "cell_type": "markdown",
287 "source": ["# Title"]
288 },
289 {
290 "cell_type": "code",
291 "source": ["print('hello')"],
292 "outputs": [{"text": ["hello\n"]}]
293 }
294 ]
295 });
296
297 fs::write(&file_path, serde_json::to_string(¬ebook).unwrap())
298 .await
299 .unwrap();
300
301 let test_context = ExecutionContext::from_path(&root).unwrap();
302 let tool = ReadTool;
303
304 let result = tool
305 .execute(
306 serde_json::json!({"file_path": file_path.to_str().unwrap()}),
307 &test_context,
308 )
309 .await;
310
311 match &result.output {
312 ToolOutput::Success(content) => {
313 assert!(content.contains("# Title"));
314 assert!(content.contains("print('hello')"));
315 assert!(content.contains("[Output]"));
316 }
317 _ => panic!("Expected success"),
318 }
319 }
320
321 #[tokio::test]
322 async fn test_read_path_traversal_blocked() {
323 let dir = tempdir().unwrap();
324 let test_context = ExecutionContext::from_path(dir.path()).unwrap();
325 let tool = ReadTool;
326
327 let result = tool
328 .execute(
329 serde_json::json!({"file_path": "../../../etc/passwd"}),
330 &test_context,
331 )
332 .await;
333
334 assert!(result.is_error());
335 }
336
337 #[tokio::test]
338 async fn test_read_with_offset_and_limit() {
339 let dir = tempdir().unwrap();
340 let root = std::fs::canonicalize(dir.path()).unwrap();
341 let file_path = root.join("test.txt");
342 fs::write(&file_path, "line 1\nline 2\nline 3\nline 4\nline 5")
343 .await
344 .unwrap();
345
346 let test_context = ExecutionContext::from_path(&root).unwrap();
347 let tool = ReadTool;
348
349 let result = tool
350 .execute(
351 serde_json::json!({
352 "file_path": file_path.to_str().unwrap(),
353 "offset": 1,
354 "limit": 2
355 }),
356 &test_context,
357 )
358 .await;
359
360 match &result.output {
361 ToolOutput::Success(content) => {
362 assert!(!content.contains("line 1"));
363 assert!(content.contains("line 2"));
364 assert!(content.contains("line 3"));
365 assert!(!content.contains("line 4"));
366 }
367 _ => panic!("Expected success"),
368 }
369 }
370}