claude_agent/tools/
read.rs

1//! Read tool - reads file contents with TOCTOU protection and multimedia support.
2
3use std::fmt::Write;
4use std::path::Path;
5
6use async_trait::async_trait;
7use schemars::JsonSchema;
8use serde::Deserialize;
9
10use super::SchemaTool;
11use super::context::ExecutionContext;
12use crate::types::ToolResult;
13
14const LARGE_FILE_THRESHOLD: u64 = 10 * 1024 * 1024; // 10MB
15
16#[derive(Debug, Deserialize, JsonSchema)]
17#[schemars(deny_unknown_fields)]
18pub struct ReadInput {
19    /// The absolute path to the file to read
20    pub file_path: String,
21    /// The line number to start reading from. Only provide if the file is too large to read at once
22    #[serde(default)]
23    pub offset: Option<usize>,
24    /// The number of lines to read. Only provide if the file is too large to read at once.
25    #[serde(default)]
26    pub limit: Option<usize>,
27}
28
29#[derive(Debug, Clone, Copy, Default)]
30pub struct ReadTool;
31
32enum FileType {
33    Text,
34    #[cfg(feature = "multimedia")]
35    Pdf,
36    #[cfg(feature = "multimedia")]
37    Image,
38    Jupyter,
39}
40
41fn detect_file_type(path: &Path) -> FileType {
42    match path.extension().and_then(|e| e.to_str()) {
43        #[cfg(feature = "multimedia")]
44        Some("pdf") => FileType::Pdf,
45        #[cfg(feature = "multimedia")]
46        Some("png" | "jpg" | "jpeg" | "gif" | "webp" | "bmp" | "ico" | "tiff") => FileType::Image,
47        Some("ipynb") => FileType::Jupyter,
48        _ => FileType::Text,
49    }
50}
51
52async fn read_text(path: &Path, offset: usize, limit: usize) -> ToolResult {
53    let content = match tokio::fs::read_to_string(path).await {
54        Ok(c) => c,
55        Err(e) => return ToolResult::error(format!("Failed to read file: {}", e)),
56    };
57
58    let lines: Vec<&str> = content.lines().collect();
59    let total_lines = lines.len();
60    let selected_lines: Vec<&str> = lines.into_iter().skip(offset).take(limit).collect();
61
62    if selected_lines.is_empty() {
63        return ToolResult::success(format!(
64            "File is empty or offset {} exceeds file length {}",
65            offset, total_lines
66        ));
67    }
68
69    let estimated_capacity: usize = selected_lines
70        .iter()
71        .map(|line| 8 + line.len().min(2003))
72        .sum();
73    let mut output = String::with_capacity(estimated_capacity);
74
75    for (i, line) in selected_lines.iter().enumerate() {
76        if i > 0 {
77            output.push('\n');
78        }
79        let line_num = offset + i + 1;
80        if line.len() > 2000 {
81            let _ = write!(output, "{:>6}\t{}...", line_num, &line[..2000]);
82        } else {
83            let _ = write!(output, "{:>6}\t{}", line_num, line);
84        }
85    }
86
87    ToolResult::success(output)
88}
89
90#[cfg(feature = "multimedia")]
91async fn read_pdf(path: &Path) -> ToolResult {
92    let bytes = match tokio::fs::read(path).await {
93        Ok(b) => b,
94        Err(e) => return ToolResult::error(format!("Failed to read PDF: {}", e)),
95    };
96
97    match pdf_extract::extract_text_from_mem(&bytes) {
98        Ok(text) => ToolResult::success(text),
99        Err(e) => ToolResult::error(format!("Failed to extract PDF text: {}", e)),
100    }
101}
102
103#[cfg(feature = "multimedia")]
104async fn read_image(path: &Path) -> ToolResult {
105    use base64::Engine;
106
107    let bytes = match tokio::fs::read(path).await {
108        Ok(b) => b,
109        Err(e) => return ToolResult::error(format!("Failed to read image: {}", e)),
110    };
111
112    let mime = mime_guess::from_path(path)
113        .first()
114        .map(|m| m.to_string())
115        .unwrap_or_else(|| "application/octet-stream".to_string());
116
117    let encoded = base64::engine::general_purpose::STANDARD.encode(&bytes);
118    ToolResult::success(format!("data:{};base64,{}", mime, encoded))
119}
120
121async fn read_jupyter(path: &Path) -> ToolResult {
122    let content = match tokio::fs::read_to_string(path).await {
123        Ok(c) => c,
124        Err(e) => return ToolResult::error(format!("Failed to read notebook: {}", e)),
125    };
126
127    let notebook: serde_json::Value = match serde_json::from_str(&content) {
128        Ok(v) => v,
129        Err(e) => return ToolResult::error(format!("Invalid notebook JSON: {}", e)),
130    };
131
132    let cells = match notebook.get("cells").and_then(|c| c.as_array()) {
133        Some(c) => c,
134        None => return ToolResult::error("Invalid notebook: no cells array"),
135    };
136
137    let mut output = String::new();
138    for (i, cell) in cells.iter().enumerate() {
139        let cell_type = cell
140            .get("cell_type")
141            .and_then(|t| t.as_str())
142            .unwrap_or("unknown");
143        let source = cell.get("source").map(extract_source).unwrap_or_default();
144
145        let _ = writeln!(output, "--- Cell {} [{}] ---", i + 1, cell_type);
146        let _ = writeln!(output, "{}", source);
147
148        if cell_type == "code"
149            && let Some(outputs) = cell.get("outputs").and_then(|o| o.as_array())
150        {
151            for out in outputs {
152                if let Some(text) = out.get("text") {
153                    let _ = writeln!(output, "[Output]\n{}", extract_source(text));
154                } else if let Some(data) = out.get("data")
155                    && let Some(text) = data.get("text/plain")
156                {
157                    let _ = writeln!(output, "[Output]\n{}", extract_source(text));
158                }
159            }
160        }
161        output.push('\n');
162    }
163
164    ToolResult::success(output)
165}
166
167fn extract_source(value: &serde_json::Value) -> String {
168    match value {
169        serde_json::Value::String(s) => s.clone(),
170        serde_json::Value::Array(arr) => arr
171            .iter()
172            .filter_map(|v| v.as_str())
173            .collect::<Vec<_>>()
174            .join(""),
175        _ => String::new(),
176    }
177}
178
179async fn warn_if_large_file(path: &Path) {
180    if let Ok(meta) = tokio::fs::metadata(path).await
181        && meta.len() > LARGE_FILE_THRESHOLD
182    {
183        tracing::warn!(
184            path = %path.display(),
185            size_mb = meta.len() / (1024 * 1024),
186            "Reading large file into memory"
187        );
188    }
189}
190
191#[async_trait]
192impl SchemaTool for ReadTool {
193    type Input = ReadInput;
194
195    const NAME: &'static str = "Read";
196
197    const DESCRIPTION: &'static str = r#"Reads a file from the local filesystem. You can access any file directly by using this tool.
198Assume this tool is able to read all files on the machine. If a path to a file is provided assume that path is valid. It is okay to read a file that does not exist; an error will be returned.
199
200Usage:
201- The file_path parameter must be an absolute path, not a relative path
202- By default, it reads up to 2000 lines starting from the beginning of the file
203- You can optionally specify a line offset and limit (especially handy for long files), but it's recommended to read the whole file by not providing these parameters
204- Any lines longer than 2000 characters will be truncated
205- Results are returned using cat -n format, with line numbers starting at 1
206- This tool can read images (eg PNG, JPG, etc). When reading an image file the contents are returned as base64-encoded data URI for multimodal processing.
207- This tool can read PDF files (.pdf). PDFs are processed page by page, extracting both text and visual content for analysis.
208- This tool can read Jupyter notebooks (.ipynb files) and returns all cells with their outputs, combining code, text, and visualizations.
209- This tool can only read files, not directories. To read a directory, use an ls command via the Bash tool.
210- You can call multiple tools in a single response. It is always better to speculatively read multiple potentially useful files in parallel.
211- If you read a file that exists but has empty contents you will receive a system reminder warning in place of file contents."#;
212
213    async fn handle(&self, input: ReadInput, context: &ExecutionContext) -> ToolResult {
214        let path = match context.try_resolve_for(Self::NAME, &input.file_path) {
215            Ok(p) => p,
216            Err(e) => return e,
217        };
218
219        let file_type = detect_file_type(path.as_path());
220
221        if !matches!(file_type, FileType::Text) {
222            warn_if_large_file(path.as_path()).await;
223        }
224
225        match file_type {
226            FileType::Text => {
227                let offset = input.offset.unwrap_or(0);
228                let limit = input.limit.unwrap_or(2000);
229                read_text(path.as_path(), offset, limit).await
230            }
231            #[cfg(feature = "multimedia")]
232            FileType::Pdf => read_pdf(path.as_path()).await,
233            #[cfg(feature = "multimedia")]
234            FileType::Image => read_image(path.as_path()).await,
235            FileType::Jupyter => read_jupyter(path.as_path()).await,
236        }
237    }
238}
239
240#[cfg(test)]
241mod tests {
242    use super::*;
243    use crate::tools::Tool;
244    use crate::types::ToolOutput;
245    use tempfile::tempdir;
246    use tokio::fs;
247
248    #[tokio::test]
249    async fn test_read_file() {
250        let dir = tempdir().unwrap();
251        let root = std::fs::canonicalize(dir.path()).unwrap();
252        let file_path = root.join("test.txt");
253        fs::write(&file_path, "line 1\nline 2\nline 3")
254            .await
255            .unwrap();
256
257        let test_context = ExecutionContext::from_path(&root).unwrap();
258        let tool = ReadTool;
259
260        let result = tool
261            .execute(
262                serde_json::json!({"file_path": file_path.to_str().unwrap()}),
263                &test_context,
264            )
265            .await;
266
267        match &result.output {
268            ToolOutput::Success(content) => {
269                assert!(content.contains("line 1"));
270                assert!(content.contains("line 2"));
271                assert!(content.contains("line 3"));
272            }
273            _ => panic!("Expected success"),
274        }
275    }
276
277    #[tokio::test]
278    async fn test_read_jupyter_notebook() {
279        let dir = tempdir().unwrap();
280        let root = std::fs::canonicalize(dir.path()).unwrap();
281        let file_path = root.join("test.ipynb");
282
283        let notebook = serde_json::json!({
284            "cells": [
285                {
286                    "cell_type": "markdown",
287                    "source": ["# Title"]
288                },
289                {
290                    "cell_type": "code",
291                    "source": ["print('hello')"],
292                    "outputs": [{"text": ["hello\n"]}]
293                }
294            ]
295        });
296
297        fs::write(&file_path, serde_json::to_string(&notebook).unwrap())
298            .await
299            .unwrap();
300
301        let test_context = ExecutionContext::from_path(&root).unwrap();
302        let tool = ReadTool;
303
304        let result = tool
305            .execute(
306                serde_json::json!({"file_path": file_path.to_str().unwrap()}),
307                &test_context,
308            )
309            .await;
310
311        match &result.output {
312            ToolOutput::Success(content) => {
313                assert!(content.contains("# Title"));
314                assert!(content.contains("print('hello')"));
315                assert!(content.contains("[Output]"));
316            }
317            _ => panic!("Expected success"),
318        }
319    }
320
321    #[tokio::test]
322    async fn test_read_path_traversal_blocked() {
323        let dir = tempdir().unwrap();
324        let test_context = ExecutionContext::from_path(dir.path()).unwrap();
325        let tool = ReadTool;
326
327        let result = tool
328            .execute(
329                serde_json::json!({"file_path": "../../../etc/passwd"}),
330                &test_context,
331            )
332            .await;
333
334        assert!(result.is_error());
335    }
336
337    #[tokio::test]
338    async fn test_read_with_offset_and_limit() {
339        let dir = tempdir().unwrap();
340        let root = std::fs::canonicalize(dir.path()).unwrap();
341        let file_path = root.join("test.txt");
342        fs::write(&file_path, "line 1\nline 2\nline 3\nline 4\nline 5")
343            .await
344            .unwrap();
345
346        let test_context = ExecutionContext::from_path(&root).unwrap();
347        let tool = ReadTool;
348
349        let result = tool
350            .execute(
351                serde_json::json!({
352                    "file_path": file_path.to_str().unwrap(),
353                    "offset": 1,
354                    "limit": 2
355                }),
356                &test_context,
357            )
358            .await;
359
360        match &result.output {
361            ToolOutput::Success(content) => {
362                assert!(!content.contains("line 1"));
363                assert!(content.contains("line 2"));
364                assert!(content.contains("line 3"));
365                assert!(!content.contains("line 4"));
366            }
367            _ => panic!("Expected success"),
368        }
369    }
370}