Skip to main content

agent_code_lib/tools/
file_read.rs

1//! FileRead tool: read file contents with optional line ranges.
2
3use async_trait::async_trait;
4use serde_json::json;
5use std::path::PathBuf;
6
7use super::{Tool, ToolContext, ToolResult};
8use crate::error::ToolError;
9
10pub struct FileReadTool;
11
12#[async_trait]
13impl Tool for FileReadTool {
14    fn name(&self) -> &'static str {
15        "FileRead"
16    }
17
18    fn description(&self) -> &'static str {
19        "Reads a file from the filesystem. Returns contents with line numbers."
20    }
21
22    fn input_schema(&self) -> serde_json::Value {
23        json!({
24            "type": "object",
25            "required": ["file_path"],
26            "properties": {
27                "file_path": {
28                    "type": "string",
29                    "description": "Absolute path to the file"
30                },
31                "offset": {
32                    "type": "integer",
33                    "description": "Line number to start reading from (1-based)"
34                },
35                "limit": {
36                    "type": "integer",
37                    "description": "Number of lines to read"
38                },
39                "pages": {
40                    "type": "string",
41                    "description": "Page range for PDF files (e.g., \"1-5\", \"3\", \"10-20\"). Max 20 pages per request."
42                }
43            }
44        })
45    }
46
47    fn is_read_only(&self) -> bool {
48        true
49    }
50
51    fn is_concurrency_safe(&self) -> bool {
52        true
53    }
54
55    fn get_path(&self, input: &serde_json::Value) -> Option<PathBuf> {
56        input
57            .get("file_path")
58            .and_then(|v| v.as_str())
59            .map(PathBuf::from)
60    }
61
62    async fn call(
63        &self,
64        input: serde_json::Value,
65        _ctx: &ToolContext,
66    ) -> Result<ToolResult, ToolError> {
67        let file_path = input
68            .get("file_path")
69            .and_then(|v| v.as_str())
70            .ok_or_else(|| ToolError::InvalidInput("'file_path' is required".into()))?;
71
72        let offset = input.get("offset").and_then(|v| v.as_u64()).unwrap_or(1) as usize;
73
74        let limit = input.get("limit").and_then(|v| v.as_u64()).unwrap_or(2000) as usize;
75
76        let path = std::path::Path::new(file_path);
77
78        // Block device and virtual filesystem paths.
79        const BLOCKED_PREFIXES: &[&str] = &["/dev/", "/proc/", "/sys/"];
80        if BLOCKED_PREFIXES
81            .iter()
82            .any(|prefix| file_path.starts_with(prefix))
83        {
84            return Err(ToolError::InvalidInput(format!(
85                "Cannot read virtual/device file: {file_path}"
86            )));
87        }
88
89        let pages = input
90            .get("pages")
91            .and_then(|v| v.as_str())
92            .map(|s| s.to_string());
93
94        // Handle binary/special file types.
95        match path.extension().and_then(|e| e.to_str()) {
96            Some("pdf") => {
97                return read_pdf(file_path, pages.as_deref()).await;
98            }
99            Some("ipynb") => {
100                return read_notebook(file_path).await;
101            }
102            Some("png" | "jpg" | "jpeg" | "gif" | "webp" | "svg" | "ico" | "bmp") => {
103                let meta = tokio::fs::metadata(file_path).await.ok();
104                let size = meta.map(|m| m.len()).unwrap_or(0);
105
106                // For small images (< 5MB), embed as base64 for vision models.
107                if size < 5 * 1024 * 1024
108                    && crate::llm::message::image_block_from_file(path).is_ok()
109                {
110                    return Ok(ToolResult::success(format!(
111                        "(Image: {file_path}, {size} bytes — loaded for vision analysis)"
112                    )));
113                }
114
115                return Ok(ToolResult::success(format!(
116                    "(Image file: {file_path}, {size} bytes — \
117                     too large for inline embedding)"
118                )));
119            }
120            Some("wasm" | "exe" | "dll" | "so" | "dylib" | "o" | "a") => {
121                let meta = tokio::fs::metadata(file_path).await.ok();
122                let size = meta.map(|m| m.len()).unwrap_or(0);
123                return Ok(ToolResult::success(format!(
124                    "(Binary file: {file_path}, {size} bytes)"
125                )));
126            }
127            _ => {}
128        }
129
130        // Try to read as text; if it fails (binary content), report the file type.
131        let content = match tokio::fs::read_to_string(file_path).await {
132            Ok(c) => c,
133            Err(e) => {
134                // May be binary — try to read size at least.
135                if let Ok(meta) = tokio::fs::metadata(file_path).await {
136                    return Ok(ToolResult::success(format!(
137                        "(Binary or unreadable file: {file_path}, {} bytes: {e})",
138                        meta.len()
139                    )));
140                }
141                return Err(ToolError::ExecutionFailed(format!(
142                    "Failed to read {file_path}: {e}"
143                )));
144            }
145        };
146
147        // Apply line range and add line numbers (1-indexed).
148        let lines: Vec<&str> = content.lines().collect();
149        let start = (offset.saturating_sub(1)).min(lines.len());
150        let end = (start + limit).min(lines.len());
151
152        let mut output = String::new();
153        for (i, line) in lines[start..end].iter().enumerate() {
154            let line_num = start + i + 1;
155            output.push_str(&format!("{line_num}\t{line}\n"));
156        }
157
158        if output.is_empty() {
159            output = "(empty file)".to_string();
160        }
161
162        // Token-based size limit: estimate ~4 bytes per token, cap at 100K tokens.
163        const MAX_TOKENS: usize = 100_000;
164        const BYTES_PER_TOKEN: usize = 4;
165        let max_bytes = MAX_TOKENS * BYTES_PER_TOKEN;
166        if output.len() > max_bytes {
167            output.truncate(max_bytes);
168            // Avoid splitting a multi-byte character.
169            while !output.is_char_boundary(output.len()) {
170                output.pop();
171            }
172            output.push_str(&format!(
173                "\n\n(File content truncated: exceeded ~{MAX_TOKENS} token estimate. \
174                 Use offset/limit to read specific sections.)"
175            ));
176        }
177
178        Ok(ToolResult::success(output))
179    }
180}
181
182/// Extract text from a PDF file using pdftotext (poppler-utils).
183async fn read_pdf(file_path: &str, pages: Option<&str>) -> Result<ToolResult, ToolError> {
184    // Build pdftotext command with optional page range.
185    let mut cmd = tokio::process::Command::new("pdftotext");
186
187    if let Some(page_spec) = pages {
188        // Parse page spec like "1-5", "3", "10-20".
189        let (first, last) = if let Some((start, end)) = page_spec.split_once('-') {
190            (start.trim().to_string(), end.trim().to_string())
191        } else {
192            let page = page_spec.trim().to_string();
193            (page.clone(), page)
194        };
195        cmd.arg("-f").arg(&first).arg("-l").arg(&last);
196    }
197
198    cmd.arg(file_path).arg("-");
199    let output = cmd.output().await;
200
201    match output {
202        Ok(out) if out.status.success() => {
203            let text = String::from_utf8_lossy(&out.stdout).to_string();
204            if text.trim().is_empty() {
205                Ok(ToolResult::success(format!(
206                    "(PDF file: {file_path} — extracted but contains no text. \
207                     May be image-based; OCR would be needed.)"
208                )))
209            } else {
210                // Truncate very large PDFs.
211                let display = if text.len() > 100_000 {
212                    format!(
213                        "{}\n\n(PDF truncated: {} chars total)",
214                        &text[..100_000],
215                        text.len()
216                    )
217                } else {
218                    text
219                };
220                Ok(ToolResult::success(display))
221            }
222        }
223        _ => {
224            // pdftotext not available — report file info.
225            let meta = tokio::fs::metadata(file_path).await.ok();
226            let size = meta.map(|m| m.len()).unwrap_or(0);
227            Ok(ToolResult::success(format!(
228                "(PDF file: {file_path}, {size} bytes. \
229                 Install poppler-utils for text extraction: \
230                 apt install poppler-utils / brew install poppler)"
231            )))
232        }
233    }
234}
235
236/// Render a Jupyter notebook (.ipynb) as readable text.
237async fn read_notebook(file_path: &str) -> Result<ToolResult, ToolError> {
238    let content = tokio::fs::read_to_string(file_path)
239        .await
240        .map_err(|e| ToolError::ExecutionFailed(format!("Failed to read {file_path}: {e}")))?;
241
242    let notebook: serde_json::Value = serde_json::from_str(&content)
243        .map_err(|e| ToolError::ExecutionFailed(format!("Invalid notebook JSON: {e}")))?;
244
245    let cells = notebook
246        .get("cells")
247        .and_then(|v| v.as_array())
248        .ok_or_else(|| ToolError::ExecutionFailed("Notebook has no 'cells' array".into()))?;
249
250    let mut output = String::new();
251    for (i, cell) in cells.iter().enumerate() {
252        let cell_type = cell
253            .get("cell_type")
254            .and_then(|v| v.as_str())
255            .unwrap_or("unknown");
256
257        output.push_str(&format!("--- Cell {} ({}) ---\n", i + 1, cell_type));
258
259        // Source lines.
260        if let Some(source) = cell.get("source") {
261            let text = match source {
262                serde_json::Value::Array(lines) => lines
263                    .iter()
264                    .filter_map(|l| l.as_str())
265                    .collect::<Vec<_>>()
266                    .join(""),
267                serde_json::Value::String(s) => s.clone(),
268                _ => String::new(),
269            };
270            output.push_str(&text);
271            if !text.ends_with('\n') {
272                output.push('\n');
273            }
274        }
275
276        // Outputs (for code cells).
277        if cell_type == "code"
278            && let Some(outputs) = cell.get("outputs").and_then(|v| v.as_array())
279        {
280            for out in outputs {
281                if let Some(text) = out.get("text").and_then(|v| v.as_array()) {
282                    output.push_str("Output:\n");
283                    for line in text {
284                        if let Some(s) = line.as_str() {
285                            output.push_str(s);
286                        }
287                    }
288                }
289                if let Some(data) = out.get("data")
290                    && let Some(plain) = data.get("text/plain").and_then(|v| v.as_array())
291                {
292                    output.push_str("Output:\n");
293                    for line in plain {
294                        if let Some(s) = line.as_str() {
295                            output.push_str(s);
296                        }
297                    }
298                }
299            }
300        }
301
302        output.push('\n');
303    }
304
305    Ok(ToolResult::success(output))
306}