Skip to main content

agent_code_lib/tools/
file_read.rs

1//! FileRead tool: read file contents with optional line ranges.
2
3use async_trait::async_trait;
4use serde_json::json;
5use std::path::PathBuf;
6
7use super::{Tool, ToolContext, ToolResult};
8use crate::error::ToolError;
9
10pub struct FileReadTool;
11
12#[async_trait]
13impl Tool for FileReadTool {
14    fn name(&self) -> &'static str {
15        "FileRead"
16    }
17
18    fn description(&self) -> &'static str {
19        "Reads a file from the filesystem. Returns contents with line numbers."
20    }
21
22    fn input_schema(&self) -> serde_json::Value {
23        json!({
24            "type": "object",
25            "required": ["file_path"],
26            "properties": {
27                "file_path": {
28                    "type": "string",
29                    "description": "Absolute path to the file"
30                },
31                "offset": {
32                    "type": "integer",
33                    "description": "Line number to start reading from (1-based)"
34                },
35                "limit": {
36                    "type": "integer",
37                    "description": "Number of lines to read"
38                },
39                "pages": {
40                    "type": "string",
41                    "description": "Page range for PDF files (e.g., \"1-5\", \"3\", \"10-20\"). Max 20 pages per request."
42                }
43            }
44        })
45    }
46
47    fn is_read_only(&self) -> bool {
48        true
49    }
50
51    fn is_concurrency_safe(&self) -> bool {
52        true
53    }
54
55    fn get_path(&self, input: &serde_json::Value) -> Option<PathBuf> {
56        input
57            .get("file_path")
58            .and_then(|v| v.as_str())
59            .map(PathBuf::from)
60    }
61
62    async fn call(
63        &self,
64        input: serde_json::Value,
65        _ctx: &ToolContext,
66    ) -> Result<ToolResult, ToolError> {
67        let file_path = input
68            .get("file_path")
69            .and_then(|v| v.as_str())
70            .ok_or_else(|| ToolError::InvalidInput("'file_path' is required".into()))?;
71
72        let offset = input.get("offset").and_then(|v| v.as_u64()).unwrap_or(1) as usize;
73
74        let limit = input.get("limit").and_then(|v| v.as_u64()).unwrap_or(2000) as usize;
75
76        let path = std::path::Path::new(file_path);
77
78        // Block device and virtual filesystem paths.
79        const BLOCKED_PREFIXES: &[&str] = &["/dev/", "/proc/", "/sys/"];
80        if BLOCKED_PREFIXES
81            .iter()
82            .any(|prefix| file_path.starts_with(prefix))
83        {
84            return Err(ToolError::InvalidInput(format!(
85                "Cannot read virtual/device file: {file_path}"
86            )));
87        }
88
89        let pages = input
90            .get("pages")
91            .and_then(|v| v.as_str())
92            .map(|s| s.to_string());
93
94        // Handle binary/special file types.
95        match path.extension().and_then(|e| e.to_str()) {
96            Some("pdf") => {
97                return read_pdf(file_path, pages.as_deref()).await;
98            }
99            Some("ipynb") => {
100                return read_notebook(file_path).await;
101            }
102            Some("png" | "jpg" | "jpeg" | "gif" | "webp" | "svg" | "ico" | "bmp") => {
103                let meta = tokio::fs::metadata(file_path).await.ok();
104                let size = meta.map(|m| m.len()).unwrap_or(0);
105
106                // For small images (< 5MB), embed as base64 for vision models.
107                if size < 5 * 1024 * 1024
108                    && crate::llm::message::image_block_from_file(path).is_ok()
109                {
110                    return Ok(ToolResult::success(format!(
111                        "(Image: {file_path}, {size} bytes — loaded for vision analysis)"
112                    )));
113                }
114
115                return Ok(ToolResult::success(format!(
116                    "(Image file: {file_path}, {size} bytes — \
117                     too large for inline embedding)"
118                )));
119            }
120            Some("wasm" | "exe" | "dll" | "so" | "dylib" | "o" | "a") => {
121                let meta = tokio::fs::metadata(file_path).await.ok();
122                let size = meta.map(|m| m.len()).unwrap_or(0);
123                return Ok(ToolResult::success(format!(
124                    "(Binary file: {file_path}, {size} bytes)"
125                )));
126            }
127            _ => {}
128        }
129
130        // Try to read as text; if it fails (binary content), report the file type.
131        let content = match tokio::fs::read_to_string(file_path).await {
132            Ok(c) => c,
133            Err(e) => {
134                // May be binary — try to read size at least.
135                if let Ok(meta) = tokio::fs::metadata(file_path).await {
136                    return Ok(ToolResult::success(format!(
137                        "(Binary or unreadable file: {file_path}, {} bytes: {e})",
138                        meta.len()
139                    )));
140                }
141                return Err(ToolError::ExecutionFailed(format!(
142                    "Failed to read {file_path}: {e}"
143                )));
144            }
145        };
146
147        // Apply line range and add line numbers (1-indexed).
148        let lines: Vec<&str> = content.lines().collect();
149        let start = (offset.saturating_sub(1)).min(lines.len());
150        let end = (start + limit).min(lines.len());
151
152        let mut output = String::new();
153        for (i, line) in lines[start..end].iter().enumerate() {
154            let line_num = start + i + 1;
155            output.push_str(&format!("{line_num}\t{line}\n"));
156        }
157
158        if output.is_empty() {
159            output = "(empty file)".to_string();
160        }
161
162        Ok(ToolResult::success(output))
163    }
164}
165
166/// Extract text from a PDF file using pdftotext (poppler-utils).
167async fn read_pdf(file_path: &str, pages: Option<&str>) -> Result<ToolResult, ToolError> {
168    // Build pdftotext command with optional page range.
169    let mut cmd = tokio::process::Command::new("pdftotext");
170
171    if let Some(page_spec) = pages {
172        // Parse page spec like "1-5", "3", "10-20".
173        let (first, last) = if let Some((start, end)) = page_spec.split_once('-') {
174            (start.trim().to_string(), end.trim().to_string())
175        } else {
176            let page = page_spec.trim().to_string();
177            (page.clone(), page)
178        };
179        cmd.arg("-f").arg(&first).arg("-l").arg(&last);
180    }
181
182    cmd.arg(file_path).arg("-");
183    let output = cmd.output().await;
184
185    match output {
186        Ok(out) if out.status.success() => {
187            let text = String::from_utf8_lossy(&out.stdout).to_string();
188            if text.trim().is_empty() {
189                Ok(ToolResult::success(format!(
190                    "(PDF file: {file_path} — extracted but contains no text. \
191                     May be image-based; OCR would be needed.)"
192                )))
193            } else {
194                // Truncate very large PDFs.
195                let display = if text.len() > 100_000 {
196                    format!(
197                        "{}\n\n(PDF truncated: {} chars total)",
198                        &text[..100_000],
199                        text.len()
200                    )
201                } else {
202                    text
203                };
204                Ok(ToolResult::success(display))
205            }
206        }
207        _ => {
208            // pdftotext not available — report file info.
209            let meta = tokio::fs::metadata(file_path).await.ok();
210            let size = meta.map(|m| m.len()).unwrap_or(0);
211            Ok(ToolResult::success(format!(
212                "(PDF file: {file_path}, {size} bytes. \
213                 Install poppler-utils for text extraction: \
214                 apt install poppler-utils / brew install poppler)"
215            )))
216        }
217    }
218}
219
220/// Render a Jupyter notebook (.ipynb) as readable text.
221async fn read_notebook(file_path: &str) -> Result<ToolResult, ToolError> {
222    let content = tokio::fs::read_to_string(file_path)
223        .await
224        .map_err(|e| ToolError::ExecutionFailed(format!("Failed to read {file_path}: {e}")))?;
225
226    let notebook: serde_json::Value = serde_json::from_str(&content)
227        .map_err(|e| ToolError::ExecutionFailed(format!("Invalid notebook JSON: {e}")))?;
228
229    let cells = notebook
230        .get("cells")
231        .and_then(|v| v.as_array())
232        .ok_or_else(|| ToolError::ExecutionFailed("Notebook has no 'cells' array".into()))?;
233
234    let mut output = String::new();
235    for (i, cell) in cells.iter().enumerate() {
236        let cell_type = cell
237            .get("cell_type")
238            .and_then(|v| v.as_str())
239            .unwrap_or("unknown");
240
241        output.push_str(&format!("--- Cell {} ({}) ---\n", i + 1, cell_type));
242
243        // Source lines.
244        if let Some(source) = cell.get("source") {
245            let text = match source {
246                serde_json::Value::Array(lines) => lines
247                    .iter()
248                    .filter_map(|l| l.as_str())
249                    .collect::<Vec<_>>()
250                    .join(""),
251                serde_json::Value::String(s) => s.clone(),
252                _ => String::new(),
253            };
254            output.push_str(&text);
255            if !text.ends_with('\n') {
256                output.push('\n');
257            }
258        }
259
260        // Outputs (for code cells).
261        if cell_type == "code"
262            && let Some(outputs) = cell.get("outputs").and_then(|v| v.as_array())
263        {
264            for out in outputs {
265                if let Some(text) = out.get("text").and_then(|v| v.as_array()) {
266                    output.push_str("Output:\n");
267                    for line in text {
268                        if let Some(s) = line.as_str() {
269                            output.push_str(s);
270                        }
271                    }
272                }
273                if let Some(data) = out.get("data")
274                    && let Some(plain) = data.get("text/plain").and_then(|v| v.as_array())
275                {
276                    output.push_str("Output:\n");
277                    for line in plain {
278                        if let Some(s) = line.as_str() {
279                            output.push_str(s);
280                        }
281                    }
282                }
283            }
284        }
285
286        output.push('\n');
287    }
288
289    Ok(ToolResult::success(output))
290}