agent_code_lib/tools/
file_read.rs1use async_trait::async_trait;
4use serde_json::json;
5use std::path::PathBuf;
6
7use super::{Tool, ToolContext, ToolResult};
8use crate::error::ToolError;
9
10pub struct FileReadTool;
11
12#[async_trait]
13impl Tool for FileReadTool {
14 fn name(&self) -> &'static str {
15 "FileRead"
16 }
17
18 fn description(&self) -> &'static str {
19 "Reads a file from the filesystem. Returns contents with line numbers."
20 }
21
22 fn input_schema(&self) -> serde_json::Value {
23 json!({
24 "type": "object",
25 "required": ["file_path"],
26 "properties": {
27 "file_path": {
28 "type": "string",
29 "description": "Absolute path to the file"
30 },
31 "offset": {
32 "type": "integer",
33 "description": "Line number to start reading from (1-based)"
34 },
35 "limit": {
36 "type": "integer",
37 "description": "Number of lines to read"
38 },
39 "pages": {
40 "type": "string",
41 "description": "Page range for PDF files (e.g., \"1-5\", \"3\", \"10-20\"). Max 20 pages per request."
42 }
43 }
44 })
45 }
46
47 fn is_read_only(&self) -> bool {
48 true
49 }
50
51 fn is_concurrency_safe(&self) -> bool {
52 true
53 }
54
55 fn get_path(&self, input: &serde_json::Value) -> Option<PathBuf> {
56 input
57 .get("file_path")
58 .and_then(|v| v.as_str())
59 .map(PathBuf::from)
60 }
61
62 async fn call(
63 &self,
64 input: serde_json::Value,
65 _ctx: &ToolContext,
66 ) -> Result<ToolResult, ToolError> {
67 let file_path = input
68 .get("file_path")
69 .and_then(|v| v.as_str())
70 .ok_or_else(|| ToolError::InvalidInput("'file_path' is required".into()))?;
71
72 let offset = input.get("offset").and_then(|v| v.as_u64()).unwrap_or(1) as usize;
73
74 let limit = input.get("limit").and_then(|v| v.as_u64()).unwrap_or(2000) as usize;
75
76 let path = std::path::Path::new(file_path);
77
78 const BLOCKED_PREFIXES: &[&str] = &["/dev/", "/proc/", "/sys/"];
80 if BLOCKED_PREFIXES
81 .iter()
82 .any(|prefix| file_path.starts_with(prefix))
83 {
84 return Err(ToolError::InvalidInput(format!(
85 "Cannot read virtual/device file: {file_path}"
86 )));
87 }
88
89 let pages = input
90 .get("pages")
91 .and_then(|v| v.as_str())
92 .map(|s| s.to_string());
93
94 match path.extension().and_then(|e| e.to_str()) {
96 Some("pdf") => {
97 return read_pdf(file_path, pages.as_deref()).await;
98 }
99 Some("ipynb") => {
100 return read_notebook(file_path).await;
101 }
102 Some("png" | "jpg" | "jpeg" | "gif" | "webp" | "svg" | "ico" | "bmp") => {
103 let meta = tokio::fs::metadata(file_path).await.ok();
104 let size = meta.map(|m| m.len()).unwrap_or(0);
105
106 if size < 5 * 1024 * 1024
108 && crate::llm::message::image_block_from_file(path).is_ok()
109 {
110 return Ok(ToolResult::success(format!(
111 "(Image: {file_path}, {size} bytes — loaded for vision analysis)"
112 )));
113 }
114
115 return Ok(ToolResult::success(format!(
116 "(Image file: {file_path}, {size} bytes — \
117 too large for inline embedding)"
118 )));
119 }
120 Some("wasm" | "exe" | "dll" | "so" | "dylib" | "o" | "a") => {
121 let meta = tokio::fs::metadata(file_path).await.ok();
122 let size = meta.map(|m| m.len()).unwrap_or(0);
123 return Ok(ToolResult::success(format!(
124 "(Binary file: {file_path}, {size} bytes)"
125 )));
126 }
127 _ => {}
128 }
129
130 let content = match tokio::fs::read_to_string(file_path).await {
132 Ok(c) => c,
133 Err(e) => {
134 if let Ok(meta) = tokio::fs::metadata(file_path).await {
136 return Ok(ToolResult::success(format!(
137 "(Binary or unreadable file: {file_path}, {} bytes: {e})",
138 meta.len()
139 )));
140 }
141 return Err(ToolError::ExecutionFailed(format!(
142 "Failed to read {file_path}: {e}"
143 )));
144 }
145 };
146
147 let lines: Vec<&str> = content.lines().collect();
149 let start = (offset.saturating_sub(1)).min(lines.len());
150 let end = (start + limit).min(lines.len());
151
152 let mut output = String::new();
153 for (i, line) in lines[start..end].iter().enumerate() {
154 let line_num = start + i + 1;
155 output.push_str(&format!("{line_num}\t{line}\n"));
156 }
157
158 if output.is_empty() {
159 output = "(empty file)".to_string();
160 }
161
162 const MAX_TOKENS: usize = 100_000;
164 const BYTES_PER_TOKEN: usize = 4;
165 let max_bytes = MAX_TOKENS * BYTES_PER_TOKEN;
166 if output.len() > max_bytes {
167 output.truncate(max_bytes);
168 while !output.is_char_boundary(output.len()) {
170 output.pop();
171 }
172 output.push_str(&format!(
173 "\n\n(File content truncated: exceeded ~{MAX_TOKENS} token estimate. \
174 Use offset/limit to read specific sections.)"
175 ));
176 }
177
178 Ok(ToolResult::success(output))
179 }
180}
181
182async fn read_pdf(file_path: &str, pages: Option<&str>) -> Result<ToolResult, ToolError> {
184 let mut cmd = tokio::process::Command::new("pdftotext");
186
187 if let Some(page_spec) = pages {
188 let (first, last) = if let Some((start, end)) = page_spec.split_once('-') {
190 (start.trim().to_string(), end.trim().to_string())
191 } else {
192 let page = page_spec.trim().to_string();
193 (page.clone(), page)
194 };
195 cmd.arg("-f").arg(&first).arg("-l").arg(&last);
196 }
197
198 cmd.arg(file_path).arg("-");
199 let output = cmd.output().await;
200
201 match output {
202 Ok(out) if out.status.success() => {
203 let text = String::from_utf8_lossy(&out.stdout).to_string();
204 if text.trim().is_empty() {
205 Ok(ToolResult::success(format!(
206 "(PDF file: {file_path} — extracted but contains no text. \
207 May be image-based; OCR would be needed.)"
208 )))
209 } else {
210 let display = if text.len() > 100_000 {
212 format!(
213 "{}\n\n(PDF truncated: {} chars total)",
214 &text[..100_000],
215 text.len()
216 )
217 } else {
218 text
219 };
220 Ok(ToolResult::success(display))
221 }
222 }
223 _ => {
224 let meta = tokio::fs::metadata(file_path).await.ok();
226 let size = meta.map(|m| m.len()).unwrap_or(0);
227 Ok(ToolResult::success(format!(
228 "(PDF file: {file_path}, {size} bytes. \
229 Install poppler-utils for text extraction: \
230 apt install poppler-utils / brew install poppler)"
231 )))
232 }
233 }
234}
235
236async fn read_notebook(file_path: &str) -> Result<ToolResult, ToolError> {
238 let content = tokio::fs::read_to_string(file_path)
239 .await
240 .map_err(|e| ToolError::ExecutionFailed(format!("Failed to read {file_path}: {e}")))?;
241
242 let notebook: serde_json::Value = serde_json::from_str(&content)
243 .map_err(|e| ToolError::ExecutionFailed(format!("Invalid notebook JSON: {e}")))?;
244
245 let cells = notebook
246 .get("cells")
247 .and_then(|v| v.as_array())
248 .ok_or_else(|| ToolError::ExecutionFailed("Notebook has no 'cells' array".into()))?;
249
250 let mut output = String::new();
251 for (i, cell) in cells.iter().enumerate() {
252 let cell_type = cell
253 .get("cell_type")
254 .and_then(|v| v.as_str())
255 .unwrap_or("unknown");
256
257 output.push_str(&format!("--- Cell {} ({}) ---\n", i + 1, cell_type));
258
259 if let Some(source) = cell.get("source") {
261 let text = match source {
262 serde_json::Value::Array(lines) => lines
263 .iter()
264 .filter_map(|l| l.as_str())
265 .collect::<Vec<_>>()
266 .join(""),
267 serde_json::Value::String(s) => s.clone(),
268 _ => String::new(),
269 };
270 output.push_str(&text);
271 if !text.ends_with('\n') {
272 output.push('\n');
273 }
274 }
275
276 if cell_type == "code"
278 && let Some(outputs) = cell.get("outputs").and_then(|v| v.as_array())
279 {
280 for out in outputs {
281 if let Some(text) = out.get("text").and_then(|v| v.as_array()) {
282 output.push_str("Output:\n");
283 for line in text {
284 if let Some(s) = line.as_str() {
285 output.push_str(s);
286 }
287 }
288 }
289 if let Some(data) = out.get("data")
290 && let Some(plain) = data.get("text/plain").and_then(|v| v.as_array())
291 {
292 output.push_str("Output:\n");
293 for line in plain {
294 if let Some(s) = line.as_str() {
295 output.push_str(s);
296 }
297 }
298 }
299 }
300 }
301
302 output.push('\n');
303 }
304
305 Ok(ToolResult::success(output))
306}