Skip to main content

imp_core/tools/
read.rs

1use std::path::Path;
2
3use async_trait::async_trait;
4use serde_json::json;
5
6use super::{suggest_similar_files, truncate_head, Tool, ToolContext, ToolOutput};
7use crate::error::Result;
8
9const MAX_BYTES: usize = 50_000;
10
11const IMAGE_EXTENSIONS: &[&str] = &["png", "jpg", "jpeg", "gif", "webp", "svg"];
12
13pub struct ReadTool;
14
15#[async_trait]
16impl Tool for ReadTool {
17    fn name(&self) -> &str {
18        "read"
19    }
20    fn label(&self) -> &str {
21        "Read File"
22    }
23    fn description(&self) -> &str {
24        "Read a file with stable line-oriented output. Supports offset/limit and images."
25    }
26    fn parameters(&self) -> serde_json::Value {
27        json!({
28            "type": "object",
29            "properties": {
30                "path": { "type": "string" },
31                "offset": { "type": "number" },
32                "limit": { "type": "number" },
33                "anchors": {
34                    "type": "boolean",
35                    "description": "When true, include opaque per-line anchors for stale-safe anchored edits. Anchors are session-local integrity markers, not security tokens."
36                }
37            },
38            "required": ["path"]
39        })
40    }
41    fn is_readonly(&self) -> bool {
42        true
43    }
44
45    async fn execute(
46        &self,
47        _call_id: &str,
48        params: serde_json::Value,
49        ctx: ToolContext,
50    ) -> Result<ToolOutput> {
51        let raw_path = params["path"]
52            .as_str()
53            .unwrap_or("")
54            .trim_start_matches('@');
55
56        if raw_path.is_empty() {
57            return Ok(ToolOutput::error("Missing required parameter: path"));
58        }
59
60        let path = super::resolve_path(&ctx.cwd, raw_path);
61
62        if !path.exists() {
63            let suggestions = suggest_similar_files(&ctx.cwd, raw_path);
64            let mut msg = format!("File not found: {}", path.display());
65            if !suggestions.is_empty() {
66                msg.push_str("\n\nDid you mean:");
67                for s in &suggestions {
68                    msg.push_str(&format!("\n  {s}"));
69                }
70            }
71            return Ok(ToolOutput::error(msg));
72        }
73
74        // Check for image files
75        if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
76            if IMAGE_EXTENSIONS.contains(&ext.to_lowercase().as_str()) {
77                return read_image(&path).await;
78            }
79        }
80
81        // Read raw bytes and check for binary
82        let bytes = tokio::fs::read(&path).await?;
83        let check_len = bytes.len().min(8192);
84        if bytes[..check_len].contains(&0) {
85            return Ok(ToolOutput::error(format!(
86                "Binary file detected: {}. Cannot display binary content.",
87                path.display()
88            )));
89        }
90
91        let content = String::from_utf8_lossy(&bytes).into_owned();
92
93        // Apply offset/limit
94        let offset = params["offset"].as_u64().map(|v| v as usize);
95        let limit = params["limit"].as_u64().map(|v| v as usize);
96        let include_anchors = params["anchors"].as_bool().unwrap_or(false);
97
98        let sliced = apply_offset_limit(&content, offset, limit);
99        let start_line = offset.unwrap_or(1);
100
101        // Apply truncation
102        let max_lines = ctx.read_max_lines;
103        let result = if max_lines == 0 {
104            super::TruncationResult {
105                content: sliced.clone(),
106                truncated: false,
107                output_lines: sliced.lines().count(),
108                total_lines: sliced.lines().count(),
109                output_bytes: sliced.len(),
110                total_bytes: sliced.len(),
111                temp_file: None,
112            }
113        } else {
114            truncate_head(&sliced, max_lines, MAX_BYTES)
115        };
116
117        let mut output = result.content.clone();
118        let mut anchors_json = serde_json::Value::Null;
119        if include_anchors {
120            let visible_lines = result.content.lines().collect::<Vec<_>>();
121            let anchors = ctx.anchor_store.record_lines(
122                &path,
123                super::stable_hash(&content),
124                start_line,
125                &visible_lines,
126            );
127            anchors_json = json!(anchors
128                .iter()
129                .map(|anchor| json!({
130                    "line": anchor.line,
131                    "anchor": anchor.id,
132                    "content_hash": format!("{:016x}", anchor.content_hash),
133                }))
134                .collect::<Vec<_>>());
135            if !anchors.is_empty() {
136                output.push_str("\n\nAnchors:");
137                for anchor in &anchors {
138                    output.push_str(&format!("\n{:>6} {}", anchor.line, anchor.id));
139                }
140            }
141        }
142        if result.truncated {
143            let note = format!(
144                "\n[…truncated: showing {}/{} lines, {}/{} bytes",
145                result.output_lines, result.total_lines, result.output_bytes, result.total_bytes,
146            );
147            if let Some(ref tf) = result.temp_file {
148                output.push_str(&format!("{note}, full output: {}]", tf.display()));
149            } else {
150                output.push_str(&format!("{note}]"));
151            }
152        }
153
154        // Record that this file was read (for staleness and unread-edit detection).
155        if let Ok(mut tracker) = ctx.file_tracker.lock() {
156            tracker.record_read(&path);
157        }
158
159        Ok(ToolOutput {
160            content: vec![imp_llm::ContentBlock::Text { text: output }],
161            details: json!({
162                "path": path.display().to_string(),
163                "truncated": result.truncated,
164                "lines": result.output_lines,
165                "total_lines": result.total_lines,
166                "anchors": anchors_json,
167            }),
168            is_error: false,
169        })
170    }
171}
172
173fn apply_offset_limit(content: &str, offset: Option<usize>, limit: Option<usize>) -> String {
174    let lines: Vec<&str> = content.lines().collect();
175    let start = offset.map(|o| o.saturating_sub(1)).unwrap_or(0); // 1-indexed to 0-indexed
176    let end = match limit {
177        Some(l) => (start + l).min(lines.len()),
178        None => lines.len(),
179    };
180
181    if start >= lines.len() {
182        return String::new();
183    }
184
185    lines[start..end].join("\n")
186}
187
188async fn read_image(path: &Path) -> Result<ToolOutput> {
189    let bytes = tokio::fs::read(path).await?;
190    let ext = path
191        .extension()
192        .and_then(|e| e.to_str())
193        .unwrap_or("png")
194        .to_lowercase();
195
196    let media_type = match ext.as_str() {
197        "png" => "image/png",
198        "jpg" | "jpeg" => "image/jpeg",
199        "gif" => "image/gif",
200        "webp" => "image/webp",
201        "svg" => "image/svg+xml",
202        _ => "application/octet-stream",
203    };
204
205    use std::io::Write;
206    let mut encoded = Vec::new();
207    {
208        let mut encoder = base64_encoder(&mut encoded);
209        encoder.write_all(&bytes)?;
210        encoder.finish()?;
211    }
212    let data = String::from_utf8(encoded).unwrap_or_default();
213
214    Ok(ToolOutput {
215        content: vec![imp_llm::ContentBlock::Image {
216            media_type: media_type.to_string(),
217            data,
218        }],
219        details: json!({
220            "path": path.display().to_string(),
221            "media_type": media_type,
222            "bytes": bytes.len(),
223        }),
224        is_error: false,
225    })
226}
227
228/// Simple base64 encoder without adding a dependency. We only need this for images.
229fn base64_encoder(output: &mut Vec<u8>) -> Base64Writer<'_> {
230    Base64Writer {
231        output,
232        buffer: [0; 3],
233        buffer_len: 0,
234    }
235}
236
237const BASE64_CHARS: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
238
239struct Base64Writer<'a> {
240    output: &'a mut Vec<u8>,
241    buffer: [u8; 3],
242    buffer_len: usize,
243}
244
245impl<'a> std::io::Write for Base64Writer<'a> {
246    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
247        for &byte in buf {
248            self.buffer[self.buffer_len] = byte;
249            self.buffer_len += 1;
250            if self.buffer_len == 3 {
251                self.encode_block();
252            }
253        }
254        Ok(buf.len())
255    }
256    fn flush(&mut self) -> std::io::Result<()> {
257        Ok(())
258    }
259}
260
261impl<'a> Base64Writer<'a> {
262    fn encode_block(&mut self) {
263        let b = &self.buffer;
264        self.output.push(BASE64_CHARS[(b[0] >> 2) as usize]);
265        self.output
266            .push(BASE64_CHARS[((b[0] & 0x03) << 4 | b[1] >> 4) as usize]);
267        self.output
268            .push(BASE64_CHARS[((b[1] & 0x0f) << 2 | b[2] >> 6) as usize]);
269        self.output.push(BASE64_CHARS[(b[2] & 0x3f) as usize]);
270        self.buffer_len = 0;
271    }
272
273    fn finish(self) -> std::io::Result<()> {
274        match self.buffer_len {
275            1 => {
276                let b = self.buffer[0];
277                self.output.push(BASE64_CHARS[(b >> 2) as usize]);
278                self.output.push(BASE64_CHARS[((b & 0x03) << 4) as usize]);
279                self.output.push(b'=');
280                self.output.push(b'=');
281            }
282            2 => {
283                let b0 = self.buffer[0];
284                let b1 = self.buffer[1];
285                self.output.push(BASE64_CHARS[(b0 >> 2) as usize]);
286                self.output
287                    .push(BASE64_CHARS[((b0 & 0x03) << 4 | b1 >> 4) as usize]);
288                self.output.push(BASE64_CHARS[((b1 & 0x0f) << 2) as usize]);
289                self.output.push(b'=');
290            }
291            _ => {}
292        }
293        Ok(())
294    }
295}
296
297#[cfg(test)]
298mod tests {
299    use super::*;
300    use crate::tools::ToolContext;
301    use std::sync::Arc;
302
303    fn test_ctx(dir: &Path) -> ToolContext {
304        let (tx, _rx) = tokio::sync::mpsc::channel(16);
305        let (cmd_tx, _cmd_rx) = tokio::sync::mpsc::channel(16);
306        ToolContext {
307            cwd: dir.to_path_buf(),
308            cancelled: Arc::new(std::sync::atomic::AtomicBool::new(false)),
309            update_tx: tx,
310            command_tx: cmd_tx,
311            ui: Arc::new(crate::ui::NullInterface),
312            file_cache: Arc::new(crate::tools::FileCache::new()),
313            checkpoint_state: Arc::new(crate::tools::CheckpointState::new()),
314            file_tracker: Arc::new(std::sync::Mutex::new(crate::tools::FileTracker::new())),
315            anchor_store: Arc::new(crate::tools::AnchorStore::new()),
316            lua_tool_loader: None,
317            mode: crate::config::AgentMode::Full,
318            read_max_lines: 500,
319            turn_mana_review: Arc::new(std::sync::Mutex::new(
320                crate::mana_review::TurnManaReviewAccumulator::default(),
321            )),
322            config: Arc::new(crate::config::Config::default()),
323        }
324    }
325
326    #[tokio::test]
327    async fn read_known_file() {
328        let dir = tempfile::tempdir().unwrap();
329        let file = dir.path().join("hello.txt");
330        std::fs::write(&file, "line1\nline2\nline3\n").unwrap();
331
332        let tool = ReadTool;
333        let result = tool
334            .execute("c1", json!({"path": "hello.txt"}), test_ctx(dir.path()))
335            .await
336            .unwrap();
337
338        assert!(!result.is_error);
339        let text = extract_text(&result);
340        assert!(text.contains("line1"));
341        assert!(text.contains("line3"));
342    }
343
344    #[tokio::test]
345    async fn read_offset_limit() {
346        let dir = tempfile::tempdir().unwrap();
347        let file = dir.path().join("data.txt");
348        std::fs::write(&file, "a\nb\nc\nd\ne\n").unwrap();
349
350        let tool = ReadTool;
351        let result = tool
352            .execute(
353                "c2",
354                json!({"path": "data.txt", "offset": 2, "limit": 2}),
355                test_ctx(dir.path()),
356            )
357            .await
358            .unwrap();
359
360        assert!(!result.is_error);
361        let text = extract_text(&result);
362        assert!(text.contains("b"));
363        assert!(text.contains("c"));
364        assert!(!text.contains("a"));
365        assert!(!text.contains("d"));
366    }
367
368    #[tokio::test]
369    async fn read_file_not_found_suggestions() {
370        let dir = tempfile::tempdir().unwrap();
371        std::fs::write(dir.path().join("hello.txt"), "hi").unwrap();
372
373        let tool = ReadTool;
374        let result = tool
375            .execute("c3", json!({"path": "helo.txt"}), test_ctx(dir.path()))
376            .await
377            .unwrap();
378
379        assert!(result.is_error);
380        let text = extract_text(&result);
381        assert!(text.contains("File not found"));
382        assert!(text.contains("hello.txt"));
383    }
384
385    #[tokio::test]
386    async fn read_binary_file_rejected() {
387        let dir = tempfile::tempdir().unwrap();
388        let file = dir.path().join("data.bin");
389        std::fs::write(&file, b"\x00\x01\x02\x03").unwrap();
390
391        let tool = ReadTool;
392        let result = tool
393            .execute("c4", json!({"path": "data.bin"}), test_ctx(dir.path()))
394            .await
395            .unwrap();
396
397        assert!(result.is_error);
398        assert!(extract_text(&result).contains("Binary file"));
399    }
400
401    #[tokio::test]
402    async fn read_strips_at_prefix() {
403        let dir = tempfile::tempdir().unwrap();
404        std::fs::write(dir.path().join("test.txt"), "content").unwrap();
405
406        let tool = ReadTool;
407        let result = tool
408            .execute("c5", json!({"path": "@test.txt"}), test_ctx(dir.path()))
409            .await
410            .unwrap();
411
412        assert!(!result.is_error);
413        assert!(extract_text(&result).contains("content"));
414    }
415
416    #[tokio::test]
417    async fn read_empty_file() {
418        let dir = tempfile::tempdir().unwrap();
419        let file = dir.path().join("empty.txt");
420        std::fs::write(&file, "").unwrap();
421
422        let tool = ReadTool;
423        let result = tool
424            .execute("c6", json!({"path": "empty.txt"}), test_ctx(dir.path()))
425            .await
426            .unwrap();
427
428        assert!(!result.is_error);
429    }
430
431    #[tokio::test]
432    async fn read_large_file_truncated() {
433        let dir = tempfile::tempdir().unwrap();
434        let file = dir.path().join("big.txt");
435        let mut content = String::new();
436        for i in 0..3000 {
437            content.push_str(&format!("line {i}\n"));
438        }
439        std::fs::write(&file, &content).unwrap();
440
441        let tool = ReadTool;
442        let result = tool
443            .execute("c7", json!({"path": "big.txt"}), test_ctx(dir.path()))
444            .await
445            .unwrap();
446
447        assert!(!result.is_error);
448        let text = extract_text(&result);
449        assert!(text.contains("truncated"));
450        // Should have the first lines
451        assert!(text.contains("line 0"));
452        // Details should indicate truncation
453        assert_eq!(result.details["truncated"], true);
454    }
455
456    #[tokio::test]
457    async fn read_respects_configured_line_limit() {
458        let dir = tempfile::tempdir().unwrap();
459        let file = dir.path().join("limited.txt");
460        let mut content = String::new();
461        for i in 0..800 {
462            content.push_str(&format!("line {i}\n"));
463        }
464        std::fs::write(&file, &content).unwrap();
465
466        let tool = ReadTool;
467        let mut ctx = test_ctx(dir.path());
468        ctx.read_max_lines = 500;
469        let result = tool
470            .execute("c7b", json!({"path": "limited.txt"}), ctx)
471            .await
472            .unwrap();
473
474        assert!(!result.is_error);
475        let text = extract_text(&result);
476        assert!(text.contains("truncated"));
477        assert!(text.contains("showing 500/800 lines"));
478        assert_eq!(result.details["lines"], 500);
479        assert_eq!(result.details["total_lines"], 800);
480    }
481
482    #[tokio::test]
483    async fn read_zero_line_limit_disables_line_truncation() {
484        let dir = tempfile::tempdir().unwrap();
485        let file = dir.path().join("unlimited.txt");
486        let mut content = String::new();
487        for i in 0..800 {
488            content.push_str(&format!("line {i}\n"));
489        }
490        std::fs::write(&file, &content).unwrap();
491
492        let tool = ReadTool;
493        let mut ctx = test_ctx(dir.path());
494        ctx.read_max_lines = 0;
495        let result = tool
496            .execute("c7c", json!({"path": "unlimited.txt"}), ctx)
497            .await
498            .unwrap();
499
500        assert!(!result.is_error);
501        let text = extract_text(&result);
502        assert!(!text.contains("truncated"));
503        assert!(text.contains("line 799"));
504        assert_eq!(result.details["truncated"], false);
505        assert_eq!(result.details["lines"], 800);
506        assert_eq!(result.details["total_lines"], 800);
507        assert!(result.details["path"]
508            .as_str()
509            .unwrap()
510            .contains("unlimited.txt"));
511    }
512
513    #[tokio::test]
514    async fn read_directory_error() {
515        let dir = tempfile::tempdir().unwrap();
516        let subdir = dir.path().join("subdir");
517        std::fs::create_dir(&subdir).unwrap();
518
519        let tool = ReadTool;
520        let result = tool
521            .execute("c8", json!({"path": "subdir"}), test_ctx(dir.path()))
522            .await;
523
524        // Reading a directory should either error or produce an error output
525        if let Ok(output) = result {
526            assert!(output.is_error)
527        }
528    }
529
530    #[tokio::test]
531    async fn read_can_emit_line_anchors() {
532        let dir = tempfile::tempdir().unwrap();
533        std::fs::write(dir.path().join("anchored.txt"), "alpha\nbeta\ngamma\n").unwrap();
534
535        let tool = ReadTool;
536        let ctx = test_ctx(dir.path());
537        let result = tool
538            .execute(
539                "c-anchors",
540                json!({"path": "anchored.txt", "offset": 2, "limit": 1, "anchors": true}),
541                ctx.clone(),
542            )
543            .await
544            .unwrap();
545
546        assert!(!result.is_error);
547        let text = extract_text(&result);
548        assert!(text.contains("Anchors:"));
549        let anchors = result.details["anchors"].as_array().unwrap();
550        assert_eq!(anchors.len(), 1);
551        assert_eq!(anchors[0]["line"], 2);
552        let anchor = anchors[0]["anchor"].as_str().unwrap();
553        let path = dir.path().join("anchored.txt");
554        assert!(ctx.anchor_store.get(&path, anchor).is_some());
555    }
556
557    fn extract_text(output: &ToolOutput) -> String {
558        output
559            .content
560            .iter()
561            .filter_map(|b| match b {
562                imp_llm::ContentBlock::Text { text } => Some(text.as_str()),
563                _ => None,
564            })
565            .collect::<Vec<_>>()
566            .join("\n")
567    }
568}