Skip to main content

imp_core/tools/
read.rs

1use std::path::Path;
2
3use async_trait::async_trait;
4use serde_json::json;
5
6use super::{suggest_similar_files, truncate_head, Tool, ToolContext, ToolOutput};
7use crate::error::Result;
8
9const MAX_BYTES: usize = 50_000;
10const MAX_TEXT_BYTES: u64 = 5 * 1024 * 1024;
11const MAX_IMAGE_BYTES: u64 = 10 * 1024 * 1024;
12
13const IMAGE_EXTENSIONS: &[&str] = &["png", "jpg", "jpeg", "gif", "webp", "svg"];
14
15pub struct ReadTool;
16
17#[async_trait]
18impl Tool for ReadTool {
19    fn name(&self) -> &str {
20        "read"
21    }
22    fn label(&self) -> &str {
23        "Read File"
24    }
25    fn description(&self) -> &str {
26        "Read a file with stable line-oriented output. Supports start_line/end_line ranges, anchors, and images."
27    }
28    fn parameters(&self) -> serde_json::Value {
29        json!({
30            "type": "object",
31            "properties": {
32                "path": { "type": "string" },
33                "start_line": {
34                    "type": "integer",
35                    "minimum": 1,
36                    "description": "1-indexed first line to read."
37                },
38                "end_line": {
39                    "type": "integer",
40                    "minimum": 1,
41                    "description": "1-indexed inclusive last line to read."
42                },
43                "anchors": {
44                    "type": "boolean",
45                    "description": "When true, include opaque per-line anchors for stale-safe anchored edits. Anchors are session-local integrity markers, not security tokens."
46                }
47            },
48            "required": ["path"]
49        })
50    }
51    fn is_readonly(&self) -> bool {
52        true
53    }
54
55    async fn execute(
56        &self,
57        _call_id: &str,
58        params: serde_json::Value,
59        ctx: ToolContext,
60    ) -> Result<ToolOutput> {
61        let raw_path = params["path"]
62            .as_str()
63            .unwrap_or("")
64            .trim_start_matches('@');
65
66        if raw_path.is_empty() {
67            return Ok(ToolOutput::error("Missing required parameter: path"));
68        }
69
70        let path = super::resolve_path(&ctx.cwd, raw_path);
71        let range = parse_line_range(&params)?;
72
73        if !path.exists() {
74            let suggestions = suggest_similar_files(&ctx.cwd, raw_path);
75            let mut msg = format!("File not found: {}", path.display());
76            if !suggestions.is_empty() {
77                msg.push_str("\n\nDid you mean:");
78                for s in &suggestions {
79                    msg.push_str(&format!("\n  {s}"));
80                }
81            }
82            return Ok(ToolOutput::error(msg));
83        }
84
85        if path.is_dir() {
86            return Ok(ToolOutput::error(format!(
87                "Path is a directory, not a file: {}",
88                path.display()
89            )));
90        }
91
92        // Check for image files
93        if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
94            if IMAGE_EXTENSIONS.contains(&ext.to_lowercase().as_str()) {
95                return read_image(&path).await;
96            }
97        }
98
99        let metadata = tokio::fs::metadata(&path).await?;
100        if metadata.len() > MAX_TEXT_BYTES && range.is_none() {
101            return Ok(ToolOutput::error(format!(
102                "File is too large to read without a line range: {} ({} bytes). Use start_line/end_line to read a smaller range.",
103                path.display(),
104                metadata.len()
105            )));
106        }
107
108        // Read raw bytes and check for binary
109        let bytes = tokio::fs::read(&path).await?;
110        let check_len = bytes.len().min(8192);
111        if bytes[..check_len].contains(&0) {
112            return Ok(ToolOutput::error(format!(
113                "Binary file detected: {}. Cannot display binary content.",
114                path.display()
115            )));
116        }
117
118        let content = String::from_utf8_lossy(&bytes).into_owned();
119
120        // Apply line range.
121        let include_anchors = params["anchors"].as_bool().unwrap_or(false);
122
123        let sliced = apply_line_range(&content, range);
124        let start_line = range.map(|r| r.start).unwrap_or(1);
125        let requested_end_line = range.and_then(|r| r.end);
126        let total_file_lines = content.lines().count();
127        let line_ending = detect_line_ending(&content);
128
129        // Apply truncation
130        let max_lines = ctx.read_max_lines;
131        let result = if max_lines == 0 {
132            super::TruncationResult {
133                content: sliced.clone(),
134                truncated: false,
135                output_lines: sliced.lines().count(),
136                total_lines: sliced.lines().count(),
137                output_bytes: sliced.len(),
138                total_bytes: sliced.len(),
139                temp_file: None,
140            }
141        } else {
142            truncate_head(&sliced, max_lines, MAX_BYTES)
143        };
144
145        let mut output = result.content.clone();
146        let mut anchors_json = serde_json::Value::Null;
147        if include_anchors {
148            let visible_lines = result.content.lines().collect::<Vec<_>>();
149            let anchors = ctx.anchor_store.record_lines(
150                &path,
151                super::stable_hash(&content),
152                start_line,
153                &visible_lines,
154            );
155            anchors_json = json!(anchors
156                .iter()
157                .map(|anchor| json!({
158                    "line": anchor.line,
159                    "anchor": anchor.id,
160                    "content_hash": format!("{:016x}", anchor.content_hash),
161                }))
162                .collect::<Vec<_>>());
163            if !anchors.is_empty() {
164                output.push_str("\n\nAnchors:");
165                for anchor in &anchors {
166                    output.push_str(&format!("\n{:>6} {}", anchor.line, anchor.id));
167                }
168            }
169        }
170        if result.truncated {
171            let note = format!(
172                "\n[…truncated: showing {}/{} lines, {}/{} bytes",
173                result.output_lines, result.total_lines, result.output_bytes, result.total_bytes,
174            );
175            if let Some(ref tf) = result.temp_file {
176                output.push_str(&format!("{note}, full output: {}]", tf.display()));
177            } else {
178                output.push_str(&format!("{note}]"));
179            }
180        }
181
182        // Record that this file was read (for staleness and unread-edit detection).
183        if let Ok(mut tracker) = ctx.file_tracker.lock() {
184            tracker.record_read(&path);
185        }
186
187        Ok(ToolOutput {
188            content: vec![imp_llm::ContentBlock::Text { text: output }],
189            details: json!({
190                "action": "read",
191                "path": path.display().to_string(),
192                "start_line": start_line,
193                "end_line": if result.output_lines == 0 { start_line.saturating_sub(1) } else { start_line + result.output_lines - 1 },
194                "requested_end_line": requested_end_line,
195                "truncated": result.truncated,
196                "lines": result.output_lines,
197                "total_lines": total_file_lines,
198                "range_total_lines": result.total_lines,
199                "bytes": result.output_bytes,
200                "total_bytes": metadata.len(),
201                "range_total_bytes": result.total_bytes,
202                "temp_file": result.temp_file.as_ref().map(|path| path.display().to_string()),
203                "encoding": "utf-8-lossy",
204                "line_ending": line_ending,
205                "anchors": anchors_json,
206                "anchor_count": anchors_json.as_array().map(|anchors| anchors.len()).unwrap_or(0),
207            }),
208            is_error: false,
209        })
210    }
211}
212
213#[derive(Clone, Copy)]
214struct LineRange {
215    start: usize,
216    end: Option<usize>,
217}
218
219fn parse_line_range(params: &serde_json::Value) -> Result<Option<LineRange>> {
220    let start_line = parse_positive_usize(params.get("start_line"), "start_line")?;
221    let end_line = parse_positive_usize(params.get("end_line"), "end_line")?;
222
223    if let (Some(start), Some(end)) = (start_line, end_line) {
224        if start > end {
225            return Err(crate::error::Error::Tool(
226                "start_line must be <= end_line".to_string(),
227            ));
228        }
229    }
230
231    Ok(match (start_line, end_line) {
232        (None, None) => None,
233        (Some(start), end) => Some(LineRange { start, end }),
234        (None, Some(end)) => Some(LineRange {
235            start: 1,
236            end: Some(end),
237        }),
238    })
239}
240
241fn parse_positive_usize(value: Option<&serde_json::Value>, field: &str) -> Result<Option<usize>> {
242    let Some(value) = value else {
243        return Ok(None);
244    };
245    if value.is_null() {
246        return Ok(None);
247    }
248    let Some(number) = value.as_u64() else {
249        return Err(crate::error::Error::Tool(format!(
250            "{field} must be a positive integer"
251        )));
252    };
253    if number == 0 {
254        return Err(crate::error::Error::Tool(format!("{field} must be >= 1")));
255    }
256    Ok(Some(number as usize))
257}
258
259fn apply_line_range(content: &str, range: Option<LineRange>) -> String {
260    let lines: Vec<&str> = content.lines().collect();
261    let start = range
262        .map(|range| range.start.saturating_sub(1))
263        .unwrap_or(0);
264    if start >= lines.len() {
265        return String::new();
266    }
267    let end = range
268        .and_then(|range| range.end)
269        .map(|end| end.min(lines.len()))
270        .unwrap_or(lines.len());
271
272    lines[start..end].join("\n")
273}
274
275fn detect_line_ending(content: &str) -> &'static str {
276    if content.contains("\r\n") {
277        "crlf"
278    } else if content.contains('\r') {
279        "cr"
280    } else {
281        "lf"
282    }
283}
284
285async fn read_image(path: &Path) -> Result<ToolOutput> {
286    let metadata = tokio::fs::metadata(path).await?;
287    if metadata.len() > MAX_IMAGE_BYTES {
288        return Ok(ToolOutput::error(format!(
289            "Image is too large to read: {} ({} bytes, max {} bytes)",
290            path.display(),
291            metadata.len(),
292            MAX_IMAGE_BYTES
293        )));
294    }
295
296    let bytes = tokio::fs::read(path).await?;
297    let ext = path
298        .extension()
299        .and_then(|e| e.to_str())
300        .unwrap_or("png")
301        .to_lowercase();
302
303    let media_type = match ext.as_str() {
304        "png" => "image/png",
305        "jpg" | "jpeg" => "image/jpeg",
306        "gif" => "image/gif",
307        "webp" => "image/webp",
308        "svg" => "image/svg+xml",
309        _ => "application/octet-stream",
310    };
311
312    use std::io::Write;
313    let mut encoded = Vec::new();
314    {
315        let mut encoder = base64_encoder(&mut encoded);
316        encoder.write_all(&bytes)?;
317        encoder.finish()?;
318    }
319    let data = String::from_utf8(encoded).unwrap_or_default();
320
321    Ok(ToolOutput {
322        content: vec![imp_llm::ContentBlock::Image {
323            media_type: media_type.to_string(),
324            data,
325        }],
326        details: json!({
327            "action": "read",
328            "path": path.display().to_string(),
329            "media_type": media_type,
330            "bytes": bytes.len(),
331            "total_bytes": metadata.len(),
332        }),
333        is_error: false,
334    })
335}
336
337/// Simple base64 encoder without adding a dependency. We only need this for images.
338fn base64_encoder(output: &mut Vec<u8>) -> Base64Writer<'_> {
339    Base64Writer {
340        output,
341        buffer: [0; 3],
342        buffer_len: 0,
343    }
344}
345
346const BASE64_CHARS: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
347
348struct Base64Writer<'a> {
349    output: &'a mut Vec<u8>,
350    buffer: [u8; 3],
351    buffer_len: usize,
352}
353
354impl<'a> std::io::Write for Base64Writer<'a> {
355    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
356        for &byte in buf {
357            self.buffer[self.buffer_len] = byte;
358            self.buffer_len += 1;
359            if self.buffer_len == 3 {
360                self.encode_block();
361            }
362        }
363        Ok(buf.len())
364    }
365    fn flush(&mut self) -> std::io::Result<()> {
366        Ok(())
367    }
368}
369
370impl<'a> Base64Writer<'a> {
371    fn encode_block(&mut self) {
372        let b = &self.buffer;
373        self.output.push(BASE64_CHARS[(b[0] >> 2) as usize]);
374        self.output
375            .push(BASE64_CHARS[((b[0] & 0x03) << 4 | b[1] >> 4) as usize]);
376        self.output
377            .push(BASE64_CHARS[((b[1] & 0x0f) << 2 | b[2] >> 6) as usize]);
378        self.output.push(BASE64_CHARS[(b[2] & 0x3f) as usize]);
379        self.buffer_len = 0;
380    }
381
382    fn finish(self) -> std::io::Result<()> {
383        match self.buffer_len {
384            1 => {
385                let b = self.buffer[0];
386                self.output.push(BASE64_CHARS[(b >> 2) as usize]);
387                self.output.push(BASE64_CHARS[((b & 0x03) << 4) as usize]);
388                self.output.push(b'=');
389                self.output.push(b'=');
390            }
391            2 => {
392                let b0 = self.buffer[0];
393                let b1 = self.buffer[1];
394                self.output.push(BASE64_CHARS[(b0 >> 2) as usize]);
395                self.output
396                    .push(BASE64_CHARS[((b0 & 0x03) << 4 | b1 >> 4) as usize]);
397                self.output.push(BASE64_CHARS[((b1 & 0x0f) << 2) as usize]);
398                self.output.push(b'=');
399            }
400            _ => {}
401        }
402        Ok(())
403    }
404}
405
406#[cfg(test)]
407mod tests {
408    use super::*;
409    use crate::tools::ToolContext;
410    use std::sync::Arc;
411
412    fn test_ctx(dir: &Path) -> ToolContext {
413        let (tx, _rx) = tokio::sync::mpsc::channel(16);
414        let (cmd_tx, _cmd_rx) = tokio::sync::mpsc::channel(16);
415        ToolContext {
416            cwd: dir.to_path_buf(),
417            cancelled: Arc::new(std::sync::atomic::AtomicBool::new(false)),
418            update_tx: tx,
419            command_tx: cmd_tx,
420            ui: Arc::new(crate::ui::NullInterface),
421            file_cache: Arc::new(crate::tools::FileCache::new()),
422            checkpoint_state: Arc::new(crate::tools::CheckpointState::new()),
423            file_tracker: Arc::new(std::sync::Mutex::new(crate::tools::FileTracker::new())),
424            anchor_store: Arc::new(crate::tools::AnchorStore::new()),
425            lua_tool_loader: None,
426            mode: crate::config::AgentMode::Full,
427            read_max_lines: 500,
428            turn_mana_review: Arc::new(std::sync::Mutex::new(
429                crate::mana_review::TurnManaReviewAccumulator::default(),
430            )),
431            config: Arc::new(crate::config::Config::default()),
432            run_policy: Default::default(),
433            supporting_provenance: Vec::new(),
434        }
435    }
436
437    #[tokio::test]
438    async fn read_known_file() {
439        let dir = tempfile::tempdir().unwrap();
440        let file = dir.path().join("hello.txt");
441        std::fs::write(&file, "line1\nline2\nline3\n").unwrap();
442
443        let tool = ReadTool;
444        let result = tool
445            .execute("c1", json!({"path": "hello.txt"}), test_ctx(dir.path()))
446            .await
447            .unwrap();
448
449        assert!(!result.is_error);
450        let text = extract_text(&result);
451        assert!(text.contains("line1"));
452        assert!(text.contains("line3"));
453    }
454
455    #[tokio::test]
456    async fn read_start_end_lines() {
457        let dir = tempfile::tempdir().unwrap();
458        let file = dir.path().join("data.txt");
459        std::fs::write(&file, "a\nb\nc\nd\ne\n").unwrap();
460
461        let tool = ReadTool;
462        let result = tool
463            .execute(
464                "c2",
465                json!({"path": "data.txt", "start_line": 2, "end_line": 3}),
466                test_ctx(dir.path()),
467            )
468            .await
469            .unwrap();
470
471        assert!(!result.is_error);
472        let text = extract_text(&result);
473        assert!(text.contains("b"));
474        assert!(text.contains("c"));
475        assert!(!text.contains("a"));
476        assert!(!text.contains("d"));
477        assert_eq!(result.details["start_line"], 2);
478        assert_eq!(result.details["end_line"], 3);
479    }
480
481    #[tokio::test]
482    async fn read_file_not_found_suggestions() {
483        let dir = tempfile::tempdir().unwrap();
484        std::fs::write(dir.path().join("hello.txt"), "hi").unwrap();
485
486        let tool = ReadTool;
487        let result = tool
488            .execute("c3", json!({"path": "helo.txt"}), test_ctx(dir.path()))
489            .await
490            .unwrap();
491
492        assert!(result.is_error);
493        let text = extract_text(&result);
494        assert!(text.contains("File not found"));
495        assert!(text.contains("hello.txt"));
496    }
497
498    #[tokio::test]
499    async fn read_binary_file_rejected() {
500        let dir = tempfile::tempdir().unwrap();
501        let file = dir.path().join("data.bin");
502        std::fs::write(&file, b"\x00\x01\x02\x03").unwrap();
503
504        let tool = ReadTool;
505        let result = tool
506            .execute("c4", json!({"path": "data.bin"}), test_ctx(dir.path()))
507            .await
508            .unwrap();
509
510        assert!(result.is_error);
511        assert!(extract_text(&result).contains("Binary file"));
512    }
513
514    #[tokio::test]
515    async fn read_strips_at_prefix() {
516        let dir = tempfile::tempdir().unwrap();
517        std::fs::write(dir.path().join("test.txt"), "content").unwrap();
518
519        let tool = ReadTool;
520        let result = tool
521            .execute("c5", json!({"path": "@test.txt"}), test_ctx(dir.path()))
522            .await
523            .unwrap();
524
525        assert!(!result.is_error);
526        assert!(extract_text(&result).contains("content"));
527    }
528
529    #[tokio::test]
530    async fn read_empty_file() {
531        let dir = tempfile::tempdir().unwrap();
532        let file = dir.path().join("empty.txt");
533        std::fs::write(&file, "").unwrap();
534
535        let tool = ReadTool;
536        let result = tool
537            .execute("c6", json!({"path": "empty.txt"}), test_ctx(dir.path()))
538            .await
539            .unwrap();
540
541        assert!(!result.is_error);
542    }
543
544    #[tokio::test]
545    async fn read_large_file_truncated() {
546        let dir = tempfile::tempdir().unwrap();
547        let file = dir.path().join("big.txt");
548        let mut content = String::new();
549        for i in 0..3000 {
550            content.push_str(&format!("line {i}\n"));
551        }
552        std::fs::write(&file, &content).unwrap();
553
554        let tool = ReadTool;
555        let result = tool
556            .execute("c7", json!({"path": "big.txt"}), test_ctx(dir.path()))
557            .await
558            .unwrap();
559
560        assert!(!result.is_error);
561        let text = extract_text(&result);
562        assert!(text.contains("truncated"));
563        // Should have the first lines
564        assert!(text.contains("line 0"));
565        // Details should indicate truncation
566        assert_eq!(result.details["truncated"], true);
567    }
568
569    #[tokio::test]
570    async fn read_respects_configured_line_limit() {
571        let dir = tempfile::tempdir().unwrap();
572        let file = dir.path().join("limited.txt");
573        let mut content = String::new();
574        for i in 0..800 {
575            content.push_str(&format!("line {i}\n"));
576        }
577        std::fs::write(&file, &content).unwrap();
578
579        let tool = ReadTool;
580        let mut ctx = test_ctx(dir.path());
581        ctx.read_max_lines = 500;
582        let result = tool
583            .execute("c7b", json!({"path": "limited.txt"}), ctx)
584            .await
585            .unwrap();
586
587        assert!(!result.is_error);
588        let text = extract_text(&result);
589        assert!(text.contains("truncated"));
590        assert!(text.contains("showing 500/800 lines"));
591        assert_eq!(result.details["lines"], 500);
592        assert_eq!(result.details["total_lines"], 800);
593    }
594
595    #[tokio::test]
596    async fn read_zero_line_limit_disables_line_truncation() {
597        let dir = tempfile::tempdir().unwrap();
598        let file = dir.path().join("unlimited.txt");
599        let mut content = String::new();
600        for i in 0..800 {
601            content.push_str(&format!("line {i}\n"));
602        }
603        std::fs::write(&file, &content).unwrap();
604
605        let tool = ReadTool;
606        let mut ctx = test_ctx(dir.path());
607        ctx.read_max_lines = 0;
608        let result = tool
609            .execute("c7c", json!({"path": "unlimited.txt"}), ctx)
610            .await
611            .unwrap();
612
613        assert!(!result.is_error);
614        let text = extract_text(&result);
615        assert!(!text.contains("truncated"));
616        assert!(text.contains("line 799"));
617        assert_eq!(result.details["truncated"], false);
618        assert_eq!(result.details["lines"], 800);
619        assert_eq!(result.details["total_lines"], 800);
620        assert!(result.details["path"]
621            .as_str()
622            .unwrap()
623            .contains("unlimited.txt"));
624    }
625
626    #[tokio::test]
627    async fn read_directory_error() {
628        let dir = tempfile::tempdir().unwrap();
629        let subdir = dir.path().join("subdir");
630        std::fs::create_dir(&subdir).unwrap();
631
632        let tool = ReadTool;
633        let result = tool
634            .execute("c8", json!({"path": "subdir"}), test_ctx(dir.path()))
635            .await;
636
637        // Reading a directory should either error or produce an error output
638        if let Ok(output) = result {
639            assert!(output.is_error)
640        }
641    }
642
643    #[tokio::test]
644    async fn read_can_emit_line_anchors() {
645        let dir = tempfile::tempdir().unwrap();
646        std::fs::write(dir.path().join("anchored.txt"), "alpha\nbeta\ngamma\n").unwrap();
647
648        let tool = ReadTool;
649        let ctx = test_ctx(dir.path());
650        let result = tool
651            .execute(
652                "c-anchors",
653                json!({"path": "anchored.txt", "start_line": 2, "end_line": 2, "anchors": true}),
654                ctx.clone(),
655            )
656            .await
657            .unwrap();
658
659        assert!(!result.is_error);
660        let text = extract_text(&result);
661        assert!(text.contains("Anchors:"));
662        let anchors = result.details["anchors"].as_array().unwrap();
663        assert_eq!(anchors.len(), 1);
664        assert_eq!(anchors[0]["line"], 2);
665        let anchor = anchors[0]["anchor"].as_str().unwrap();
666        let path = dir.path().join("anchored.txt");
667        assert!(ctx.anchor_store.get(&path, anchor).is_some());
668    }
669
670    fn extract_text(output: &ToolOutput) -> String {
671        output
672            .content
673            .iter()
674            .filter_map(|b| match b {
675                imp_llm::ContentBlock::Text { text } => Some(text.as_str()),
676                _ => None,
677            })
678            .collect::<Vec<_>>()
679            .join("\n")
680    }
681}