Skip to main content

oxi_agent/tools/
read.rs

1/// Read file tool
2/// Reads file contents with support for:
3/// - Text files with line numbers, offset/limit, and truncation
4/// - Image files (jpg/png/gif/webp) returned as base64-encoded content blocks
5/// - Binary file detection
6/// - Snapshot tag emission for hashline editing
7use super::path_security::PathGuard;
8use super::truncate::{self, TruncationOptions};
9use super::{AgentTool, AgentToolResult, ProgressCallback, ToolContext, ToolError};
10use async_trait::async_trait;
11use base64::Engine;
12use oxi_ai::{ContentBlock, ImageContent, TextContent};
13use oxi_hashline::format::{compute_file_hash, format_hashline_header};
14use oxi_hashline::normalize::{normalize_to_lf, strip_bom};
15use oxi_hashline::snapshots::SnapshotStore;
16use serde_json::{Value, json};
17use std::path::{Path, PathBuf};
18use std::sync::{Arc, Mutex};
19use tokio::fs;
20use tokio::io::AsyncReadExt;
21/// Maximum bytes to read for binary detection
22const BINARY_DETECT_BYTES: usize = 8192;
23
24/// Supported image extensions and their MIME types
25const IMAGE_EXTENSIONS: &[(&str, &str)] = &[
26    ("jpg", "image/jpeg"),
27    ("jpeg", "image/jpeg"),
28    ("png", "image/png"),
29    ("gif", "image/gif"),
30    ("webp", "image/webp"),
31];
32
33/// ReadTool.
34pub struct ReadTool {
35    root_dir: Option<PathBuf>,
36    progress_callback: Arc<Mutex<Option<ProgressCallback>>>,
37}
38
39impl ReadTool {
40    /// Create with no explicit root (uses ToolContext.workspace_dir at runtime).
41    pub fn new() -> Self {
42        Self {
43            root_dir: None,
44            progress_callback: Arc::new(Mutex::new(None)),
45        }
46    }
47
48    /// Create with a specific working directory (overrides ToolContext).
49    pub fn with_cwd(cwd: PathBuf) -> Self {
50        Self {
51            root_dir: Some(cwd),
52            progress_callback: Arc::new(Mutex::new(None)),
53        }
54    }
55
56    /// Determine if a file extension corresponds to a supported image type.
57    /// Returns the MIME type if it's a supported image.
58    fn image_mime_type(path: &Path) -> Option<&'static str> {
59        let ext = path.extension()?.to_str()?.to_lowercase();
60        IMAGE_EXTENSIONS
61            .iter()
62            .find(|(e, _)| *e == ext)
63            .map(|(_, mime)| *mime)
64    }
65
66    /// Check if data appears to be binary by looking for null bytes in the first chunk.
67    fn is_binary(data: &[u8]) -> bool {
68        data.contains(&0)
69    }
70
71    /// Read an image file and return it as a base64-encoded content block.
72    async fn read_image(
73        path: &Path,
74        progress_cb: &Option<ProgressCallback>,
75    ) -> Result<AgentToolResult, ToolError> {
76        let display_path = path.display();
77
78        if let Some(cb) = progress_cb {
79            cb(format!("Reading image: {}", display_path));
80        }
81
82        let data = fs::read(path)
83            .await
84            .map_err(|e| format!("Cannot read image file: {}", e))?;
85
86        if let Some(cb) = progress_cb {
87            cb(format!("Read {} bytes, encoding as base64", data.len()));
88        }
89
90        let mime_type = Self::image_mime_type(path).unwrap_or("application/octet-stream");
91        let encoded = base64::engine::general_purpose::STANDARD.encode(&data);
92
93        // Build a text summary and an image content block
94        let summary = format!(
95            "Image file: {} ({} bytes, {})",
96            display_path,
97            data.len(),
98            mime_type
99        );
100
101        let image_block = ContentBlock::Image(ImageContent::new(encoded, mime_type));
102        let text_block = ContentBlock::Text(TextContent::new(summary.clone()));
103
104        Ok(AgentToolResult::success(summary).with_content_blocks(vec![text_block, image_block]))
105    }
106
107    /// Read a text file with optional offset/limit, line numbers, and truncation.
108    /// When `snapshot_store` is provided and the file is fully read without
109    /// offset, records a snapshot and emits a `[path#TAG]` header for hashline.
110    async fn read_text(
111        path: &Path,
112        offset: Option<usize>,
113        limit: Option<usize>,
114        progress_cb: &Option<ProgressCallback>,
115        snapshot_store: Option<(Arc<dyn SnapshotStore>, PathBuf)>,
116    ) -> Result<AgentToolResult, ToolError> {
117        let display_path = path.display();
118
119        // Check file metadata
120        let file_size = match fs::metadata(path).await {
121            Ok(meta) => meta.len(),
122            Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
123                return Err(format!("File not found: {}", display_path));
124            }
125            Err(e) => {
126                return Err(format!("Cannot access file: {}", e));
127            }
128        };
129
130        if let Some(cb) = progress_cb {
131            cb(format!(
132                "Reading file: {} ({} bytes)",
133                display_path, file_size
134            ));
135        }
136
137        // Open and read file
138        let mut file = fs::File::open(path)
139            .await
140            .map_err(|e| format!("Cannot open file: {}", e))?;
141
142        // Read a chunk for binary detection
143        let mut detect_buf = vec![0u8; BINARY_DETECT_BYTES.min(file_size as usize)];
144        let n = file
145            .read(&mut detect_buf)
146            .await
147            .map_err(|e| format!("Cannot read file: {}", e))?;
148
149        if Self::is_binary(&detect_buf[..n]) {
150            return Ok(AgentToolResult::error(format!(
151                "File appears to be binary: {} ({} bytes). Cannot display as text.",
152                display_path, file_size
153            )));
154        }
155
156        // Now read the full content: what we already read + the rest
157        let mut content = String::from_utf8_lossy(&detect_buf[..n]).into_owned();
158        let mut buffer = vec![0u8; 8192];
159        loop {
160            let n = file
161                .read(&mut buffer)
162                .await
163                .map_err(|e| format!("Cannot read file: {}", e))?;
164            if n == 0 {
165                break;
166            }
167            content.push_str(&String::from_utf8_lossy(&buffer[..n]));
168        }
169
170        if let Some(cb) = progress_cb {
171            cb(format!("Completed reading {} bytes", content.len()));
172        }
173
174        // ── Snapshot recording for hashline ──
175        // Normalize content (LF, strip BOM) for hash computation. The hash must
176        // be derived from the full text even when only a subset of lines is shown
177        // (partial read via offset/limit), so that the tag always names the
178        // canonical file version the model is referencing.
179        let snap_data: Option<(Arc<dyn SnapshotStore>, PathBuf, String, String)> = snapshot_store
180            .map(|(store, canonical)| {
181                let normalized = normalize_to_lf(strip_bom(&content).text);
182                let hash = compute_file_hash(&normalized);
183                (store, canonical, hash, normalized)
184            });
185
186        // Split into lines for offset/limit/numbering
187        let all_lines: Vec<&str> = content.lines().collect();
188        let total_lines = all_lines.len();
189
190        // Apply offset (1-indexed) and limit
191        let start_idx = offset
192            .map(|o| if o == 0 { 0 } else { o - 1 }) // Convert 1-indexed to 0-indexed
193            .unwrap_or(0);
194
195        if start_idx >= total_lines && total_lines > 0 {
196            return Ok(AgentToolResult::error(format!(
197                "Offset {} exceeds file length ({} lines). Use offset=1 to {}.",
198                offset.unwrap_or(1),
199                total_lines,
200                total_lines
201            )));
202        }
203
204        let effective_limit = limit.unwrap_or(usize::MAX);
205        let end_idx = if effective_limit > total_lines - start_idx {
206            total_lines
207        } else {
208            start_idx + effective_limit
209        };
210        let selected_lines = &all_lines[start_idx..end_idx];
211        let selected_count = selected_lines.len();
212
213        // Apply truncation if no explicit limit was provided
214        let (output_lines, truncated) = if limit.is_none() {
215            let trunc_opts = TruncationOptions::default();
216            let max_lines = trunc_opts.max_lines.unwrap_or(truncate::DEFAULT_MAX_LINES);
217            let max_bytes = trunc_opts.max_bytes.unwrap_or(truncate::DEFAULT_MAX_BYTES);
218
219            // Count bytes as we add lines
220            let mut byte_count: usize = 0;
221            let mut line_count: usize = 0;
222            for line in selected_lines {
223                // line number prefix + content + newline
224                let prefix_len = format!("{}", start_idx + line_count + 1).len() + 2; // "  " separator
225                byte_count += prefix_len + line.len() + 1;
226                if line_count >= max_lines || byte_count > max_bytes {
227                    break;
228                }
229                line_count += 1;
230            }
231
232            if line_count < selected_count {
233                (line_count, true)
234            } else {
235                (selected_count, false)
236            }
237        } else {
238            (selected_count, false)
239        };
240
241        // Build numbered output
242        let mut output = String::new();
243        for (i, line) in selected_lines.iter().enumerate().take(output_lines) {
244            let line_num = start_idx + i + 1; // 1-indexed
245            output.push_str(&format!("{:>6}\t{}", line_num, line));
246            if i < output_lines - 1 || !content.ends_with('\n') {
247                output.push('\n');
248            }
249        }
250
251        // Add truncation notice
252        if truncated {
253            let next_offset = start_idx + output_lines + 1;
254            output.push_str(&format!(
255                "\n... [truncated: {} of {} lines shown. Use offset={} to continue]",
256                output_lines,
257                total_lines - start_idx,
258                next_offset
259            ));
260        }
261
262        // If offset was used, add context header
263        if start_idx > 0 {
264            output = format!(
265                "Showing lines {}-{} of {}:\n",
266                start_idx + 1,
267                start_idx + output_lines,
268                total_lines
269            ) + &output;
270        }
271
272        // ── Emit hashline header and record snapshot ──
273        if let Some((store, canonical, hash, normalized)) = snap_data {
274            // Prepend [path#TAG] header so the model can anchor edits.
275            let header = format_hashline_header(&canonical.to_string_lossy(), &hash);
276            output = format!("{}\n{}", header, output);
277
278            // Record seen lines: 1-indexed line numbers actually displayed.
279            let seen: Vec<u32> =
280                (start_idx as u32 + 1..=start_idx as u32 + output_lines as u32).collect();
281            store.record(&canonical.to_string_lossy(), &normalized, Some(&seen));
282        }
283
284        Ok(AgentToolResult::success(output))
285    }
286}
287
288impl Default for ReadTool {
289    fn default() -> Self {
290        Self::new()
291    }
292}
293
294#[async_trait]
295impl AgentTool for ReadTool {
296    fn name(&self) -> &str {
297        "read"
298    }
299
300    fn label(&self) -> &str {
301        "Read File"
302    }
303
304    fn essential(&self) -> bool {
305        true
306    }
307    fn description(&self) -> &str {
308        "Read the contents of a file. Supports text files and images (jpg, png, gif, webp). Images are sent as attachments. For text files, output is truncated to 2000 lines or 50KB (whichever is hit first). Use offset/limit for large files. When reading with offset, line numbering starts from 1."
309    }
310
311    fn parameters_schema(&self) -> Value {
312        json!({
313            "type": "object",
314            "properties": {
315                "path": {
316                    "type": "string",
317                    "description": "Path to the file to read (relative or absolute), or an internal URL (issue://N, pr://owner/repo/N, skill://name/SKILL.md, agent://id, etc.)"
318                },
319                "offset": {
320                    "type": "number",
321                    "description": "Line number to start reading from (1-indexed)"
322                },
323                "limit": {
324                    "type": "number",
325                    "description": "Maximum number of lines to read"
326                }
327            },
328            "required": ["path"]
329        })
330    }
331
332    async fn execute(
333        &self,
334        _tool_call_id: &str,
335        params: Value,
336        _signal: Option<tokio::sync::oneshot::Receiver<()>>,
337        ctx: &ToolContext,
338    ) -> Result<AgentToolResult, ToolError> {
339        let path_str = params
340            .get("path")
341            .and_then(|v: &Value| v.as_str())
342            .ok_or_else(|| "Missing required parameter: path".to_string())?;
343
344        let offset = params
345            .get("offset")
346            .and_then(|v| v.as_u64())
347            .map(|n| n as usize);
348
349        let limit = params
350            .get("limit")
351            .and_then(|v| v.as_u64())
352            .map(|n| n as usize);
353
354        // ── Internal URL dispatch ──
355        // If the input looks like an internal URL (scheme://…), try the
356        // configured resolver before falling through to file-system read.
357        if let Some(ref resolver) = ctx.url_resolver
358            && resolver.can_resolve(path_str)
359        {
360            let resolved = resolver.resolve(path_str).await?;
361            return Ok(AgentToolResult::success(resolved.content));
362        }
363
364        // Security: validate path with PathGuard (use root_dir if set, else ctx)
365        let root = self.root_dir.as_deref().unwrap_or(ctx.root());
366        let guard = PathGuard::new(root);
367        let validated = guard
368            .validate_traversal(Path::new(path_str))
369            .map_err(|e| e.to_string())?;
370        let path = validated.as_path();
371
372        // Check if path exists and is a directory
373        match fs::metadata(path).await {
374            Ok(meta) if meta.is_dir() => {
375                return Err("Cannot read a directory, use read_dir instead".to_string());
376            }
377            Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
378                return Err(format!("File not found: {}", path.display()));
379            }
380            Err(e) => {
381                return Err(format!("Cannot access file: {}", e));
382            }
383            _ => {}
384        }
385
386        let progress_cb = self
387            .progress_callback
388            .lock()
389            .expect("progress callback lock poisoned")
390            .clone();
391
392        // Check if it's an image file
393        if Self::image_mime_type(path).is_some() {
394            return Self::read_image(path, &progress_cb).await;
395        }
396
397        // Otherwise, read as text (with snapshot if available for hashline)
398        let snap = ctx.snapshot_store.as_ref().map(|s| {
399            let canonical = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
400            (s.clone(), canonical)
401        });
402        Self::read_text(path, offset, limit, &progress_cb, snap).await
403    }
404
405    fn on_progress(&self, callback: ProgressCallback) {
406        let cb = self.progress_callback.clone();
407        let mut guard = cb.lock().expect("progress callback lock poisoned");
408        *guard = Some(callback);
409    }
410}
411
412#[cfg(test)]
413mod tests {
414    use super::*;
415    use std::io::Write as IoWrite;
416    use tempfile::NamedTempFile;
417
418    fn make_text_file(content: &str) -> NamedTempFile {
419        let mut f = NamedTempFile::new().unwrap();
420        f.write_all(content.as_bytes()).unwrap();
421        f.flush().unwrap();
422        f
423    }
424
425    #[tokio::test]
426    async fn test_read_simple_text() {
427        let f = make_text_file("hello\nworld\n");
428        let tool = ReadTool::new();
429        let params = json!({"path": f.path().to_str().unwrap()});
430        let result = tool
431            .execute("test", params, None, &ToolContext::default())
432            .await
433            .unwrap();
434        assert!(result.success);
435        assert!(result.output.contains("hello"));
436        assert!(result.output.contains("world"));
437    }
438
439    #[tokio::test]
440    async fn test_read_with_line_numbers() {
441        let f = make_text_file("line1\nline2\nline3\n");
442        let tool = ReadTool::new();
443        let params = json!({"path": f.path().to_str().unwrap()});
444        let result = tool
445            .execute("test", params, None, &ToolContext::default())
446            .await
447            .unwrap();
448        assert!(result.success);
449        // Should contain line numbers
450        assert!(result.output.contains("1"));
451        assert!(result.output.contains("2"));
452        assert!(result.output.contains("3"));
453        // Should contain tab-separated line numbers
454        assert!(result.output.contains("\tline1"));
455        assert!(result.output.contains("\tline2"));
456    }
457
458    #[tokio::test]
459    async fn test_read_with_offset() {
460        let f = make_text_file("line1\nline2\nline3\nline4\nline5\n");
461        let tool = ReadTool::new();
462        let params = json!({"path": f.path().to_str().unwrap(), "offset": 3});
463        let result = tool
464            .execute("test", params, None, &ToolContext::default())
465            .await
466            .unwrap();
467        assert!(result.success);
468        // Should show lines 3 onwards
469        assert!(result.output.contains("Showing lines 3-5 of 5"));
470        assert!(result.output.contains("\tline3"));
471        assert!(result.output.contains("\tline4"));
472        assert!(result.output.contains("\tline5"));
473        // Should NOT contain line1 or line2
474        assert!(!result.output.contains("\tline1"));
475        assert!(!result.output.contains("\tline2"));
476    }
477
478    #[tokio::test]
479    async fn test_read_with_offset_and_limit() {
480        let f = make_text_file("line1\nline2\nline3\nline4\nline5\n");
481        let tool = ReadTool::new();
482        let params = json!({"path": f.path().to_str().unwrap(), "offset": 2, "limit": 2});
483        let result = tool
484            .execute("test", params, None, &ToolContext::default())
485            .await
486            .unwrap();
487        assert!(result.success);
488        assert!(result.output.contains("\tline2"));
489        assert!(result.output.contains("\tline3"));
490        assert!(!result.output.contains("\tline4"));
491    }
492
493    #[tokio::test]
494    async fn test_read_offset_beyond_file() {
495        let f = make_text_file("line1\nline2\n");
496        let tool = ReadTool::new();
497        let params = json!({"path": f.path().to_str().unwrap(), "offset": 999});
498        let result = tool
499            .execute("test", params, None, &ToolContext::default())
500            .await
501            .unwrap();
502        assert!(!result.success);
503        assert!(result.output.contains("exceeds file length"));
504    }
505
506    #[tokio::test]
507    async fn test_read_truncation_notice() {
508        // Create a file with many lines to trigger truncation
509        let content: Vec<String> = (1..3000).map(|i| format!("line {}", i)).collect();
510        let f = make_text_file(&content.join("\n"));
511        let tool = ReadTool::new();
512        let params = json!({"path": f.path().to_str().unwrap()});
513        let result = tool
514            .execute("test", params, None, &ToolContext::default())
515            .await
516            .unwrap();
517        assert!(result.success);
518        assert!(result.output.contains("truncated"));
519        assert!(result.output.contains("Use offset="));
520    }
521
522    #[tokio::test]
523    async fn test_read_path_traversal_rejected() {
524        let tool = ReadTool::new();
525        let params = json!({"path": "../../etc/passwd"});
526        let result = tool
527            .execute("test", params, None, &ToolContext::default())
528            .await;
529        assert!(result.is_err());
530        assert!(result.unwrap_err().contains("Path traversal"));
531    }
532
533    #[tokio::test]
534    async fn test_read_nonexistent_file() {
535        let tool = ReadTool::new();
536        let params = json!({"path": "/nonexistent/path/file.txt"});
537        let result = tool
538            .execute("test", params, None, &ToolContext::default())
539            .await;
540        assert!(result.is_err() || !result.unwrap().success);
541    }
542
543    #[tokio::test]
544    async fn test_read_binary_detection() {
545        let mut f = NamedTempFile::new().unwrap();
546        // Write bytes with null bytes
547        f.write_all(b"hello\x00world\x00binary").unwrap();
548        f.flush().unwrap();
549        let tool = ReadTool::new();
550        let params = json!({"path": f.path().to_str().unwrap()});
551        let result = tool
552            .execute("test", params, None, &ToolContext::default())
553            .await
554            .unwrap();
555        assert!(!result.success);
556        assert!(result.output.contains("binary"));
557    }
558
559    #[tokio::test]
560    async fn test_read_image_file() {
561        let mut f = NamedTempFile::with_suffix(".png").unwrap();
562        // Write a fake PNG-like header + data
563        f.write_all(&[0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, 0x00, 0x00])
564            .unwrap();
565        f.flush().unwrap();
566        let tool = ReadTool::new();
567        let params = json!({"path": f.path().to_str().unwrap()});
568        let result = tool
569            .execute("test", params, None, &ToolContext::default())
570            .await
571            .unwrap();
572        assert!(result.success);
573        assert!(result.output.contains("image/png"));
574        // Should have content blocks with image
575        let blocks = result.content_blocks.unwrap();
576        assert!(blocks.iter().any(|b| matches!(b, ContentBlock::Image(_))));
577    }
578
579    #[tokio::test]
580    async fn test_read_image_jpg() {
581        let mut f = NamedTempFile::with_suffix(".jpg").unwrap();
582        f.write_all(b"\xFF\xD8\xFF\xE0").unwrap();
583        f.flush().unwrap();
584        let tool = ReadTool::new();
585        let params = json!({"path": f.path().to_str().unwrap()});
586        let result = tool
587            .execute("test", params, None, &ToolContext::default())
588            .await
589            .unwrap();
590        assert!(result.success);
591        assert!(result.output.contains("image/jpeg"));
592        let blocks = result.content_blocks.unwrap();
593        assert!(blocks.iter().any(|b| matches!(b, ContentBlock::Image(_))));
594    }
595
596    #[tokio::test]
597    async fn test_read_image_webp() {
598        let mut f = NamedTempFile::with_suffix(".webp").unwrap();
599        f.write_all(b"RIFF\x00\x00\x00\x00WEBP").unwrap();
600        f.flush().unwrap();
601        let tool = ReadTool::new();
602        let params = json!({"path": f.path().to_str().unwrap()});
603        let result = tool
604            .execute("test", params, None, &ToolContext::default())
605            .await
606            .unwrap();
607        assert!(result.success);
608        assert!(result.output.contains("image/webp"));
609    }
610
611    #[tokio::test]
612    async fn test_read_empty_file() {
613        let f = make_text_file("");
614        let tool = ReadTool::new();
615        let params = json!({"path": f.path().to_str().unwrap()});
616        let result = tool
617            .execute("test", params, None, &ToolContext::default())
618            .await
619            .unwrap();
620        assert!(result.success);
621    }
622
623    #[tokio::test]
624    async fn test_read_file_not_found() {
625        let tool = ReadTool::new();
626        let params = json!({"path": "/tmp/nonexistent_oxi_test_file_12345.txt"});
627        let result = tool
628            .execute("test", params, None, &ToolContext::default())
629            .await;
630        match result {
631            Err(e) => assert!(e.contains("File not found")),
632            Ok(r) => assert!(!r.success),
633        }
634    }
635
636    #[tokio::test]
637    async fn test_read_directory_error() {
638        let tool = ReadTool::new();
639        let params = json!({"path": "/tmp"});
640        let result = tool
641            .execute("test", params, None, &ToolContext::default())
642            .await;
643        match result {
644            Err(e) => assert!(e.contains("directory")),
645            Ok(r) => assert!(!r.success || r.output.contains("directory")),
646        }
647    }
648
649    #[test]
650    fn test_image_mime_type_detection() {
651        assert_eq!(
652            ReadTool::image_mime_type(Path::new("photo.jpg")),
653            Some("image/jpeg")
654        );
655        assert_eq!(
656            ReadTool::image_mime_type(Path::new("photo.jpeg")),
657            Some("image/jpeg")
658        );
659        assert_eq!(
660            ReadTool::image_mime_type(Path::new("icon.png")),
661            Some("image/png")
662        );
663        assert_eq!(
664            ReadTool::image_mime_type(Path::new("anim.gif")),
665            Some("image/gif")
666        );
667        assert_eq!(
668            ReadTool::image_mime_type(Path::new("img.webp")),
669            Some("image/webp")
670        );
671        assert_eq!(ReadTool::image_mime_type(Path::new("file.txt")), None);
672        assert_eq!(ReadTool::image_mime_type(Path::new("noext")), None);
673    }
674
675    #[test]
676    fn test_binary_detection() {
677        assert!(ReadTool::is_binary(b"hello\x00world"));
678        assert!(!ReadTool::is_binary(b"hello world\nfoo bar\n"));
679        assert!(!ReadTool::is_binary(b""));
680        assert!(!ReadTool::is_binary(b"pure ascii text"));
681    }
682}