1use super::path_security::PathGuard;
8use super::truncate::{self, TruncationOptions};
9use super::{AgentTool, AgentToolResult, ProgressCallback, ToolContext, ToolError};
10use async_trait::async_trait;
11use base64::Engine;
12use oxi_ai::{ContentBlock, ImageContent, TextContent};
13use oxi_hashline::format::{compute_file_hash, format_hashline_header};
14use oxi_hashline::normalize::{normalize_to_lf, strip_bom};
15use oxi_hashline::snapshots::SnapshotStore;
16use serde_json::{Value, json};
17use std::path::{Path, PathBuf};
18use std::sync::{Arc, Mutex};
19use tokio::fs;
20use tokio::io::AsyncReadExt;
21const BINARY_DETECT_BYTES: usize = 8192;
23
24const IMAGE_EXTENSIONS: &[(&str, &str)] = &[
26 ("jpg", "image/jpeg"),
27 ("jpeg", "image/jpeg"),
28 ("png", "image/png"),
29 ("gif", "image/gif"),
30 ("webp", "image/webp"),
31];
32
33pub struct ReadTool {
35 root_dir: Option<PathBuf>,
36 progress_callback: Arc<Mutex<Option<ProgressCallback>>>,
37}
38
39impl ReadTool {
40 pub fn new() -> Self {
42 Self {
43 root_dir: None,
44 progress_callback: Arc::new(Mutex::new(None)),
45 }
46 }
47
48 pub fn with_cwd(cwd: PathBuf) -> Self {
50 Self {
51 root_dir: Some(cwd),
52 progress_callback: Arc::new(Mutex::new(None)),
53 }
54 }
55
56 fn image_mime_type(path: &Path) -> Option<&'static str> {
59 let ext = path.extension()?.to_str()?.to_lowercase();
60 IMAGE_EXTENSIONS
61 .iter()
62 .find(|(e, _)| *e == ext)
63 .map(|(_, mime)| *mime)
64 }
65
66 fn is_binary(data: &[u8]) -> bool {
68 data.contains(&0)
69 }
70
71 async fn read_image(
73 path: &Path,
74 progress_cb: &Option<ProgressCallback>,
75 ) -> Result<AgentToolResult, ToolError> {
76 let display_path = path.display();
77
78 if let Some(cb) = progress_cb {
79 cb(format!("Reading image: {}", display_path));
80 }
81
82 let data = fs::read(path)
83 .await
84 .map_err(|e| format!("Cannot read image file: {}", e))?;
85
86 if let Some(cb) = progress_cb {
87 cb(format!("Read {} bytes, encoding as base64", data.len()));
88 }
89
90 let mime_type = Self::image_mime_type(path).unwrap_or("application/octet-stream");
91 let encoded = base64::engine::general_purpose::STANDARD.encode(&data);
92
93 let summary = format!(
95 "Image file: {} ({} bytes, {})",
96 display_path,
97 data.len(),
98 mime_type
99 );
100
101 let image_block = ContentBlock::Image(ImageContent::new(encoded, mime_type));
102 let text_block = ContentBlock::Text(TextContent::new(summary.clone()));
103
104 Ok(AgentToolResult::success(summary).with_content_blocks(vec![text_block, image_block]))
105 }
106
107 async fn read_text(
111 path: &Path,
112 offset: Option<usize>,
113 limit: Option<usize>,
114 progress_cb: &Option<ProgressCallback>,
115 snapshot_store: Option<(Arc<dyn SnapshotStore>, PathBuf)>,
116 ) -> Result<AgentToolResult, ToolError> {
117 let display_path = path.display();
118
119 let file_size = match fs::metadata(path).await {
121 Ok(meta) => meta.len(),
122 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
123 return Err(format!("File not found: {}", display_path));
124 }
125 Err(e) => {
126 return Err(format!("Cannot access file: {}", e));
127 }
128 };
129
130 if let Some(cb) = progress_cb {
131 cb(format!(
132 "Reading file: {} ({} bytes)",
133 display_path, file_size
134 ));
135 }
136
137 let mut file = fs::File::open(path)
139 .await
140 .map_err(|e| format!("Cannot open file: {}", e))?;
141
142 let mut detect_buf = vec![0u8; BINARY_DETECT_BYTES.min(file_size as usize)];
144 let n = file
145 .read(&mut detect_buf)
146 .await
147 .map_err(|e| format!("Cannot read file: {}", e))?;
148
149 if Self::is_binary(&detect_buf[..n]) {
150 return Ok(AgentToolResult::error(format!(
151 "File appears to be binary: {} ({} bytes). Cannot display as text.",
152 display_path, file_size
153 )));
154 }
155
156 let mut content = String::from_utf8_lossy(&detect_buf[..n]).into_owned();
158 let mut buffer = vec![0u8; 8192];
159 loop {
160 let n = file
161 .read(&mut buffer)
162 .await
163 .map_err(|e| format!("Cannot read file: {}", e))?;
164 if n == 0 {
165 break;
166 }
167 content.push_str(&String::from_utf8_lossy(&buffer[..n]));
168 }
169
170 if let Some(cb) = progress_cb {
171 cb(format!("Completed reading {} bytes", content.len()));
172 }
173
174 let snap_data: Option<(Arc<dyn SnapshotStore>, PathBuf, String, String)> = snapshot_store
180 .map(|(store, canonical)| {
181 let normalized = normalize_to_lf(strip_bom(&content).text);
182 let hash = compute_file_hash(&normalized);
183 (store, canonical, hash, normalized)
184 });
185
186 let all_lines: Vec<&str> = content.lines().collect();
188 let total_lines = all_lines.len();
189
190 let start_idx = offset
192 .map(|o| if o == 0 { 0 } else { o - 1 }) .unwrap_or(0);
194
195 if start_idx >= total_lines && total_lines > 0 {
196 return Ok(AgentToolResult::error(format!(
197 "Offset {} exceeds file length ({} lines). Use offset=1 to {}.",
198 offset.unwrap_or(1),
199 total_lines,
200 total_lines
201 )));
202 }
203
204 let effective_limit = limit.unwrap_or(usize::MAX);
205 let end_idx = if effective_limit > total_lines - start_idx {
206 total_lines
207 } else {
208 start_idx + effective_limit
209 };
210 let selected_lines = &all_lines[start_idx..end_idx];
211 let selected_count = selected_lines.len();
212
213 let (output_lines, truncated) = if limit.is_none() {
215 let trunc_opts = TruncationOptions::default();
216 let max_lines = trunc_opts.max_lines.unwrap_or(truncate::DEFAULT_MAX_LINES);
217 let max_bytes = trunc_opts.max_bytes.unwrap_or(truncate::DEFAULT_MAX_BYTES);
218
219 let mut byte_count: usize = 0;
221 let mut line_count: usize = 0;
222 for line in selected_lines {
223 let prefix_len = format!("{}", start_idx + line_count + 1).len() + 2; byte_count += prefix_len + line.len() + 1;
226 if line_count >= max_lines || byte_count > max_bytes {
227 break;
228 }
229 line_count += 1;
230 }
231
232 if line_count < selected_count {
233 (line_count, true)
234 } else {
235 (selected_count, false)
236 }
237 } else {
238 (selected_count, false)
239 };
240
241 let mut output = String::new();
243 for (i, line) in selected_lines.iter().enumerate().take(output_lines) {
244 let line_num = start_idx + i + 1; output.push_str(&format!("{:>6}\t{}", line_num, line));
246 if i < output_lines - 1 || !content.ends_with('\n') {
247 output.push('\n');
248 }
249 }
250
251 if truncated {
253 let next_offset = start_idx + output_lines + 1;
254 output.push_str(&format!(
255 "\n... [truncated: {} of {} lines shown. Use offset={} to continue]",
256 output_lines,
257 total_lines - start_idx,
258 next_offset
259 ));
260 }
261
262 if start_idx > 0 {
264 output = format!(
265 "Showing lines {}-{} of {}:\n",
266 start_idx + 1,
267 start_idx + output_lines,
268 total_lines
269 ) + &output;
270 }
271
272 if let Some((store, canonical, hash, normalized)) = snap_data {
274 let header = format_hashline_header(&canonical.to_string_lossy(), &hash);
276 output = format!("{}\n{}", header, output);
277
278 let seen: Vec<u32> =
280 (start_idx as u32 + 1..=start_idx as u32 + output_lines as u32).collect();
281 store.record(&canonical.to_string_lossy(), &normalized, Some(&seen));
282 }
283
284 Ok(AgentToolResult::success(output))
285 }
286}
287
288impl Default for ReadTool {
289 fn default() -> Self {
290 Self::new()
291 }
292}
293
294#[async_trait]
295impl AgentTool for ReadTool {
296 fn name(&self) -> &str {
297 "read"
298 }
299
300 fn label(&self) -> &str {
301 "Read File"
302 }
303
304 fn essential(&self) -> bool {
305 true
306 }
307 fn description(&self) -> &str {
308 "Read the contents of a file. Supports text files and images (jpg, png, gif, webp). Images are sent as attachments. For text files, output is truncated to 2000 lines or 50KB (whichever is hit first). Use offset/limit for large files. When reading with offset, line numbering starts from 1."
309 }
310
311 fn parameters_schema(&self) -> Value {
312 json!({
313 "type": "object",
314 "properties": {
315 "path": {
316 "type": "string",
317 "description": "Path to the file to read (relative or absolute), or an internal URL (issue://N, pr://owner/repo/N, skill://name/SKILL.md, agent://id, etc.)"
318 },
319 "offset": {
320 "type": "number",
321 "description": "Line number to start reading from (1-indexed)"
322 },
323 "limit": {
324 "type": "number",
325 "description": "Maximum number of lines to read"
326 }
327 },
328 "required": ["path"]
329 })
330 }
331
332 async fn execute(
333 &self,
334 _tool_call_id: &str,
335 params: Value,
336 _signal: Option<tokio::sync::oneshot::Receiver<()>>,
337 ctx: &ToolContext,
338 ) -> Result<AgentToolResult, ToolError> {
339 let path_str = params
340 .get("path")
341 .and_then(|v: &Value| v.as_str())
342 .ok_or_else(|| "Missing required parameter: path".to_string())?;
343
344 let offset = params
345 .get("offset")
346 .and_then(|v| v.as_u64())
347 .map(|n| n as usize);
348
349 let limit = params
350 .get("limit")
351 .and_then(|v| v.as_u64())
352 .map(|n| n as usize);
353
354 if let Some(ref resolver) = ctx.url_resolver
358 && resolver.can_resolve(path_str)
359 {
360 let resolved = resolver.resolve(path_str).await?;
361 return Ok(AgentToolResult::success(resolved.content));
362 }
363
364 let root = self.root_dir.as_deref().unwrap_or(ctx.root());
366 let guard = PathGuard::new(root);
367 let validated = guard
368 .validate_traversal(Path::new(path_str))
369 .map_err(|e| e.to_string())?;
370 let path = validated.as_path();
371
372 match fs::metadata(path).await {
374 Ok(meta) if meta.is_dir() => {
375 return Err("Cannot read a directory, use read_dir instead".to_string());
376 }
377 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
378 return Err(format!("File not found: {}", path.display()));
379 }
380 Err(e) => {
381 return Err(format!("Cannot access file: {}", e));
382 }
383 _ => {}
384 }
385
386 let progress_cb = self
387 .progress_callback
388 .lock()
389 .expect("progress callback lock poisoned")
390 .clone();
391
392 if Self::image_mime_type(path).is_some() {
394 return Self::read_image(path, &progress_cb).await;
395 }
396
397 let snap = ctx.snapshot_store.as_ref().map(|s| {
399 let canonical = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
400 (s.clone(), canonical)
401 });
402 Self::read_text(path, offset, limit, &progress_cb, snap).await
403 }
404
405 fn on_progress(&self, callback: ProgressCallback) {
406 let cb = self.progress_callback.clone();
407 let mut guard = cb.lock().expect("progress callback lock poisoned");
408 *guard = Some(callback);
409 }
410}
411
412#[cfg(test)]
413mod tests {
414 use super::*;
415 use std::io::Write as IoWrite;
416 use tempfile::NamedTempFile;
417
418 fn make_text_file(content: &str) -> NamedTempFile {
419 let mut f = NamedTempFile::new().unwrap();
420 f.write_all(content.as_bytes()).unwrap();
421 f.flush().unwrap();
422 f
423 }
424
425 #[tokio::test]
426 async fn test_read_simple_text() {
427 let f = make_text_file("hello\nworld\n");
428 let tool = ReadTool::new();
429 let params = json!({"path": f.path().to_str().unwrap()});
430 let result = tool
431 .execute("test", params, None, &ToolContext::default())
432 .await
433 .unwrap();
434 assert!(result.success);
435 assert!(result.output.contains("hello"));
436 assert!(result.output.contains("world"));
437 }
438
439 #[tokio::test]
440 async fn test_read_with_line_numbers() {
441 let f = make_text_file("line1\nline2\nline3\n");
442 let tool = ReadTool::new();
443 let params = json!({"path": f.path().to_str().unwrap()});
444 let result = tool
445 .execute("test", params, None, &ToolContext::default())
446 .await
447 .unwrap();
448 assert!(result.success);
449 assert!(result.output.contains("1"));
451 assert!(result.output.contains("2"));
452 assert!(result.output.contains("3"));
453 assert!(result.output.contains("\tline1"));
455 assert!(result.output.contains("\tline2"));
456 }
457
458 #[tokio::test]
459 async fn test_read_with_offset() {
460 let f = make_text_file("line1\nline2\nline3\nline4\nline5\n");
461 let tool = ReadTool::new();
462 let params = json!({"path": f.path().to_str().unwrap(), "offset": 3});
463 let result = tool
464 .execute("test", params, None, &ToolContext::default())
465 .await
466 .unwrap();
467 assert!(result.success);
468 assert!(result.output.contains("Showing lines 3-5 of 5"));
470 assert!(result.output.contains("\tline3"));
471 assert!(result.output.contains("\tline4"));
472 assert!(result.output.contains("\tline5"));
473 assert!(!result.output.contains("\tline1"));
475 assert!(!result.output.contains("\tline2"));
476 }
477
478 #[tokio::test]
479 async fn test_read_with_offset_and_limit() {
480 let f = make_text_file("line1\nline2\nline3\nline4\nline5\n");
481 let tool = ReadTool::new();
482 let params = json!({"path": f.path().to_str().unwrap(), "offset": 2, "limit": 2});
483 let result = tool
484 .execute("test", params, None, &ToolContext::default())
485 .await
486 .unwrap();
487 assert!(result.success);
488 assert!(result.output.contains("\tline2"));
489 assert!(result.output.contains("\tline3"));
490 assert!(!result.output.contains("\tline4"));
491 }
492
493 #[tokio::test]
494 async fn test_read_offset_beyond_file() {
495 let f = make_text_file("line1\nline2\n");
496 let tool = ReadTool::new();
497 let params = json!({"path": f.path().to_str().unwrap(), "offset": 999});
498 let result = tool
499 .execute("test", params, None, &ToolContext::default())
500 .await
501 .unwrap();
502 assert!(!result.success);
503 assert!(result.output.contains("exceeds file length"));
504 }
505
506 #[tokio::test]
507 async fn test_read_truncation_notice() {
508 let content: Vec<String> = (1..3000).map(|i| format!("line {}", i)).collect();
510 let f = make_text_file(&content.join("\n"));
511 let tool = ReadTool::new();
512 let params = json!({"path": f.path().to_str().unwrap()});
513 let result = tool
514 .execute("test", params, None, &ToolContext::default())
515 .await
516 .unwrap();
517 assert!(result.success);
518 assert!(result.output.contains("truncated"));
519 assert!(result.output.contains("Use offset="));
520 }
521
522 #[tokio::test]
523 async fn test_read_path_traversal_rejected() {
524 let tool = ReadTool::new();
525 let params = json!({"path": "../../etc/passwd"});
526 let result = tool
527 .execute("test", params, None, &ToolContext::default())
528 .await;
529 assert!(result.is_err());
530 assert!(result.unwrap_err().contains("Path traversal"));
531 }
532
533 #[tokio::test]
534 async fn test_read_nonexistent_file() {
535 let tool = ReadTool::new();
536 let params = json!({"path": "/nonexistent/path/file.txt"});
537 let result = tool
538 .execute("test", params, None, &ToolContext::default())
539 .await;
540 assert!(result.is_err() || !result.unwrap().success);
541 }
542
543 #[tokio::test]
544 async fn test_read_binary_detection() {
545 let mut f = NamedTempFile::new().unwrap();
546 f.write_all(b"hello\x00world\x00binary").unwrap();
548 f.flush().unwrap();
549 let tool = ReadTool::new();
550 let params = json!({"path": f.path().to_str().unwrap()});
551 let result = tool
552 .execute("test", params, None, &ToolContext::default())
553 .await
554 .unwrap();
555 assert!(!result.success);
556 assert!(result.output.contains("binary"));
557 }
558
559 #[tokio::test]
560 async fn test_read_image_file() {
561 let mut f = NamedTempFile::with_suffix(".png").unwrap();
562 f.write_all(&[0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, 0x00, 0x00])
564 .unwrap();
565 f.flush().unwrap();
566 let tool = ReadTool::new();
567 let params = json!({"path": f.path().to_str().unwrap()});
568 let result = tool
569 .execute("test", params, None, &ToolContext::default())
570 .await
571 .unwrap();
572 assert!(result.success);
573 assert!(result.output.contains("image/png"));
574 let blocks = result.content_blocks.unwrap();
576 assert!(blocks.iter().any(|b| matches!(b, ContentBlock::Image(_))));
577 }
578
579 #[tokio::test]
580 async fn test_read_image_jpg() {
581 let mut f = NamedTempFile::with_suffix(".jpg").unwrap();
582 f.write_all(b"\xFF\xD8\xFF\xE0").unwrap();
583 f.flush().unwrap();
584 let tool = ReadTool::new();
585 let params = json!({"path": f.path().to_str().unwrap()});
586 let result = tool
587 .execute("test", params, None, &ToolContext::default())
588 .await
589 .unwrap();
590 assert!(result.success);
591 assert!(result.output.contains("image/jpeg"));
592 let blocks = result.content_blocks.unwrap();
593 assert!(blocks.iter().any(|b| matches!(b, ContentBlock::Image(_))));
594 }
595
596 #[tokio::test]
597 async fn test_read_image_webp() {
598 let mut f = NamedTempFile::with_suffix(".webp").unwrap();
599 f.write_all(b"RIFF\x00\x00\x00\x00WEBP").unwrap();
600 f.flush().unwrap();
601 let tool = ReadTool::new();
602 let params = json!({"path": f.path().to_str().unwrap()});
603 let result = tool
604 .execute("test", params, None, &ToolContext::default())
605 .await
606 .unwrap();
607 assert!(result.success);
608 assert!(result.output.contains("image/webp"));
609 }
610
611 #[tokio::test]
612 async fn test_read_empty_file() {
613 let f = make_text_file("");
614 let tool = ReadTool::new();
615 let params = json!({"path": f.path().to_str().unwrap()});
616 let result = tool
617 .execute("test", params, None, &ToolContext::default())
618 .await
619 .unwrap();
620 assert!(result.success);
621 }
622
623 #[tokio::test]
624 async fn test_read_file_not_found() {
625 let tool = ReadTool::new();
626 let params = json!({"path": "/tmp/nonexistent_oxi_test_file_12345.txt"});
627 let result = tool
628 .execute("test", params, None, &ToolContext::default())
629 .await;
630 match result {
631 Err(e) => assert!(e.contains("File not found")),
632 Ok(r) => assert!(!r.success),
633 }
634 }
635
636 #[tokio::test]
637 async fn test_read_directory_error() {
638 let tool = ReadTool::new();
639 let params = json!({"path": "/tmp"});
640 let result = tool
641 .execute("test", params, None, &ToolContext::default())
642 .await;
643 match result {
644 Err(e) => assert!(e.contains("directory")),
645 Ok(r) => assert!(!r.success || r.output.contains("directory")),
646 }
647 }
648
649 #[test]
650 fn test_image_mime_type_detection() {
651 assert_eq!(
652 ReadTool::image_mime_type(Path::new("photo.jpg")),
653 Some("image/jpeg")
654 );
655 assert_eq!(
656 ReadTool::image_mime_type(Path::new("photo.jpeg")),
657 Some("image/jpeg")
658 );
659 assert_eq!(
660 ReadTool::image_mime_type(Path::new("icon.png")),
661 Some("image/png")
662 );
663 assert_eq!(
664 ReadTool::image_mime_type(Path::new("anim.gif")),
665 Some("image/gif")
666 );
667 assert_eq!(
668 ReadTool::image_mime_type(Path::new("img.webp")),
669 Some("image/webp")
670 );
671 assert_eq!(ReadTool::image_mime_type(Path::new("file.txt")), None);
672 assert_eq!(ReadTool::image_mime_type(Path::new("noext")), None);
673 }
674
675 #[test]
676 fn test_binary_detection() {
677 assert!(ReadTool::is_binary(b"hello\x00world"));
678 assert!(!ReadTool::is_binary(b"hello world\nfoo bar\n"));
679 assert!(!ReadTool::is_binary(b""));
680 assert!(!ReadTool::is_binary(b"pure ascii text"));
681 }
682}