1use anyhow::{Context, Result};
2use async_trait::async_trait;
3use serde::Deserialize;
4use serde_json::json;
5
6use super::{ApprovalRequirement, Tool, ToolContext, ToolDef, ToolResult};
7
8pub(crate) const SKELETON_LINE_THRESHOLD: usize = 300;
13
14pub struct ReadFileTool;
15
16fn deserialize_lenient_usize<'de, D>(
18 deserializer: D,
19) -> std::result::Result<Option<usize>, D::Error>
20where
21 D: serde::Deserializer<'de>,
22{
23 use serde::de;
24 struct V;
25 impl<'de> de::Visitor<'de> for V {
26 type Value = Option<usize>;
27 fn expecting(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
28 f.write_str("usize or string")
29 }
30 fn visit_none<E: de::Error>(self) -> std::result::Result<Self::Value, E> {
31 Ok(None)
32 }
33 fn visit_unit<E: de::Error>(self) -> std::result::Result<Self::Value, E> {
34 Ok(None)
35 }
36 fn visit_u64<E: de::Error>(self, v: u64) -> std::result::Result<Self::Value, E> {
37 Ok(Some(v as usize))
38 }
39 fn visit_i64<E: de::Error>(self, v: i64) -> std::result::Result<Self::Value, E> {
40 if v >= 0 {
41 Ok(Some(v as usize))
42 } else {
43 Ok(None)
44 }
45 }
46 fn visit_f64<E: de::Error>(self, v: f64) -> std::result::Result<Self::Value, E> {
47 Ok(Some(v as usize))
48 }
49 fn visit_str<E: de::Error>(self, v: &str) -> std::result::Result<Self::Value, E> {
50 if let Ok(n) = v.trim().parse::<usize>() {
52 return Ok(Some(n));
53 }
54 if let Ok(f) = v.trim().parse::<f64>() {
55 return Ok(Some(f as usize));
56 }
57 Ok(None)
58 }
59 }
60 deserializer.deserialize_any(V)
61}
62
63#[derive(Deserialize)]
64struct ReadFileArgs {
65 file_path: String,
66 #[serde(default, deserialize_with = "deserialize_lenient_usize")]
67 offset: Option<usize>,
68 #[serde(default, deserialize_with = "deserialize_lenient_usize")]
69 limit: Option<usize>,
70}
71
72#[async_trait]
73impl Tool for ReadFileTool {
74 fn definition(&self) -> ToolDef {
75 ToolDef {
76 name: "read_file",
77 description: "Read a file. Returns full content with line numbers.\n\
78 Large files return a skeleton (structure overview) — use offset/limit to read sections.\n\
79 NEVER use bash (cat/head/tail) to read files.".to_string(),
80 parameters: json!({
81 "type": "object",
82 "properties": {
83 "file_path": { "type": "string", "description": "Absolute path to the file to read" },
84 "offset": { "type": "integer", "description": "Start line (1-based). Omit to read from beginning." },
85 "limit": { "type": "integer", "description": "Max lines to read. Defaults to full file." }
86 },
87 "required": ["file_path"]
88 }),
89 }
90 }
91
92 fn approval(&self, _args: &str) -> ApprovalRequirement {
93 ApprovalRequirement::AutoApprove
94 }
95
96 fn approval_with_context(&self, args: &str, ctx: &ToolContext) -> ApprovalRequirement {
97 let parsed = match serde_json::from_str::<ReadFileArgs>(args) {
98 Ok(parsed) => parsed,
99 Err(_) => return self.approval(args),
100 };
101 let working_dir = match ctx.working_dir.try_read() {
102 Ok(wd) => wd.clone(),
103 Err(_) => return self.approval(args),
104 };
105 match super::approval_for_path(
106 &parsed.file_path,
107 &working_dir,
108 super::ExternalPathAction::Read,
109 ) {
110 Ok(approval) => approval,
111 Err(_) => self.approval(args),
112 }
113 }
114
115 async fn execute(&self, args: &str, ctx: &ToolContext) -> Result<ToolResult> {
116 let parsed: ReadFileArgs = serde_json::from_str(args)?;
117 let working_dir = ctx.working_dir.read().await.clone();
118 let path = match super::inspect_path_access(&parsed.file_path, &working_dir) {
119 Ok(access) => access.path,
120 Err(err) => {
121 return Ok(ToolResult {
122 call_id: String::new(),
123 output: err.to_string(),
124 success: false,
125 });
126 }
127 };
128 let path_ref = path.as_path();
129
130 let cache_key: crate::tool::ReadCacheKey = (path.clone(), parsed.offset, parsed.limit);
139 let disk_mtime = tokio::fs::metadata(&path)
140 .await
141 .ok()
142 .and_then(|m| m.modified().ok());
143 if let Some(mtime) = disk_mtime {
144 let cached = ctx.read_cache.read().await.get(&cache_key).cloned();
145 if let Some((cached_mtime, cached_output, _)) = cached {
146 if cached_mtime == mtime {
147 return Ok(ToolResult {
148 call_id: String::new(),
149 output: cached_output,
150 success: true,
151 });
152 }
153 }
154 }
155
156 if path_ref.is_dir() {
158 let mut entries: Vec<String> = Vec::new();
159 if let Ok(mut rd) = tokio::fs::read_dir(path_ref).await {
160 while let Ok(Some(entry)) = rd.next_entry().await {
161 let name = entry.file_name().to_string_lossy().to_string();
162 let is_dir = entry.file_type().await.map(|t| t.is_dir()).unwrap_or(false);
163 entries.push(if is_dir { format!("{}/", name) } else { name });
164 }
165 }
166 entries.sort();
167 return Ok(ToolResult {
168 call_id: String::new(),
169 output: format!(
170 "[NOTE: {} is a directory, not a file. Here are its contents:]\n{}",
171 parsed.file_path,
172 entries.join("\n")
173 ),
174 success: true,
175 });
176 }
177
178 if !path_ref.exists() {
188 let filename = path_ref
198 .file_name()
199 .map(|n| n.to_string_lossy().to_string())
200 .unwrap_or_default();
201 let mut matches: Vec<String> = Vec::new();
202 if !filename.is_empty() {
203 fn find_file(
204 dir: &std::path::Path,
205 target: &str,
206 depth: usize,
207 max_depth: usize,
208 results: &mut Vec<String>,
209 ) {
210 if depth > max_depth || results.len() >= 20 {
211 return;
212 }
213 if let Ok(entries) = std::fs::read_dir(dir) {
214 for entry in entries.flatten() {
215 let name = entry.file_name().to_string_lossy().to_string();
216 if name.starts_with('.')
217 || name == "node_modules"
218 || name == "target"
219 || name == ".git"
220 {
221 continue;
222 }
223 let p = entry.path();
224 if p.is_dir() {
225 find_file(&p, target, depth + 1, max_depth, results);
226 } else if name == target {
227 results.push(p.to_string_lossy().to_string());
228 }
229 }
230 }
231 }
232 find_file(&working_dir, &filename, 0, 7, &mut matches);
233 matches.sort_by_key(|m| {
237 std::cmp::Reverse(super::shared_prefix_len(&parsed.file_path, m))
238 });
239 }
240
241 let mut output = format!(
245 "Error: No such file: {} (resolved to {})",
246 parsed.file_path,
247 path_ref.display()
248 );
249 if !matches.is_empty() {
250 let shown: Vec<String> =
251 matches.iter().take(5).map(|m| format!(" {}", m)).collect();
252 output.push_str("\n\nDid you mean:\n");
253 output.push_str(&shown.join("\n"));
254 }
255 if !std::path::Path::new(&parsed.file_path).is_absolute()
260 && !parsed.file_path.starts_with('~')
261 {
262 output.push_str(&format!(
263 "\n\nHint: file_path was relative and resolved against working dir {}. \
264 If the user mentioned a different location (e.g. ~/some/path), retry \
265 with the absolute path.",
266 working_dir.display()
267 ));
268 }
269 return Ok(ToolResult {
270 call_id: String::new(),
271 output,
272 success: false,
273 });
274 }
275
276 let store_hit: Option<String> = if let Some(mtime) = disk_mtime {
287 let store = ctx.file_store.read().await;
288 store
289 .store_id_for_path(&path)
290 .map(|s| s.to_string())
291 .and_then(|id| store.get(&id).cloned())
292 .filter(|entry| entry.mtime == mtime)
293 .map(|entry| entry.content)
294 } else {
295 None
296 };
297 let served_from_store = store_hit.is_some();
298
299 let content = if let Some(c) = store_hit {
300 c
305 } else {
306 let bytes = tokio::fs::read(&path)
307 .await
308 .with_context(|| format!("Failed to read {}", path.display()))?;
309
310 match String::from_utf8(bytes.clone()) {
314 Ok(s) => s,
315 Err(_) => match decode_non_utf8_text(path_ref, &bytes) {
316 Some(s) => s,
317 None => {
318 let output = format!(
319 "Binary file ({} bytes), cannot display as text.{}",
320 bytes.len(),
321 binary_recovery_hint(path_ref, &parsed.file_path),
322 );
323 if let Some(mtime) = disk_mtime {
324 ctx.read_cache
325 .write()
326 .await
327 .insert(cache_key.clone(), (mtime, output.clone(), 1));
328 }
329 return Ok(ToolResult {
330 call_id: String::new(),
331 output,
332 success: true,
333 });
334 }
335 },
336 }
337 };
338
339 if !served_from_store {
348 if let Some(mtime) = disk_mtime {
349 ctx.file_store
350 .write()
351 .await
352 .insert(path.clone(), content.clone(), mtime);
353 }
354 }
355
356 let lines: Vec<&str> = content.lines().collect();
357 let total_lines = lines.len();
358
359 let auto_skeleton = total_lines > SKELETON_LINE_THRESHOLD
366 && parsed.offset.is_none()
367 && parsed.limit.is_none();
368
369 if auto_skeleton {
370 let mut searcher = ctx.semantic.lock().await;
371 let skeleton = if let Some(symbols) = searcher.list_symbols(path_ref) {
372 let fname = path_ref
373 .file_name()
374 .map(|n| n.to_string_lossy())
375 .unwrap_or_default();
376 let mut skel = format!("[File skeleton: {} ({} lines). Each symbol line ends with the exact offset/limit to read it — copy those into read_file, don't recompute.]\n\n",
377 fname, total_lines);
378 let interest_keywords = [
383 "handle", "process", "route", "search", "query", "fetch", "execute",
384 "dispatch", "run", "main", "serve",
385 ];
386 let mut scored: Vec<(usize, &crate::semantic::Symbol)> = symbols
387 .iter()
388 .map(|s| {
389 let name_lower = s.name.to_lowercase();
390 let body_lines = s.end_line.saturating_sub(s.start_line) + 1;
391 let keyword_score =
392 if interest_keywords.iter().any(|k| name_lower.contains(k)) {
393 100
394 } else {
395 0
396 };
397 (keyword_score + body_lines, s)
398 })
399 .collect();
400 scored.sort_by(|a, b| b.0.cmp(&a.0));
401
402 let expand_candidates: Vec<&crate::semantic::Symbol> = scored
404 .iter()
405 .filter(|(_, s)| {
406 let body = s.end_line.saturating_sub(s.start_line) + 1;
407 body >= 5 && body <= 50
408 })
409 .take(2)
410 .map(|(_, s)| *s)
411 .collect();
412
413 for s in &symbols {
414 let sig = lines
415 .get(s.start_line.saturating_sub(1))
416 .map(|l| l.trim())
417 .unwrap_or(&s.name);
418 let sig_short = if sig.chars().count() > 70 {
419 format!("{}...", sig.chars().take(67).collect::<String>())
420 } else {
421 sig.to_string()
422 };
423
424 let body_len = s.end_line.saturating_sub(s.start_line) + 1;
425 if expand_candidates
426 .iter()
427 .any(|c| c.start_line == s.start_line && c.name == s.name)
428 {
429 skel.push_str(&format!(
431 "{:>4}| {} (L{}-{}) [auto-expanded]\n",
432 s.start_line, sig_short, s.start_line, s.end_line
433 ));
434 let start = s.start_line.saturating_sub(1);
435 let end = s.end_line.min(total_lines);
436 for i in (start + 1)..end {
437 if let Some(line) = lines.get(i) {
438 skel.push_str(&format!("{:>4}| {}\n", i + 1, line));
439 }
440 }
441 } else {
442 skel.push_str(&format!(
443 "{:>4}| {} (L{}-{}, read offset={} limit={})\n",
444 s.start_line,
445 sig_short,
446 s.start_line,
447 s.end_line,
448 s.start_line,
449 body_len
450 ));
451 }
452 }
453 skel
454 } else {
455 let fname = path
458 .file_name()
459 .map(|n| n.to_string_lossy())
460 .unwrap_or_default();
461 format!("[File skeleton: {} ({} lines) — use grep to find relevant lines, then read with offset/limit.]\n",
462 fname, total_lines)
463 };
464 if let Some(mtime) = disk_mtime {
471 ctx.read_cache.write().await.insert(
472 cache_key.clone(),
473 (mtime, skeleton.clone(), 1),
474 );
475 }
476 return Ok(ToolResult {
477 call_id: String::new(),
478 output: skeleton,
479 success: true,
480 });
481 }
482
483 let offset = parsed.offset.unwrap_or(1).max(1) - 1;
484
485 let limit = match (parsed.offset, parsed.limit) {
491 (None, Some(_)) => total_lines, (Some(_), Some(l)) => l, _ => total_lines, };
495
496 let offset = if offset > 0 && limit >= total_lines {
498 0
499 } else {
500 offset
501 };
502 let offset = offset.min(total_lines);
505
506 let end = (offset.saturating_add(limit)).min(total_lines);
507
508 let returned_all = offset == 0 && end >= total_lines;
511
512 let mut output: String = lines[offset..end]
513 .iter()
514 .enumerate()
515 .map(|(i, line)| format!("{:>4}| {}", offset + i + 1, line))
516 .collect::<Vec<_>>()
517 .join("\n");
518
519 if !returned_all {
520 let mut searcher = ctx.semantic.lock().await;
524 let skeleton = if let Some(symbols) = searcher.list_symbols(path_ref) {
525 let unseen: Vec<String> = symbols
526 .iter()
527 .filter(|s| s.start_line < offset + 1 || s.start_line > end)
528 .map(|s| {
529 let sig = lines
530 .get(s.start_line.saturating_sub(1))
531 .map(|l| l.trim())
532 .unwrap_or(&s.name);
533 let sig_short: String = sig.chars().take(70).collect();
534 let body_len = s.end_line.saturating_sub(s.start_line) + 1;
535 format!(
536 "{:>4}| {} (L{}-{}, read offset={} limit={})",
537 s.start_line,
538 sig_short,
539 s.start_line,
540 s.end_line,
541 s.start_line,
542 body_len
543 )
544 })
545 .collect();
546 if !unseen.is_empty() {
547 format!("\n{}", unseen.join("\n"))
548 } else {
549 String::new()
550 }
551 } else {
552 String::new()
553 };
554
555 output.push_str(&format!(
556 "\n\n[Showing lines {}-{} of {} total. Unseen structure:]{}",
557 offset + 1,
558 end,
559 total_lines,
560 skeleton
561 ));
562 }
563
564 if let Some(mtime) = disk_mtime {
572 ctx.read_cache
573 .write()
574 .await
575 .insert(cache_key, (mtime, output.clone(), 1));
576 }
577 Ok(ToolResult {
578 call_id: String::new(),
579 output,
580 success: true,
581 })
582 }
583}
584
585const GBK_CANDIDATE_EXTENSIONS: &[&str] = &[
591 "txt", "md", "markdown", "csv", "tsv", "log", "sql", "ini", "conf", "cfg", "toml", "yaml",
592 "yml", "html", "htm", "xml", "json", "js", "ts", "css", "py", "rb", "go", "rs", "c", "h",
593 "cpp", "hpp", "java", "kt", "sh", "bat", "ps1",
594];
595
596fn has_text_extension(path: &std::path::Path) -> bool {
597 path.extension()
598 .and_then(|e| e.to_str())
599 .map(|e| {
600 let e = e.to_ascii_lowercase();
601 GBK_CANDIDATE_EXTENSIONS.iter().any(|t| *t == e)
602 })
603 .unwrap_or(false)
604}
605
606fn decode_non_utf8_text(path: &std::path::Path, bytes: &[u8]) -> Option<String> {
611 if !has_text_extension(path) {
612 return None;
613 }
614 let (decoded, _, had_errors) = encoding_rs::GB18030.decode(bytes);
615 if had_errors {
616 return None;
617 }
618 Some(decoded.into_owned())
619}
620
621fn binary_recovery_hint(path: &std::path::Path, full_path_str: &str) -> String {
626 let ext = path
627 .extension()
628 .and_then(|e| e.to_str())
629 .map(|e| e.to_ascii_lowercase())
630 .unwrap_or_default();
631 let quoted = shell_quote(full_path_str);
632 match ext.as_str() {
633 "doc" => format!(
634 "\n\n[Recovery] This is a legacy Word (.doc) binary. Run one of:\n\
635 - bash: `antiword {q}`\n\
636 - bash: `pandoc {q} -t plain`\n\
637 - bash: `catdoc {q}`",
638 q = quoted,
639 ),
640 "docx" => format!(
641 "\n\n[Recovery] This is a modern Word (.docx) — a zip containing XML. Run:\n\
642 - bash: `unzip -p {q} word/document.xml | sed 's/<[^>]*>//g'`\n\
643 - or: `pandoc {q} -t plain`",
644 q = quoted,
645 ),
646 "xls" => format!(
647 "\n\n[Recovery] Legacy Excel (.xls). Run:\n\
648 - bash: `libreoffice --headless --convert-to csv --outdir /tmp {q} && cat /tmp/*.csv`",
649 q = quoted,
650 ),
651 "xlsx" => format!(
652 "\n\n[Recovery] Modern Excel (.xlsx). Run:\n\
653 - bash: `libreoffice --headless --convert-to csv --outdir /tmp {q} && cat /tmp/*.csv`\n\
654 - or: `unzip -p {q} xl/sharedStrings.xml` (raw string table)",
655 q = quoted,
656 ),
657 "ppt" | "pptx" => format!(
658 "\n\n[Recovery] PowerPoint. Run:\n\
659 - bash: `pandoc {q} -t plain`",
660 q = quoted,
661 ),
662 "pdf" => format!(
663 "\n\n[Recovery] PDF. Run:\n\
664 - bash: `pdftotext {q} -` (poppler)\n\
665 - or: `mutool draw -F txt {q}`",
666 q = quoted,
667 ),
668 "rtf" => format!(
669 "\n\n[Recovery] RTF. Run:\n\
670 - bash: `pandoc {q} -t plain`\n\
671 - or: `unrtf --text {q}`",
672 q = quoted,
673 ),
674 _ => format!(
675 "\n\n[Hint] The file is not UTF-8 and not a recognised text extension. \
676 If it's text in another encoding, ask the user; if it's a packaged format \
677 (archive, installer, media), there is no point reading it as text.",
678 ),
679 }
680}
681
682fn shell_quote(s: &str) -> String {
685 let mut out = String::with_capacity(s.len() + 2);
686 out.push('\'');
687 for c in s.chars() {
688 if c == '\'' {
689 out.push_str(r"'\''");
690 } else {
691 out.push(c);
692 }
693 }
694 out.push('\'');
695 out
696}
697
698#[cfg(test)]
699mod tests {
700 use super::*;
701 use tempfile::TempDir;
702
703 #[tokio::test]
705 async fn read_cache_hits_returns_full_content() {
706 let dir = TempDir::new().unwrap();
707 let path = dir.path().join("a.rs");
708 std::fs::write(&path, "fn main() {}\n").unwrap();
709
710 let ctx = ToolContext::new(dir.path().to_path_buf());
711 let tool = ReadFileTool;
712 let args = format!(r#"{{"file_path":"{}"}}"#, path.display());
713
714 let r1 = tool.execute(&args, &ctx).await.unwrap();
715 assert!(r1.success);
716 assert!(
717 r1.output.contains("fn main"),
718 "first read should return content"
719 );
720
721 let r2 = tool.execute(&args, &ctx).await.unwrap();
722 assert!(r2.success);
723 assert!(
724 r2.output.contains("fn main"),
725 "cache hit should return same content"
726 );
727 }
728
729 #[tokio::test]
735 async fn read_cache_hits_replay_silently() {
736 let dir = TempDir::new().unwrap();
737 let path = dir.path().join("a.rs");
738 std::fs::write(&path, "fn main() {}\n").unwrap();
739
740 let ctx = ToolContext::new(dir.path().to_path_buf());
741 let tool = ReadFileTool;
742 let args = format!(r#"{{"file_path":"{}"}}"#, path.display());
743
744 let r1 = tool.execute(&args, &ctx).await.unwrap();
745 let r2 = tool.execute(&args, &ctx).await.unwrap();
746 let r3 = tool.execute(&args, &ctx).await.unwrap();
747 assert!(r1.success && r2.success && r3.success);
748 for r in [&r2, &r3] {
750 assert!(
751 !r.output.contains("times this session"),
752 "no meta-commentary on cache hits; got:\n{}",
753 r.output
754 );
755 }
756 }
757
758 #[tokio::test]
760 async fn read_cache_misses_when_mtime_changes() {
761 let dir = TempDir::new().unwrap();
762 let path = dir.path().join("b.rs");
763 std::fs::write(&path, "fn main() {}\n").unwrap();
764
765 let ctx = ToolContext::new(dir.path().to_path_buf());
766 let tool = ReadFileTool;
767 let args = format!(r#"{{"file_path":"{}"}}"#, path.display());
768
769 let r1 = tool.execute(&args, &ctx).await.unwrap();
770 let out1 = r1.output.clone();
771
772 std::thread::sleep(std::time::Duration::from_millis(10));
774 std::fs::write(&path, "fn main() { println!(\"hi\"); }\n").unwrap();
775
776 let r2 = tool.execute(&args, &ctx).await.unwrap();
777 assert_ne!(
778 r2.output, out1,
779 "2nd read must re-read from disk when mtime changed"
780 );
781 assert!(r2.output.contains("println"));
782 }
783
784 #[tokio::test]
793 async fn d3_edit_invalidates_caches_for_subsequent_read() {
794 let dir = TempDir::new().unwrap();
795 let path = dir.path().join("payload.rs");
796 std::fs::write(&path, "fn before() {}\n").unwrap();
797
798 let ctx = ToolContext::new(dir.path().to_path_buf());
799 let read_tool = ReadFileTool;
800 let edit_tool = crate::tool::edit::EditFileTool;
801 let read_args = format!(r#"{{"file_path":"{}"}}"#, path.display());
802
803 let r1 = read_tool.execute(&read_args, &ctx).await.unwrap();
805 assert!(r1.output.contains("fn before"));
806 assert_eq!(
807 ctx.file_store.read().await.len(),
808 1,
809 "FileStore should have 1 entry after read"
810 );
811 assert_eq!(
812 ctx.read_cache.read().await.len(),
813 1,
814 "read_cache should have 1 entry after read"
815 );
816
817 let edit_args = format!(
825 r#"{{"file_path":"{}","old_string":"fn before() {{}}","new_string":"fn after() {{ /* edited */ }}"}}"#,
826 path.display()
827 );
828 let e = edit_tool.execute(&edit_args, &ctx).await.unwrap();
829 assert!(e.success, "edit should succeed; got: {}", e.output);
830
831 let on_disk = std::fs::read_to_string(&path).unwrap();
833 assert!(
834 on_disk.contains("fn after"),
835 "disk content not updated: {}",
836 on_disk
837 );
838
839 let fs_state_after_edit = {
842 let store = ctx.file_store.read().await;
843 store
844 .store_id_for_path(&path)
845 .and_then(|id| store.get(id).cloned())
846 .map(|e| e.content)
847 };
848 if let Some(content) = &fs_state_after_edit {
849 assert!(
850 content.contains("fn after"),
851 "FileStore retained pre-edit content: {}",
852 content
853 );
854 }
855 let read_cache_post_edit = ctx.read_cache.read().await.clone();
866 let stale_cache_for_path = read_cache_post_edit
867 .keys()
868 .filter(|(p, _, _)| p == &path)
869 .count();
870 assert_eq!(
871 stale_cache_for_path, 0,
872 "read_cache must be purged for edited path; lingering entries \
873 would let coarse-mtime FS serve stale content"
874 );
875
876 let r2 = read_tool.execute(&read_args, &ctx).await.unwrap();
878 assert!(
879 r2.output.contains("fn after"),
880 "POST-EDIT READ SERVED STALE CONTENT: {}",
881 r2.output
882 );
883 assert!(
884 !r2.output.contains("fn before"),
885 "post-edit read still mentions pre-edit symbol: {}",
886 r2.output
887 );
888 }
889
890 #[tokio::test]
893 async fn read_decodes_gbk_text_file() {
894 let dir = TempDir::new().unwrap();
895 let path = dir.path().join("notes.txt");
896 let gbk_bytes: Vec<u8> = vec![0xC4, 0xE3, 0xBA, 0xC3, 0xCA, 0xC0, 0xBD, 0xE7, 0x0A];
899 std::fs::write(&path, &gbk_bytes).unwrap();
900 assert!(std::str::from_utf8(&gbk_bytes).is_err());
903
904 let ctx = ToolContext::new(dir.path().to_path_buf());
905 let tool = ReadFileTool;
906 let args = format!(r#"{{"file_path":"{}"}}"#, path.display());
907
908 let r = tool.execute(&args, &ctx).await.unwrap();
909 assert!(r.success, "GBK text should decode, got: {}", r.output);
910 assert!(
911 r.output.contains("你好世界"),
912 "expected decoded text, got: {}",
913 r.output
914 );
915 assert!(!r.output.contains("Binary file"));
916 }
917
918 #[tokio::test]
921 async fn read_docx_returns_recovery_hint_not_garbage() {
922 let dir = TempDir::new().unwrap();
923 let path = dir.path().join("spec.docx");
924 let docx_bytes: Vec<u8> = [b'P', b'K', 0x03, 0x04]
926 .iter()
927 .copied()
928 .chain((0..200).map(|i| (i as u8).wrapping_mul(31).wrapping_add(0x80)))
929 .collect();
930 let mut docx_bytes = docx_bytes;
933 docx_bytes.extend_from_slice(&[0xFE, 0xFF, 0xC0]);
934 std::fs::write(&path, &docx_bytes).unwrap();
935
936 let ctx = ToolContext::new(dir.path().to_path_buf());
937 let tool = ReadFileTool;
938 let args = format!(r#"{{"file_path":"{}"}}"#, path.display());
939
940 let r = tool.execute(&args, &ctx).await.unwrap();
941 assert!(r.output.contains("Binary file"));
942 assert!(
943 r.output.contains("Recovery"),
944 "should give recovery hint: {}",
945 r.output
946 );
947 assert!(r.output.contains("unzip") || r.output.contains("pandoc"));
948 }
949
950 #[tokio::test]
951 async fn read_pdf_returns_pdftotext_hint() {
952 let dir = TempDir::new().unwrap();
953 let path = dir.path().join("doc.pdf");
954 let mut bytes: Vec<u8> = b"%PDF-1.4\n".to_vec();
956 bytes.extend_from_slice(&[0xFF, 0xFE, 0xC0, 0x80, 0xFE]);
957 std::fs::write(&path, &bytes).unwrap();
958
959 let ctx = ToolContext::new(dir.path().to_path_buf());
960 let tool = ReadFileTool;
961 let args = format!(r#"{{"file_path":"{}"}}"#, path.display());
962
963 let r = tool.execute(&args, &ctx).await.unwrap();
964 assert!(r.output.contains("Binary file"));
965 assert!(
966 r.output.contains("pdftotext"),
967 "should suggest pdftotext: {}",
968 r.output
969 );
970 }
971
972 #[test]
973 fn shell_quote_escapes_single_quote() {
974 assert_eq!(shell_quote("abc"), "'abc'");
975 assert_eq!(shell_quote("a'b"), r"'a'\''b'");
976 assert_eq!(
977 shell_quote("/tmp/file with spaces.doc"),
978 "'/tmp/file with spaces.doc'"
979 );
980 }
981
982 #[tokio::test]
985 async fn skeleton_includes_read_offset_limit_hints() {
986 let dir = TempDir::new().unwrap();
987 let path = dir.path().join("big.rs");
988
989 let mut content = String::new();
993 content.push_str("pub fn save_session(id: &str) -> Result<()> {\n");
994 for i in 0..80 {
995 content.push_str(&format!(" let _x{} = {};\n", i, i));
996 }
997 content.push_str(" Ok(())\n");
998 content.push_str("}\n");
999 for i in 0..(SKELETON_LINE_THRESHOLD + 20) {
1000 content.push_str(&format!("// filler {}\n", i));
1001 }
1002 std::fs::write(&path, &content).unwrap();
1003
1004 let ctx = ToolContext::new(dir.path().to_path_buf());
1005 let tool = ReadFileTool;
1006 let args = format!(r#"{{"file_path":"{}"}}"#, path.display());
1007
1008 let r = tool.execute(&args, &ctx).await.unwrap();
1009 assert!(r.success);
1010 assert!(
1011 r.output.contains("[File skeleton:"),
1012 "expected skeleton output, got:\n{}",
1013 r.output
1014 );
1015 assert!(
1017 r.output.contains("read offset=1 limit="),
1018 "skeleton should expose offset=1 limit=<body_len> for save_session\nGot:\n{}",
1019 r.output
1020 );
1021 }
1022
1023 #[tokio::test]
1029 async fn read_404_ranks_by_shared_path_prefix() {
1030 let dir = TempDir::new().unwrap();
1031 std::fs::create_dir_all(dir.path().join("proj-wanted").join("presentation")).unwrap();
1034 std::fs::create_dir_all(dir.path().join("proj-other")).unwrap();
1035 std::fs::write(
1036 dir.path().join("proj-wanted/presentation/index.html"),
1037 "<html></html>",
1038 )
1039 .unwrap();
1040 std::fs::write(dir.path().join("proj-other/index.html"), "<html></html>").unwrap();
1041
1042 let ctx = ToolContext::new(dir.path().to_path_buf());
1043 let tool = ReadFileTool;
1044 let asked = dir.path().join("proj-wanted/index.html");
1046 let args = format!(r#"{{"file_path":"{}"}}"#, asked.display());
1047
1048 let r = tool.execute(&args, &ctx).await.unwrap();
1049 assert!(!r.success);
1050 assert!(r.output.contains("Did you mean"));
1051 let wanted_pos = r
1054 .output
1055 .find("proj-wanted/presentation/index.html")
1056 .unwrap();
1057 let other_pos = r.output.find("proj-other/index.html").unwrap();
1058 assert!(
1059 wanted_pos < other_pos,
1060 "proj-wanted match must rank above proj-other. output:\n{}",
1061 r.output
1062 );
1063 }
1064
1065 #[tokio::test]
1079 async fn read_404_relative_path_includes_resolved_path_and_absolute_hint() {
1080 let dir = TempDir::new().unwrap();
1081 let ctx = ToolContext::new(dir.path().to_path_buf());
1085 let tool = ReadFileTool;
1086 let args = r#"{"file_path":"MEMORY.md"}"#;
1087
1088 let r = tool.execute(args, &ctx).await.unwrap();
1089 assert!(!r.success);
1090 assert!(
1091 r.output.contains("No such file: MEMORY.md"),
1092 "must surface the raw input. output:\n{}",
1093 r.output
1094 );
1095 assert!(
1096 r.output.contains("resolved to"),
1097 "must surface the resolved absolute path so the agent sees \
1098 what was actually attempted. output:\n{}",
1099 r.output
1100 );
1101 assert!(
1102 r.output.contains("absolute path"),
1103 "relative-input path must include the absolute-path hint. output:\n{}",
1104 r.output
1105 );
1106 assert!(
1108 !r.output.contains("os error"),
1109 "must not leak the raw OS error string. output:\n{}",
1110 r.output
1111 );
1112 }
1113
1114 #[tokio::test]
1119 async fn read_404_absolute_path_omits_relative_hint() {
1120 let dir = TempDir::new().unwrap();
1121 let ctx = ToolContext::new(dir.path().to_path_buf());
1122 let tool = ReadFileTool;
1123 let asked = dir.path().join("MEMORY.md");
1124 let args = format!(r#"{{"file_path":"{}"}}"#, asked.display());
1125
1126 let r = tool.execute(&args, &ctx).await.unwrap();
1127 assert!(!r.success);
1128 assert!(r.output.contains("No such file"));
1129 assert!(
1130 !r.output.contains("absolute path"),
1131 "absolute-input path must NOT show the relative-path hint. output:\n{}",
1132 r.output
1133 );
1134 }
1135
1136 fn write_n_line_file(dir: &TempDir, name: &str, n_lines: usize) -> std::path::PathBuf {
1142 let path = dir.path().join(name);
1143 let body: String = (1..=n_lines).map(|i| format!("line {}\n", i)).collect();
1144 std::fs::write(&path, body).unwrap();
1145 path
1146 }
1147
1148 #[tokio::test]
1154 async fn d3_full_read_pushes_to_store_returns_inline_content() {
1155 let dir = TempDir::new().unwrap();
1156 let path = write_n_line_file(&dir, "big.rs", 200);
1157 let ctx = ToolContext::new(dir.path().to_path_buf());
1158 let args = format!(r#"{{"file_path":"{}"}}"#, path.display());
1159 let r = ReadFileTool.execute(&args, &ctx).await.unwrap();
1160 assert!(r.success);
1161 assert!(
1164 !r.output.contains("store_id="),
1165 "store_id must NOT leak into model output:\n{}",
1166 r.output
1167 );
1168 assert!(
1169 !r.output.contains("peek_file"),
1170 "peek_file no longer exists, must not be referenced:\n{}",
1171 r.output
1172 );
1173 assert!(r.output.contains("line 1"));
1175 assert!(r.output.contains("line 100"));
1176 assert!(r.output.contains("line 200"));
1177 assert_eq!(ctx.file_store.read().await.len(), 1);
1179 }
1180
1181 #[tokio::test]
1185 async fn d3_small_file_pushes_to_store_after_merge() {
1186 let dir = TempDir::new().unwrap();
1187 let path = write_n_line_file(&dir, "small.rs", 10);
1188 let ctx = ToolContext::new(dir.path().to_path_buf());
1189 let args = format!(r#"{{"file_path":"{}"}}"#, path.display());
1190 let r = ReadFileTool.execute(&args, &ctx).await.unwrap();
1191 assert!(r.success);
1192 assert_eq!(
1193 ctx.file_store.read().await.len(),
1194 1,
1195 "fresh disk read must populate store regardless of file size"
1196 );
1197 }
1198
1199 #[tokio::test]
1208 async fn d3_range_read_after_full_read_silently_serves_from_store() {
1209 let dir = TempDir::new().unwrap();
1210 let path = write_n_line_file(&dir, "big.rs", 200);
1211 let ctx = ToolContext::new(dir.path().to_path_buf());
1212
1213 let full_args = format!(r#"{{"file_path":"{}"}}"#, path.display());
1214 let _ = ReadFileTool.execute(&full_args, &ctx).await.unwrap();
1215
1216 let range_args = format!(
1217 r#"{{"file_path":"{}","offset":100,"limit":5}}"#,
1218 path.display()
1219 );
1220 let r = ReadFileTool.execute(&range_args, &ctx).await.unwrap();
1221 assert!(r.success);
1222 assert!(r.output.contains("line 100"));
1223 assert!(
1224 !r.output.contains("FileStore"),
1225 "store-served read must NOT leak any FileStore preamble:\n{}",
1226 r.output
1227 );
1228 assert_eq!(
1229 ctx.file_store.read().await.len(),
1230 1,
1231 "FileStore must retain the entry across both reads"
1232 );
1233 }
1234
1235 #[tokio::test]
1240 async fn d3_edit_invalidates_cache_next_read_hits_disk() {
1241 let dir = TempDir::new().unwrap();
1242 let path = write_n_line_file(&dir, "big.rs", 200);
1243 let ctx = ToolContext::new(dir.path().to_path_buf());
1244
1245 let read_args = format!(r#"{{"file_path":"{}"}}"#, path.display());
1246 let _ = ReadFileTool.execute(&read_args, &ctx).await.unwrap();
1247 assert_eq!(ctx.file_store.read().await.len(), 1);
1248
1249 let edit_args = format!(
1250 r#"{{"file_path":"{}","old_string":"line 1\n","new_string":"LINE 1\n"}}"#,
1251 path.display()
1252 );
1253 let e = crate::tool::edit::EditFileTool
1254 .execute(&edit_args, &ctx)
1255 .await
1256 .unwrap();
1257 assert!(e.success, "edit must succeed:\n{}", e.output);
1258 assert_eq!(
1259 ctx.file_store.read().await.len(),
1260 0,
1261 "edit must invalidate the store entry"
1262 );
1263
1264 let range_args = format!(
1268 r#"{{"file_path":"{}","offset":1,"limit":3}}"#,
1269 path.display()
1270 );
1271 let r = ReadFileTool.execute(&range_args, &ctx).await.unwrap();
1272 assert!(r.success);
1273 assert!(
1274 !r.output.contains("FileStore cache"),
1275 "post-edit read must come from disk, not stale cache:\n{}",
1276 r.output
1277 );
1278 assert_eq!(ctx.file_store.read().await.len(), 1);
1279 }
1280
1281 #[tokio::test]
1285 async fn d3_reread_unchanged_file_keeps_one_entry() {
1286 let dir = TempDir::new().unwrap();
1287 let path = write_n_line_file(&dir, "big.rs", 200);
1288 let ctx = ToolContext::new(dir.path().to_path_buf());
1289 let args = format!(r#"{{"file_path":"{}"}}"#, path.display());
1290 let _ = ReadFileTool.execute(&args, &ctx).await.unwrap();
1291 let _ = ReadFileTool.execute(&args, &ctx).await.unwrap();
1292 assert_eq!(ctx.file_store.read().await.len(), 1);
1293 }
1294
1295 #[tokio::test]
1300 async fn d3_skeleton_path_pushes_to_store() {
1301 let dir = TempDir::new().unwrap();
1302 let path = write_n_line_file(&dir, "huge.rs", 350);
1303 let ctx = ToolContext::new(dir.path().to_path_buf());
1304 let args = format!(r#"{{"file_path":"{}"}}"#, path.display());
1305 let r = ReadFileTool.execute(&args, &ctx).await.unwrap();
1306 assert!(r.success);
1307 assert!(
1308 r.output.contains("File skeleton:"),
1309 "huge file should still get skeleton:\n{}",
1310 r.output
1311 );
1312 assert!(
1316 !r.output.contains("store_id="),
1317 "merged design hides store_id from model:\n{}",
1318 r.output
1319 );
1320 assert_eq!(
1321 ctx.file_store.read().await.len(),
1322 1,
1323 "auto_skeleton path must populate FileStore"
1324 );
1325 }
1326
1327 #[tokio::test]
1334 async fn d3_subsequent_reads_have_no_framework_preamble() {
1335 let dir = TempDir::new().unwrap();
1336 let path = write_n_line_file(&dir, "big.rs", 200);
1337 let ctx = ToolContext::new(dir.path().to_path_buf());
1338 let args1 = format!(r#"{{"file_path":"{}"}}"#, path.display());
1339 let args2 = format!(r#"{{"file_path":"{}","offset":50,"limit":10}}"#, path.display());
1340 let args3 = format!(r#"{{"file_path":"{}","offset":100,"limit":10}}"#, path.display());
1341 let r1 = ReadFileTool.execute(&args1, &ctx).await.unwrap();
1342 let r2 = ReadFileTool.execute(&args2, &ctx).await.unwrap();
1343 let r3 = ReadFileTool.execute(&args3, &ctx).await.unwrap();
1344 assert!(r1.success && r2.success && r3.success);
1345 for (i, r) in [&r1, &r2, &r3].iter().enumerate() {
1346 assert!(
1347 !r.output.contains("read of `") && !r.output.contains("FileStore cache"),
1348 "read #{} must not carry framework metadata; got:\n{}",
1349 i + 1,
1350 r.output
1351 );
1352 }
1353 }
1354}