atomcode_core/tool/
read.rs

1use anyhow::{Context, Result};
2use async_trait::async_trait;
3use serde::Deserialize;
4use serde_json::json;
5
6use super::{ApprovalRequirement, Tool, ToolContext, ToolDef, ToolResult};
7
8/// Files with more lines than this return a skeleton (structure overview)
9/// instead of full content when read without offset/limit. GLM-5 gets lost
10/// in the middle at ~685 lines — 300 is the safe full-content ceiling.
11/// Shared with `agent::tool_dispatch` so its first-read heuristic stays aligned.
12pub(crate) const SKELETON_LINE_THRESHOLD: usize = 300;
13
14pub struct ReadFileTool;
15
16/// Deserialize a number that may arrive as a float string (weak models often send "50.0" instead of 50).
17fn deserialize_lenient_usize<'de, D>(
18    deserializer: D,
19) -> std::result::Result<Option<usize>, D::Error>
20where
21    D: serde::Deserializer<'de>,
22{
23    use serde::de;
24    struct V;
25    impl<'de> de::Visitor<'de> for V {
26        type Value = Option<usize>;
27        fn expecting(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
28            f.write_str("usize or string")
29        }
30        fn visit_none<E: de::Error>(self) -> std::result::Result<Self::Value, E> {
31            Ok(None)
32        }
33        fn visit_unit<E: de::Error>(self) -> std::result::Result<Self::Value, E> {
34            Ok(None)
35        }
36        fn visit_u64<E: de::Error>(self, v: u64) -> std::result::Result<Self::Value, E> {
37            Ok(Some(v as usize))
38        }
39        fn visit_i64<E: de::Error>(self, v: i64) -> std::result::Result<Self::Value, E> {
40            if v >= 0 {
41                Ok(Some(v as usize))
42            } else {
43                Ok(None)
44            }
45        }
46        fn visit_f64<E: de::Error>(self, v: f64) -> std::result::Result<Self::Value, E> {
47            Ok(Some(v as usize))
48        }
49        fn visit_str<E: de::Error>(self, v: &str) -> std::result::Result<Self::Value, E> {
50            // Handle "50.0" → 50
51            if let Ok(n) = v.trim().parse::<usize>() {
52                return Ok(Some(n));
53            }
54            if let Ok(f) = v.trim().parse::<f64>() {
55                return Ok(Some(f as usize));
56            }
57            Ok(None)
58        }
59    }
60    deserializer.deserialize_any(V)
61}
62
63#[derive(Deserialize)]
64struct ReadFileArgs {
65    file_path: String,
66    #[serde(default, deserialize_with = "deserialize_lenient_usize")]
67    offset: Option<usize>,
68    #[serde(default, deserialize_with = "deserialize_lenient_usize")]
69    limit: Option<usize>,
70}
71
72#[async_trait]
73impl Tool for ReadFileTool {
74    fn definition(&self) -> ToolDef {
75        ToolDef {
76            name: "read_file",
77            description: "Read a file. Returns full content with line numbers.\n\
78                Large files return a skeleton (structure overview) — use offset/limit to read sections.\n\
79                NEVER use bash (cat/head/tail) to read files.".to_string(),
80            parameters: json!({
81                "type": "object",
82                "properties": {
83                    "file_path": { "type": "string", "description": "Absolute path to the file to read" },
84                    "offset": { "type": "integer", "description": "Start line (1-based). Omit to read from beginning." },
85                    "limit": { "type": "integer", "description": "Max lines to read. Defaults to full file." }
86                },
87                "required": ["file_path"]
88            }),
89        }
90    }
91
92    fn approval(&self, _args: &str) -> ApprovalRequirement {
93        ApprovalRequirement::AutoApprove
94    }
95
96    fn approval_with_context(&self, args: &str, ctx: &ToolContext) -> ApprovalRequirement {
97        let parsed = match serde_json::from_str::<ReadFileArgs>(args) {
98            Ok(parsed) => parsed,
99            Err(_) => return self.approval(args),
100        };
101        let working_dir = match ctx.working_dir.try_read() {
102            Ok(wd) => wd.clone(),
103            Err(_) => return self.approval(args),
104        };
105        match super::approval_for_path(
106            &parsed.file_path,
107            &working_dir,
108            super::ExternalPathAction::Read,
109        ) {
110            Ok(approval) => approval,
111            Err(_) => self.approval(args),
112        }
113    }
114
115    async fn execute(&self, args: &str, ctx: &ToolContext) -> Result<ToolResult> {
116        let parsed: ReadFileArgs = serde_json::from_str(args)?;
117        let working_dir = ctx.working_dir.read().await.clone();
118        let path = match super::inspect_path_access(&parsed.file_path, &working_dir) {
119            Ok(access) => access.path,
120            Err(err) => {
121                return Ok(ToolResult {
122                    call_id: String::new(),
123                    output: err.to_string(),
124                    success: false,
125                });
126            }
127        };
128        let path_ref = path.as_path();
129
130        // ── Read cache: pure performance optimization ──
131        // Cache stores (mtime, rendered_output). If mtime matches the
132        // current disk state, return the cached output directly —
133        // saves UTF-8 decode + tree-sitter cost on identical re-reads.
134        // No model-visible meta-commentary on cache hits: the cached
135        // bytes are returned silently, same way Claude Code's Read
136        // tool replays content. Aligns with the "framework doesn't
137        // educate the model about its own behaviour" principle.
138        let cache_key: crate::tool::ReadCacheKey = (path.clone(), parsed.offset, parsed.limit);
139        let disk_mtime = tokio::fs::metadata(&path)
140            .await
141            .ok()
142            .and_then(|m| m.modified().ok());
143        if let Some(mtime) = disk_mtime {
144            let cached = ctx.read_cache.read().await.get(&cache_key).cloned();
145            if let Some((cached_mtime, cached_output, _)) = cached {
146                if cached_mtime == mtime {
147                    return Ok(ToolResult {
148                        call_id: String::new(),
149                        output: cached_output,
150                        success: true,
151                    });
152                }
153            }
154        }
155
156        // Auto-recover: if the path is a directory, return a listing instead of an error.
157        if path_ref.is_dir() {
158            let mut entries: Vec<String> = Vec::new();
159            if let Ok(mut rd) = tokio::fs::read_dir(path_ref).await {
160                while let Ok(Some(entry)) = rd.next_entry().await {
161                    let name = entry.file_name().to_string_lossy().to_string();
162                    let is_dir = entry.file_type().await.map(|t| t.is_dir()).unwrap_or(false);
163                    entries.push(if is_dir { format!("{}/", name) } else { name });
164                }
165            }
166            entries.sort();
167            return Ok(ToolResult {
168                call_id: String::new(),
169                output: format!(
170                    "[NOTE: {} is a directory, not a file. Here are its contents:]\n{}",
171                    parsed.file_path,
172                    entries.join("\n")
173                ),
174                success: true,
175            });
176        }
177
178        // If file doesn't exist, auto-find similar filenames and suggest.
179        // Saves 2-3 turns of path guessing (7% of sessions hit this).
180        //
181        // 2026-04-22: collect up to 20 candidates then rank by path-prefix
182        // similarity to what the agent asked for, show top 5. Without the
183        // prefix ranking, a random match in an unrelated subtree (e.g. the
184        // first `index.html` the walk hit) could outrank the correct one in
185        // the requested project — agent ignored the suggestion and started
186        // manual `ls` (see 426-atom 2026-04-21 session).
187        if !path_ref.exists() {
188            // Always return a clean NotFound message (with resolved path
189            // surfaced) — never fall through to `tokio::fs::read` on a
190            // missing file. Falling through used to leak a bare
191            // `"No such file or directory (os error 2)"` from the OS,
192            // which (a) didn't say WHICH path was tried, and (b) was
193            // indistinguishable from EACCES leaks. The agent often
194            // misread it as a permission issue and looped on read_file
195            // hitting the call-loop cap (see runner.rs detect_call_loop)
196            // instead of correcting the path.
197            let filename = path_ref
198                .file_name()
199                .map(|n| n.to_string_lossy().to_string())
200                .unwrap_or_default();
201            let mut matches: Vec<String> = Vec::new();
202            if !filename.is_empty() {
203                fn find_file(
204                    dir: &std::path::Path,
205                    target: &str,
206                    depth: usize,
207                    max_depth: usize,
208                    results: &mut Vec<String>,
209                ) {
210                    if depth > max_depth || results.len() >= 20 {
211                        return;
212                    }
213                    if let Ok(entries) = std::fs::read_dir(dir) {
214                        for entry in entries.flatten() {
215                            let name = entry.file_name().to_string_lossy().to_string();
216                            if name.starts_with('.')
217                                || name == "node_modules"
218                                || name == "target"
219                                || name == ".git"
220                            {
221                                continue;
222                            }
223                            let p = entry.path();
224                            if p.is_dir() {
225                                find_file(&p, target, depth + 1, max_depth, results);
226                            } else if name == target {
227                                results.push(p.to_string_lossy().to_string());
228                            }
229                        }
230                    }
231                }
232                find_file(&working_dir, &filename, 0, 7, &mut matches);
233                // Rank by shared-path-prefix length with the requested
234                // path. The correct match almost always shares the most
235                // segments with what the agent asked for.
236                matches.sort_by_key(|m| {
237                    std::cmp::Reverse(super::shared_prefix_len(&parsed.file_path, m))
238                });
239            }
240
241            // Build the message. Always include the resolved path so the
242            // agent sees what was actually attempted (raw input might be
243            // relative — the resolved path is what hit the filesystem).
244            let mut output = format!(
245                "Error: No such file: {} (resolved to {})",
246                parsed.file_path,
247                path_ref.display()
248            );
249            if !matches.is_empty() {
250                let shown: Vec<String> =
251                    matches.iter().take(5).map(|m| format!("  {}", m)).collect();
252                output.push_str("\n\nDid you mean:\n");
253                output.push_str(&shown.join("\n"));
254            }
255            // Nudge the agent toward absolute paths when it passed a
256            // relative one. The most common cause of this branch is the
257            // agent ignoring an absolute path the user mentioned in
258            // their message and passing a bare basename instead.
259            if !std::path::Path::new(&parsed.file_path).is_absolute()
260                && !parsed.file_path.starts_with('~')
261            {
262                output.push_str(&format!(
263                    "\n\nHint: file_path was relative and resolved against working dir {}. \
264                     If the user mentioned a different location (e.g. ~/some/path), retry \
265                     with the absolute path.",
266                    working_dir.display()
267                ));
268            }
269            return Ok(ToolResult {
270                call_id: String::new(),
271                output,
272                success: false,
273            });
274        }
275
276        // D3 (merged): consult FileStore before reading disk. If we've
277        // read this path before AND mtime hasn't moved, every range
278        // read of any subsequent offset/limit can be served from
279        // memory. The previous design exposed this as a separate
280        // `peek_file` tool, but the model often defaulted to
281        // read_file anyway (datalog 2026-05-06_15-33-23: 13 peeks vs
282        // 59 reads — 18% adoption). Routing the cache hit through
283        // read_file's own path makes the optimisation transparent and
284        // tool-surface-neutral: the model has one tool, the framework
285        // decides disk vs cache.
286        let store_hit: Option<String> = if let Some(mtime) = disk_mtime {
287            let store = ctx.file_store.read().await;
288            store
289                .store_id_for_path(&path)
290                .map(|s| s.to_string())
291                .and_then(|id| store.get(&id).cloned())
292                .filter(|entry| entry.mtime == mtime)
293                .map(|entry| entry.content)
294        } else {
295            None
296        };
297        let served_from_store = store_hit.is_some();
298
299        let content = if let Some(c) = store_hit {
300            // Store entries only ever hold text (we never push binary
301            // bytes), so we can short-circuit the UTF-8 / GBK decode
302            // dance. mtime check above guarantees the content matches
303            // what's currently on disk.
304            c
305        } else {
306            let bytes = tokio::fs::read(&path)
307                .await
308                .with_context(|| format!("Failed to read {}", path.display()))?;
309
310            // Decode: UTF-8 first (the vast majority of text files), then GBK
311            // fallback for plain-text extensions (Chinese Windows legacy files
312            // that fail UTF-8 validation), then declare binary.
313            match String::from_utf8(bytes.clone()) {
314                Ok(s) => s,
315                Err(_) => match decode_non_utf8_text(path_ref, &bytes) {
316                    Some(s) => s,
317                    None => {
318                        let output = format!(
319                            "Binary file ({} bytes), cannot display as text.{}",
320                            bytes.len(),
321                            binary_recovery_hint(path_ref, &parsed.file_path),
322                        );
323                        if let Some(mtime) = disk_mtime {
324                            ctx.read_cache
325                                .write()
326                                .await
327                                .insert(cache_key.clone(), (mtime, output.clone(), 1));
328                        }
329                        return Ok(ToolResult {
330                            call_id: String::new(),
331                            output,
332                            success: true,
333                        });
334                    }
335                },
336            }
337        };
338
339        // Push fresh disk content into the FileStore exactly once,
340        // upstream of every output-shaping branch (skeleton / D3a /
341        // range-slice). Subsequent reads of the same path at any range
342        // hit the store path above (`store_hit`) and skip disk
343        // entirely. Idempotent: re-reading after an edit pushes the
344        // new content under the same path key, replacing the prior
345        // entry. Skipped when we just served from store — content is
346        // already there.
347        if !served_from_store {
348            if let Some(mtime) = disk_mtime {
349                ctx.file_store
350                    .write()
351                    .await
352                    .insert(path.clone(), content.clone(), mtime);
353            }
354        }
355
356        let lines: Vec<&str> = content.lines().collect();
357        let total_lines = lines.len();
358
359        // ── Layer A: full content default, skeleton for large files ──
360        // Skeleton is the FALLBACK, not the default. Files at or below the
361        // threshold return full content so the model can grep→old_string→edit
362        // in 2 steps. Above the threshold we return a skeleton (GLM-5 gets
363        // lost in the middle at ~685 lines).
364        // With offset/limit: always return exact content (model chose a range).
365        let auto_skeleton = total_lines > SKELETON_LINE_THRESHOLD
366            && parsed.offset.is_none()
367            && parsed.limit.is_none();
368
369        if auto_skeleton {
370            let mut searcher = ctx.semantic.lock().await;
371            let skeleton = if let Some(symbols) = searcher.list_symbols(path_ref) {
372                let fname = path_ref
373                    .file_name()
374                    .map(|n| n.to_string_lossy())
375                    .unwrap_or_default();
376                let mut skel = format!("[File skeleton: {} ({} lines). Each symbol line ends with the exact offset/limit to read it — copy those into read_file, don't recompute.]\n\n",
377                    fname, total_lines);
378                // Skeleton is fully driven by semantic layer's list_symbols().
379                // For Vue/Svelte, list_symbols already includes <template>/<style> sections
380                // as pseudo-symbols alongside script functions.
381                // Score symbols for auto-expansion: high-interest names get priority
382                let interest_keywords = [
383                    "handle", "process", "route", "search", "query", "fetch", "execute",
384                    "dispatch", "run", "main", "serve",
385                ];
386                let mut scored: Vec<(usize, &crate::semantic::Symbol)> = symbols
387                    .iter()
388                    .map(|s| {
389                        let name_lower = s.name.to_lowercase();
390                        let body_lines = s.end_line.saturating_sub(s.start_line) + 1;
391                        let keyword_score =
392                            if interest_keywords.iter().any(|k| name_lower.contains(k)) {
393                                100
394                            } else {
395                                0
396                            };
397                        (keyword_score + body_lines, s)
398                    })
399                    .collect();
400                scored.sort_by(|a, b| b.0.cmp(&a.0));
401
402                // Pick top 2 functions to auto-expand (5-50 lines each)
403                let expand_candidates: Vec<&crate::semantic::Symbol> = scored
404                    .iter()
405                    .filter(|(_, s)| {
406                        let body = s.end_line.saturating_sub(s.start_line) + 1;
407                        body >= 5 && body <= 50
408                    })
409                    .take(2)
410                    .map(|(_, s)| *s)
411                    .collect();
412
413                for s in &symbols {
414                    let sig = lines
415                        .get(s.start_line.saturating_sub(1))
416                        .map(|l| l.trim())
417                        .unwrap_or(&s.name);
418                    let sig_short = if sig.chars().count() > 70 {
419                        format!("{}...", sig.chars().take(67).collect::<String>())
420                    } else {
421                        sig.to_string()
422                    };
423
424                    let body_len = s.end_line.saturating_sub(s.start_line) + 1;
425                    if expand_candidates
426                        .iter()
427                        .any(|c| c.start_line == s.start_line && c.name == s.name)
428                    {
429                        // Auto-expand: show full body (no read-params needed — already visible)
430                        skel.push_str(&format!(
431                            "{:>4}| {}  (L{}-{}) [auto-expanded]\n",
432                            s.start_line, sig_short, s.start_line, s.end_line
433                        ));
434                        let start = s.start_line.saturating_sub(1);
435                        let end = s.end_line.min(total_lines);
436                        for i in (start + 1)..end {
437                            if let Some(line) = lines.get(i) {
438                                skel.push_str(&format!("{:>4}| {}\n", i + 1, line));
439                            }
440                        }
441                    } else {
442                        skel.push_str(&format!(
443                            "{:>4}| {}  (L{}-{}, read offset={} limit={})\n",
444                            s.start_line,
445                            sig_short,
446                            s.start_line,
447                            s.end_line,
448                            s.start_line,
449                            body_len
450                        ));
451                    }
452                }
453                skel
454            } else {
455                // Unreachable: list_symbols always returns Some via indent fallback.
456                // Kept as safety net — produces minimal skeleton.
457                let fname = path
458                    .file_name()
459                    .map(|n| n.to_string_lossy())
460                    .unwrap_or_default();
461                format!("[File skeleton: {} ({} lines) — use grep to find relevant lines, then read with offset/limit.]\n",
462                    fname, total_lines)
463            };
464            // The upstream `served_from_store ? skip : push` block
465            // already populated FileStore with the raw content; this
466            // skeleton path does NOT need its own push. Subsequent
467            // range reads of this file hit FileStore transparently
468            // via the upstream `store_hit` branch — no model-visible
469            // metadata in the result.
470            if let Some(mtime) = disk_mtime {
471                ctx.read_cache.write().await.insert(
472                    cache_key.clone(),
473                    (mtime, skeleton.clone(), 1),
474                );
475            }
476            return Ok(ToolResult {
477                call_id: String::new(),
478                output: skeleton,
479                success: true,
480            });
481        }
482
483        let offset = parsed.offset.unwrap_or(1).max(1) - 1;
484
485        // No hardcoded line limit — Layer A (auto_skeleton) is the only gate.
486        // If auto_skeleton didn't fire, the file fits in budget → return all lines.
487        // Ignore model-supplied limit when reading from start (offset=0): if the
488        // file passed Layer A, the model is just creating fragments by passing
489        // limit=100. GLM-5 does this despite "do NOT use offset/limit" instruction.
490        let limit = match (parsed.offset, parsed.limit) {
491            (None, Some(_)) => total_lines, // offset=0 + limit → ignore limit, give full
492            (Some(_), Some(l)) => l,        // explicit range → respect it
493            _ => total_lines,               // no limit → full
494        };
495
496        // If offset > 0 but auto-expand would give the whole file, reset offset to 0
497        let offset = if offset > 0 && limit >= total_lines {
498            0
499        } else {
500            offset
501        };
502        // Clamp offset to file size — caller may pass an offset past EOF
503        // (e.g. cached line count stale, or model hallucinates a line number).
504        let offset = offset.min(total_lines);
505
506        let end = (offset.saturating_add(limit)).min(total_lines);
507
508        // char_limit branch DELETED — Layer A (auto_skeleton) is the only gate.
509        // If we reach here, the file passed the budget check → return full content.
510        let returned_all = offset == 0 && end >= total_lines;
511
512        let mut output: String = lines[offset..end]
513            .iter()
514            .enumerate()
515            .map(|(i, line)| format!("{:>4}| {}", offset + i + 1, line))
516            .collect::<Vec<_>>()
517            .join("\n");
518
519        if !returned_all {
520            // Append tree-sitter skeleton of the UNSEEN portions.
521            // Model reads 51 lines but file has 600 — skeleton shows
522            // what functions exist in the other 549 lines with line numbers.
523            let mut searcher = ctx.semantic.lock().await;
524            let skeleton = if let Some(symbols) = searcher.list_symbols(path_ref) {
525                let unseen: Vec<String> = symbols
526                    .iter()
527                    .filter(|s| s.start_line < offset + 1 || s.start_line > end)
528                    .map(|s| {
529                        let sig = lines
530                            .get(s.start_line.saturating_sub(1))
531                            .map(|l| l.trim())
532                            .unwrap_or(&s.name);
533                        let sig_short: String = sig.chars().take(70).collect();
534                        let body_len = s.end_line.saturating_sub(s.start_line) + 1;
535                        format!(
536                            "{:>4}| {}  (L{}-{}, read offset={} limit={})",
537                            s.start_line,
538                            sig_short,
539                            s.start_line,
540                            s.end_line,
541                            s.start_line,
542                            body_len
543                        )
544                    })
545                    .collect();
546                if !unseen.is_empty() {
547                    format!("\n{}", unseen.join("\n"))
548                } else {
549                    String::new()
550                }
551            } else {
552                String::new()
553            };
554
555            output.push_str(&format!(
556                "\n\n[Showing lines {}-{} of {} total. Unseen structure:]{}",
557                offset + 1,
558                end,
559                total_lines,
560                skeleton
561            ));
562        }
563
564        // After the merge of peek_file → read_file, the previous
565        // "pointer + preview" branch (LARGE_FILE_LINE_THRESHOLD) is
566        // gone. Range reads are served from FileStore transparently
567        // via the upstream `store_hit` check, so there's no separate
568        // store-id pointer the model needs to track. The renderer
569        // just emits full inline content (skeleton already handled
570        // very-large files above).
571        if let Some(mtime) = disk_mtime {
572            ctx.read_cache
573                .write()
574                .await
575                .insert(cache_key, (mtime, output.clone(), 1));
576        }
577        Ok(ToolResult {
578            call_id: String::new(),
579            output,
580            success: true,
581        })
582    }
583}
584
585/// Extensions that are plain text in practice but routinely arrive in GBK /
586/// GB18030 on Chinese Windows systems. We *only* try GBK for these — for
587/// genuine binary formats (.doc/.pdf/etc) the decode would succeed by luck
588/// (GBK accepts most byte sequences) and dump random ideographs into the
589/// model's context.
590const GBK_CANDIDATE_EXTENSIONS: &[&str] = &[
591    "txt", "md", "markdown", "csv", "tsv", "log", "sql", "ini", "conf", "cfg", "toml", "yaml",
592    "yml", "html", "htm", "xml", "json", "js", "ts", "css", "py", "rb", "go", "rs", "c", "h",
593    "cpp", "hpp", "java", "kt", "sh", "bat", "ps1",
594];
595
596fn has_text_extension(path: &std::path::Path) -> bool {
597    path.extension()
598        .and_then(|e| e.to_str())
599        .map(|e| {
600            let e = e.to_ascii_lowercase();
601            GBK_CANDIDATE_EXTENSIONS.iter().any(|t| *t == e)
602        })
603        .unwrap_or(false)
604}
605
606/// Attempt to decode a file that failed UTF-8 validation. Today this tries
607/// GB18030 (superset of GBK/GB2312) only, and only for text-ish extensions —
608/// that's ~100% of the real-world miss we've seen on Chinese Windows `.txt`.
609/// Returns `None` for binary files so the caller can emit the recovery hint.
610fn decode_non_utf8_text(path: &std::path::Path, bytes: &[u8]) -> Option<String> {
611    if !has_text_extension(path) {
612        return None;
613    }
614    let (decoded, _, had_errors) = encoding_rs::GB18030.decode(bytes);
615    if had_errors {
616        return None;
617    }
618    Some(decoded.into_owned())
619}
620
621/// Build a recovery hint for a file that couldn't be decoded as text. Lets
622/// the model pivot to an external converter (pandoc / pdftotext / unzip
623/// for .docx) on the first failure instead of cycling through offset/limit
624/// values for 30 turns.
625fn binary_recovery_hint(path: &std::path::Path, full_path_str: &str) -> String {
626    let ext = path
627        .extension()
628        .and_then(|e| e.to_str())
629        .map(|e| e.to_ascii_lowercase())
630        .unwrap_or_default();
631    let quoted = shell_quote(full_path_str);
632    match ext.as_str() {
633        "doc" => format!(
634            "\n\n[Recovery] This is a legacy Word (.doc) binary. Run one of:\n\
635             - bash: `antiword {q}`\n\
636             - bash: `pandoc {q} -t plain`\n\
637             - bash: `catdoc {q}`",
638            q = quoted,
639        ),
640        "docx" => format!(
641            "\n\n[Recovery] This is a modern Word (.docx) — a zip containing XML. Run:\n\
642             - bash: `unzip -p {q} word/document.xml | sed 's/<[^>]*>//g'`\n\
643             - or: `pandoc {q} -t plain`",
644            q = quoted,
645        ),
646        "xls" => format!(
647            "\n\n[Recovery] Legacy Excel (.xls). Run:\n\
648             - bash: `libreoffice --headless --convert-to csv --outdir /tmp {q} && cat /tmp/*.csv`",
649            q = quoted,
650        ),
651        "xlsx" => format!(
652            "\n\n[Recovery] Modern Excel (.xlsx). Run:\n\
653             - bash: `libreoffice --headless --convert-to csv --outdir /tmp {q} && cat /tmp/*.csv`\n\
654             - or: `unzip -p {q} xl/sharedStrings.xml` (raw string table)",
655            q = quoted,
656        ),
657        "ppt" | "pptx" => format!(
658            "\n\n[Recovery] PowerPoint. Run:\n\
659             - bash: `pandoc {q} -t plain`",
660            q = quoted,
661        ),
662        "pdf" => format!(
663            "\n\n[Recovery] PDF. Run:\n\
664             - bash: `pdftotext {q} -` (poppler)\n\
665             - or: `mutool draw -F txt {q}`",
666            q = quoted,
667        ),
668        "rtf" => format!(
669            "\n\n[Recovery] RTF. Run:\n\
670             - bash: `pandoc {q} -t plain`\n\
671             - or: `unrtf --text {q}`",
672            q = quoted,
673        ),
674        _ => format!(
675            "\n\n[Hint] The file is not UTF-8 and not a recognised text extension. \
676             If it's text in another encoding, ask the user; if it's a packaged format \
677             (archive, installer, media), there is no point reading it as text.",
678        ),
679    }
680}
681
682/// Minimal shell-quoter for embedding a path in a bash command suggestion.
683/// POSIX single-quoted form: wraps in `'`, escapes any existing `'` as `'\''`.
684fn shell_quote(s: &str) -> String {
685    let mut out = String::with_capacity(s.len() + 2);
686    out.push('\'');
687    for c in s.chars() {
688        if c == '\'' {
689            out.push_str(r"'\''");
690        } else {
691            out.push(c);
692        }
693    }
694    out.push('\'');
695    out
696}
697
698#[cfg(test)]
699mod tests {
700    use super::*;
701    use tempfile::TempDir;
702
703    /// Cache hit returns full content (performance cache, not STUB).
704    #[tokio::test]
705    async fn read_cache_hits_returns_full_content() {
706        let dir = TempDir::new().unwrap();
707        let path = dir.path().join("a.rs");
708        std::fs::write(&path, "fn main() {}\n").unwrap();
709
710        let ctx = ToolContext::new(dir.path().to_path_buf());
711        let tool = ReadFileTool;
712        let args = format!(r#"{{"file_path":"{}"}}"#, path.display());
713
714        let r1 = tool.execute(&args, &ctx).await.unwrap();
715        assert!(r1.success);
716        assert!(
717            r1.output.contains("fn main"),
718            "first read should return content"
719        );
720
721        let r2 = tool.execute(&args, &ctx).await.unwrap();
722        assert!(r2.success);
723        assert!(
724            r2.output.contains("fn main"),
725            "cache hit should return same content"
726        );
727    }
728
729    /// 2nd+ identical read returns the cached output silently — no
730    /// model-visible meta-commentary. Aligns with Claude Code's Read
731    /// tool behaviour: cache is a performance optimisation, not a
732    /// teaching tool. The "you've read this N times" preamble that
733    /// the previous version prepended has been removed.
734    #[tokio::test]
735    async fn read_cache_hits_replay_silently() {
736        let dir = TempDir::new().unwrap();
737        let path = dir.path().join("a.rs");
738        std::fs::write(&path, "fn main() {}\n").unwrap();
739
740        let ctx = ToolContext::new(dir.path().to_path_buf());
741        let tool = ReadFileTool;
742        let args = format!(r#"{{"file_path":"{}"}}"#, path.display());
743
744        let r1 = tool.execute(&args, &ctx).await.unwrap();
745        let r2 = tool.execute(&args, &ctx).await.unwrap();
746        let r3 = tool.execute(&args, &ctx).await.unwrap();
747        assert!(r1.success && r2.success && r3.success);
748        // No "you've read N times" preamble on any replay.
749        for r in [&r2, &r3] {
750            assert!(
751                !r.output.contains("times this session"),
752                "no meta-commentary on cache hits; got:\n{}",
753                r.output
754            );
755        }
756    }
757
758    /// Cache miss after file content changes — mtime shifts, cached entry is ignored.
759    #[tokio::test]
760    async fn read_cache_misses_when_mtime_changes() {
761        let dir = TempDir::new().unwrap();
762        let path = dir.path().join("b.rs");
763        std::fs::write(&path, "fn main() {}\n").unwrap();
764
765        let ctx = ToolContext::new(dir.path().to_path_buf());
766        let tool = ReadFileTool;
767        let args = format!(r#"{{"file_path":"{}"}}"#, path.display());
768
769        let r1 = tool.execute(&args, &ctx).await.unwrap();
770        let out1 = r1.output.clone();
771
772        // Touch the file with new content + force a visible mtime change.
773        std::thread::sleep(std::time::Duration::from_millis(10));
774        std::fs::write(&path, "fn main() { println!(\"hi\"); }\n").unwrap();
775
776        let r2 = tool.execute(&args, &ctx).await.unwrap();
777        assert_ne!(
778            r2.output, out1,
779            "2nd read must re-read from disk when mtime changed"
780        );
781        assert!(r2.output.contains("println"));
782    }
783
784    /// D3 SMOKE TEST: edit_file invalidates both read_cache (via mtime) and
785    /// FileStore (via explicit invalidate). This is the load-bearing assumption
786    /// for Task 1 of plans/2026-05-07-readfile-skip-and-edit-verify.md — if
787    /// this test fails, weak models will read stale post-edit content and
788    /// the read_file-skips-microcompact strategy collapses.
789    ///
790    /// Sequence: write A → read (populates caches) → edit A→B → read again →
791    /// must observe B, not cached A.
792    #[tokio::test]
793    async fn d3_edit_invalidates_caches_for_subsequent_read() {
794        let dir = TempDir::new().unwrap();
795        let path = dir.path().join("payload.rs");
796        std::fs::write(&path, "fn before() {}\n").unwrap();
797
798        let ctx = ToolContext::new(dir.path().to_path_buf());
799        let read_tool = ReadFileTool;
800        let edit_tool = crate::tool::edit::EditFileTool;
801        let read_args = format!(r#"{{"file_path":"{}"}}"#, path.display());
802
803        // Step 1: initial read populates read_cache and FileStore.
804        let r1 = read_tool.execute(&read_args, &ctx).await.unwrap();
805        assert!(r1.output.contains("fn before"));
806        assert_eq!(
807            ctx.file_store.read().await.len(),
808            1,
809            "FileStore should have 1 entry after read"
810        );
811        assert_eq!(
812            ctx.read_cache.read().await.len(),
813            1,
814            "read_cache should have 1 entry after read"
815        );
816
817        // NO SLEEP: deliberately worst-case. On filesystems with coarse mtime
818        // granularity (ext4 sec-precision), the post-edit mtime may equal the
819        // pre-edit mtime, defeating the read_cache mtime gate. Then the only
820        // line of defense is the explicit `invalidate(canon_path)` in edit.rs.
821        // If this test passes without sleeping, both layers are working.
822
823        // Step 2: edit_file replaces "before" with "after".
824        let edit_args = format!(
825            r#"{{"file_path":"{}","old_string":"fn before() {{}}","new_string":"fn after() {{ /* edited */ }}"}}"#,
826            path.display()
827        );
828        let e = edit_tool.execute(&edit_args, &ctx).await.unwrap();
829        assert!(e.success, "edit should succeed; got: {}", e.output);
830
831        // Sanity: disk now holds B.
832        let on_disk = std::fs::read_to_string(&path).unwrap();
833        assert!(
834            on_disk.contains("fn after"),
835            "disk content not updated: {}",
836            on_disk
837        );
838
839        // FileStore should be invalidated for this path. (Either entry gone,
840        // or replaced with new content. Both are correct outcomes.)
841        let fs_state_after_edit = {
842            let store = ctx.file_store.read().await;
843            store
844                .store_id_for_path(&path)
845                .and_then(|id| store.get(id).cloned())
846                .map(|e| e.content)
847        };
848        if let Some(content) = &fs_state_after_edit {
849            assert!(
850                content.contains("fn after"),
851                "FileStore retained pre-edit content: {}",
852                content
853            );
854        }
855        // (If None, that's even better — fully invalidated.)
856
857        // Defense-layer probe (BEFORE the second read): both caches are
858        // now explicitly purged by edit.rs.
859        //
860        // FileStore: explicitly invalidated by edit.rs — entry gone OR
861        //   overwritten with new content (already asserted above).
862        // read_cache: explicitly purged by edit.rs (defense-in-depth for
863        //   FS with coarse mtime granularity where the mtime gate alone
864        //   could fail). Map should hold no entries for this path.
865        let read_cache_post_edit = ctx.read_cache.read().await.clone();
866        let stale_cache_for_path = read_cache_post_edit
867            .keys()
868            .filter(|(p, _, _)| p == &path)
869            .count();
870        assert_eq!(
871            stale_cache_for_path, 0,
872            "read_cache must be purged for edited path; lingering entries \
873             would let coarse-mtime FS serve stale content"
874        );
875
876        // Step 3: re-read must surface B, NOT cached A.
877        let r2 = read_tool.execute(&read_args, &ctx).await.unwrap();
878        assert!(
879            r2.output.contains("fn after"),
880            "POST-EDIT READ SERVED STALE CONTENT: {}",
881            r2.output
882        );
883        assert!(
884            !r2.output.contains("fn before"),
885            "post-edit read still mentions pre-edit symbol: {}",
886            r2.output
887        );
888    }
889
890    /// GBK-encoded .txt should decode via the fallback path, not be reported
891    /// as binary. This is the hot path for Chinese Windows legacy text files.
892    #[tokio::test]
893    async fn read_decodes_gbk_text_file() {
894        let dir = TempDir::new().unwrap();
895        let path = dir.path().join("notes.txt");
896        // "你好世界" in GB18030 (hex: C4 E3 BA C3 CA C0 BD E7). Using Vec
897        // defeats the compile-time invalid-UTF-8 literal lint.
898        let gbk_bytes: Vec<u8> = vec![0xC4, 0xE3, 0xBA, 0xC3, 0xCA, 0xC0, 0xBD, 0xE7, 0x0A];
899        std::fs::write(&path, &gbk_bytes).unwrap();
900        // Sanity: these bytes must not be valid UTF-8, otherwise the test
901        // wouldn't exercise the fallback.
902        assert!(std::str::from_utf8(&gbk_bytes).is_err());
903
904        let ctx = ToolContext::new(dir.path().to_path_buf());
905        let tool = ReadFileTool;
906        let args = format!(r#"{{"file_path":"{}"}}"#, path.display());
907
908        let r = tool.execute(&args, &ctx).await.unwrap();
909        assert!(r.success, "GBK text should decode, got: {}", r.output);
910        assert!(
911            r.output.contains("你好世界"),
912            "expected decoded text, got: {}",
913            r.output
914        );
915        assert!(!r.output.contains("Binary file"));
916    }
917
918    /// Binary formats (Office, PDF) should NOT trigger GBK decode (that would
919    /// dump random ideographs into context). Instead the hint path fires.
920    #[tokio::test]
921    async fn read_docx_returns_recovery_hint_not_garbage() {
922        let dir = TempDir::new().unwrap();
923        let path = dir.path().join("spec.docx");
924        // Docx is a zip — "PK\x03\x04" + random bytes that aren't valid UTF-8.
925        let docx_bytes: Vec<u8> = [b'P', b'K', 0x03, 0x04]
926            .iter()
927            .copied()
928            .chain((0..200).map(|i| (i as u8).wrapping_mul(31).wrapping_add(0x80)))
929            .collect();
930        // Ensure non-UTF-8 (our mul trick usually produces invalid sequences,
931        // but belt-and-braces: append a clearly invalid byte).
932        let mut docx_bytes = docx_bytes;
933        docx_bytes.extend_from_slice(&[0xFE, 0xFF, 0xC0]);
934        std::fs::write(&path, &docx_bytes).unwrap();
935
936        let ctx = ToolContext::new(dir.path().to_path_buf());
937        let tool = ReadFileTool;
938        let args = format!(r#"{{"file_path":"{}"}}"#, path.display());
939
940        let r = tool.execute(&args, &ctx).await.unwrap();
941        assert!(r.output.contains("Binary file"));
942        assert!(
943            r.output.contains("Recovery"),
944            "should give recovery hint: {}",
945            r.output
946        );
947        assert!(r.output.contains("unzip") || r.output.contains("pandoc"));
948    }
949
950    #[tokio::test]
951    async fn read_pdf_returns_pdftotext_hint() {
952        let dir = TempDir::new().unwrap();
953        let path = dir.path().join("doc.pdf");
954        // %PDF-1.4 header + junk that fails UTF-8.
955        let mut bytes: Vec<u8> = b"%PDF-1.4\n".to_vec();
956        bytes.extend_from_slice(&[0xFF, 0xFE, 0xC0, 0x80, 0xFE]);
957        std::fs::write(&path, &bytes).unwrap();
958
959        let ctx = ToolContext::new(dir.path().to_path_buf());
960        let tool = ReadFileTool;
961        let args = format!(r#"{{"file_path":"{}"}}"#, path.display());
962
963        let r = tool.execute(&args, &ctx).await.unwrap();
964        assert!(r.output.contains("Binary file"));
965        assert!(
966            r.output.contains("pdftotext"),
967            "should suggest pdftotext: {}",
968            r.output
969        );
970    }
971
972    #[test]
973    fn shell_quote_escapes_single_quote() {
974        assert_eq!(shell_quote("abc"), "'abc'");
975        assert_eq!(shell_quote("a'b"), r"'a'\''b'");
976        assert_eq!(
977            shell_quote("/tmp/file with spaces.doc"),
978            "'/tmp/file with spaces.doc'"
979        );
980    }
981
982    /// Skeleton symbol lines carry ready-to-copy offset/limit values so the
983    /// model doesn't have to compute body length from the L{start}-{end} span.
984    #[tokio::test]
985    async fn skeleton_includes_read_offset_limit_hints() {
986        let dir = TempDir::new().unwrap();
987        let path = dir.path().join("big.rs");
988
989        // Build >SKELETON_LINE_THRESHOLD lines of Rust with one recognizable
990        // fn that is long enough to survive the auto-expand filter (>50 body
991        // lines → stays collapsed → should get the read-params hint).
992        let mut content = String::new();
993        content.push_str("pub fn save_session(id: &str) -> Result<()> {\n");
994        for i in 0..80 {
995            content.push_str(&format!("    let _x{} = {};\n", i, i));
996        }
997        content.push_str("    Ok(())\n");
998        content.push_str("}\n");
999        for i in 0..(SKELETON_LINE_THRESHOLD + 20) {
1000            content.push_str(&format!("// filler {}\n", i));
1001        }
1002        std::fs::write(&path, &content).unwrap();
1003
1004        let ctx = ToolContext::new(dir.path().to_path_buf());
1005        let tool = ReadFileTool;
1006        let args = format!(r#"{{"file_path":"{}"}}"#, path.display());
1007
1008        let r = tool.execute(&args, &ctx).await.unwrap();
1009        assert!(r.success);
1010        assert!(
1011            r.output.contains("[File skeleton:"),
1012            "expected skeleton output, got:\n{}",
1013            r.output
1014        );
1015        // A collapsed symbol line must carry the pre-computed read params.
1016        assert!(
1017            r.output.contains("read offset=1 limit="),
1018            "skeleton should expose offset=1 limit=<body_len> for save_session\nGot:\n{}",
1019            r.output
1020        );
1021    }
1022
1023    /// P0 #4: when a 404 recovery has multiple candidates, the one sharing
1024    /// the most path prefix with the requested path must come first.
1025    /// Regression for 426-atom 2026-04-21 session where agent asked for
1026    /// `/proj/A/index.html` and a wrong-project `index.html` outranked the
1027    /// correct one.
1028    #[tokio::test]
1029    async fn read_404_ranks_by_shared_path_prefix() {
1030        let dir = TempDir::new().unwrap();
1031        // Two projects with a same-named file. The one sharing more of the
1032        // requested path must be listed first.
1033        std::fs::create_dir_all(dir.path().join("proj-wanted").join("presentation")).unwrap();
1034        std::fs::create_dir_all(dir.path().join("proj-other")).unwrap();
1035        std::fs::write(
1036            dir.path().join("proj-wanted/presentation/index.html"),
1037            "<html></html>",
1038        )
1039        .unwrap();
1040        std::fs::write(dir.path().join("proj-other/index.html"), "<html></html>").unwrap();
1041
1042        let ctx = ToolContext::new(dir.path().to_path_buf());
1043        let tool = ReadFileTool;
1044        // Ask for a wrong path in proj-wanted — 404, both candidates found.
1045        let asked = dir.path().join("proj-wanted/index.html");
1046        let args = format!(r#"{{"file_path":"{}"}}"#, asked.display());
1047
1048        let r = tool.execute(&args, &ctx).await.unwrap();
1049        assert!(!r.success);
1050        assert!(r.output.contains("Did you mean"));
1051        // The correct candidate (inside proj-wanted/) must appear before the
1052        // cross-project noise (inside proj-other/).
1053        let wanted_pos = r
1054            .output
1055            .find("proj-wanted/presentation/index.html")
1056            .unwrap();
1057        let other_pos = r.output.find("proj-other/index.html").unwrap();
1058        assert!(
1059            wanted_pos < other_pos,
1060            "proj-wanted match must rank above proj-other. output:\n{}",
1061            r.output
1062        );
1063    }
1064
1065    /// The key UX case behind option B in the OAuth-fix follow-up:
1066    /// agent passes a relative basename for a file that doesn't exist
1067    /// in the working dir AND no fuzzy match turns up. Pre-fix this
1068    /// fell through to `tokio::fs::read?` and the agent saw a bare
1069    /// `"No such file or directory (os error 2)"` (or, when a parent
1070    /// directory's perms tripped the kernel, a misleading
1071    /// `"Permission denied (os error 13)"`). The fix:
1072    ///   1. Always early-return a clean `Error: No such file: <input>
1073    ///      (resolved to <abs path>)` so the agent sees what was tried
1074    ///   2. Add the absolute-path hint when input was relative —
1075    ///      pushing the agent to use the path the user actually
1076    ///      mentioned (e.g. `~/.atomcode/MEMORY.md`) on the next call
1077    ///      instead of looping.
1078    #[tokio::test]
1079    async fn read_404_relative_path_includes_resolved_path_and_absolute_hint() {
1080        let dir = TempDir::new().unwrap();
1081        // Working dir has no MEMORY.md and no fuzzy match — so the
1082        // suggestion list must come back empty and we exercise the
1083        // "no candidates" branch that previously fell through.
1084        let ctx = ToolContext::new(dir.path().to_path_buf());
1085        let tool = ReadFileTool;
1086        let args = r#"{"file_path":"MEMORY.md"}"#;
1087
1088        let r = tool.execute(args, &ctx).await.unwrap();
1089        assert!(!r.success);
1090        assert!(
1091            r.output.contains("No such file: MEMORY.md"),
1092            "must surface the raw input. output:\n{}",
1093            r.output
1094        );
1095        assert!(
1096            r.output.contains("resolved to"),
1097            "must surface the resolved absolute path so the agent sees \
1098             what was actually attempted. output:\n{}",
1099            r.output
1100        );
1101        assert!(
1102            r.output.contains("absolute path"),
1103            "relative-input path must include the absolute-path hint. output:\n{}",
1104            r.output
1105        );
1106        // We expect this branch to NEVER leak a bare OS error.
1107        assert!(
1108            !r.output.contains("os error"),
1109            "must not leak the raw OS error string. output:\n{}",
1110            r.output
1111        );
1112    }
1113
1114    /// Mirror of the relative-path test for absolute input: the
1115    /// resolved-path line is still useful (shows canonicalisation),
1116    /// but the absolute-path hint must be suppressed — the agent
1117    /// already gave us an absolute path.
1118    #[tokio::test]
1119    async fn read_404_absolute_path_omits_relative_hint() {
1120        let dir = TempDir::new().unwrap();
1121        let ctx = ToolContext::new(dir.path().to_path_buf());
1122        let tool = ReadFileTool;
1123        let asked = dir.path().join("MEMORY.md");
1124        let args = format!(r#"{{"file_path":"{}"}}"#, asked.display());
1125
1126        let r = tool.execute(&args, &ctx).await.unwrap();
1127        assert!(!r.success);
1128        assert!(r.output.contains("No such file"));
1129        assert!(
1130            !r.output.contains("absolute path"),
1131            "absolute-input path must NOT show the relative-path hint. output:\n{}",
1132            r.output
1133        );
1134    }
1135
1136    // ── D3 FileStore integration ────────────────────────────────────
1137
1138    /// Helper: write a file with `n_lines` lines (each `line N`) and
1139    /// return its absolute path. Use file sizes large enough to trip
1140    /// the FileStore threshold (50 lines).
1141    fn write_n_line_file(dir: &TempDir, name: &str, n_lines: usize) -> std::path::PathBuf {
1142        let path = dir.path().join(name);
1143        let body: String = (1..=n_lines).map(|i| format!("line {}\n", i)).collect();
1144        std::fs::write(&path, body).unwrap();
1145        path
1146    }
1147
1148    /// Every fresh disk read pushes its content into FileStore so
1149    /// subsequent reads of any range hit the in-memory snapshot
1150    /// instead of touching disk again. After the peek_file → read_file
1151    /// merge, the model never sees a store_id — the cache is purely
1152    /// internal.
1153    #[tokio::test]
1154    async fn d3_full_read_pushes_to_store_returns_inline_content() {
1155        let dir = TempDir::new().unwrap();
1156        let path = write_n_line_file(&dir, "big.rs", 200);
1157        let ctx = ToolContext::new(dir.path().to_path_buf());
1158        let args = format!(r#"{{"file_path":"{}"}}"#, path.display());
1159        let r = ReadFileTool.execute(&args, &ctx).await.unwrap();
1160        assert!(r.success);
1161        // No more pointer/preview formatting — model gets the content
1162        // directly. store_id is internal-only after the merge.
1163        assert!(
1164            !r.output.contains("store_id="),
1165            "store_id must NOT leak into model output:\n{}",
1166            r.output
1167        );
1168        assert!(
1169            !r.output.contains("peek_file"),
1170            "peek_file no longer exists, must not be referenced:\n{}",
1171            r.output
1172        );
1173        // Full content is inline.
1174        assert!(r.output.contains("line 1"));
1175        assert!(r.output.contains("line 100"));
1176        assert!(r.output.contains("line 200"));
1177        // Store populated for future range reads.
1178        assert_eq!(ctx.file_store.read().await.len(), 1);
1179    }
1180
1181    /// Small files also populate the store — uniform behaviour means
1182    /// the "Nth read" hint can fire on any file, and a future range
1183    /// read of a small file (rare but possible) hits cache too.
1184    #[tokio::test]
1185    async fn d3_small_file_pushes_to_store_after_merge() {
1186        let dir = TempDir::new().unwrap();
1187        let path = write_n_line_file(&dir, "small.rs", 10);
1188        let ctx = ToolContext::new(dir.path().to_path_buf());
1189        let args = format!(r#"{{"file_path":"{}"}}"#, path.display());
1190        let r = ReadFileTool.execute(&args, &ctx).await.unwrap();
1191        assert!(r.success);
1192        assert_eq!(
1193            ctx.file_store.read().await.len(),
1194            1,
1195            "fresh disk read must populate store regardless of file size"
1196        );
1197    }
1198
1199    /// THE merge's core promise: a range read after a full read of
1200    /// the same path is served from FileStore (no disk hit). After
1201    /// the CC-alignment cleanup the store-served path is silent —
1202    /// the model gets the requested range with no model-visible
1203    /// metadata about cache origin. Test pins behaviour by checking
1204    /// (a) the requested lines are returned, (b) no leaked
1205    /// "FileStore" / "cache" preamble appears, and (c) the store
1206    /// still has the entry (so we know the cache was actually used).
1207    #[tokio::test]
1208    async fn d3_range_read_after_full_read_silently_serves_from_store() {
1209        let dir = TempDir::new().unwrap();
1210        let path = write_n_line_file(&dir, "big.rs", 200);
1211        let ctx = ToolContext::new(dir.path().to_path_buf());
1212
1213        let full_args = format!(r#"{{"file_path":"{}"}}"#, path.display());
1214        let _ = ReadFileTool.execute(&full_args, &ctx).await.unwrap();
1215
1216        let range_args = format!(
1217            r#"{{"file_path":"{}","offset":100,"limit":5}}"#,
1218            path.display()
1219        );
1220        let r = ReadFileTool.execute(&range_args, &ctx).await.unwrap();
1221        assert!(r.success);
1222        assert!(r.output.contains("line 100"));
1223        assert!(
1224            !r.output.contains("FileStore"),
1225            "store-served read must NOT leak any FileStore preamble:\n{}",
1226            r.output
1227        );
1228        assert_eq!(
1229            ctx.file_store.read().await.len(),
1230            1,
1231            "FileStore must retain the entry across both reads"
1232        );
1233    }
1234
1235    /// Edit invalidates the cache so the next read sees fresh disk
1236    /// content, not a stale snapshot. Without this, the model would
1237    /// reason against bytes that no longer match what's on disk after
1238    /// its own edit.
1239    #[tokio::test]
1240    async fn d3_edit_invalidates_cache_next_read_hits_disk() {
1241        let dir = TempDir::new().unwrap();
1242        let path = write_n_line_file(&dir, "big.rs", 200);
1243        let ctx = ToolContext::new(dir.path().to_path_buf());
1244
1245        let read_args = format!(r#"{{"file_path":"{}"}}"#, path.display());
1246        let _ = ReadFileTool.execute(&read_args, &ctx).await.unwrap();
1247        assert_eq!(ctx.file_store.read().await.len(), 1);
1248
1249        let edit_args = format!(
1250            r#"{{"file_path":"{}","old_string":"line 1\n","new_string":"LINE 1\n"}}"#,
1251            path.display()
1252        );
1253        let e = crate::tool::edit::EditFileTool
1254            .execute(&edit_args, &ctx)
1255            .await
1256            .unwrap();
1257        assert!(e.success, "edit must succeed:\n{}", e.output);
1258        assert_eq!(
1259            ctx.file_store.read().await.len(),
1260            0,
1261            "edit must invalidate the store entry"
1262        );
1263
1264        // Range read after edit: store was invalidated, so this is a
1265        // fresh disk read. Output must NOT carry the cache notice and
1266        // store gets repopulated.
1267        let range_args = format!(
1268            r#"{{"file_path":"{}","offset":1,"limit":3}}"#,
1269            path.display()
1270        );
1271        let r = ReadFileTool.execute(&range_args, &ctx).await.unwrap();
1272        assert!(r.success);
1273        assert!(
1274            !r.output.contains("FileStore cache"),
1275            "post-edit read must come from disk, not stale cache:\n{}",
1276            r.output
1277        );
1278        assert_eq!(ctx.file_store.read().await.len(), 1);
1279    }
1280
1281    /// Re-reading the same path with the same args keeps store size
1282    /// at 1 — the entry is replaced, not duplicated. Guards against
1283    /// a regression where every call grew the store unboundedly.
1284    #[tokio::test]
1285    async fn d3_reread_unchanged_file_keeps_one_entry() {
1286        let dir = TempDir::new().unwrap();
1287        let path = write_n_line_file(&dir, "big.rs", 200);
1288        let ctx = ToolContext::new(dir.path().to_path_buf());
1289        let args = format!(r#"{{"file_path":"{}"}}"#, path.display());
1290        let _ = ReadFileTool.execute(&args, &ctx).await.unwrap();
1291        let _ = ReadFileTool.execute(&args, &ctx).await.unwrap();
1292        assert_eq!(ctx.file_store.read().await.len(), 1);
1293    }
1294
1295    /// Auto-skeleton path (>300 lines) populates the store too — fix
1296    /// for the early-return bug that left huge files completely
1297    /// outside the cache (datalog 2026-05-06_14-29-08: 19 reads of a
1298    /// single 753-line file, zero cache hits, before this guard).
1299    #[tokio::test]
1300    async fn d3_skeleton_path_pushes_to_store() {
1301        let dir = TempDir::new().unwrap();
1302        let path = write_n_line_file(&dir, "huge.rs", 350);
1303        let ctx = ToolContext::new(dir.path().to_path_buf());
1304        let args = format!(r#"{{"file_path":"{}"}}"#, path.display());
1305        let r = ReadFileTool.execute(&args, &ctx).await.unwrap();
1306        assert!(r.success);
1307        assert!(
1308            r.output.contains("File skeleton:"),
1309            "huge file should still get skeleton:\n{}",
1310            r.output
1311        );
1312        // Skeleton path used to expose store_id; merge removed that.
1313        // The store is populated invisibly so future range reads can
1314        // hit cache.
1315        assert!(
1316            !r.output.contains("store_id="),
1317            "merged design hides store_id from model:\n{}",
1318            r.output
1319        );
1320        assert_eq!(
1321            ctx.file_store.read().await.len(),
1322            1,
1323            "auto_skeleton path must populate FileStore"
1324        );
1325    }
1326
1327    /// After CC alignment: subsequent reads of the same path do NOT
1328    /// surface any framework-side "Nth read of X" preamble. The
1329    /// model gets the same shape of output every time. Pins the
1330    /// removal of the R2 hint that earlier datalogs (2026-05-06)
1331    /// showed glm-5.1 ignoring anyway — keeping it was both
1332    /// hardcoded metadata-injection and ineffective.
1333    #[tokio::test]
1334    async fn d3_subsequent_reads_have_no_framework_preamble() {
1335        let dir = TempDir::new().unwrap();
1336        let path = write_n_line_file(&dir, "big.rs", 200);
1337        let ctx = ToolContext::new(dir.path().to_path_buf());
1338        let args1 = format!(r#"{{"file_path":"{}"}}"#, path.display());
1339        let args2 = format!(r#"{{"file_path":"{}","offset":50,"limit":10}}"#, path.display());
1340        let args3 = format!(r#"{{"file_path":"{}","offset":100,"limit":10}}"#, path.display());
1341        let r1 = ReadFileTool.execute(&args1, &ctx).await.unwrap();
1342        let r2 = ReadFileTool.execute(&args2, &ctx).await.unwrap();
1343        let r3 = ReadFileTool.execute(&args3, &ctx).await.unwrap();
1344        assert!(r1.success && r2.success && r3.success);
1345        for (i, r) in [&r1, &r2, &r3].iter().enumerate() {
1346            assert!(
1347                !r.output.contains("read of `") && !r.output.contains("FileStore cache"),
1348                "read #{} must not carry framework metadata; got:\n{}",
1349                i + 1,
1350                r.output
1351            );
1352        }
1353    }
1354}
atomcode_core/tool/read.rs

atomcode_core/tool/
read.rs