Skip to main content

atomcode_core/ctx/
file_store.rs

1//! In-memory file content store — D3 step 1.
2//!
3//! Why this exists: read_file's content used to live in conversation
4//! tool_result messages. Each turn the LLM saw it again at full token
5//! cost; compaction stripped it; the model then re-read the same file
6//! and burned another full-content roundtrip. Across a 120-turn
7//! atomgr session that pattern produced 47 reads on 10 unique files
8//! plus three compactions destroying file content (datalog
9//! 2026-05-06_10-22-35).
10//!
11//! `FileStore` decouples file content from conversation history. The
12//! model's `read_file` ToolResult carries a tiny pointer (`store_id`
13//! + preview); the actual bytes live here. `peek_file` is a separate
14//! tool that fetches regions from this store with zero disk hits.
15//! Compaction touches only pointers; content survives.
16//!
17//! Lifecycle: process-local (no persistence yet — that's D3b). Path
18//! invalidation fires on `edit_file` / `write_file` success so a
19//! stale `store_id` cannot serve outdated bytes.
20
21use std::collections::HashMap;
22use std::path::PathBuf;
23use std::time::SystemTime;
24
25/// One captured file snapshot.
26#[derive(Debug, Clone)]
27pub struct FileEntry {
28    pub path: PathBuf,
29    pub content: String,
30    /// File mtime at insert time. `peek` validates against this — if the
31    /// disk file changed (or was edited via our own write tools), the
32    /// entry is stale and the caller is asked to re-read.
33    pub mtime: SystemTime,
34    pub size_bytes: usize,
35    pub line_count: usize,
36}
37
38
39/// Process-local file content store.
40///
41/// `Default` constructs an empty store. Wrap in `Arc<RwLock<>>` for the
42/// shared `ToolContext.file_store` field; the lock is taken briefly per
43/// call (insert is one allocation + hash, peek is a HashMap lookup +
44/// substring slice).
45#[derive(Debug, Default)]
46pub struct FileStore {
47    entries: HashMap<String, FileEntry>,
48    /// path → most recent store_id, so callers that only know the path
49    /// (e.g. invalidate) can find what to drop. A path can have at
50    /// most one live entry; subsequent reads of the same path reuse
51    /// the slot.
52    by_path: HashMap<PathBuf, String>,
53}
54
55impl FileStore {
56    pub fn new() -> Self {
57        Self::default()
58    }
59
60    /// Push a file snapshot into the store. Returns the assigned
61    /// `store_id`. Any prior entry for the same `path` is replaced —
62    /// re-reading a file overwrites its slot rather than accumulating
63    /// stale copies.
64    ///
65    /// `store_id` shape: `fs_<8-hex-of-content-hash>`. Hash carries
66    /// content+path so unrelated files can't collide; the prefix
67    /// disambiguates from other id namespaces in logs.
68    pub fn insert(&mut self, path: PathBuf, content: String, mtime: SystemTime) -> String {
69        let store_id = derive_id(&path, &content);
70        let line_count = content.lines().count();
71        let size_bytes = content.len();
72        let entry = FileEntry {
73            path: path.clone(),
74            content,
75            mtime,
76            size_bytes,
77            line_count,
78        };
79        // Drop any prior entry for this path before reinserting.
80        if let Some(old_id) = self.by_path.insert(path, store_id.clone()) {
81            if old_id != store_id {
82                self.entries.remove(&old_id);
83            }
84        }
85        self.entries.insert(store_id.clone(), entry);
86        store_id
87    }
88
89    /// Look up an entry by store_id. Returns `None` if invalidated or
90    /// never inserted.
91    pub fn get(&self, store_id: &str) -> Option<&FileEntry> {
92        self.entries.get(store_id)
93    }
94
95    /// Look up the live store_id for a path (if any). Used by
96    /// invalidate-on-edit and by `read_file` to detect "we already
97    /// have this; reuse".
98    pub fn store_id_for_path(&self, path: &std::path::Path) -> Option<&str> {
99        self.by_path.get(path).map(String::as_str)
100    }
101
102    /// Compare the entry's recorded mtime to a freshly-stat'd one.
103    /// Returns true when the disk has moved on and the entry should
104    /// not serve. Caller (typically peek_file) returns a recovery
105    /// hint pointing at re-read.
106    pub fn is_stale(&self, store_id: &str, current_mtime: SystemTime) -> bool {
107        match self.entries.get(store_id) {
108            Some(e) => e.mtime != current_mtime,
109            // Unknown id behaves as "stale" so callers route through the
110            // same recovery path uniformly.
111            None => true,
112        }
113    }
114
115    /// Drop the entry (if any) for a path. Called by edit_file /
116    /// write_file on success. No-op when the path was never in the
117    /// store. Idempotent — calling twice with the same path is fine.
118    pub fn invalidate(&mut self, path: &std::path::Path) {
119        if let Some(store_id) = self.by_path.remove(path) {
120            self.entries.remove(&store_id);
121        }
122    }
123
124    /// Extract a 1-indexed inclusive line range. `[1, 1]` returns the
125    /// first line; out-of-range tails are clamped. Returns `None`
126    /// only if the store_id is unknown — empty regions return `Some("")`
127    /// so callers can distinguish "no such entry" from "valid request,
128    /// nothing in that range".
129    pub fn peek_lines(&self, store_id: &str, start: usize, end: usize) -> Option<String> {
130        let entry = self.entries.get(store_id)?;
131        if start == 0 || start > entry.line_count {
132            return Some(String::new());
133        }
134        let s = start.saturating_sub(1);
135        let e = end.min(entry.line_count);
136        if e < start {
137            return Some(String::new());
138        }
139        let lines: Vec<&str> = entry.content.lines().collect();
140        Some(lines[s..e].join("\n"))
141    }
142
143    /// Number of live entries — used by tests and the `/context`
144    /// rich snapshot.
145    pub fn len(&self) -> usize {
146        self.entries.len()
147    }
148
149    pub fn is_empty(&self) -> bool {
150        self.entries.is_empty()
151    }
152
153}
154
155/// Derive a stable id from path + content. Same content at the same
156/// path produces the same id — useful for de-duping repeated reads
157/// of an unchanged file. Different content (post-edit) produces a
158/// new id even if the path is the same; that's the lever we use to
159/// detect "model is operating on a stale snapshot".
160fn derive_id(path: &std::path::Path, content: &str) -> String {
161    use std::hash::{Hash, Hasher};
162    let mut h = std::collections::hash_map::DefaultHasher::new();
163    path.hash(&mut h);
164    content.hash(&mut h);
165    format!("fs_{:08x}", h.finish() & 0xFFFF_FFFF)
166}
167
168#[cfg(test)]
169mod tests {
170    use super::*;
171    use std::time::Duration;
172
173    fn t(secs: u64) -> SystemTime {
174        SystemTime::UNIX_EPOCH + Duration::from_secs(secs)
175    }
176
177    fn three_lines() -> String {
178        "alpha\nbeta\ngamma\n".to_string()
179    }
180
181    #[test]
182    fn insert_and_get_round_trip() {
183        let mut s = FileStore::new();
184        let id = s.insert(PathBuf::from("/x.rs"), three_lines(), t(100));
185        let e = s.get(&id).unwrap();
186        assert_eq!(e.line_count, 3);
187        assert_eq!(e.content, "alpha\nbeta\ngamma\n");
188        assert_eq!(e.mtime, t(100));
189    }
190
191    #[test]
192    fn store_id_lookup_by_path_returns_latest() {
193        let mut s = FileStore::new();
194        let id1 = s.insert(PathBuf::from("/x.rs"), "v1".into(), t(100));
195        let id2 = s.insert(PathBuf::from("/x.rs"), "v2".into(), t(200));
196        assert_ne!(id1, id2);
197        // Old id displaced by new — only the latest survives.
198        assert!(s.get(&id1).is_none());
199        assert!(s.get(&id2).is_some());
200        assert_eq!(s.store_id_for_path(std::path::Path::new("/x.rs")), Some(id2.as_str()));
201    }
202
203    #[test]
204    fn peek_lines_extracts_inclusive_range() {
205        let mut s = FileStore::new();
206        let id = s.insert(PathBuf::from("/x.rs"), three_lines(), t(0));
207        assert_eq!(s.peek_lines(&id, 1, 1).unwrap(), "alpha");
208        assert_eq!(s.peek_lines(&id, 1, 2).unwrap(), "alpha\nbeta");
209        assert_eq!(s.peek_lines(&id, 2, 3).unwrap(), "beta\ngamma");
210        assert_eq!(s.peek_lines(&id, 1, 99).unwrap(), "alpha\nbeta\ngamma");
211    }
212
213    #[test]
214    fn peek_lines_handles_zero_and_oob() {
215        let mut s = FileStore::new();
216        let id = s.insert(PathBuf::from("/x.rs"), three_lines(), t(0));
217        // 0 / past-end / inverted ranges all yield Some("") — callers
218        // can format a friendly "out of range" without an extra branch.
219        assert_eq!(s.peek_lines(&id, 0, 1).unwrap(), "");
220        assert_eq!(s.peek_lines(&id, 50, 99).unwrap(), "");
221        assert_eq!(s.peek_lines(&id, 5, 2).unwrap(), "");
222    }
223
224    #[test]
225    fn peek_lines_unknown_id_returns_none() {
226        let s = FileStore::new();
227        assert!(s.peek_lines("fs_00000000", 1, 1).is_none());
228    }
229
230    #[test]
231    fn is_stale_detects_mtime_change() {
232        let mut s = FileStore::new();
233        let id = s.insert(PathBuf::from("/x.rs"), "x".into(), t(100));
234        assert!(!s.is_stale(&id, t(100)));
235        assert!(s.is_stale(&id, t(101)));
236    }
237
238    #[test]
239    fn is_stale_unknown_id_treated_as_stale() {
240        let s = FileStore::new();
241        // Unknown id routes through the same "stale" path so callers
242        // don't need separate branches.
243        assert!(s.is_stale("fs_deadbeef", t(0)));
244    }
245
246    #[test]
247    fn invalidate_drops_entry_for_path() {
248        let mut s = FileStore::new();
249        let id = s.insert(PathBuf::from("/x.rs"), "x".into(), t(100));
250        assert!(s.get(&id).is_some());
251        s.invalidate(std::path::Path::new("/x.rs"));
252        assert!(s.get(&id).is_none());
253        assert!(s.store_id_for_path(std::path::Path::new("/x.rs")).is_none());
254    }
255
256    #[test]
257    fn invalidate_unknown_path_is_noop() {
258        let mut s = FileStore::new();
259        s.invalidate(std::path::Path::new("/nonexistent")); // no panic
260        assert!(s.is_empty());
261    }
262
263    #[test]
264    fn invalidate_only_affects_named_path() {
265        let mut s = FileStore::new();
266        let id_a = s.insert(PathBuf::from("/a.rs"), "a".into(), t(0));
267        let id_b = s.insert(PathBuf::from("/b.rs"), "b".into(), t(0));
268        s.invalidate(std::path::Path::new("/a.rs"));
269        assert!(s.get(&id_a).is_none());
270        assert!(s.get(&id_b).is_some());
271    }
272
273    #[test]
274    fn derive_id_stable_for_same_input() {
275        let p = std::path::Path::new("/x.rs");
276        let id1 = derive_id(p, "hello");
277        let id2 = derive_id(p, "hello");
278        assert_eq!(id1, id2);
279    }
280
281    #[test]
282    fn derive_id_changes_with_content() {
283        let p = std::path::Path::new("/x.rs");
284        assert_ne!(derive_id(p, "hello"), derive_id(p, "world"));
285    }
286
287    #[test]
288    fn derive_id_changes_with_path() {
289        assert_ne!(
290            derive_id(std::path::Path::new("/a"), "x"),
291            derive_id(std::path::Path::new("/b"), "x"),
292        );
293    }
294
295}