code_moniker_cli/
cache.rs1use std::fs;
2use std::hash::{Hash, Hasher};
3use std::io::{self, Write};
4use std::path::{Path, PathBuf};
5use std::sync::atomic::{AtomicU64, Ordering};
6use std::time::UNIX_EPOCH;
7
8use code_moniker_core::core::code_graph::CodeGraph;
9use code_moniker_core::core::code_graph::encoding::{self, LAYOUT_VERSION};
10use rustc_hash::FxHasher;
11
12use crate::extract;
13use code_moniker_core::lang::Lang;
14
15const CACHE_MAGIC: u32 = 0xC0DE_2106;
16const CACHE_FORMAT_VERSION: u32 = 1;
17const OFF_MAGIC: usize = 0;
18const OFF_FORMAT: usize = 4;
19const OFF_MTIME: usize = 8;
20const OFF_SIZE: usize = 16;
21const OFF_ANCHOR: usize = 24;
22const OFF_PATH_LEN: usize = 32;
23const HEADER_FIXED: usize = OFF_PATH_LEN + 4;
24
25static TMP_NONCE: AtomicU64 = AtomicU64::new(0);
26
27#[derive(Clone, Debug)]
28pub struct CacheKey {
29 pub abs_path: PathBuf,
30 pub mtime: u64,
31 pub size: u64,
32 pub anchor_hash: u64,
33}
34
35impl CacheKey {
36 pub fn from_path(path: &Path, anchor: &Path) -> io::Result<Self> {
37 let abs_path = path.canonicalize()?;
38 let meta = fs::metadata(&abs_path)?;
39 let mtime = meta
40 .modified()?
41 .duration_since(UNIX_EPOCH)
42 .map(|d| d.as_nanos() as u64)
43 .unwrap_or(0);
44 Ok(Self {
45 abs_path,
46 mtime,
47 size: meta.len(),
48 anchor_hash: hash_path(anchor),
49 })
50 }
51
52 fn path_hash(&self) -> u64 {
53 hash_path(&self.abs_path)
54 }
55
56 fn shard(&self) -> String {
57 format!("{:02x}", (self.path_hash() & 0xff) as u8)
58 }
59
60 fn filename(&self) -> String {
61 format!("{:016x}_{:016x}.bin", self.path_hash(), self.anchor_hash)
62 }
63
64 fn full_path(&self, root: &Path) -> PathBuf {
65 root.join(format!("v{LAYOUT_VERSION}_{CACHE_FORMAT_VERSION}"))
66 .join(self.shard())
67 .join(self.filename())
68 }
69
70 fn abs_path_bytes(&self) -> &[u8] {
71 path_bytes(&self.abs_path)
72 }
73}
74
75pub fn load(cache_dir: &Path, key: &CacheKey) -> Option<CodeGraph> {
76 let path = key.full_path(cache_dir);
77 let bytes = fs::read(&path).ok()?;
78 let body = validate_header(&bytes, key)?;
79 match encoding::decode(body) {
80 Ok(g) => Some(g),
81 Err(e) => {
82 eprintln!(
83 "code-moniker: cache decode failed at {} ({e}); ignoring",
84 path.display(),
85 );
86 None
87 }
88 }
89}
90
91pub fn store(cache_dir: &Path, key: &CacheKey, graph: &CodeGraph) {
92 let _ = try_store(cache_dir, key, graph);
93}
94
95fn try_store(cache_dir: &Path, key: &CacheKey, graph: &CodeGraph) -> io::Result<()> {
96 let path = key.full_path(cache_dir);
97 if let Some(parent) = path.parent() {
98 fs::create_dir_all(parent)?;
99 }
100 let body = encoding::encode(graph)
101 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e.to_string()))?;
102 let path_bytes = key.abs_path_bytes();
103 let mut buf = Vec::with_capacity(HEADER_FIXED + path_bytes.len() + body.len());
104 buf.extend_from_slice(&CACHE_MAGIC.to_le_bytes());
105 buf.extend_from_slice(&CACHE_FORMAT_VERSION.to_le_bytes());
106 buf.extend_from_slice(&key.mtime.to_le_bytes());
107 buf.extend_from_slice(&key.size.to_le_bytes());
108 buf.extend_from_slice(&key.anchor_hash.to_le_bytes());
109 buf.extend_from_slice(&(path_bytes.len() as u32).to_le_bytes());
110 buf.extend_from_slice(path_bytes);
111 buf.extend_from_slice(&body);
112
113 let nonce = TMP_NONCE.fetch_add(1, Ordering::Relaxed);
114 let tmp = path.with_extension(format!("tmp.{}.{nonce}", std::process::id()));
115 let write_result = (|| -> io::Result<()> {
116 let mut f = fs::File::create(&tmp)?;
117 f.write_all(&buf)?;
118 f.sync_data()?;
119 Ok(())
120 })();
121 if let Err(e) = write_result {
122 let _ = fs::remove_file(&tmp);
123 return Err(e);
124 }
125 fs::rename(&tmp, &path)
126}
127
128pub fn load_or_extract(
129 path: &Path,
130 anchor: &Path,
131 lang: Lang,
132 cache_dir: Option<&Path>,
133) -> Option<(CodeGraph, Option<String>)> {
134 if let Some(dir) = cache_dir
135 && let Ok(key) = CacheKey::from_path(path, anchor)
136 {
137 if let Some(g) = load(dir, &key) {
138 return Some((g, None));
139 }
140 let source = fs::read_to_string(path).ok()?;
141 let graph = extract::extract(lang, &source, anchor);
142 store(dir, &key, &graph);
143 return Some((graph, Some(source)));
144 }
145 let source = fs::read_to_string(path).ok()?;
146 let graph = extract::extract(lang, &source, anchor);
147 Some((graph, Some(source)))
148}
149
150fn validate_header<'a>(bytes: &'a [u8], key: &CacheKey) -> Option<&'a [u8]> {
151 if bytes.len() < HEADER_FIXED {
152 return None;
153 }
154 let magic = u32::from_le_bytes(bytes[OFF_MAGIC..OFF_FORMAT].try_into().ok()?);
155 if magic != CACHE_MAGIC {
156 return None;
157 }
158 let version = u32::from_le_bytes(bytes[OFF_FORMAT..OFF_MTIME].try_into().ok()?);
159 if version != CACHE_FORMAT_VERSION {
160 return None;
161 }
162 let mtime = u64::from_le_bytes(bytes[OFF_MTIME..OFF_SIZE].try_into().ok()?);
163 let size = u64::from_le_bytes(bytes[OFF_SIZE..OFF_ANCHOR].try_into().ok()?);
164 let anchor_hash = u64::from_le_bytes(bytes[OFF_ANCHOR..OFF_PATH_LEN].try_into().ok()?);
165 if mtime != key.mtime || size != key.size || anchor_hash != key.anchor_hash {
166 return None;
167 }
168 let path_len = u32::from_le_bytes(bytes[OFF_PATH_LEN..HEADER_FIXED].try_into().ok()?) as usize;
169 if HEADER_FIXED + path_len > bytes.len() {
170 return None;
171 }
172 let stored_path = &bytes[HEADER_FIXED..HEADER_FIXED + path_len];
173 if stored_path != key.abs_path_bytes() {
174 return None;
175 }
176 Some(&bytes[HEADER_FIXED + path_len..])
177}
178
179#[cfg(unix)]
180fn path_bytes(p: &Path) -> &[u8] {
181 use std::os::unix::ffi::OsStrExt;
182 p.as_os_str().as_bytes()
183}
184
185#[cfg(not(unix))]
186fn path_bytes(p: &Path) -> &[u8] {
187 p.to_str().map(|s| s.as_bytes()).unwrap_or(&[])
189}
190
191fn hash_path(p: &Path) -> u64 {
192 let mut h = FxHasher::default();
193 path_bytes(p).hash(&mut h);
194 h.finish()
195}
196
197#[cfg(test)]
198mod tests {
199 use super::*;
200 use code_moniker_core::core::moniker::MonikerBuilder;
201
202 fn graph_with_one_def() -> CodeGraph {
203 let root = MonikerBuilder::new()
204 .project(b"app")
205 .segment(b"path", b"root")
206 .build();
207 let mut g = CodeGraph::new(root.clone(), b"module");
208 let child = MonikerBuilder::new()
209 .project(b"app")
210 .segment(b"path", b"root")
211 .segment(b"class", b"Foo")
212 .build();
213 g.add_def(child, b"class", &root, Some((0, 10))).unwrap();
214 g
215 }
216
217 #[test]
218 fn store_then_load_roundtrips() {
219 let tmp = tempfile::tempdir().unwrap();
220 let src = tmp.path().join("src.ts");
221 std::fs::write(&src, b"export class Foo {}\n").unwrap();
222 let anchor = tmp.path().join("anchor");
223 let key = CacheKey::from_path(&src, &anchor).unwrap();
224 let g = graph_with_one_def();
225
226 store(tmp.path(), &key, &g);
227 let back = load(tmp.path(), &key).expect("should hit");
228 assert_eq!(back.def_count(), g.def_count());
229 }
230
231 #[test]
232 fn load_misses_when_mtime_changes() {
233 let tmp = tempfile::tempdir().unwrap();
234 let src = tmp.path().join("src.ts");
235 std::fs::write(&src, b"a").unwrap();
236 let anchor = tmp.path().join("anchor");
237 let key = CacheKey::from_path(&src, &anchor).unwrap();
238 store(tmp.path(), &key, &graph_with_one_def());
239
240 std::thread::sleep(std::time::Duration::from_millis(10));
241 std::fs::write(&src, b"ab").unwrap();
242 let key2 = CacheKey::from_path(&src, &anchor).unwrap();
243 assert!(key2.mtime != key.mtime || key2.size != key.size);
244 assert!(load(tmp.path(), &key2).is_none());
245 }
246
247 #[test]
248 fn load_misses_when_anchor_changes() {
249 let tmp = tempfile::tempdir().unwrap();
250 let src = tmp.path().join("src.ts");
251 std::fs::write(&src, b"a").unwrap();
252 let anchor1 = tmp.path().join("anchor1");
253 let anchor2 = tmp.path().join("anchor2");
254 let key1 = CacheKey::from_path(&src, &anchor1).unwrap();
255 let key2 = CacheKey::from_path(&src, &anchor2).unwrap();
256 store(tmp.path(), &key1, &graph_with_one_def());
257 assert!(load(tmp.path(), &key1).is_some());
258 assert!(load(tmp.path(), &key2).is_none());
259 }
260
261 #[test]
262 fn load_returns_none_on_empty_dir() {
263 let tmp = tempfile::tempdir().unwrap();
264 let src = tmp.path().join("src.ts");
265 std::fs::write(&src, b"a").unwrap();
266 let key = CacheKey::from_path(&src, tmp.path()).unwrap();
267 assert!(load(tmp.path(), &key).is_none());
268 }
269
270 #[test]
271 fn cache_path_is_versioned_and_sharded() {
272 let tmp = tempfile::tempdir().unwrap();
273 let src = tmp.path().join("src.ts");
274 std::fs::write(&src, b"a").unwrap();
275 let key = CacheKey::from_path(&src, tmp.path()).unwrap();
276 let full = key.full_path(tmp.path());
277 let s = full.to_string_lossy();
278 assert!(s.contains(&format!("v{LAYOUT_VERSION}_{CACHE_FORMAT_VERSION}")));
279 assert!(full.parent().unwrap().file_name().unwrap().len() == 2);
280 }
281}