1use std::fs;
2use std::hash::{Hash, Hasher};
3use std::io::{self, Write};
4use std::path::{Path, PathBuf};
5use std::sync::atomic::{AtomicU64, Ordering};
6use std::time::UNIX_EPOCH;
7
8use code_moniker_core::core::code_graph::CodeGraph;
9use code_moniker_core::core::code_graph::encoding::{self, LAYOUT_VERSION};
10use rustc_hash::FxHasher;
11
12use crate::extract;
13use code_moniker_core::lang::Lang;
14
15const CACHE_MAGIC: u32 = 0xC0DE_2106;
16const CACHE_FORMAT_VERSION: u32 = 2;
17const OFF_MAGIC: usize = 0;
18const OFF_FORMAT: usize = 4;
19const OFF_MTIME: usize = 8;
20const OFF_SIZE: usize = 16;
21const OFF_ANCHOR: usize = 24;
22const OFF_PATH_LEN: usize = 32;
23const HEADER_FIXED: usize = OFF_PATH_LEN + 4;
24
25static TMP_NONCE: AtomicU64 = AtomicU64::new(0);
26
27#[derive(Clone, Debug)]
28pub struct CacheKey {
29 pub abs_path: PathBuf,
30 pub mtime: u64,
31 pub size: u64,
32 pub anchor_hash: u64,
33}
34
35impl CacheKey {
36 pub fn from_path(path: &Path, anchor: &Path) -> io::Result<Self> {
37 let abs_path = path.canonicalize()?;
38 let meta = fs::metadata(&abs_path)?;
39 let mtime = meta
40 .modified()?
41 .duration_since(UNIX_EPOCH)
42 .map(|d| d.as_nanos() as u64)
43 .unwrap_or(0);
44 Ok(Self {
45 abs_path,
46 mtime,
47 size: meta.len(),
48 anchor_hash: hash_path(anchor),
49 })
50 }
51
52 fn path_hash(&self) -> u64 {
53 hash_path(&self.abs_path)
54 }
55
56 fn shard(&self) -> String {
57 format!("{:02x}", (self.path_hash() & 0xff) as u8)
58 }
59
60 fn filename(&self) -> String {
61 format!("{:016x}_{:016x}.bin", self.path_hash(), self.anchor_hash)
62 }
63
64 fn full_path(&self, root: &Path) -> PathBuf {
65 root.join(format!("v{LAYOUT_VERSION}_{CACHE_FORMAT_VERSION}"))
66 .join(self.shard())
67 .join(self.filename())
68 }
69
70 fn abs_path_bytes(&self) -> &[u8] {
71 path_bytes(&self.abs_path)
72 }
73}
74
75pub fn load(cache_dir: &Path, key: &CacheKey) -> Option<CodeGraph> {
76 let path = key.full_path(cache_dir);
77 let bytes = fs::read(&path).ok()?;
78 let body = validate_header(&bytes, key)?;
79 match encoding::decode(body) {
80 Ok(g) => Some(g),
81 Err(e) => {
82 eprintln!(
83 "code-moniker: cache decode failed at {} ({e}); ignoring",
84 path.display(),
85 );
86 None
87 }
88 }
89}
90
91pub fn store(cache_dir: &Path, key: &CacheKey, graph: &CodeGraph) {
92 let _ = try_store(cache_dir, key, graph);
93}
94
95fn try_store(cache_dir: &Path, key: &CacheKey, graph: &CodeGraph) -> io::Result<()> {
96 let path = key.full_path(cache_dir);
97 if let Some(parent) = path.parent() {
98 fs::create_dir_all(parent)?;
99 }
100 let body = encoding::encode(graph)
101 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e.to_string()))?;
102 let path_bytes = key.abs_path_bytes();
103 let mut buf = Vec::with_capacity(HEADER_FIXED + path_bytes.len() + body.len());
104 buf.extend_from_slice(&CACHE_MAGIC.to_le_bytes());
105 buf.extend_from_slice(&CACHE_FORMAT_VERSION.to_le_bytes());
106 buf.extend_from_slice(&key.mtime.to_le_bytes());
107 buf.extend_from_slice(&key.size.to_le_bytes());
108 buf.extend_from_slice(&key.anchor_hash.to_le_bytes());
109 buf.extend_from_slice(&(path_bytes.len() as u32).to_le_bytes());
110 buf.extend_from_slice(path_bytes);
111 buf.extend_from_slice(&body);
112
113 let nonce = TMP_NONCE.fetch_add(1, Ordering::Relaxed);
114 let tmp = path.with_extension(format!("tmp.{}.{nonce}", std::process::id()));
115 let write_result = (|| -> io::Result<()> {
116 let mut f = fs::File::create(&tmp)?;
117 f.write_all(&buf)?;
118 f.sync_data()?;
119 Ok(())
120 })();
121 if let Err(e) = write_result {
122 let _ = fs::remove_file(&tmp);
123 return Err(e);
124 }
125 fs::rename(&tmp, &path)
126}
127
128pub fn load_or_extract(
129 path: &Path,
130 anchor: &Path,
131 lang: Lang,
132 cache_dir: Option<&Path>,
133 ctx: &extract::Context,
134) -> Option<(CodeGraph, Option<String>)> {
135 if let Some(dir) = cache_dir
136 && let Ok(key) = CacheKey::from_path(path, anchor)
137 {
138 if let Some(g) = load(dir, &key) {
139 return Some((g, None));
140 }
141 let source = fs::read_to_string(path).ok()?;
142 let graph = extract::extract_with(lang, &source, anchor, ctx);
143 store(dir, &key, &graph);
144 return Some((graph, Some(source)));
145 }
146 let source = fs::read_to_string(path).ok()?;
147 let graph = extract::extract_with(lang, &source, anchor, ctx);
148 Some((graph, Some(source)))
149}
150
151fn validate_header<'a>(bytes: &'a [u8], key: &CacheKey) -> Option<&'a [u8]> {
152 if bytes.len() < HEADER_FIXED {
153 return None;
154 }
155 let magic = u32::from_le_bytes(bytes[OFF_MAGIC..OFF_FORMAT].try_into().ok()?);
156 if magic != CACHE_MAGIC {
157 return None;
158 }
159 let version = u32::from_le_bytes(bytes[OFF_FORMAT..OFF_MTIME].try_into().ok()?);
160 if version != CACHE_FORMAT_VERSION {
161 return None;
162 }
163 let mtime = u64::from_le_bytes(bytes[OFF_MTIME..OFF_SIZE].try_into().ok()?);
164 let size = u64::from_le_bytes(bytes[OFF_SIZE..OFF_ANCHOR].try_into().ok()?);
165 let anchor_hash = u64::from_le_bytes(bytes[OFF_ANCHOR..OFF_PATH_LEN].try_into().ok()?);
166 if mtime != key.mtime || size != key.size || anchor_hash != key.anchor_hash {
167 return None;
168 }
169 let path_len = u32::from_le_bytes(bytes[OFF_PATH_LEN..HEADER_FIXED].try_into().ok()?) as usize;
170 if HEADER_FIXED + path_len > bytes.len() {
171 return None;
172 }
173 let stored_path = &bytes[HEADER_FIXED..HEADER_FIXED + path_len];
174 if stored_path != key.abs_path_bytes() {
175 return None;
176 }
177 Some(&bytes[HEADER_FIXED + path_len..])
178}
179
180#[cfg(unix)]
181fn path_bytes(p: &Path) -> &[u8] {
182 use std::os::unix::ffi::OsStrExt;
183 p.as_os_str().as_bytes()
184}
185
186#[cfg(not(unix))]
187fn path_bytes(p: &Path) -> &[u8] {
188 p.to_str().map(|s| s.as_bytes()).unwrap_or(&[])
190}
191
192fn hash_path(p: &Path) -> u64 {
193 let mut h = FxHasher::default();
194 path_bytes(p).hash(&mut h);
195 h.finish()
196}
197
198#[cfg(test)]
199mod tests {
200 use super::*;
201 use code_moniker_core::core::moniker::MonikerBuilder;
202
203 fn graph_with_one_def() -> CodeGraph {
204 let root = MonikerBuilder::new()
205 .project(b"app")
206 .segment(b"path", b"root")
207 .build();
208 let mut g = CodeGraph::new(root.clone(), b"module");
209 let child = MonikerBuilder::new()
210 .project(b"app")
211 .segment(b"path", b"root")
212 .segment(b"class", b"Foo")
213 .build();
214 g.add_def(child, b"class", &root, Some((0, 10))).unwrap();
215 g
216 }
217
218 #[test]
219 fn store_then_load_roundtrips() {
220 let tmp = tempfile::tempdir().unwrap();
221 let src = tmp.path().join("src.ts");
222 std::fs::write(&src, b"export class Foo {}\n").unwrap();
223 let anchor = tmp.path().join("anchor");
224 let key = CacheKey::from_path(&src, &anchor).unwrap();
225 let g = graph_with_one_def();
226
227 store(tmp.path(), &key, &g);
228 let back = load(tmp.path(), &key).expect("should hit");
229 assert_eq!(back.def_count(), g.def_count());
230 }
231
232 #[test]
233 fn load_misses_when_mtime_changes() {
234 let tmp = tempfile::tempdir().unwrap();
235 let src = tmp.path().join("src.ts");
236 std::fs::write(&src, b"a").unwrap();
237 let anchor = tmp.path().join("anchor");
238 let key = CacheKey::from_path(&src, &anchor).unwrap();
239 store(tmp.path(), &key, &graph_with_one_def());
240
241 std::thread::sleep(std::time::Duration::from_millis(10));
242 std::fs::write(&src, b"ab").unwrap();
243 let key2 = CacheKey::from_path(&src, &anchor).unwrap();
244 assert!(key2.mtime != key.mtime || key2.size != key.size);
245 assert!(load(tmp.path(), &key2).is_none());
246 }
247
248 #[test]
249 fn load_misses_when_anchor_changes() {
250 let tmp = tempfile::tempdir().unwrap();
251 let src = tmp.path().join("src.ts");
252 std::fs::write(&src, b"a").unwrap();
253 let anchor1 = tmp.path().join("anchor1");
254 let anchor2 = tmp.path().join("anchor2");
255 let key1 = CacheKey::from_path(&src, &anchor1).unwrap();
256 let key2 = CacheKey::from_path(&src, &anchor2).unwrap();
257 store(tmp.path(), &key1, &graph_with_one_def());
258 assert!(load(tmp.path(), &key1).is_some());
259 assert!(load(tmp.path(), &key2).is_none());
260 }
261
262 #[test]
263 fn load_returns_none_on_empty_dir() {
264 let tmp = tempfile::tempdir().unwrap();
265 let src = tmp.path().join("src.ts");
266 std::fs::write(&src, b"a").unwrap();
267 let key = CacheKey::from_path(&src, tmp.path()).unwrap();
268 assert!(load(tmp.path(), &key).is_none());
269 }
270
271 #[test]
272 fn cache_path_is_versioned_and_sharded() {
273 let tmp = tempfile::tempdir().unwrap();
274 let src = tmp.path().join("src.ts");
275 std::fs::write(&src, b"a").unwrap();
276 let key = CacheKey::from_path(&src, tmp.path()).unwrap();
277 let full = key.full_path(tmp.path());
278 let s = full.to_string_lossy();
279 assert!(s.contains(&format!("v{LAYOUT_VERSION}_{CACHE_FORMAT_VERSION}")));
280 assert!(full.parent().unwrap().file_name().unwrap().len() == 2);
281 }
282}