Skip to main content

code_moniker_cli/
cache.rs

1use std::fs;
2use std::hash::{Hash, Hasher};
3use std::io::{self, Write};
4use std::path::{Path, PathBuf};
5use std::sync::atomic::{AtomicU64, Ordering};
6use std::time::UNIX_EPOCH;
7
8use code_moniker_core::core::code_graph::CodeGraph;
9use code_moniker_core::core::code_graph::encoding::{self, LAYOUT_VERSION};
10use rustc_hash::FxHasher;
11
12use crate::extract;
13use code_moniker_core::lang::Lang;
14
15const CACHE_MAGIC: u32 = 0xC0DE_2106;
16const CACHE_FORMAT_VERSION: u32 = 2;
17const OFF_MAGIC: usize = 0;
18const OFF_FORMAT: usize = 4;
19const OFF_MTIME: usize = 8;
20const OFF_SIZE: usize = 16;
21const OFF_ANCHOR: usize = 24;
22const OFF_PATH_LEN: usize = 32;
23const HEADER_FIXED: usize = OFF_PATH_LEN + 4;
24
25static TMP_NONCE: AtomicU64 = AtomicU64::new(0);
26
27#[derive(Clone, Debug)]
28pub struct CacheKey {
29	pub abs_path: PathBuf,
30	pub mtime: u64,
31	pub size: u64,
32	pub anchor_hash: u64,
33}
34
35impl CacheKey {
36	pub fn from_path(path: &Path, anchor: &Path) -> io::Result<Self> {
37		let abs_path = path.canonicalize()?;
38		let meta = fs::metadata(&abs_path)?;
39		let mtime = meta
40			.modified()?
41			.duration_since(UNIX_EPOCH)
42			.map(|d| d.as_nanos() as u64)
43			.unwrap_or(0);
44		Ok(Self {
45			abs_path,
46			mtime,
47			size: meta.len(),
48			anchor_hash: hash_path(anchor),
49		})
50	}
51
52	fn path_hash(&self) -> u64 {
53		hash_path(&self.abs_path)
54	}
55
56	fn shard(&self) -> String {
57		format!("{:02x}", (self.path_hash() & 0xff) as u8)
58	}
59
60	fn filename(&self) -> String {
61		format!("{:016x}_{:016x}.bin", self.path_hash(), self.anchor_hash)
62	}
63
64	fn full_path(&self, root: &Path) -> PathBuf {
65		root.join(format!("v{LAYOUT_VERSION}_{CACHE_FORMAT_VERSION}"))
66			.join(self.shard())
67			.join(self.filename())
68	}
69
70	fn abs_path_bytes(&self) -> &[u8] {
71		path_bytes(&self.abs_path)
72	}
73}
74
75pub fn load(cache_dir: &Path, key: &CacheKey) -> Option<CodeGraph> {
76	let path = key.full_path(cache_dir);
77	let bytes = fs::read(&path).ok()?;
78	let body = validate_header(&bytes, key)?;
79	match encoding::decode(body) {
80		Ok(g) => Some(g),
81		Err(e) => {
82			eprintln!(
83				"code-moniker: cache decode failed at {} ({e}); ignoring",
84				path.display(),
85			);
86			None
87		}
88	}
89}
90
91pub fn store(cache_dir: &Path, key: &CacheKey, graph: &CodeGraph) {
92	let _ = try_store(cache_dir, key, graph);
93}
94
95fn try_store(cache_dir: &Path, key: &CacheKey, graph: &CodeGraph) -> io::Result<()> {
96	let path = key.full_path(cache_dir);
97	if let Some(parent) = path.parent() {
98		fs::create_dir_all(parent)?;
99	}
100	let body = encoding::encode(graph)
101		.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e.to_string()))?;
102	let path_bytes = key.abs_path_bytes();
103	let mut buf = Vec::with_capacity(HEADER_FIXED + path_bytes.len() + body.len());
104	buf.extend_from_slice(&CACHE_MAGIC.to_le_bytes());
105	buf.extend_from_slice(&CACHE_FORMAT_VERSION.to_le_bytes());
106	buf.extend_from_slice(&key.mtime.to_le_bytes());
107	buf.extend_from_slice(&key.size.to_le_bytes());
108	buf.extend_from_slice(&key.anchor_hash.to_le_bytes());
109	buf.extend_from_slice(&(path_bytes.len() as u32).to_le_bytes());
110	buf.extend_from_slice(path_bytes);
111	buf.extend_from_slice(&body);
112
113	let nonce = TMP_NONCE.fetch_add(1, Ordering::Relaxed);
114	let tmp = path.with_extension(format!("tmp.{}.{nonce}", std::process::id()));
115	let write_result = (|| -> io::Result<()> {
116		let mut f = fs::File::create(&tmp)?;
117		f.write_all(&buf)?;
118		f.sync_data()?;
119		Ok(())
120	})();
121	if let Err(e) = write_result {
122		let _ = fs::remove_file(&tmp);
123		return Err(e);
124	}
125	fs::rename(&tmp, &path)
126}
127
128pub fn load_or_extract(
129	path: &Path,
130	anchor: &Path,
131	lang: Lang,
132	cache_dir: Option<&Path>,
133	ctx: &extract::Context,
134) -> Option<(CodeGraph, Option<String>)> {
135	if let Some(dir) = cache_dir
136		&& let Ok(key) = CacheKey::from_path(path, anchor)
137	{
138		if let Some(g) = load(dir, &key) {
139			return Some((g, None));
140		}
141		let source = fs::read_to_string(path).ok()?;
142		let graph = extract::extract_with(lang, &source, anchor, ctx);
143		store(dir, &key, &graph);
144		return Some((graph, Some(source)));
145	}
146	let source = fs::read_to_string(path).ok()?;
147	let graph = extract::extract_with(lang, &source, anchor, ctx);
148	Some((graph, Some(source)))
149}
150
151fn validate_header<'a>(bytes: &'a [u8], key: &CacheKey) -> Option<&'a [u8]> {
152	if bytes.len() < HEADER_FIXED {
153		return None;
154	}
155	let magic = u32::from_le_bytes(bytes[OFF_MAGIC..OFF_FORMAT].try_into().ok()?);
156	if magic != CACHE_MAGIC {
157		return None;
158	}
159	let version = u32::from_le_bytes(bytes[OFF_FORMAT..OFF_MTIME].try_into().ok()?);
160	if version != CACHE_FORMAT_VERSION {
161		return None;
162	}
163	let mtime = u64::from_le_bytes(bytes[OFF_MTIME..OFF_SIZE].try_into().ok()?);
164	let size = u64::from_le_bytes(bytes[OFF_SIZE..OFF_ANCHOR].try_into().ok()?);
165	let anchor_hash = u64::from_le_bytes(bytes[OFF_ANCHOR..OFF_PATH_LEN].try_into().ok()?);
166	if mtime != key.mtime || size != key.size || anchor_hash != key.anchor_hash {
167		return None;
168	}
169	let path_len = u32::from_le_bytes(bytes[OFF_PATH_LEN..HEADER_FIXED].try_into().ok()?) as usize;
170	if HEADER_FIXED + path_len > bytes.len() {
171		return None;
172	}
173	let stored_path = &bytes[HEADER_FIXED..HEADER_FIXED + path_len];
174	if stored_path != key.abs_path_bytes() {
175		return None;
176	}
177	Some(&bytes[HEADER_FIXED + path_len..])
178}
179
180#[cfg(unix)]
181fn path_bytes(p: &Path) -> &[u8] {
182	use std::os::unix::ffi::OsStrExt;
183	p.as_os_str().as_bytes()
184}
185
186#[cfg(not(unix))]
187fn path_bytes(p: &Path) -> &[u8] {
188	// non-unix fallback; PG-supported hosts are all unix
189	p.to_str().map(|s| s.as_bytes()).unwrap_or(&[])
190}
191
192fn hash_path(p: &Path) -> u64 {
193	let mut h = FxHasher::default();
194	path_bytes(p).hash(&mut h);
195	h.finish()
196}
197
198#[cfg(test)]
199mod tests {
200	use super::*;
201	use code_moniker_core::core::moniker::MonikerBuilder;
202
203	fn graph_with_one_def() -> CodeGraph {
204		let root = MonikerBuilder::new()
205			.project(b"app")
206			.segment(b"path", b"root")
207			.build();
208		let mut g = CodeGraph::new(root.clone(), b"module");
209		let child = MonikerBuilder::new()
210			.project(b"app")
211			.segment(b"path", b"root")
212			.segment(b"class", b"Foo")
213			.build();
214		g.add_def(child, b"class", &root, Some((0, 10))).unwrap();
215		g
216	}
217
218	#[test]
219	fn store_then_load_roundtrips() {
220		let tmp = tempfile::tempdir().unwrap();
221		let src = tmp.path().join("src.ts");
222		std::fs::write(&src, b"export class Foo {}\n").unwrap();
223		let anchor = tmp.path().join("anchor");
224		let key = CacheKey::from_path(&src, &anchor).unwrap();
225		let g = graph_with_one_def();
226
227		store(tmp.path(), &key, &g);
228		let back = load(tmp.path(), &key).expect("should hit");
229		assert_eq!(back.def_count(), g.def_count());
230	}
231
232	#[test]
233	fn load_misses_when_mtime_changes() {
234		let tmp = tempfile::tempdir().unwrap();
235		let src = tmp.path().join("src.ts");
236		std::fs::write(&src, b"a").unwrap();
237		let anchor = tmp.path().join("anchor");
238		let key = CacheKey::from_path(&src, &anchor).unwrap();
239		store(tmp.path(), &key, &graph_with_one_def());
240
241		std::thread::sleep(std::time::Duration::from_millis(10));
242		std::fs::write(&src, b"ab").unwrap();
243		let key2 = CacheKey::from_path(&src, &anchor).unwrap();
244		assert!(key2.mtime != key.mtime || key2.size != key.size);
245		assert!(load(tmp.path(), &key2).is_none());
246	}
247
248	#[test]
249	fn load_misses_when_anchor_changes() {
250		let tmp = tempfile::tempdir().unwrap();
251		let src = tmp.path().join("src.ts");
252		std::fs::write(&src, b"a").unwrap();
253		let anchor1 = tmp.path().join("anchor1");
254		let anchor2 = tmp.path().join("anchor2");
255		let key1 = CacheKey::from_path(&src, &anchor1).unwrap();
256		let key2 = CacheKey::from_path(&src, &anchor2).unwrap();
257		store(tmp.path(), &key1, &graph_with_one_def());
258		assert!(load(tmp.path(), &key1).is_some());
259		assert!(load(tmp.path(), &key2).is_none());
260	}
261
262	#[test]
263	fn load_returns_none_on_empty_dir() {
264		let tmp = tempfile::tempdir().unwrap();
265		let src = tmp.path().join("src.ts");
266		std::fs::write(&src, b"a").unwrap();
267		let key = CacheKey::from_path(&src, tmp.path()).unwrap();
268		assert!(load(tmp.path(), &key).is_none());
269	}
270
271	#[test]
272	fn cache_path_is_versioned_and_sharded() {
273		let tmp = tempfile::tempdir().unwrap();
274		let src = tmp.path().join("src.ts");
275		std::fs::write(&src, b"a").unwrap();
276		let key = CacheKey::from_path(&src, tmp.path()).unwrap();
277		let full = key.full_path(tmp.path());
278		let s = full.to_string_lossy();
279		assert!(s.contains(&format!("v{LAYOUT_VERSION}_{CACHE_FORMAT_VERSION}")));
280		assert!(full.parent().unwrap().file_name().unwrap().len() == 2);
281	}
282}