Skip to main content

code_moniker_cli/
cache.rs

1use std::fs;
2use std::hash::{Hash, Hasher};
3use std::io::{self, Write};
4use std::path::{Path, PathBuf};
5use std::sync::atomic::{AtomicU64, Ordering};
6use std::time::UNIX_EPOCH;
7
8use code_moniker_core::core::code_graph::CodeGraph;
9use code_moniker_core::core::code_graph::encoding::{self, LAYOUT_VERSION};
10use rustc_hash::FxHasher;
11
12use crate::extract;
13use code_moniker_core::lang::Lang;
14
15const CACHE_MAGIC: u32 = 0xC0DE_2106;
16const CACHE_FORMAT_VERSION: u32 = 1;
17const OFF_MAGIC: usize = 0;
18const OFF_FORMAT: usize = 4;
19const OFF_MTIME: usize = 8;
20const OFF_SIZE: usize = 16;
21const OFF_ANCHOR: usize = 24;
22const OFF_PATH_LEN: usize = 32;
23const HEADER_FIXED: usize = OFF_PATH_LEN + 4;
24
25static TMP_NONCE: AtomicU64 = AtomicU64::new(0);
26
27#[derive(Clone, Debug)]
28pub struct CacheKey {
29	pub abs_path: PathBuf,
30	pub mtime: u64,
31	pub size: u64,
32	pub anchor_hash: u64,
33}
34
35impl CacheKey {
36	pub fn from_path(path: &Path, anchor: &Path) -> io::Result<Self> {
37		let abs_path = path.canonicalize()?;
38		let meta = fs::metadata(&abs_path)?;
39		let mtime = meta
40			.modified()?
41			.duration_since(UNIX_EPOCH)
42			.map(|d| d.as_nanos() as u64)
43			.unwrap_or(0);
44		Ok(Self {
45			abs_path,
46			mtime,
47			size: meta.len(),
48			anchor_hash: hash_path(anchor),
49		})
50	}
51
52	fn path_hash(&self) -> u64 {
53		hash_path(&self.abs_path)
54	}
55
56	fn shard(&self) -> String {
57		format!("{:02x}", (self.path_hash() & 0xff) as u8)
58	}
59
60	fn filename(&self) -> String {
61		format!("{:016x}_{:016x}.bin", self.path_hash(), self.anchor_hash)
62	}
63
64	fn full_path(&self, root: &Path) -> PathBuf {
65		root.join(format!("v{LAYOUT_VERSION}_{CACHE_FORMAT_VERSION}"))
66			.join(self.shard())
67			.join(self.filename())
68	}
69
70	fn abs_path_bytes(&self) -> &[u8] {
71		path_bytes(&self.abs_path)
72	}
73}
74
75pub fn load(cache_dir: &Path, key: &CacheKey) -> Option<CodeGraph> {
76	let path = key.full_path(cache_dir);
77	let bytes = fs::read(&path).ok()?;
78	let body = validate_header(&bytes, key)?;
79	match encoding::decode(body) {
80		Ok(g) => Some(g),
81		Err(e) => {
82			eprintln!(
83				"code-moniker: cache decode failed at {} ({e}); ignoring",
84				path.display(),
85			);
86			None
87		}
88	}
89}
90
91pub fn store(cache_dir: &Path, key: &CacheKey, graph: &CodeGraph) {
92	let _ = try_store(cache_dir, key, graph);
93}
94
95fn try_store(cache_dir: &Path, key: &CacheKey, graph: &CodeGraph) -> io::Result<()> {
96	let path = key.full_path(cache_dir);
97	if let Some(parent) = path.parent() {
98		fs::create_dir_all(parent)?;
99	}
100	let body = encoding::encode(graph)
101		.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e.to_string()))?;
102	let path_bytes = key.abs_path_bytes();
103	let mut buf = Vec::with_capacity(HEADER_FIXED + path_bytes.len() + body.len());
104	buf.extend_from_slice(&CACHE_MAGIC.to_le_bytes());
105	buf.extend_from_slice(&CACHE_FORMAT_VERSION.to_le_bytes());
106	buf.extend_from_slice(&key.mtime.to_le_bytes());
107	buf.extend_from_slice(&key.size.to_le_bytes());
108	buf.extend_from_slice(&key.anchor_hash.to_le_bytes());
109	buf.extend_from_slice(&(path_bytes.len() as u32).to_le_bytes());
110	buf.extend_from_slice(path_bytes);
111	buf.extend_from_slice(&body);
112
113	let nonce = TMP_NONCE.fetch_add(1, Ordering::Relaxed);
114	let tmp = path.with_extension(format!("tmp.{}.{nonce}", std::process::id()));
115	let write_result = (|| -> io::Result<()> {
116		let mut f = fs::File::create(&tmp)?;
117		f.write_all(&buf)?;
118		f.sync_data()?;
119		Ok(())
120	})();
121	if let Err(e) = write_result {
122		let _ = fs::remove_file(&tmp);
123		return Err(e);
124	}
125	fs::rename(&tmp, &path)
126}
127
128pub fn load_or_extract(
129	path: &Path,
130	anchor: &Path,
131	lang: Lang,
132	cache_dir: Option<&Path>,
133) -> Option<(CodeGraph, Option<String>)> {
134	if let Some(dir) = cache_dir
135		&& let Ok(key) = CacheKey::from_path(path, anchor)
136	{
137		if let Some(g) = load(dir, &key) {
138			return Some((g, None));
139		}
140		let source = fs::read_to_string(path).ok()?;
141		let graph = extract::extract(lang, &source, anchor);
142		store(dir, &key, &graph);
143		return Some((graph, Some(source)));
144	}
145	let source = fs::read_to_string(path).ok()?;
146	let graph = extract::extract(lang, &source, anchor);
147	Some((graph, Some(source)))
148}
149
150fn validate_header<'a>(bytes: &'a [u8], key: &CacheKey) -> Option<&'a [u8]> {
151	if bytes.len() < HEADER_FIXED {
152		return None;
153	}
154	let magic = u32::from_le_bytes(bytes[OFF_MAGIC..OFF_FORMAT].try_into().ok()?);
155	if magic != CACHE_MAGIC {
156		return None;
157	}
158	let version = u32::from_le_bytes(bytes[OFF_FORMAT..OFF_MTIME].try_into().ok()?);
159	if version != CACHE_FORMAT_VERSION {
160		return None;
161	}
162	let mtime = u64::from_le_bytes(bytes[OFF_MTIME..OFF_SIZE].try_into().ok()?);
163	let size = u64::from_le_bytes(bytes[OFF_SIZE..OFF_ANCHOR].try_into().ok()?);
164	let anchor_hash = u64::from_le_bytes(bytes[OFF_ANCHOR..OFF_PATH_LEN].try_into().ok()?);
165	if mtime != key.mtime || size != key.size || anchor_hash != key.anchor_hash {
166		return None;
167	}
168	let path_len = u32::from_le_bytes(bytes[OFF_PATH_LEN..HEADER_FIXED].try_into().ok()?) as usize;
169	if HEADER_FIXED + path_len > bytes.len() {
170		return None;
171	}
172	let stored_path = &bytes[HEADER_FIXED..HEADER_FIXED + path_len];
173	if stored_path != key.abs_path_bytes() {
174		return None;
175	}
176	Some(&bytes[HEADER_FIXED + path_len..])
177}
178
179#[cfg(unix)]
180fn path_bytes(p: &Path) -> &[u8] {
181	use std::os::unix::ffi::OsStrExt;
182	p.as_os_str().as_bytes()
183}
184
185#[cfg(not(unix))]
186fn path_bytes(p: &Path) -> &[u8] {
187	// non-unix fallback; PG-supported hosts are all unix
188	p.to_str().map(|s| s.as_bytes()).unwrap_or(&[])
189}
190
191fn hash_path(p: &Path) -> u64 {
192	let mut h = FxHasher::default();
193	path_bytes(p).hash(&mut h);
194	h.finish()
195}
196
197#[cfg(test)]
198mod tests {
199	use super::*;
200	use code_moniker_core::core::moniker::MonikerBuilder;
201
202	fn graph_with_one_def() -> CodeGraph {
203		let root = MonikerBuilder::new()
204			.project(b"app")
205			.segment(b"path", b"root")
206			.build();
207		let mut g = CodeGraph::new(root.clone(), b"module");
208		let child = MonikerBuilder::new()
209			.project(b"app")
210			.segment(b"path", b"root")
211			.segment(b"class", b"Foo")
212			.build();
213		g.add_def(child, b"class", &root, Some((0, 10))).unwrap();
214		g
215	}
216
217	#[test]
218	fn store_then_load_roundtrips() {
219		let tmp = tempfile::tempdir().unwrap();
220		let src = tmp.path().join("src.ts");
221		std::fs::write(&src, b"export class Foo {}\n").unwrap();
222		let anchor = tmp.path().join("anchor");
223		let key = CacheKey::from_path(&src, &anchor).unwrap();
224		let g = graph_with_one_def();
225
226		store(tmp.path(), &key, &g);
227		let back = load(tmp.path(), &key).expect("should hit");
228		assert_eq!(back.def_count(), g.def_count());
229	}
230
231	#[test]
232	fn load_misses_when_mtime_changes() {
233		let tmp = tempfile::tempdir().unwrap();
234		let src = tmp.path().join("src.ts");
235		std::fs::write(&src, b"a").unwrap();
236		let anchor = tmp.path().join("anchor");
237		let key = CacheKey::from_path(&src, &anchor).unwrap();
238		store(tmp.path(), &key, &graph_with_one_def());
239
240		std::thread::sleep(std::time::Duration::from_millis(10));
241		std::fs::write(&src, b"ab").unwrap();
242		let key2 = CacheKey::from_path(&src, &anchor).unwrap();
243		assert!(key2.mtime != key.mtime || key2.size != key.size);
244		assert!(load(tmp.path(), &key2).is_none());
245	}
246
247	#[test]
248	fn load_misses_when_anchor_changes() {
249		let tmp = tempfile::tempdir().unwrap();
250		let src = tmp.path().join("src.ts");
251		std::fs::write(&src, b"a").unwrap();
252		let anchor1 = tmp.path().join("anchor1");
253		let anchor2 = tmp.path().join("anchor2");
254		let key1 = CacheKey::from_path(&src, &anchor1).unwrap();
255		let key2 = CacheKey::from_path(&src, &anchor2).unwrap();
256		store(tmp.path(), &key1, &graph_with_one_def());
257		assert!(load(tmp.path(), &key1).is_some());
258		assert!(load(tmp.path(), &key2).is_none());
259	}
260
261	#[test]
262	fn load_returns_none_on_empty_dir() {
263		let tmp = tempfile::tempdir().unwrap();
264		let src = tmp.path().join("src.ts");
265		std::fs::write(&src, b"a").unwrap();
266		let key = CacheKey::from_path(&src, tmp.path()).unwrap();
267		assert!(load(tmp.path(), &key).is_none());
268	}
269
270	#[test]
271	fn cache_path_is_versioned_and_sharded() {
272		let tmp = tempfile::tempdir().unwrap();
273		let src = tmp.path().join("src.ts");
274		std::fs::write(&src, b"a").unwrap();
275		let key = CacheKey::from_path(&src, tmp.path()).unwrap();
276		let full = key.full_path(tmp.path());
277		let s = full.to_string_lossy();
278		assert!(s.contains(&format!("v{LAYOUT_VERSION}_{CACHE_FORMAT_VERSION}")));
279		assert!(full.parent().unwrap().file_name().unwrap().len() == 2);
280	}
281}