Skip to main content

ferridriver_script/
bytecode_cache.rs

1//! Cross-process disk cache for compiled QuickJS bytecode.
2//!
3//! Compiling a rolldown bundle to bytecode (`bundle_and_compile`) or a
4//! plugin file (`compile_and_extract_plugins`) costs ~15 ms cold per
5//! process. The in-memory cache only helps within one process; a fresh
6//! `ferridriver bdd` / MCP server start pays it again. This persists the
7//! bytecode (plus its source map / manifests) to disk so an unchanged
8//! source tree skips BOTH rolldown and the QuickJS compile entirely.
9//!
10//! ## Soundness
11//!
12//! `Module::load` on bytecode is `unsafe`: it trusts the input was
13//! produced by an identical QuickJS build with native endianness. A disk
14//! cache crosses process (and machine) boundaries, so every entry lives
15//! under an `abi_tag`-named directory folding the QuickJS version
16//! (which tracks the on-disk `BC_VERSION`), target arch, endianness, and
17//! pointer width. Bytecode is only ever loaded from the directory
18//! matching the running toolchain — a mismatched build simply misses and
19//! recompiles. Bumping rquickjs changes `JS_GetVersion()` and thus the
20//! directory, so stale bytecode is never loaded.
21//!
22//! ## Freshness
23//!
24//! A bundle inlines its whole import graph, so the entry file's hash is
25//! not enough — an edited (but still-imported) helper must invalidate.
26//! Each entry records the content hash of every transitive input (the
27//! source map's `sources`); a load re-hashes them all and misses on any
28//! change, addition, or deletion.
29
30use std::hash::{Hash, Hasher};
31use std::path::{Path, PathBuf};
32use std::sync::OnceLock;
33
34use serde::{Deserialize, Serialize};
35
36/// One cached compile: the bytecode plus the auxiliary data each caller
37/// needs to reconstruct its result without re-running rolldown.
38pub struct CacheEntry {
39  pub bytecode: Vec<u8>,
40  /// Source-map JSON (BDD bundle) — `None` when the bundle had no map.
41  pub source_map_json: Option<String>,
42  /// Caller-specific sidecar (plugin manifests JSON) — `None` for BDD.
43  pub aux: Option<String>,
44}
45
46#[derive(Serialize, Deserialize)]
47struct Manifest {
48  module_name: String,
49  source_map_json: Option<String>,
50  aux: Option<String>,
51  /// `(absolute path, content hash)` for every transitive input.
52  inputs: Vec<(String, u64)>,
53}
54
55fn disabled() -> bool {
56  std::env::var_os("FERRIDRIVER_NO_BYTECODE_CACHE").is_some()
57}
58
59/// Toolchain fingerprint. Bytecode under one tag is safe to
60/// `Module::load` only by an identical toolchain. `fdbc<N>` is our own
61/// format version — bump it on any change to the manifest shape.
62fn abi_tag() -> &'static str {
63  static TAG: OnceLock<String> = OnceLock::new();
64  TAG.get_or_init(|| {
65    // SAFETY: returns a static C string owned by the linked QuickJS.
66    #[allow(unsafe_code)]
67    let qjs = unsafe { std::ffi::CStr::from_ptr(rquickjs::qjs::JS_GetVersion()) }
68      .to_str()
69      .unwrap_or("unknown");
70    let endian = if cfg!(target_endian = "big") { "be" } else { "le" };
71    format!(
72      "fdbc1-qjs{qjs}-{}-{endian}-p{}",
73      std::env::consts::ARCH,
74      std::mem::size_of::<usize>() * 8,
75    )
76  })
77}
78
79/// `<cache>/ferridriver/bytecode/<abi_tag>/`, created on demand. Honors
80/// `FERRIDRIVER_CACHE_DIR`, else the platform user cache dir, else the
81/// system temp dir. Returns `None` if no writable base exists.
82fn cache_dir() -> Option<&'static Path> {
83  static DIR: OnceLock<Option<PathBuf>> = OnceLock::new();
84  DIR
85    .get_or_init(|| {
86      let base = std::env::var_os("FERRIDRIVER_CACHE_DIR")
87        .map(PathBuf::from)
88        .or_else(user_cache_base)
89        .unwrap_or_else(std::env::temp_dir);
90      let dir = base.join("ferridriver").join("bytecode").join(abi_tag());
91      match std::fs::create_dir_all(&dir) {
92        Ok(()) => Some(dir),
93        Err(_) => None,
94      }
95    })
96    .as_deref()
97}
98
99fn user_cache_base() -> Option<PathBuf> {
100  if let Some(x) = std::env::var_os("XDG_CACHE_HOME") {
101    return Some(PathBuf::from(x));
102  }
103  #[cfg(target_os = "macos")]
104  if let Some(h) = std::env::var_os("HOME") {
105    return Some(PathBuf::from(h).join("Library").join("Caches"));
106  }
107  std::env::var_os("HOME").map(|h| PathBuf::from(h).join(".cache"))
108}
109
110fn hash_bytes(bytes: &[u8]) -> u64 {
111  let mut h = std::collections::hash_map::DefaultHasher::new();
112  bytes.hash(&mut h);
113  h.finish()
114}
115
116/// A stable key for a set of entry paths (canonicalized, order-independent).
117/// The transitive content check on load is what actually guards freshness;
118/// this only needs to be collision-free across distinct bundle requests.
119#[must_use]
120pub fn entry_key(entry_paths: &[PathBuf]) -> u64 {
121  let mut canon: Vec<String> = entry_paths
122    .iter()
123    .map(|p| {
124      std::fs::canonicalize(p)
125        .unwrap_or_else(|_| p.clone())
126        .to_string_lossy()
127        .into_owned()
128    })
129    .collect();
130  canon.sort();
131  let mut h = std::collections::hash_map::DefaultHasher::new();
132  abi_tag().hash(&mut h);
133  canon.hash(&mut h);
134  h.finish()
135}
136
137/// Resolve a source-map `sources` entry to an absolute path under `cwd`.
138fn resolve_source(src: &str, cwd: &Path) -> PathBuf {
139  let p = Path::new(src);
140  if p.is_absolute() { p.to_path_buf() } else { cwd.join(p) }
141}
142
143/// Collect the transitive input set for a bundle: the entry files plus
144/// every `sources` entry in the source map, canonicalized and deduped.
145#[must_use]
146pub fn collect_inputs(entry_paths: &[PathBuf], source_map_json: Option<&str>, cwd: &Path) -> Vec<PathBuf> {
147  let mut out: Vec<PathBuf> = Vec::new();
148  let mut push = |p: PathBuf| {
149    let c = std::fs::canonicalize(&p).unwrap_or(p);
150    if !out.contains(&c) {
151      out.push(c);
152    }
153  };
154  for e in entry_paths {
155    push(e.clone());
156  }
157  if let Some(json) = source_map_json {
158    if let Ok(sm) = sourcemap::SourceMap::from_slice(json.as_bytes()) {
159      for src in sm.sources() {
160        // rolldown emits synthetic sources (e.g. `\0` virtual modules)
161        // that don't map to real files — skip anything missing.
162        let path = resolve_source(src, cwd);
163        if path.is_file() {
164          push(path);
165        }
166      }
167    }
168  }
169  out
170}
171
172fn paths(key: u64) -> Option<(PathBuf, PathBuf)> {
173  let dir = cache_dir()?;
174  let hex = format!("{key:016x}");
175  Some((dir.join(format!("{hex}.bin")), dir.join(format!("{hex}.json"))))
176}
177
178/// Load a cached compile for `key`, validating that every recorded input
179/// still hashes identically. Returns `None` on any miss, mismatch, or IO
180/// error (the caller then compiles and [`store`]s).
181#[must_use]
182pub fn load(key: u64) -> Option<CacheEntry> {
183  if disabled() {
184    return None;
185  }
186  let (bin_path, json_path) = paths(key)?;
187  let manifest: Manifest = serde_json::from_slice(&std::fs::read(json_path).ok()?).ok()?;
188  for (path, want) in &manifest.inputs {
189    let bytes = std::fs::read(path).ok()?;
190    if hash_bytes(&bytes) != *want {
191      return None;
192    }
193  }
194  let bytecode = std::fs::read(bin_path).ok()?;
195  Some(CacheEntry {
196    bytecode,
197    source_map_json: manifest.source_map_json,
198    aux: manifest.aux,
199  })
200}
201
202/// Persist a freshly compiled `key` -> bytecode entry. Best-effort: any
203/// IO failure is swallowed (the cache is an optimization, never a
204/// correctness dependency). Writes are atomic via temp-file + rename so a
205/// concurrent or crashed writer never exposes a torn manifest.
206pub fn store(
207  key: u64,
208  bytecode: &[u8],
209  module_name: &str,
210  source_map_json: Option<&str>,
211  aux: Option<&str>,
212  inputs: &[PathBuf],
213) {
214  if disabled() {
215    return;
216  }
217  let Some((bin_path, json_path)) = paths(key) else {
218    return;
219  };
220  let input_hashes: Vec<(String, u64)> = inputs
221    .iter()
222    .filter_map(|p| {
223      let bytes = std::fs::read(p).ok()?;
224      Some((p.to_string_lossy().into_owned(), hash_bytes(&bytes)))
225    })
226    .collect();
227  let manifest = Manifest {
228    module_name: module_name.to_string(),
229    source_map_json: source_map_json.map(str::to_string),
230    aux: aux.map(str::to_string),
231    inputs: input_hashes,
232  };
233  let Ok(json) = serde_json::to_vec(&manifest) else {
234    return;
235  };
236  let _ = atomic_write(&bin_path, bytecode);
237  let _ = atomic_write(&json_path, &json);
238}
239
240fn atomic_write(path: &Path, bytes: &[u8]) -> std::io::Result<()> {
241  let tmp = path.with_extension(format!("tmp.{}", std::process::id()));
242  std::fs::write(&tmp, bytes)?;
243  std::fs::rename(&tmp, path)
244}