Skip to main content

ferridriver_script/
bundle.rs

1//! Step-file front-end: rolldown bundle + tree-shake + TypeScript ->
2//! one ESM module -> compiled to `QuickJS` bytecode once.
3//!
4//! rolldown (built on oxc) resolves the whole import graph including
5//! `node_modules`, transpiles `.ts`/`.tsx`, tree-shakes, and emits a
6//! single ESM chunk. That chunk is compiled to bytecode a single time;
7//! every per-worker session links the bytecode (one `Module::load`, no
8//! parse, no resolver). A hidden source map is kept so a JS error in
9//! the bundled output is reported at the original `.ts`/`.js` location.
10
11use std::path::{Path, PathBuf};
12use std::sync::Arc;
13
14use rolldown::{Bundler, BundlerOptions, InputItem, OutputFormat, Platform, SourceMapType};
15use rolldown_common::Output;
16use rquickjs::{AsyncContext, AsyncRuntime, CatchResultExt, Module, WriteOptions, WriteOptionsEndianness, async_with};
17
18use crate::engine::caught_to_script_error;
19use crate::error::ScriptError;
20
21/// One bundled+tree-shaken step graph compiled to `QuickJS` bytecode,
22/// plus the source map to translate bundled positions back to source.
23pub struct CompiledBundle {
24  pub module_name: String,
25  pub bytecode: Arc<[u8]>,
26  source_map: Option<sourcemap::SourceMap>,
27}
28
29/// rolldown-bundle + tree-shake + transpile the step entry files (and
30/// their `node_modules`/shared imports) into a single ESM module.
31/// Returns the bundled code and the (hidden) source map JSON. Exposed
32/// for diagnostics/tests; production uses [`bundle_and_compile`].
33pub async fn bundle_source(entry_paths: &[PathBuf], cwd: &Path) -> Result<(String, Option<String>), ScriptError> {
34  if entry_paths.is_empty() {
35    return Err(ScriptError::internal("no step entry files".to_string()));
36  }
37
38  let input: Vec<InputItem> = entry_paths
39    .iter()
40    .map(|p| InputItem {
41      name: None,
42      import: p.to_string_lossy().into_owned(),
43    })
44    .collect();
45
46  let options = BundlerOptions {
47    input: Some(input),
48    cwd: Some(cwd.to_path_buf()),
49    // Neutral: no Node builtins are injected (QuickJS has none); pure
50    // ESM/CJS node_modules still resolve and bundle.
51    platform: Some(Platform::Neutral),
52    format: Some(OutputFormat::Esm),
53    // Hidden: emit the map but no `//# sourceMappingURL` trailer in the
54    // code we feed to QuickJS.
55    sourcemap: Some(SourceMapType::Hidden),
56    ..Default::default()
57  };
58
59  let mut bundler = Bundler::new(options).map_err(|e| ScriptError::internal(format!("rolldown init: {e:?}")))?;
60  // rolldown's generate future is large; box it so it doesn't bloat the
61  // enclosing future.
62  let out = Box::pin(bundler.generate())
63    .await
64    .map_err(|e| ScriptError::internal(format!("rolldown bundle: {e:?}")))?;
65
66  for asset in &out.assets {
67    if let Output::Chunk(chunk) = asset {
68      if chunk.is_entry {
69        let code = chunk.code.clone();
70        return Ok(match &chunk.map {
71          Some(m) => (code, Some(m.to_json_string())),
72          None => (code, None),
73        });
74      }
75    }
76  }
77  Err(ScriptError::internal("rolldown produced no entry chunk".to_string()))
78}
79
80/// Bundle the step entry files (TypeScript ok; `node_modules` and
81/// shared utils resolved + tree-shaken) into one ESM module and compile
82/// it to bytecode. Done once, before workers spawn.
83pub async fn bundle_and_compile(entry_paths: &[PathBuf], cwd: &Path) -> Result<CompiledBundle, ScriptError> {
84  let module_name = "ferridriver-bdd-steps.js".to_string();
85
86  // Disk cache: an unchanged source tree skips rolldown AND the QuickJS
87  // compile. Validated against every transitive input's content hash.
88  let cache_key = crate::bytecode_cache::entry_key(entry_paths);
89  if let Some(hit) = crate::bytecode_cache::load(cache_key) {
90    let source_map = hit
91      .source_map_json
92      .and_then(|j| sourcemap::SourceMap::from_slice(j.as_bytes()).ok());
93    return Ok(CompiledBundle {
94      module_name,
95      bytecode: Arc::from(hit.bytecode.into_boxed_slice()),
96      source_map,
97    });
98  }
99
100  let (code, map_json) = Box::pin(bundle_source(entry_paths, cwd)).await?;
101
102  let name = module_name.clone();
103  let runtime = AsyncRuntime::new().map_err(|e| ScriptError::internal(format!("bytecode runtime: {e}")))?;
104  let ctx = AsyncContext::full(&runtime)
105    .await
106    .map_err(|e| ScriptError::internal(format!("bytecode context: {e}")))?;
107  let bytecode: Vec<u8> = async_with!(ctx => |ctx| {
108    // Bundled module has no remaining imports — `declare` (parse only,
109    // no execution) + `write` needs no resolver.
110    let module = Module::declare(ctx.clone(), name.into_bytes(), code.into_bytes())
111      .catch(&ctx)
112      .map_err(|e| caught_to_script_error(e, ""))?;
113    module
114      .write(WriteOptions {
115        endianness: WriteOptionsEndianness::Native,
116        ..Default::default()
117      })
118      .map_err(|e| ScriptError::internal(format!("module write: {e}")))
119  })
120  .await?;
121
122  let inputs = crate::bytecode_cache::collect_inputs(entry_paths, map_json.as_deref(), cwd);
123  crate::bytecode_cache::store(cache_key, &bytecode, &module_name, map_json.as_deref(), None, &inputs);
124
125  let source_map = map_json.and_then(|j| sourcemap::SourceMap::from_slice(j.as_bytes()).ok());
126  Ok(CompiledBundle {
127    module_name,
128    bytecode: Arc::from(bytecode.into_boxed_slice()),
129    source_map,
130  })
131}
132
133/// Link + evaluate the bundled step module from precompiled bytecode in
134/// the given session. Top-level `Given`/`When`/`Then` run here.
135pub async fn eval_bundle(actx: &AsyncContext, bundle: &CompiledBundle) -> Result<(), ScriptError> {
136  let bytecode = Arc::clone(&bundle.bytecode);
137  let label = bundle.module_name.clone();
138  async_with!(actx => |ctx| {
139    // SAFETY: produced by `Module::write` in THIS process and
140    // rquickjs/QuickJS build with native endianness, never persisted —
141    // the precondition `Module::load` documents.
142    #[allow(unsafe_code)]
143    let module = match (unsafe { Module::load(ctx.clone(), &bytecode) }).catch(&ctx) {
144      Ok(m) => m,
145      Err(e) => return Err(caught_to_script_error(e, &label)),
146    };
147    let promise = match module.eval().catch(&ctx) {
148      Ok((_evaluated, p)) => p,
149      Err(e) => return Err(caught_to_script_error(e, &label)),
150    };
151    match promise.into_future::<()>().await.catch(&ctx) {
152      Ok(()) => Ok(()),
153      Err(e) => Err(caught_to_script_error(e, &label)),
154    }
155  })
156  .await
157}
158
159impl CompiledBundle {
160  /// Map a bundled-output `line:col` (1-based, as QuickJS reports) back
161  /// to the original `.ts`/`.js` source location.
162  #[must_use]
163  pub fn remap(&self, line: u32, col: u32) -> Option<(String, u32, u32)> {
164    let sm = self.source_map.as_ref()?;
165    let token = sm.lookup_token(line.saturating_sub(1), col.saturating_sub(1))?;
166    let src = token.get_source().unwrap_or("<unknown>").to_string();
167    Some((src, token.get_src_line() + 1, token.get_src_col() + 1))
168  }
169
170  /// Every source file that went into this bundle (entry + transitive
171  /// imports), resolved to absolute paths against `cwd`. Read from the
172  /// source map's `sources`; synthetic (non-file) sources are skipped.
173  ///
174  /// Callers running untrusted bundles use this to enforce a sandbox
175  /// jail (every input must live under an allowed root).
176  #[must_use]
177  pub fn source_files(&self, cwd: &Path) -> Vec<PathBuf> {
178    let Some(sm) = self.source_map.as_ref() else {
179      return Vec::new();
180    };
181    sm.sources()
182      .map(|src| {
183        let p = Path::new(src);
184        if p.is_absolute() { p.to_path_buf() } else { cwd.join(p) }
185      })
186      .collect()
187  }
188}
189
190/// True when a path's extension marks it as TypeScript (`.ts`/`.tsx`/
191/// `.mts`/`.cts`) and so must be transpiled through the bundler.
192#[must_use]
193pub fn is_typescript_path(path: &Path) -> bool {
194  matches!(
195    path.extension().and_then(|e| e.to_str()),
196    Some("ts" | "tsx" | "mts" | "cts")
197  )
198}
199
200/// Heuristic: the source begins a line with a static `import`/`export`
201/// and so must run as an ES module (bundled). Dynamic `import(...)` is
202/// intentionally NOT matched — it is valid in a plain script, so such a
203/// script keeps top-level `return`. A false positive only costs an
204/// unnecessary bundle, never wrong output.
205#[must_use]
206pub fn source_is_es_module(source: &str) -> bool {
207  source.lines().any(|line| {
208    let t = line.trim_start();
209    let static_import = t
210      .strip_prefix("import")
211      .is_some_and(|rest| matches!(rest.as_bytes().first(), Some(b' ' | b'\t' | b'{' | b'\'' | b'"')));
212    static_import
213      || t.starts_with("export ")
214      || t.starts_with("export\t")
215      || t.starts_with("export{")
216      || t.starts_with("export*")
217  })
218}
219
220/// One plugin file: rolldown-bundled (TypeScript, plugin-local imports,
221/// tree-shaking) and compiled to `QuickJS` bytecode, with its manifests
222/// extracted straight from the compiled module — no separate throwaway
223/// runtime per file.
224///
225/// The bytecode is pure rolldown output — no appended epilogue, no
226/// transfer global. Evaluating it runs the file's top-level
227/// `defineTool(...)` calls, registering into the Rust
228/// `ExtensionRegistry`. `manifests_json` is read straight off that
229/// registry — no JS extraction expression. `index` is the file's
230/// position in the returned (file-order, contiguous over successes) vec.
231pub struct CompiledPlugin {
232  pub path: PathBuf,
233  pub index: usize,
234  pub bytecode: Arc<[u8]>,
235  /// JSON array (one object per tool, source order, `handler` stripped).
236  /// Deserialises into `Vec<PluginManifest>` on the MCP side without
237  /// ever re-running the plugin.
238  pub manifests_json: String,
239}
240
241/// Process-scoped content-hash cache: `hash(canonical path + bytes)` ->
242/// (bytecode, manifests JSON). A plugin file whose content+path is
243/// unchanged skips rolldown + compile entirely on any later
244/// `compile_and_extract_plugins` call (reload, the same file discovered
245/// under two roots, repeated `box-craft setup`). Bounded by the number
246/// of distinct plugin files a process ever loads (tiny) so no eviction
247/// is needed.
248///
249/// In-memory only and never serialised: the cached bytecode never
250/// crosses a process or interpreter boundary, which is exactly the
251/// precondition the `unsafe Module::load` paths rely on (a disk cache
252/// would violate it — see `docs/plugin-architecture.md`).
253type PluginCache = std::sync::Mutex<rustc_hash::FxHashMap<u64, (Arc<[u8]>, String)>>;
254static PLUGIN_BYTECODE_CACHE: std::sync::OnceLock<PluginCache> = std::sync::OnceLock::new();
255
256fn plugin_cache() -> &'static PluginCache {
257  PLUGIN_BYTECODE_CACHE.get_or_init(|| std::sync::Mutex::new(rustc_hash::FxHashMap::default()))
258}
259
260/// Cache key: the file's canonical path (rolldown resolution + relative
261/// imports depend on it) plus its byte content. SipHash via the std
262/// default hasher — adequate for an in-process content cache, no dep.
263fn cache_key(path: &Path, bytes: &[u8]) -> u64 {
264  use std::hash::{Hash, Hasher};
265  let mut h = std::collections::hash_map::DefaultHasher::new();
266  std::fs::canonicalize(path)
267    .unwrap_or_else(|_| path.to_path_buf())
268    .hash(&mut h);
269  bytes.hash(&mut h);
270  h.finish()
271}
272
273/// Bundle + compile + extract every plugin file. The expensive
274/// per-file rolldown bundles run concurrently; bytecode compile +
275/// extraction share ONE throwaway runtime for the whole batch (the
276/// pre-migration path spun one full engine per file for extraction
277/// *and* one per file for bytecode). Unchanged files are served from
278/// the process content-hash cache with no bundle and no compile.
279///
280/// Per-file failures (bundle, compile, or extraction) are returned
281/// rather than aborting the batch. Output preserves input file order;
282/// surviving `CompiledPlugin`s carry contiguous `index` values.
283pub async fn compile_and_extract_plugins(files: &[PathBuf]) -> (Vec<CompiledPlugin>, Vec<(PathBuf, ScriptError)>) {
284  // Per original position: a cache hit (bytecode + manifests), or a
285  // cache miss we must bundle, or an early failure. A miss carries both
286  // the in-memory content key and the disk-cache key so the compile step
287  // can populate both tiers.
288  enum Slot {
289    Hit(Arc<[u8]>, String),
290    Miss { inmem_key: u64, disk_key: u64 },
291    Failed(ScriptError),
292  }
293
294  let mut bytes: Vec<Vec<u8>> = Vec::with_capacity(files.len());
295  let mut slots: Vec<Slot> = Vec::with_capacity(files.len());
296  for path in files {
297    match std::fs::read(path) {
298      Ok(b) => {
299        let inmem_key = cache_key(path, &b);
300        let cached = plugin_cache().lock().ok().and_then(|c| c.get(&inmem_key).cloned());
301        let disk_key = crate::bytecode_cache::entry_key(std::slice::from_ref(path));
302        match cached {
303          // 1. In-memory (same process).
304          Some((bc, mj)) => slots.push(Slot::Hit(bc, mj)),
305          // 2. Disk (cross-process), transitively validated. Promote into
306          //    the in-memory tier so later same-process loads stay hot.
307          None => match crate::bytecode_cache::load(disk_key) {
308            Some(entry) => {
309              let bc: Arc<[u8]> = Arc::from(entry.bytecode.into_boxed_slice());
310              let mj = entry.aux.unwrap_or_else(|| "[]".to_string());
311              if let Ok(mut cache) = plugin_cache().lock() {
312                cache.insert(inmem_key, (bc.clone(), mj.clone()));
313              }
314              slots.push(Slot::Hit(bc, mj));
315            },
316            // 3. Cold: bundle + compile below.
317            None => slots.push(Slot::Miss { inmem_key, disk_key }),
318          },
319        }
320        bytes.push(b);
321      },
322      Err(e) => {
323        slots.push(Slot::Failed(ScriptError::internal(format!(
324          "read {}: {e}",
325          path.display()
326        ))));
327        bytes.push(Vec::new());
328      },
329    }
330  }
331
332  // Bundle every cache-miss file concurrently (independent rolldown
333  // graphs; this is the dominant cold-start cost).
334  let miss_idx: Vec<usize> = slots
335    .iter()
336    .enumerate()
337    .filter_map(|(i, s)| matches!(s, Slot::Miss { .. }).then_some(i))
338    .collect();
339  let bundles = futures::future::join_all(miss_idx.iter().map(|&i| {
340    let path = files[i].clone();
341    async move {
342      let cwd = path.parent().unwrap_or_else(|| Path::new(".")).to_path_buf();
343      (i, Box::pin(bundle_source(std::slice::from_ref(&path), &cwd)).await)
344    }
345  }))
346  .await;
347
348  // Compiled code (+ source map, for the disk cache's transitive input
349  // set) per missed position. None = bundle failed.
350  let mut bundled_code: rustc_hash::FxHashMap<usize, String> = rustc_hash::FxHashMap::default();
351  let mut bundled_map: rustc_hash::FxHashMap<usize, Option<String>> = rustc_hash::FxHashMap::default();
352  for (i, res) in bundles {
353    match res {
354      Ok((code, map)) => {
355        bundled_code.insert(i, code);
356        bundled_map.insert(i, map);
357      },
358      Err(e) => slots[i] = Slot::Failed(e),
359    }
360  }
361
362  // One throwaway runtime/context compiles + extracts every missed file.
363  let runtime_ctx = match AsyncRuntime::new() {
364    Ok(r) => match AsyncContext::full(&r).await {
365      Ok(c) => Some((r, c)),
366      Err(e) => {
367        let err = ScriptError::internal(format!("plugin bytecode context: {e}"));
368        for s in &mut slots {
369          if matches!(s, Slot::Miss { .. }) {
370            *s = Slot::Failed(err.clone());
371          }
372        }
373        None
374      },
375    },
376    Err(e) => {
377      let err = ScriptError::internal(format!("plugin bytecode runtime: {e}"));
378      for s in &mut slots {
379        if matches!(s, Slot::Miss { .. }) {
380          *s = Slot::Failed(err.clone());
381        }
382      }
383      None
384    },
385  };
386
387  if let Some((_runtime, actx)) = runtime_ctx {
388    for i in &miss_idx {
389      let i = *i;
390      let Slot::Miss { inmem_key, disk_key } = slots[i] else {
391        continue;
392      };
393      let Some(code) = bundled_code.get(&i) else { continue };
394      let module_name = format!("ferri_plugin_{i}.js");
395      match compile_extract_one(&actx, &module_name, code).await {
396        Ok((bc, mj)) => {
397          let bc: Arc<[u8]> = Arc::from(bc.into_boxed_slice());
398          if let Ok(mut cache) = plugin_cache().lock() {
399            cache.insert(inmem_key, (bc.clone(), mj.clone()));
400          }
401          // Persist for the next process. Inputs = this plugin file plus
402          // its transitive imports (from the source map), so an edited
403          // helper invalidates the entry on the next load.
404          let cwd = files[i].parent().unwrap_or_else(|| Path::new(".")).to_path_buf();
405          let map = bundled_map.get(&i).cloned().flatten();
406          let inputs = crate::bytecode_cache::collect_inputs(std::slice::from_ref(&files[i]), map.as_deref(), &cwd);
407          crate::bytecode_cache::store(disk_key, &bc, &module_name, map.as_deref(), Some(&mj), &inputs);
408          slots[i] = Slot::Hit(bc, mj);
409        },
410        Err(e) => slots[i] = Slot::Failed(e),
411      }
412    }
413  }
414
415  let mut survivors: Vec<CompiledPlugin> = Vec::new();
416  let mut failures: Vec<(PathBuf, ScriptError)> = Vec::new();
417  for (i, slot) in slots.into_iter().enumerate() {
418    match slot {
419      Slot::Hit(bytecode, manifests_json) => survivors.push(CompiledPlugin {
420        path: files[i].clone(),
421        index: survivors.len(),
422        bytecode,
423        manifests_json,
424      }),
425      Slot::Failed(e) => failures.push((files[i].clone(), e)),
426      // A Miss with no compiled output never reached Hit/Failed only if
427      // its bundle was dropped — already recorded as Failed above; this
428      // arm is unreachable but keeps the match total without a panic.
429      Slot::Miss { .. } => failures.push((
430        files[i].clone(),
431        ScriptError::internal("plugin compile produced no output".to_string()),
432      )),
433    }
434  }
435  (survivors, failures)
436}
437
438/// Declare the bundled module, serialise it to bytecode, then load +
439/// evaluate that exact bytecode in the shared context (which has the
440/// extension registry installed) so the manifest is read straight off
441/// the Rust registry — the very bytes, and the very registration path,
442/// a session will run. Returns the file's bytecode and the JSON of just
443/// the tools THIS file registered (registry slice `[before, after)`).
444async fn compile_extract_one(
445  actx: &AsyncContext,
446  module_name: &str,
447  code: &str,
448) -> Result<(Vec<u8>, String), ScriptError> {
449  let name = module_name.to_string();
450  let code = code.to_string();
451  let label = module_name.to_string();
452  async_with!(actx => |ctx| {
453    // Registry + native `defineTool`/cucumber surface. Idempotent — the
454    // shared extraction context installs it once for the whole batch.
455    crate::bindings::install_bdd(&ctx)
456      .map_err(|e| ScriptError::internal(format!("install extension registry: {e}")))?;
457    // Manifest extraction is the MCP tool path: expose
458    // `ferridriver.host = 'mcp'` so an extension's host-gated
459    // `defineTool` runs and its manifest is captured (mirrors what the
460    // mcp session does).
461    {
462      let fd = rquickjs::Object::new(ctx.clone())
463        .map_err(|e| ScriptError::internal(format!("ferridriver global: {e}")))?;
464      fd.set("host", "mcp")
465        .map_err(|e| ScriptError::internal(format!("ferridriver.host: {e}")))?;
466      ctx
467        .globals()
468        .set("ferridriver", fd)
469        .map_err(|e| ScriptError::internal(format!("install ferridriver global: {e}")))?;
470    }
471
472    // Bundled module has no remaining imports — `declare` (parse only)
473    // needs no resolver; mirrors `bundle_and_compile`.
474    let module = Module::declare(ctx.clone(), name.clone().into_bytes(), code.into_bytes())
475      .catch(&ctx)
476      .map_err(|e| caught_to_script_error(e, &label))?;
477    let bytecode = module
478      .write(WriteOptions {
479        // Same process + interpreter that will `load` it.
480        endianness: WriteOptionsEndianness::Native,
481        ..Default::default()
482      })
483      .map_err(|e| ScriptError::internal(format!("plugin module write: {e}")))?;
484
485    let before = crate::bindings::tools_len(&ctx)?;
486
487    // SAFETY: `bytecode` was just produced by `Module::write` in THIS
488    // process and rquickjs/QuickJS build with native endianness and is
489    // not persisted — the precondition `Module::load` documents. This is
490    // the same contract `eval_bundle` and `install_plugins` rely on.
491    #[allow(unsafe_code)]
492    let loaded = (unsafe { Module::load(ctx.clone(), &bytecode) })
493      .catch(&ctx)
494      .map_err(|e| caught_to_script_error(e, &label))?;
495    let promise = loaded
496      .eval()
497      .catch(&ctx)
498      .map_err(|e| caught_to_script_error(e, &label))?
499      .1;
500    promise
501      .into_future::<()>()
502      .await
503      .catch(&ctx)
504      .map_err(|e| caught_to_script_error(e, &label))?;
505
506    // Tools registered via the file's top-level `defineTool(...)` calls
507    // during eval — slice off the ones THIS file added.
508    let all = crate::bindings::tools_snapshot(&ctx)?;
509    let slice = all.get(before..).unwrap_or(&[]);
510    let manifests_json =
511      serde_json::to_string(slice).map_err(|e| ScriptError::internal(format!("serialise manifests: {e}")))?;
512    Ok((bytecode, manifests_json))
513  })
514  .await
515}