plg_runtime/machine.rs
1//! The Machine: the single runtime context every compiled predicate
2//! receives (`%M` in generated IR). Owns the term heap, trail,
3//! choice-point stack, argument registers, the current success
4//! continuation, step accounting, and the runtime atom table.
5
6use crate::cell::{self, Word};
7use plg_shared::StringInterner;
8
9/// Uniform signature shared by every compiled predicate, continuation,
10/// and retry function — and by runtime-provided continuations. The
11/// uniform C-ABI prototype is what makes `musttail` transfers valid.
12/// Returns 0 = fail/exhausted (driver backtracks), 1 = stop (limit hit
13/// or final success).
14pub type ContFn = unsafe extern "C" fn(*mut Machine, u64) -> i32;
15
16/// Maximum predicate arity passed via argument registers (v1 had no
17/// practical limit; raise alongside a frame-passing scheme if ever hit).
18pub const MAX_ARGS: usize = 16;
19
20/// Registry row emitted by codegen as `{ i32, i32, ptr }`, sorted by
21/// (functor, arity) for binary search.
22#[repr(C)]
23#[derive(Clone, Copy)]
24pub struct RegistryEntry {
25 pub functor: u32,
26 pub arity: u32,
27 pub f: ContFn,
28}
29
30/// One source-location row of the codegen-emitted `@plg_srcmap` side-table
31/// (SPANS.md Layer 3). A raising call site passes its index (`site_id`) and
32/// the error path resolves it to `file:line:col`. Layout mirrors the IR's
33/// `{i32, i32, i32}`.
34#[repr(C)]
35#[derive(Clone, Copy)]
36pub struct SrcLoc {
37 pub file: u32,
38 pub line: u32,
39 pub col: u32,
40}
41
42/// `site_id` sentinel meaning "no source location" — runtime-internal
43/// raises (query-side undefined goals) and any binary built without
44/// provenance. The error message gets no `at file:line:col` suffix.
45///
46/// ABI contract: MUST equal `plg_compiler::codegen::NO_SITE` (codegen emits
47/// this value as the `site_id` arg). Separate consts in separate crates;
48/// each pins `== u32::MAX` in a unit test to flag a one-sided renumber.
49pub const NO_SITE: u32 = u32::MAX;
50
51/// RAII guard for the in-flight raise's site (SPANS.md Layer 3). A raising
52/// compiled builtin creates one at its ABI boundary; `Drop` **restores the
53/// previous value** (not `NO_SITE`), so the set/clear can't be forgotten and
54/// nested raises — an outer builtin whose work reaches an inner raising
55/// builtin — each see their own site without a save/restore stack. Use this
56/// instead of hand-writing `m.error_site = site; ...; m.error_site = NO_SITE`.
57///
58/// FOOTGUN: bind to a **named** variable — `let _site = ...;`. `let _ = ...`
59/// drops the guard immediately, so the body runs with the site already
60/// restored (no provenance, or the caller's site). `#[must_use]` does not
61/// catch the `let _` form.
62#[must_use = "binding to `let _` drops the guard immediately; use `let _site = ...`"]
63pub(crate) struct ErrorSiteGuard {
64 m: *mut Machine,
65 saved: u32,
66}
67
68impl ErrorSiteGuard {
69 pub(crate) fn enter(m: *mut Machine, site_id: u32) -> Self {
70 // SAFETY: `m` is the valid Machine pointer the builtin was called
71 // with. We touch only `error_site`, and the `Drop` write runs after
72 // the builtin's `&mut Machine` borrows have ended.
73 let saved = unsafe { (*m).error_site };
74 unsafe { (*m).error_site = site_id };
75 ErrorSiteGuard { m, saved }
76 }
77}
78
79impl Drop for ErrorSiteGuard {
80 fn drop(&mut self) {
81 unsafe { (*self.m).error_site = self.saved };
82 }
83}
84
85/// RAII bound on `metacall_depth` (#23): increments on `enter`, decrements on
86/// `Drop`, so the count unwinds on *every* exit path of `call_goal` — early
87/// return on the limit, normal return, or a panic. Same discipline as
88/// `ErrorSiteGuard`; matters once a Machine outlives a single query (the
89/// embedding designs), where a leaked `+1` would tighten the ceiling on reuse.
90#[must_use = "binding to `let _` drops the guard immediately; use `let _g = ...`"]
91pub(crate) struct MetacallDepthGuard {
92 m: *mut Machine,
93}
94
95impl MetacallDepthGuard {
96 pub(crate) fn enter(m: *mut Machine) -> Self {
97 // SAFETY: `m` is the live Machine pointer `call_goal` was entered with;
98 // the `Drop` write runs after its `&mut Machine` borrows have ended.
99 unsafe { (*m).metacall_depth += 1 };
100 MetacallDepthGuard { m }
101 }
102}
103
104impl Drop for MetacallDepthGuard {
105 fn drop(&mut self) {
106 unsafe { (*self.m).metacall_depth -= 1 };
107 }
108}
109
110/// Where `write/1`/`writeln/1`/`nl/0` send their bytes. The CLI/WASI shell
111/// streams to process stdout (the v1 contract); the Tier-2 reactor has no
112/// stdout in a V8 isolate, so it captures losslessly into a buffer that the
113/// result JSON carries back to the host (docs/design/done/WASM_TIER2_PLAN.md D4).
114pub enum OutputSink {
115 /// Stream to process stdout immediately (native CLI / WASI).
116 Stdout,
117 /// Capture into a buffer (reactor) — returned as the result's `output`.
118 Capture(String),
119}
120
121/// Catch frames participate in error unwinding (drive() in solve.rs)
122/// and stop cut truncation (v1 rule: catch is opaque to cut).
123#[derive(Clone, Copy, PartialEq)]
124pub enum CpKind {
125 Normal,
126 Catch,
127}
128
129pub struct ChoicePoint {
130 pub trail_mark: usize,
131 pub heap_mark: usize,
132 pub retry: ContFn,
133 pub env: u64,
134 pub kind: CpKind,
135}
136
137/// A runtime error in flight. The ball is a relocatable copy (it must
138/// survive heap rewinding on the way to a catch frame); the message is
139/// its v1-format rendering for top-level output (exit code 3).
140pub struct RtError {
141 pub ball: crate::copyterm::TermBuf,
142 pub message: String,
143 pub uncatchable: bool,
144}
145
146pub struct Machine {
147 pub heap: Vec<Word>,
148 pub trail: Vec<u64>, // heap indices of bound REF cells
149 pub cps: Vec<ChoicePoint>,
150 pub areg: [Word; MAX_ARGS],
151 /// Build registers for `plg_rt_put_struct` (separate from areg so
152 /// argument setup and term construction never clobber each other).
153 pub breg: [Word; MAX_ARGS],
154 pub k_fn: ContFn,
155 pub k_env: u64,
156 pub steps: u64,
157 pub step_limit: u64,
158 /// Live nesting depth of the runtime goal-walker (`call_goal`). Compiled
159 /// predicate-to-predicate transfers `musttail` and never touch this; only
160 /// runtime-walked goals (queries, `call/N`, `findall/3`, `catch/3`
161 /// recovery) recurse the C stack here. Bounded by `metacall_depth_limit`
162 /// so a deep non-trampolined metacall fails gracefully instead of
163 /// overflowing the native stack (#23).
164 pub metacall_depth: usize,
165 pub metacall_depth_limit: usize,
166 pub error: Option<RtError>,
167 pub atoms: StringInterner,
168 pub registry: Vec<RegistryEntry>,
169 /// Source-location side-table (SPANS.md Layer 3), handed over by codegen
170 /// at init. Empty for binaries built without provenance.
171 pub srcmap: Vec<SrcLoc>,
172 /// `file_id` → filename, parallel to `srcmap`'s `file` field.
173 pub files: Vec<String>,
174 /// `site_id` of the raise currently in flight (SPANS.md Layer 3).
175 /// `set_formal` appends ` at file:line:col` from it. `NO_SITE` (the
176 /// default, and the value for runtime-internal/query-side raises) means no
177 /// suffix — keeping those messages byte-identical to v1.
178 ///
179 /// INVARIANT: a raising compiled builtin must set this to its site only
180 /// around its own work and restore it after — always via `ErrorSiteGuard`,
181 /// which makes the set/restore impossible to forget and keeps nested
182 /// raises correct (each restores the caller's site, not `NO_SITE`).
183 pub error_site: u32,
184 /// Query variables in source order: (name, heap index of the cell).
185 pub query_vars: Vec<(String, usize)>,
186 /// findall/3 collector stack (a stack because findall can nest):
187 /// each level accumulates relocatable copies of template instances.
188 pub findall_stack: Vec<Vec<crate::copyterm::TermBuf>>,
189 /// Cut barrier for `!` in RUNTIME-WALKED goals (queries, metacalls).
190 /// Call-like constructs set it for their inner goal; every walker
191 /// continuation frame snapshots and restores it (the runtime mirror
192 /// of the compiled cut_slot). Compiled `!` never reads this.
193 pub qbarrier: usize,
194 /// Solutions captured by the print continuation, already rendered.
195 pub solutions: Vec<crate::render::RenderedSolution>,
196 pub solution_limit: Option<usize>,
197 /// Sink for `write/1`/`writeln/1`/`nl/0`. Defaults to `Stdout`; the
198 /// reactor swaps in `Capture` so output survives an isolate with no stdout.
199 pub output: OutputSink,
200}
201
202unsafe extern "C" fn no_continuation(_m: *mut Machine, _env: u64) -> i32 {
203 // Reaching the continuation with none installed is a codegen bug.
204 debug_assert!(false, "no continuation installed");
205 0
206}
207
208impl Machine {
209 pub fn new(atoms: StringInterner, registry: Vec<RegistryEntry>) -> Box<Machine> {
210 Box::new(Machine {
211 heap: Vec::with_capacity(4096),
212 trail: Vec::with_capacity(256),
213 cps: Vec::with_capacity(64),
214 areg: [0; MAX_ARGS],
215 breg: [0; MAX_ARGS],
216 k_fn: no_continuation,
217 k_env: 0,
218 steps: 0,
219 step_limit: 10_000, // v1 default
220 metacall_depth: 0,
221 // Conservative: well below the native C-stack capacity (~5-6k
222 // walker frames overflow an 8MB stack). The trampoline keeps the
223 // common `call(pred)` tail recursion off this path entirely, so
224 // this only bounds rare control-construct / findall recursion.
225 metacall_depth_limit: 1000,
226 error: None,
227 atoms,
228 registry,
229 srcmap: Vec::new(),
230 files: Vec::new(),
231 error_site: NO_SITE,
232 query_vars: Vec::new(),
233 findall_stack: Vec::new(),
234 qbarrier: 0,
235 solutions: Vec::new(),
236 solution_limit: None,
237 output: OutputSink::Stdout,
238 })
239 }
240
241 /// Reset per-query accumulated state, preserving the program (atoms,
242 /// registry, provenance) and the caller-set per-query limits
243 /// (`step_limit`, `metacall_depth_limit`, `solution_limit` — the caller
244 /// sets those each query). Lives next to the field declarations on
245 /// purpose: adding a field without resetting it here is then a *local*
246 /// review question. The leak class this guards only surfaces under
247 /// sustained reuse (the reactor serving many queries on one Machine),
248 /// never in a single-query CLI run (WASM_TIER2_PLAN.md A2 / finding #3).
249 pub fn reset_per_query(&mut self) {
250 self.heap.clear();
251 self.trail.clear();
252 self.cps.clear();
253 self.areg = [0; MAX_ARGS];
254 self.breg = [0; MAX_ARGS];
255 self.k_fn = no_continuation;
256 self.k_env = 0;
257 self.steps = 0;
258 self.metacall_depth = 0;
259 self.error = None;
260 self.error_site = NO_SITE;
261 self.query_vars.clear();
262 self.findall_stack.clear();
263 self.qbarrier = 0;
264 self.solutions.clear();
265 if let OutputSink::Capture(buf) = &mut self.output {
266 buf.clear();
267 }
268 }
269
270 /// Append `s` to the current output sink: stream to stdout (CLI/WASI) or
271 /// accumulate in the capture buffer (reactor). The single seam every
272 /// `write`-family builtin goes through.
273 pub fn write_out(&mut self, s: &str) {
274 match &mut self.output {
275 OutputSink::Stdout => {
276 use std::io::Write;
277 print!("{s}");
278 let _ = std::io::stdout().flush();
279 }
280 OutputSink::Capture(buf) => buf.push_str(s),
281 }
282 }
283
284 /// The captured output for this query, or `None` when streaming to stdout
285 /// (the CLI never carries output in its JSON — it already went to stdout).
286 pub fn captured_output(&self) -> Option<&str> {
287 match &self.output {
288 OutputSink::Capture(s) => Some(s.as_str()),
289 OutputSink::Stdout => None,
290 }
291 }
292
293 /// Allocate a fresh unbound variable cell; returns its REF word.
294 pub fn new_var(&mut self) -> Word {
295 let idx = self.heap.len();
296 self.heap.push(cell::make_ref(idx)); // unbound = self-reference
297 cell::make_ref(idx)
298 }
299
300 /// Allocate `n` raw cells (a frame); returns the base index.
301 /// Frames hold continuation state — they are not terms and must
302 /// never be unified.
303 pub fn frame_alloc(&mut self, n: usize) -> usize {
304 let idx = self.heap.len();
305 self.heap.resize(idx + n, 0);
306 idx
307 }
308
309 /// Bind an unbound REF cell to a value, recording it on the trail.
310 pub fn bind(&mut self, ref_idx: usize, value: Word) {
311 debug_assert_eq!(
312 self.heap[ref_idx],
313 cell::make_ref(ref_idx),
314 "bind target must be unbound"
315 );
316 self.heap[ref_idx] = value;
317 self.trail.push(ref_idx as u64);
318 }
319
320 /// Follow REF chains to the representative word. Bound chains are
321 /// acyclic (we only ever bind unbound cells), so this terminates.
322 pub fn deref(&self, mut w: Word) -> Word {
323 while cell::tag_of(w) == cell::TAG_REF {
324 let idx = cell::payload(w) as usize;
325 let c = self.heap[idx];
326 if c == w {
327 return w; // unbound
328 }
329 w = c;
330 }
331 w
332 }
333
334 pub fn push_cp(&mut self, retry: ContFn, env: u64) {
335 self.cps.push(ChoicePoint {
336 trail_mark: self.trail.len(),
337 heap_mark: self.heap.len(),
338 retry,
339 env,
340 kind: CpKind::Normal,
341 });
342 }
343
344 /// Push a choice point whose backtrack restore-point is an EXPLICIT mark,
345 /// captured before the current alternative bound anything — rather than
346 /// the live heap/trail top. Lets a nondeterministic builtin bind a
347 /// solution and still record the pre-binding state for the next
348 /// alternative, without a rewind-then-rebind.
349 ///
350 /// **CALLER MUST GUARANTEE** `trail_mark <= self.trail.len()` and
351 /// `heap_mark <= self.heap.len()`. An out-of-range mark is caught only by
352 /// the debug assertion below; in release it silently no-ops the backtrack
353 /// truncation (`Vec::truncate(n)` with `n > len` does nothing), leaking
354 /// bindings from a supposedly-undone alternative into the next.
355 pub fn push_cp_at(&mut self, retry: ContFn, env: u64, trail_mark: usize, heap_mark: usize) {
356 debug_assert!(trail_mark <= self.trail.len() && heap_mark <= self.heap.len());
357 self.cps.push(ChoicePoint {
358 trail_mark,
359 heap_mark,
360 retry,
361 env,
362 kind: CpKind::Normal,
363 });
364 }
365
366 pub fn push_catch_cp(&mut self, retry: ContFn, env: u64) {
367 self.cps.push(ChoicePoint {
368 trail_mark: self.trail.len(),
369 heap_mark: self.heap.len(),
370 retry,
371 env,
372 kind: CpKind::Catch,
373 });
374 }
375
376 /// Cut: truncate the CP stack to `height`, but stop at a catch
377 /// frame (v1 rule: catch/3 is opaque to cut — `!` inside catch's
378 /// goal cannot prune the catch frame or anything below it).
379 pub fn cut_to(&mut self, height: usize) {
380 while self.cps.len() > height {
381 if self.cps.last().is_some_and(|cp| cp.kind == CpKind::Catch) {
382 break;
383 }
384 self.cps.pop();
385 }
386 }
387
388 /// Rewind bindings and heap to a popped choice point's marks.
389 pub fn rewind_to(&mut self, trail_mark: usize, heap_mark: usize) {
390 while self.trail.len() > trail_mark {
391 let idx = self.trail.pop().unwrap() as usize;
392 self.heap[idx] = cell::make_ref(idx);
393 }
394 self.heap.truncate(heap_mark);
395 }
396
397 /// Bump the step counter; on exceeding the limit set the uncatchable
398 /// resource error (v1: step limit cannot be trapped by catch/3).
399 /// v1 wording, byte-for-byte (solver.rs step_limit_thrown).
400 pub fn step(&mut self) -> bool {
401 self.steps += 1;
402 if self.steps > self.step_limit {
403 let context = format!("Maximum step limit exceeded ({})", self.step_limit);
404 crate::errors::resource(self, "steps", &context, true);
405 return false;
406 }
407 true
408 }
409
410 pub fn registry_lookup(&self, functor: u32, arity: u32) -> Option<ContFn> {
411 self.registry
412 .binary_search_by_key(&(functor, arity), |e| (e.functor, e.arity))
413 .ok()
414 .map(|i| self.registry[i].f)
415 }
416
417 /// Install the codegen-emitted source-location side-table (SPANS.md
418 /// Layer 3). Called once from `plg_rt_init`.
419 pub fn set_provenance(&mut self, srcmap: Vec<SrcLoc>, files: Vec<String>) {
420 self.srcmap = srcmap;
421 self.files = files;
422 }
423
424 /// Resolve a `site_id` to `(filename, line, col)`, or `None` for the
425 /// `NO_SITE` sentinel / a binary built without provenance. Owned so the
426 /// (cold) error path can mutate `self.error` without a borrow conflict.
427 pub fn site_location(&self, site_id: u32) -> Option<(String, u32, u32)> {
428 if site_id == NO_SITE {
429 return None;
430 }
431 let loc = self.srcmap.get(site_id as usize)?;
432 let file = self.files.get(loc.file as usize)?;
433 Some((file.clone(), loc.line, loc.col))
434 }
435}
436
437#[cfg(test)]
438mod tests {
439 use super::*;
440 use crate::cell::*;
441
442 fn machine() -> Box<Machine> {
443 Machine::new(StringInterner::new(), Vec::new())
444 }
445
446 #[test]
447 fn new_var_is_unbound_self_ref() {
448 let mut m = machine();
449 let v = m.new_var();
450 assert_eq!(tag_of(v), TAG_REF);
451 assert_eq!(m.deref(v), v);
452 }
453
454 #[test]
455 fn no_site_sentinel_value_is_pinned() {
456 // ABI contract with plg_compiler::codegen::NO_SITE (see its docs).
457 assert_eq!(NO_SITE, u32::MAX);
458 }
459
460 #[test]
461 fn bind_and_rewind() {
462 let mut m = machine();
463 let v = m.new_var();
464 let tmark = m.trail.len();
465 let hmark = m.heap.len();
466 m.bind(payload(v) as usize, make_atom(7));
467 assert_eq!(m.deref(v), make_atom(7));
468 m.rewind_to(tmark, hmark);
469 assert_eq!(m.deref(v), v, "binding undone");
470 }
471
472 #[test]
473 fn deref_follows_chains() {
474 let mut m = machine();
475 let a = m.new_var();
476 let b = m.new_var();
477 m.bind(payload(a) as usize, b);
478 m.bind(payload(b) as usize, make_int(-5));
479 assert_eq!(int_value(m.deref(a)), -5);
480 }
481
482 #[test]
483 fn step_limit_sets_uncatchable_error() {
484 let mut m = machine();
485 m.step_limit = 2;
486 assert!(m.step());
487 assert!(m.step());
488 assert!(!m.step());
489 assert!(m.error.as_ref().unwrap().uncatchable);
490 }
491}