Skip to main content

plg_runtime/
machine.rs

1//! The Machine: the single runtime context every compiled predicate
2//! receives (`%M` in generated IR). Owns the term heap, trail,
3//! choice-point stack, argument registers, the current success
4//! continuation, step accounting, and the runtime atom table.
5
6use crate::cell::{self, Word};
7use plg_shared::StringInterner;
8
9/// Uniform signature shared by every compiled predicate, continuation,
10/// and retry function — and by runtime-provided continuations. The
11/// uniform C-ABI prototype is what makes `musttail` transfers valid.
12/// Returns 0 = fail/exhausted (driver backtracks), 1 = stop (limit hit
13/// or final success).
14pub type ContFn = unsafe extern "C" fn(*mut Machine, u64) -> i32;
15
16/// Maximum predicate arity passed via argument registers (v1 had no
17/// practical limit; raise alongside a frame-passing scheme if ever hit).
18pub const MAX_ARGS: usize = 16;
19
20/// Registry row emitted by codegen as `{ i32, i32, ptr }`, sorted by
21/// (functor, arity) for binary search.
22#[repr(C)]
23#[derive(Clone, Copy)]
24pub struct RegistryEntry {
25    pub functor: u32,
26    pub arity: u32,
27    pub f: ContFn,
28}
29
30/// One source-location row of the codegen-emitted `@plg_srcmap` side-table
31/// (SPANS.md Layer 3). A raising call site passes its index (`site_id`) and
32/// the error path resolves it to `file:line:col`. Layout mirrors the IR's
33/// `{i32, i32, i32}`.
34#[repr(C)]
35#[derive(Clone, Copy)]
36pub struct SrcLoc {
37    pub file: u32,
38    pub line: u32,
39    pub col: u32,
40}
41
42/// `site_id` sentinel meaning "no source location" — runtime-internal
43/// raises (query-side undefined goals) and any binary built without
44/// provenance. The error message gets no `at file:line:col` suffix.
45///
46/// ABI contract: MUST equal `plg_compiler::codegen::NO_SITE` (codegen emits
47/// this value as the `site_id` arg). Separate consts in separate crates;
48/// each pins `== u32::MAX` in a unit test to flag a one-sided renumber.
49pub const NO_SITE: u32 = u32::MAX;
50
51/// RAII guard for the in-flight raise's site (SPANS.md Layer 3). A raising
52/// compiled builtin creates one at its ABI boundary; `Drop` **restores the
53/// previous value** (not `NO_SITE`), so the set/clear can't be forgotten and
54/// nested raises — an outer builtin whose work reaches an inner raising
55/// builtin — each see their own site without a save/restore stack. Use this
56/// instead of hand-writing `m.error_site = site; ...; m.error_site = NO_SITE`.
57///
58/// FOOTGUN: bind to a **named** variable — `let _site = ...;`. `let _ = ...`
59/// drops the guard immediately, so the body runs with the site already
60/// restored (no provenance, or the caller's site). `#[must_use]` does not
61/// catch the `let _` form.
62#[must_use = "binding to `let _` drops the guard immediately; use `let _site = ...`"]
63pub(crate) struct ErrorSiteGuard {
64    m: *mut Machine,
65    saved: u32,
66}
67
68impl ErrorSiteGuard {
69    pub(crate) fn enter(m: *mut Machine, site_id: u32) -> Self {
70        // SAFETY: `m` is the valid Machine pointer the builtin was called
71        // with. We touch only `error_site`, and the `Drop` write runs after
72        // the builtin's `&mut Machine` borrows have ended.
73        let saved = unsafe { (*m).error_site };
74        unsafe { (*m).error_site = site_id };
75        ErrorSiteGuard { m, saved }
76    }
77}
78
79impl Drop for ErrorSiteGuard {
80    fn drop(&mut self) {
81        unsafe { (*self.m).error_site = self.saved };
82    }
83}
84
85/// RAII bound on `metacall_depth` (#23): increments on `enter`, decrements on
86/// `Drop`, so the count unwinds on *every* exit path of `call_goal` — early
87/// return on the limit, normal return, or a panic. Same discipline as
88/// `ErrorSiteGuard`; matters once a Machine outlives a single query (the
89/// embedding designs), where a leaked `+1` would tighten the ceiling on reuse.
90#[must_use = "binding to `let _` drops the guard immediately; use `let _g = ...`"]
91pub(crate) struct MetacallDepthGuard {
92    m: *mut Machine,
93}
94
95impl MetacallDepthGuard {
96    pub(crate) fn enter(m: *mut Machine) -> Self {
97        // SAFETY: `m` is the live Machine pointer `call_goal` was entered with;
98        // the `Drop` write runs after its `&mut Machine` borrows have ended.
99        unsafe { (*m).metacall_depth += 1 };
100        MetacallDepthGuard { m }
101    }
102}
103
104impl Drop for MetacallDepthGuard {
105    fn drop(&mut self) {
106        unsafe { (*self.m).metacall_depth -= 1 };
107    }
108}
109
110/// Where `write/1`/`writeln/1`/`nl/0` send their bytes. The CLI/WASI shell
111/// streams to process stdout (the v1 contract); the Tier-2 reactor has no
112/// stdout in a V8 isolate, so it captures losslessly into a buffer that the
113/// result JSON carries back to the host (docs/design/done/WASM_TIER2_PLAN.md D4).
114pub enum OutputSink {
115    /// Stream to process stdout immediately (native CLI / WASI).
116    Stdout,
117    /// Capture into a buffer (reactor) — returned as the result's `output`.
118    Capture(String),
119}
120
121/// Catch frames participate in error unwinding (drive() in solve.rs)
122/// and stop cut truncation (v1 rule: catch is opaque to cut).
123#[derive(Clone, Copy, PartialEq)]
124pub enum CpKind {
125    Normal,
126    Catch,
127}
128
129pub struct ChoicePoint {
130    pub trail_mark: usize,
131    pub heap_mark: usize,
132    pub retry: ContFn,
133    pub env: u64,
134    pub kind: CpKind,
135}
136
137/// A runtime error in flight. The ball is a relocatable copy (it must
138/// survive heap rewinding on the way to a catch frame); the message is
139/// its v1-format rendering for top-level output (exit code 3).
140pub struct RtError {
141    pub ball: crate::copyterm::TermBuf,
142    pub message: String,
143    pub uncatchable: bool,
144}
145
146pub struct Machine {
147    pub heap: Vec<Word>,
148    pub trail: Vec<u64>, // heap indices of bound REF cells
149    pub cps: Vec<ChoicePoint>,
150    pub areg: [Word; MAX_ARGS],
151    /// Build registers for `plg_rt_put_struct` (separate from areg so
152    /// argument setup and term construction never clobber each other).
153    pub breg: [Word; MAX_ARGS],
154    pub k_fn: ContFn,
155    pub k_env: u64,
156    pub steps: u64,
157    pub step_limit: u64,
158    /// Live nesting depth of the runtime goal-walker (`call_goal`). Compiled
159    /// predicate-to-predicate transfers `musttail` and never touch this; only
160    /// runtime-walked goals (queries, `call/N`, `findall/3`, `catch/3`
161    /// recovery) recurse the C stack here. Bounded by `metacall_depth_limit`
162    /// so a deep non-trampolined metacall fails gracefully instead of
163    /// overflowing the native stack (#23).
164    pub metacall_depth: usize,
165    pub metacall_depth_limit: usize,
166    pub error: Option<RtError>,
167    pub atoms: StringInterner,
168    pub registry: Vec<RegistryEntry>,
169    /// Source-location side-table (SPANS.md Layer 3), handed over by codegen
170    /// at init. Empty for binaries built without provenance.
171    pub srcmap: Vec<SrcLoc>,
172    /// `file_id` → filename, parallel to `srcmap`'s `file` field.
173    pub files: Vec<String>,
174    /// `site_id` of the raise currently in flight (SPANS.md Layer 3).
175    /// `set_formal` appends ` at file:line:col` from it. `NO_SITE` (the
176    /// default, and the value for runtime-internal/query-side raises) means no
177    /// suffix — keeping those messages byte-identical to v1.
178    ///
179    /// INVARIANT: a raising compiled builtin must set this to its site only
180    /// around its own work and restore it after — always via `ErrorSiteGuard`,
181    /// which makes the set/restore impossible to forget and keeps nested
182    /// raises correct (each restores the caller's site, not `NO_SITE`).
183    pub error_site: u32,
184    /// Query variables in source order: (name, heap index of the cell).
185    pub query_vars: Vec<(String, usize)>,
186    /// findall/3 collector stack (a stack because findall can nest):
187    /// each level accumulates relocatable copies of template instances.
188    pub findall_stack: Vec<Vec<crate::copyterm::TermBuf>>,
189    /// Cut barrier for `!` in RUNTIME-WALKED goals (queries, metacalls).
190    /// Call-like constructs set it for their inner goal; every walker
191    /// continuation frame snapshots and restores it (the runtime mirror
192    /// of the compiled cut_slot). Compiled `!` never reads this.
193    pub qbarrier: usize,
194    /// Solutions captured by the print continuation, already rendered.
195    pub solutions: Vec<crate::render::RenderedSolution>,
196    pub solution_limit: Option<usize>,
197    /// Sink for `write/1`/`writeln/1`/`nl/0`. Defaults to `Stdout`; the
198    /// reactor swaps in `Capture` so output survives an isolate with no stdout.
199    pub output: OutputSink,
200    /// Wire-encoding capability table: pointers to the `EncoderDesc` statics
201    /// codegen baked for the encodings this binary advertises (`io_format/1`,
202    /// default `[text]`). `entry.rs` resolves `--format` against it; encoders
203    /// not listed were dead-stripped and won't resolve. (docs/design/IO.md)
204    pub capabilities: Vec<*const crate::wire::EncoderDesc>,
205}
206
207unsafe extern "C" fn no_continuation(_m: *mut Machine, _env: u64) -> i32 {
208    // Reaching the continuation with none installed is a codegen bug.
209    debug_assert!(false, "no continuation installed");
210    0
211}
212
213impl Machine {
214    pub fn new(atoms: StringInterner, registry: Vec<RegistryEntry>) -> Box<Machine> {
215        Box::new(Machine {
216            heap: Vec::with_capacity(4096),
217            trail: Vec::with_capacity(256),
218            cps: Vec::with_capacity(64),
219            areg: [0; MAX_ARGS],
220            breg: [0; MAX_ARGS],
221            k_fn: no_continuation,
222            k_env: 0,
223            steps: 0,
224            step_limit: 10_000, // v1 default
225            metacall_depth: 0,
226            // Conservative: well below the native C-stack capacity (~5-6k
227            // walker frames overflow an 8MB stack). The trampoline keeps the
228            // common `call(pred)` tail recursion off this path entirely, so
229            // this only bounds rare control-construct / findall recursion.
230            metacall_depth_limit: 1000,
231            error: None,
232            atoms,
233            registry,
234            srcmap: Vec::new(),
235            files: Vec::new(),
236            error_site: NO_SITE,
237            query_vars: Vec::new(),
238            findall_stack: Vec::new(),
239            qbarrier: 0,
240            solutions: Vec::new(),
241            solution_limit: None,
242            output: OutputSink::Stdout,
243            capabilities: Vec::new(),
244        })
245    }
246
247    /// Reset per-query accumulated state, preserving the program (atoms,
248    /// registry, provenance) and the caller-set per-query limits
249    /// (`step_limit`, `metacall_depth_limit`, `solution_limit` — the caller
250    /// sets those each query). Lives next to the field declarations on
251    /// purpose: adding a field without resetting it here is then a *local*
252    /// review question. The leak class this guards only surfaces under
253    /// sustained reuse (the reactor serving many queries on one Machine),
254    /// never in a single-query CLI run (WASM_TIER2_PLAN.md A2 / finding #3).
255    pub fn reset_per_query(&mut self) {
256        self.heap.clear();
257        self.trail.clear();
258        self.cps.clear();
259        self.areg = [0; MAX_ARGS];
260        self.breg = [0; MAX_ARGS];
261        self.k_fn = no_continuation;
262        self.k_env = 0;
263        self.steps = 0;
264        self.metacall_depth = 0;
265        self.error = None;
266        self.error_site = NO_SITE;
267        self.query_vars.clear();
268        self.findall_stack.clear();
269        self.qbarrier = 0;
270        self.solutions.clear();
271        if let OutputSink::Capture(buf) = &mut self.output {
272            buf.clear();
273        }
274    }
275
276    /// Append `s` to the current output sink: stream to stdout (CLI/WASI) or
277    /// accumulate in the capture buffer (reactor). The single seam every
278    /// `write`-family builtin goes through.
279    pub fn write_out(&mut self, s: &str) {
280        match &mut self.output {
281            OutputSink::Stdout => {
282                use std::io::Write;
283                print!("{s}");
284                let _ = std::io::stdout().flush();
285            }
286            OutputSink::Capture(buf) => buf.push_str(s),
287        }
288    }
289
290    /// The captured output for this query, or `None` when streaming to stdout
291    /// (the CLI never carries output in its JSON — it already went to stdout).
292    pub fn captured_output(&self) -> Option<&str> {
293        match &self.output {
294            OutputSink::Capture(s) => Some(s.as_str()),
295            OutputSink::Stdout => None,
296        }
297    }
298
299    /// Allocate a fresh unbound variable cell; returns its REF word.
300    pub fn new_var(&mut self) -> Word {
301        let idx = self.heap.len();
302        self.heap.push(cell::make_ref(idx)); // unbound = self-reference
303        cell::make_ref(idx)
304    }
305
306    /// Allocate `n` raw cells (a frame); returns the base index.
307    /// Frames hold continuation state — they are not terms and must
308    /// never be unified.
309    pub fn frame_alloc(&mut self, n: usize) -> usize {
310        let idx = self.heap.len();
311        self.heap.resize(idx + n, 0);
312        idx
313    }
314
315    /// Bind an unbound REF cell to a value, recording it on the trail.
316    pub fn bind(&mut self, ref_idx: usize, value: Word) {
317        debug_assert_eq!(
318            self.heap[ref_idx],
319            cell::make_ref(ref_idx),
320            "bind target must be unbound"
321        );
322        self.heap[ref_idx] = value;
323        self.trail.push(ref_idx as u64);
324    }
325
326    /// Follow REF chains to the representative word. Bound chains are
327    /// acyclic (we only ever bind unbound cells), so this terminates.
328    pub fn deref(&self, mut w: Word) -> Word {
329        while cell::tag_of(w) == cell::TAG_REF {
330            let idx = cell::payload(w) as usize;
331            let c = self.heap[idx];
332            if c == w {
333                return w; // unbound
334            }
335            w = c;
336        }
337        w
338    }
339
340    pub fn push_cp(&mut self, retry: ContFn, env: u64) {
341        self.cps.push(ChoicePoint {
342            trail_mark: self.trail.len(),
343            heap_mark: self.heap.len(),
344            retry,
345            env,
346            kind: CpKind::Normal,
347        });
348    }
349
350    /// Push a choice point whose backtrack restore-point is an EXPLICIT mark,
351    /// captured before the current alternative bound anything — rather than
352    /// the live heap/trail top. Lets a nondeterministic builtin bind a
353    /// solution and still record the pre-binding state for the next
354    /// alternative, without a rewind-then-rebind.
355    ///
356    /// **CALLER MUST GUARANTEE** `trail_mark <= self.trail.len()` and
357    /// `heap_mark <= self.heap.len()`. An out-of-range mark is caught only by
358    /// the debug assertion below; in release it silently no-ops the backtrack
359    /// truncation (`Vec::truncate(n)` with `n > len` does nothing), leaking
360    /// bindings from a supposedly-undone alternative into the next.
361    pub fn push_cp_at(&mut self, retry: ContFn, env: u64, trail_mark: usize, heap_mark: usize) {
362        debug_assert!(trail_mark <= self.trail.len() && heap_mark <= self.heap.len());
363        self.cps.push(ChoicePoint {
364            trail_mark,
365            heap_mark,
366            retry,
367            env,
368            kind: CpKind::Normal,
369        });
370    }
371
372    pub fn push_catch_cp(&mut self, retry: ContFn, env: u64) {
373        self.cps.push(ChoicePoint {
374            trail_mark: self.trail.len(),
375            heap_mark: self.heap.len(),
376            retry,
377            env,
378            kind: CpKind::Catch,
379        });
380    }
381
382    /// Cut: truncate the CP stack to `height`, but stop at a catch
383    /// frame (v1 rule: catch/3 is opaque to cut — `!` inside catch's
384    /// goal cannot prune the catch frame or anything below it).
385    pub fn cut_to(&mut self, height: usize) {
386        while self.cps.len() > height {
387            if self.cps.last().is_some_and(|cp| cp.kind == CpKind::Catch) {
388                break;
389            }
390            self.cps.pop();
391        }
392    }
393
394    /// Rewind bindings and heap to a popped choice point's marks.
395    pub fn rewind_to(&mut self, trail_mark: usize, heap_mark: usize) {
396        while self.trail.len() > trail_mark {
397            let idx = self.trail.pop().unwrap() as usize;
398            self.heap[idx] = cell::make_ref(idx);
399        }
400        self.heap.truncate(heap_mark);
401    }
402
403    /// Bump the step counter; on exceeding the limit set the uncatchable
404    /// resource error (v1: step limit cannot be trapped by catch/3).
405    /// v1 wording, byte-for-byte (solver.rs step_limit_thrown).
406    pub fn step(&mut self) -> bool {
407        self.steps += 1;
408        if self.steps > self.step_limit {
409            let context = format!("Maximum step limit exceeded ({})", self.step_limit);
410            crate::errors::resource(self, "steps", &context, true);
411            return false;
412        }
413        true
414    }
415
416    pub fn registry_lookup(&self, functor: u32, arity: u32) -> Option<ContFn> {
417        self.registry
418            .binary_search_by_key(&(functor, arity), |e| (e.functor, e.arity))
419            .ok()
420            .map(|i| self.registry[i].f)
421    }
422
423    /// Install the codegen-emitted source-location side-table (SPANS.md
424    /// Layer 3). Called once from `plg_rt_init`.
425    pub fn set_provenance(&mut self, srcmap: Vec<SrcLoc>, files: Vec<String>) {
426        self.srcmap = srcmap;
427        self.files = files;
428    }
429
430    /// Resolve a `site_id` to `(filename, line, col)`, or `None` for the
431    /// `NO_SITE` sentinel / a binary built without provenance. Owned so the
432    /// (cold) error path can mutate `self.error` without a borrow conflict.
433    pub fn site_location(&self, site_id: u32) -> Option<(String, u32, u32)> {
434        if site_id == NO_SITE {
435            return None;
436        }
437        let loc = self.srcmap.get(site_id as usize)?;
438        let file = self.files.get(loc.file as usize)?;
439        Some((file.clone(), loc.line, loc.col))
440    }
441}
442
443#[cfg(test)]
444mod tests {
445    use super::*;
446    use crate::cell::*;
447
448    fn machine() -> Box<Machine> {
449        Machine::new(StringInterner::new(), Vec::new())
450    }
451
452    #[test]
453    fn new_var_is_unbound_self_ref() {
454        let mut m = machine();
455        let v = m.new_var();
456        assert_eq!(tag_of(v), TAG_REF);
457        assert_eq!(m.deref(v), v);
458    }
459
460    #[test]
461    fn no_site_sentinel_value_is_pinned() {
462        // ABI contract with plg_compiler::codegen::NO_SITE (see its docs).
463        assert_eq!(NO_SITE, u32::MAX);
464    }
465
466    #[test]
467    fn bind_and_rewind() {
468        let mut m = machine();
469        let v = m.new_var();
470        let tmark = m.trail.len();
471        let hmark = m.heap.len();
472        m.bind(payload(v) as usize, make_atom(7));
473        assert_eq!(m.deref(v), make_atom(7));
474        m.rewind_to(tmark, hmark);
475        assert_eq!(m.deref(v), v, "binding undone");
476    }
477
478    #[test]
479    fn deref_follows_chains() {
480        let mut m = machine();
481        let a = m.new_var();
482        let b = m.new_var();
483        m.bind(payload(a) as usize, b);
484        m.bind(payload(b) as usize, make_int(-5));
485        assert_eq!(int_value(m.deref(a)), -5);
486    }
487
488    #[test]
489    fn step_limit_sets_uncatchable_error() {
490        let mut m = machine();
491        m.step_limit = 2;
492        assert!(m.step());
493        assert!(m.step());
494        assert!(!m.step());
495        assert!(m.error.as_ref().unwrap().uncatchable);
496    }
497}