Skip to main content

plg_runtime/builtins/
facts.rs

1//! Fact-table lookup (FACT_TABLE.md, Stages A–C): the generic enumerator for a
2//! predicate compiled to a `.rodata` table instead of one function per clause.
3//! Same observable behavior as the per-clause facts — solution order = program
4//! order, choice-point backtracking — mirroring `between/3`'s shape.
5//!
6//! A table cell is one word. Immediate columns (atom / i61-int) are stored
7//! inline and unified directly. Non-immediate columns (compound, list, float,
8//! big-int — Stage C) are stored as a *blob reference*: a `STR`/`LST`/`FLT`/
9//! `BIG` word whose payload indexes a per-predicate `.rodata` serialized blob
10//! (the `copyterm`/`TermBuf` format). `fact_scan` restores such a cell onto the
11//! heap via `restore_cells` before unifying — so the cell tag (atom/int vs the
12//! pointer tags) tells the two cases apart with no extra metadata.
13//!
14//! Stage B's first-argument index applies only when column 0 is all-immediate;
15//! otherwise (or for an unbound/non-immediate query arg) the scan covers all
16//! rows. Either way matching rows are visited in program order.
17//!
18//! Delivery to the continuation is a `musttail` in the GENERATED entry/retry
19//! functions, not here: these helpers only find/bind a row and push the
20//! choice point, then RETURN. That return pops their frame before the
21//! generated code tail-calls the continuation, so recursion *through* a fact
22//! predicate (e.g. `edge` in a recursive `path/2`) keeps a constant C stack.
23
24use crate::cell::{TAG_ATOM, TAG_INT, Word, tag_of};
25use crate::copyterm::restore_cells;
26use crate::machine::{ContFn, Machine};
27use crate::unify::unify;
28
29// Control-frame layout (heap cells): a fixed prefix, then the arg snapshots.
30const TBL: usize = 0; // table pointer (ptrtoint of the .rodata global)
31const NROWS: usize = 1;
32const ARITY: usize = 2;
33const IDX: usize = 3; // first-arg index pointer, or 0 for a full scan (no
34// real `.rodata` global lives at address 0, so 0 is a safe sentinel — see
35// `fact_scan` for how a position maps to a row through this index, or to the
36// row directly when 0)
37const BLOB: usize = 4; // serialized-term blob pointer, or 0 if all-immediate
38const BLOBLEN: usize = 5; // blob length in words (for the slice bound)
39const CURSOR: usize = 6; // next position in [.., END) to try (see fact_scan
40// for the position→row mapping); mutated in place, untrailed
41const END: usize = 7; // exclusive upper bound of the cursor's range
42const RETRY: usize = 8; // the predicate's generated `@..._ftr` (a ContFn)
43const KFN: usize = 9;
44const KENV: usize = 10;
45const QBAR: usize = 11;
46const ARGS: usize = 12; // arg snapshots start here
47
48/// Read a `ContFn` from a frame cell that the generated IR wrote via
49/// `ptrtoint` — the retry pointer (`@..._ftr`) or the saved continuation
50/// (`k_fn`). Centralizes the one invariant both sites share: the cell holds a
51/// function pointer to an `i32 (ptr, i64)` we ourselves emitted.
52///
53/// # Safety
54/// `word` must be such a `ptrtoint`-encoded function pointer; nothing else is
55/// ever stored in these cells.
56unsafe fn read_contfn(word: u64) -> ContFn {
57    unsafe { std::mem::transmute::<usize, ContFn>(word as usize) }
58}
59
60/// Borrow a `.rodata` array of `len` words.
61///
62/// # Safety
63/// `ptr` is a generated `.rodata` global of exactly that length (FACT_TABLE.md)
64/// — the same kind of codegen-emitted, read-only table the runtime already
65/// reads for the atom table and predicate registry. Returns an empty slice for
66/// a null pointer (a predicate with no blob).
67unsafe fn rodata_slice<'a>(ptr: u64, len: usize) -> &'a [Word] {
68    if ptr == 0 {
69        return &[];
70    }
71    unsafe { std::slice::from_raw_parts(ptr as usize as *const Word, len) }
72}
73
74/// Compiled entry: snapshot the args + continuation into a control frame, pick
75/// the row range to scan (binary search on the index when the first arg is a
76/// bound atom/int, else all rows), then find the first matching row. Returns 1
77/// if a solution was set up (the generated entry then musttails the
78/// continuation), 0 if no row matches.
79///
80/// # Safety
81/// Called from generated code. `table_ptr` addresses a `.rodata` array of
82/// `nrows * arity` words; `idx_ptr` is 0 or a `.rodata` array of `nrows` row
83/// indices sorted by column 0; `blob_ptr` is 0 or a `.rodata` array of
84/// `blob_len` serialized-term words; `retry_ptr` is the predicate's `@..._ftr`.
85#[unsafe(no_mangle)]
86#[allow(clippy::too_many_arguments)]
87pub unsafe extern "C" fn plg_rt_fact_first(
88    m: *mut Machine,
89    table_ptr: i64,
90    idx_ptr: i64,
91    blob_ptr: i64,
92    blob_len: i64,
93    nrows: i64,
94    arity: i64,
95    retry_ptr: i64,
96) -> i32 {
97    let m = unsafe { &mut *m };
98    let nrows = nrows as usize;
99    let arity = arity as usize;
100
101    // Choose the iteration space. The index is consulted only when the first
102    // argument is a bound atom/int — the exact domain of an indexed column 0
103    // (the index is emitted only when column 0 is all-immediate) — so that
104    // Word-equality (what the binary search tests) coincides with what `unify`
105    // would accept. Unbound or non-immediate first args fall back to a full
106    // scan.
107    let (idx_for_frame, cursor0, end) = if arity == 0 {
108        (0u64, 0usize, nrows)
109    } else {
110        let a0 = m.deref(m.areg[0]);
111        if idx_ptr != 0 && matches!(tag_of(a0), TAG_ATOM | TAG_INT) {
112            let table = unsafe { rodata_slice(table_ptr as u64, nrows * arity) };
113            let idx = unsafe { rodata_slice(idx_ptr as u64, nrows) };
114            let (lo, hi) = equal_range(idx, table, arity, a0);
115            (idx_ptr as u64, lo, hi)
116        } else {
117            (0u64, 0usize, nrows)
118        }
119    };
120
121    let frame = m.frame_alloc(ARGS + arity);
122    m.heap[frame + TBL] = table_ptr as u64;
123    m.heap[frame + NROWS] = nrows as u64;
124    m.heap[frame + ARITY] = arity as u64;
125    m.heap[frame + IDX] = idx_for_frame;
126    m.heap[frame + BLOB] = blob_ptr as u64;
127    m.heap[frame + BLOBLEN] = blob_len as u64;
128    m.heap[frame + CURSOR] = cursor0 as u64;
129    m.heap[frame + END] = end as u64;
130    m.heap[frame + RETRY] = retry_ptr as u64;
131    m.heap[frame + KFN] = m.k_fn as usize as u64;
132    m.heap[frame + KENV] = m.k_env;
133    m.heap[frame + QBAR] = m.qbarrier as u64;
134    for c in 0..arity {
135        m.heap[frame + ARGS + c] = m.areg[c];
136    }
137    // `m.k_fn`/`k_env` are unchanged (the caller's continuation), so the
138    // generated `deliver:` reads the right continuation — no restore needed
139    // here (unlike `plg_rt_fact_next`, which runs after the driver may have
140    // overwritten them).
141    fact_scan(m, frame)
142}
143
144/// Choice-point retry: restore the saved continuation (the driver may have
145/// overwritten `m.k_fn`), then resume the scan from the frame's cursor.
146///
147/// # Safety
148/// Called by the solve driver with a frame built by `plg_rt_fact_first`.
149#[unsafe(no_mangle)]
150pub unsafe extern "C" fn plg_rt_fact_next(m: *mut Machine, frame: u64) -> i32 {
151    let m = unsafe { &mut *m };
152    let frame = frame as usize;
153    m.k_fn = unsafe { read_contfn(m.heap[frame + KFN]) };
154    m.k_env = m.heap[frame + KENV];
155    m.qbarrier = m.heap[frame + QBAR] as usize;
156    fact_scan(m, frame)
157}
158
159/// `[lower, upper)` positions in `idx` whose column-0 word equals `key`. `idx`
160/// is sorted by `table[idx[k] * arity]` as a `u64`; equality search is correct
161/// on any consistent total order, so the raw `u64` comparator suffices (we
162/// never order-compare across keys). Requires `arity >= 1`.
163fn equal_range(idx: &[Word], table: &[Word], arity: usize, key: Word) -> (usize, usize) {
164    let col0 = |k: usize| table[idx[k] as usize * arity];
165    // lower bound: first k with col0(k) >= key
166    let (mut lo, mut hi) = (0usize, idx.len());
167    while lo < hi {
168        let mid = lo + (hi - lo) / 2;
169        if col0(mid) < key {
170            lo = mid + 1;
171        } else {
172            hi = mid;
173        }
174    }
175    let lower = lo;
176    // upper bound: first k with col0(k) > key (search the [lower, len) tail)
177    hi = idx.len();
178    let mut lo2 = lower;
179    while lo2 < hi {
180        let mid = lo2 + (hi - lo2) / 2;
181        if col0(mid) <= key {
182            lo2 = mid + 1;
183        } else {
184            hi = mid;
185        }
186    }
187    (lower, lo2)
188}
189
190/// Scan positions `[cursor, END)` for the first row that unifies with the
191/// snapshot args. A row's actual index is `IDX[cursor]` when an index is set,
192/// else `cursor` itself. Each column unifies inline when it's an immediate
193/// (atom/int) word, or is first restored from the blob (compound/list/float/
194/// big-int — Stage C). On a match: advance the cursor, push a choice point for
195/// the rest of the range whose restore-point is the *pre-binding* state (so
196/// backtracking undoes exactly this row, including any restored cells), and
197/// return 1 — binding the row only once. Returns 0 when no position matches.
198fn fact_scan(m: &mut Machine, frame: usize) -> i32 {
199    let nrows = m.heap[frame + NROWS] as usize;
200    let arity = m.heap[frame + ARITY] as usize;
201    let idx_ptr = m.heap[frame + IDX];
202    let end = m.heap[frame + END] as usize;
203    let retry = unsafe { read_contfn(m.heap[frame + RETRY]) };
204    let table = unsafe { rodata_slice(m.heap[frame + TBL], nrows * arity) };
205    let idx: Option<&[Word]> = (idx_ptr != 0).then(|| unsafe { rodata_slice(idx_ptr, nrows) });
206    let blob = unsafe { rodata_slice(m.heap[frame + BLOB], m.heap[frame + BLOBLEN] as usize) };
207
208    // Restore-point for the next alternative: the state before any row here
209    // binds (or restores blob cells). A failed row rewinds to it and retries;
210    // a matched row leaves its bindings in place and hands this mark to the
211    // choice point.
212    let clean_t = m.trail.len();
213    let clean_h = m.heap.len();
214    loop {
215        let pos = m.heap[frame + CURSOR] as usize;
216        if pos >= end {
217            return 0;
218        }
219        let row_idx = match idx {
220            Some(ix) => ix[pos] as usize,
221            None => pos,
222        };
223        let mut matched = true;
224        for c in 0..arity {
225            let col = table[row_idx * arity + c];
226            // Immediate columns unify directly; blob references (STR/LST/FLT/
227            // BIG payloads index the blob) are materialized onto the heap first.
228            let w = match tag_of(col) {
229                TAG_ATOM | TAG_INT => col,
230                _ => restore_cells(m, blob, col),
231            };
232            let a = m.heap[frame + ARGS + c];
233            if !unify(m, a, w) {
234                matched = false;
235                break;
236            }
237        }
238        // Advance both branches. The write is intentionally untrailed: the
239        // frame cell survives the choice point's heap truncation, so the
240        // cursor advances monotonically across a CP resume (the next solution
241        // continues from here, never re-tries `pos`).
242        m.heap[frame + CURSOR] = (pos + 1) as u64;
243        if matched {
244            if pos + 1 < end {
245                m.push_cp_at(retry, frame as u64, clean_t, clean_h);
246            }
247            return 1;
248        }
249        m.rewind_to(clean_t, clean_h);
250    }
251}