plg_runtime/
entry.rs

1//! Process entry: `plg_rt_init` + `plg_rt_main`, called from the thin
2//! generated `main`. Owns argv parsing (hand-rolled — no clap inside compiled
3//! binaries), output, and the exit-code contract:
4//!
5//!   0 = no solutions, 1 = solutions found,
6//!   2 = query parse / usage error, 3 = runtime error
7//!
8//! Wire encodings are `text` (readable, default) and `bson` (binary), gated by
9//! the codegen-baked capability table (`io_format/1`, default `[text]`). No
10//! JSON — a host wanting JSON derives it from bson. See docs/design/IO.md.
11
12use crate::core::{self, QueryResult};
13use crate::machine::{Machine, RegistryEntry, SrcLoc};
14use crate::wire::{EncoderDesc, Envelope, WireError};
15use plg_shared::StringInterner;
16use std::ffi::CStr;
17use std::io::{self, Read, Write};
18use std::os::raw::c_char;
19
20/// Build the Machine from the tables codegen baked into the binary.
21/// Re-interning the emitted atom names in id order reproduces the
22/// compiler's exact id space (the interner pre-seeds the same
23/// well-known atoms the compiler's did).
24///
25/// # Safety
26/// Called once from generated `main` with codegen-emitted tables.
27#[unsafe(no_mangle)]
28pub unsafe extern "C" fn plg_rt_init(
29    atom_strs: *const *const c_char,
30    atom_count: u32,
31    registry: *const RegistryEntry,
32    registry_len: u32,
33    srcmap: *const SrcLoc,
34    srcmap_len: u32,
35    files: *const *const c_char,
36    files_len: u32,
37    caps: *const *const crate::wire::EncoderDesc,
38    caps_len: u32,
39) -> *mut Machine {
40    let mut atoms = StringInterner::new();
41    for i in 0..atom_count as usize {
42        let s = unsafe { CStr::from_ptr(*atom_strs.add(i)) };
43        let expected = i as u32;
44        let id = atoms.intern(&s.to_string_lossy());
45        debug_assert_eq!(id, expected, "atom table out of sync with interner");
46    }
47    let registry: Vec<RegistryEntry> =
48        unsafe { std::slice::from_raw_parts(registry, registry_len as usize) }.to_vec();
49    debug_assert!(
50        registry.is_sorted_by_key(|e| (e.functor, e.arity)),
51        "registry must be sorted for binary search"
52    );
53    let srcmap: Vec<SrcLoc> = if srcmap_len == 0 {
54        Vec::new()
55    } else {
56        unsafe { std::slice::from_raw_parts(srcmap, srcmap_len as usize) }.to_vec()
57    };
58    let files: Vec<String> = (0..files_len as usize)
59        .map(|i| {
60            unsafe { CStr::from_ptr(*files.add(i)) }
61                .to_string_lossy()
62                .into_owned()
63        })
64        .collect();
65    let mut m = Machine::new(atoms, registry);
66    m.set_provenance(srcmap, files);
67    m.capabilities = (0..caps_len as usize)
68        .map(|i| unsafe { *caps.add(i) })
69        .collect();
70    Box::into_raw(m)
71}
72
73struct Args {
74    query: Option<String>,
75    limit: Option<usize>,
76    format: String,
77    input_format: String,
78    atoms: bool,
79}
80
81fn parse_args(argv: Vec<String>) -> Result<Args, String> {
82    let mut query = None;
83    let mut limit = None;
84    let mut format = "text".to_string(); // default wire encoding
85    let mut input_format = "text".to_string(); // default: argv `--query`
86    let mut atoms = false; // bson self-describing mode (--atoms)
87    let mut it = argv.into_iter().peekable();
88    while let Some(arg) = it.next() {
89        let (flag, inline_value) = match arg.split_once('=') {
90            Some((f, v)) => (f.to_string(), Some(v.to_string())),
91            None => (arg, None),
92        };
93        let value = |it: &mut std::iter::Peekable<std::vec::IntoIter<String>>| {
94            inline_value
95                .clone()
96                .or_else(|| it.next())
97                .ok_or(format!("missing value for {flag}"))
98        };
99        match flag.as_str() {
100            "-q" | "--query" => query = Some(value(&mut it)?),
101            "-l" | "--limit" => {
102                limit = Some(
103                    value(&mut it)?
104                        .parse::<usize>()
105                        .map_err(|_| "invalid --limit value".to_string())?,
106                )
107            }
108            "-f" | "--format" => format = value(&mut it)?,
109            "--input-format" => input_format = value(&mut it)?,
110            "--atoms" => atoms = true,
111            "-h" | "--help" => {
112                return Err(
113                    "usage: --query <goal> [--limit N] [--format text|bson] [--input-format text|bson] [--atoms]"
114                        .to_string(),
115                );
116            }
117            other => return Err(format!("unexpected argument: {other}")),
118        }
119    }
120    Ok(Args {
121        query,
122        limit,
123        format,
124        input_format,
125        atoms,
126    })
127}
128
129/// Emit solutions for a solved query through the chosen encoder. `can_stream()`
130/// drives the line discipline: streamed encodings (text) get a trailing flush
131/// via the line buffer; non-streamable encodings (bson) get an explicit flush
132/// (Rust's stdout is a `LineWriter`, and bson's no-newline bytes would never
133/// flush on exit without it).
134fn output_solutions(enc: &EncoderDesc, m: &Machine, exhausted: bool, atoms: bool) {
135    let mut out = io::stdout().lock();
136    let mut env = Envelope::from_machine(m, exhausted);
137    // `--atoms`: bson self-describing mode — materialize the post-query atom
138    // map (program + query atoms) so the host can decode term values from this
139    // document. No-op for text (can_stream renders names directly).
140    let atom_names: Vec<String> = if atoms && !(enc.can_stream)() {
141        (0..m.atoms.len())
142            .map(|i| {
143                m.atoms
144                    .try_resolve(i as u32)
145                    .unwrap_or_default()
146                    .to_string()
147            })
148            .collect()
149    } else {
150        Vec::new()
151    };
152    if !atom_names.is_empty() {
153        env.atoms = Some(&atom_names);
154    }
155    let _ = (enc.write_envelope)(&mut out, m, &env);
156    if !(enc.can_stream)() {
157        let _ = out.flush();
158    }
159}
160
161/// Emit an error through the chosen encoder. The caller constructs the
162/// `WireError` with the correct class (`Parse` vs `Runtime`) so the distinction
163/// is honest on the wire.
164fn output_result(enc: Option<&EncoderDesc>, err: WireError) {
165    let message = match &err {
166        WireError::Parse(m) | WireError::Runtime(m) => m.as_str(),
167    };
168    match enc {
169        Some(e) => {
170            let mut out = io::stdout().lock();
171            let _ = (e.write_error)(&mut out, &err);
172            if !(e.can_stream)() {
173                let _ = out.flush();
174            }
175        }
176        None => eprintln!("Error: {message}"),
177    }
178}
179
180/// Run the query named in argv (or a bson request on stdin) against the
181/// compiled program. Returns the process exit code.
182///
183/// # Safety
184/// Called once from generated `main` with the process argc/argv.
185#[unsafe(no_mangle)]
186pub unsafe extern "C" fn plg_rt_main(
187    m: *mut Machine,
188    argc: i32,
189    argv: *const *const c_char,
190) -> i32 {
191    let m = unsafe { &mut *m };
192    let raw_args: Vec<String> = (1..argc as usize)
193        .map(|i| {
194            unsafe { CStr::from_ptr(*argv.add(i)) }
195                .to_string_lossy()
196                .into_owned()
197        })
198        .collect();
199
200    let args = match parse_args(raw_args) {
201        Ok(a) => a,
202        Err(e) => {
203            eprintln!("{e}");
204            return 2;
205        }
206    };
207    // Resolve the output encoding against the capability table (the encoders
208    // this binary advertises via `io_format/1`, default `[text]`). An unknown
209    // or undeclared wire format is a usage error (exit 2); there is no valid
210    // encoder to serialize the error, so it goes to stderr.
211    let enc: Option<&'static EncoderDesc> = match unsafe {
212        EncoderDesc::find(m.capabilities.as_ptr(), m.capabilities.len(), &args.format)
213    } {
214        Some(e) => Some(e),
215        None => {
216            eprintln!("Unknown or undeclared format: {}", args.format);
217            return 2;
218        }
219    };
220    // Standalone `--atoms` (no query): emit the program atom map and exit.
221    // No query runs, so the map is program-atoms-only (the boundary: a query
222    // that introduces atoms needs the with-query `--atoms` form instead).
223    if args.atoms && args.query.is_none() && args.input_format == "text" {
224        let mut out = io::stdout().lock();
225        let e = enc.unwrap();
226        if (e.can_stream)() {
227            let _ = crate::wire::write_atom_map_text(&mut out, m);
228        } else {
229            let _ = crate::wire::write_atom_map_bson(&mut out, m);
230            let _ = out.flush();
231        }
232        return 0;
233    }
234    // Resolve the query string and limit by input mode. Default (`text`) takes
235    // the query from argv `--query` (required). `--input-format bson` reads a
236    // one-field request document `{query, limit?}` from stdin; it requires the
237    // binary to advertise `bson` (capability gates both directions). argv
238    // `--limit`, when present, overrides any limit from the bson document.
239    let (query, argv_limit) = match args.input_format.as_str() {
240        "text" => {
241            let q = match args.query {
242                Some(q) => q,
243                None => {
244                    eprintln!("missing required argument: --query <goal>");
245                    return 2;
246                }
247            };
248            (q, args.limit)
249        }
250        "bson" => {
251            if unsafe {
252                EncoderDesc::find(m.capabilities.as_ptr(), m.capabilities.len(), "bson").is_none()
253            } {
254                eprintln!("Unknown or undeclared input format: bson");
255                return 2;
256            }
257            let mut stdin_buf = Vec::new();
258            if let Err(e) = std::io::stdin().read_to_end(&mut stdin_buf) {
259                output_result(
260                    enc,
261                    WireError::Parse(format!("bson request read error: {e}")),
262                );
263                return 2;
264            }
265            match crate::wire::parse_bson_request(&stdin_buf) {
266                Ok(req) => (req.query, args.limit.or(req.limit)),
267                Err(e) => {
268                    // A malformed bson request is a parse error at the framing
269                    // layer; encode it like any other parse error rather than
270                    // breaking the "I only speak bson" contract.
271                    output_result(
272                        enc,
273                        WireError::Parse(format!("bson request parse error: {e}")),
274                    );
275                    return 2;
276                }
277            }
278        }
279        other => {
280            eprintln!("Unknown --input-format: {other} (expected text|bson)");
281            return 2;
282        }
283    };
284    // A non-streamable encoding (binary) can't coexist with raw `write/1` text
285    // bytes on stdout, so run in capture mode: `write/1` bytes land in the
286    // envelope's `output` field. (IO.md — the encoding dictates the sink.)
287    if let Some(e) = enc
288        && !(e.can_stream)()
289    {
290        m.output = crate::machine::OutputSink::Capture(String::new());
291    }
292    m.solution_limit = argv_limit;
293    if let Ok(s) = std::env::var("PLG_MAX_STEPS")
294        && let Ok(n) = s.parse::<u64>()
295    {
296        m.step_limit = n;
297    }
298    if let Ok(s) = std::env::var("PLG_METACALL_DEPTH")
299        && let Ok(n) = s.parse::<usize>()
300    {
301        m.metacall_depth_limit = n;
302    }
303
304    match core::run_query(m, &query) {
305        QueryResult::ParseError(msg) => {
306            output_result(enc, WireError::Parse(msg));
307            2
308        }
309        QueryResult::RuntimeError(msg) => {
310            output_result(enc, WireError::Runtime(msg));
311            3
312        }
313        QueryResult::Solutions => {
314            let count = m.solutions.len();
315            let exhausted = core::exhausted(m);
316            output_solutions(enc.unwrap(), m, exhausted, args.atoms);
317            if count > 0 { 1 } else { 0 }
318        }
319    }
320}
321
322#[cfg(test)]
323mod tests {
324    use super::*;
325
326    fn args(v: &[&str]) -> Result<Args, String> {
327        parse_args(v.iter().map(|s| s.to_string()).collect())
328    }
329
330    #[test]
331    fn parses_flags_with_space_and_equals() {
332        let a = args(&["--query", "p(X)", "--limit", "3", "--format", "bson"]).unwrap();
333        assert_eq!(a.query.as_deref(), Some("p(X)"));
334        assert_eq!(a.limit, Some(3));
335        assert_eq!(a.format, "bson");
336        assert_eq!(a.input_format, "text", "default input-format is text");
337
338        let a = args(&["--query=p(X)", "-l", "1"]).unwrap();
339        assert_eq!(a.query.as_deref(), Some("p(X)"));
340        assert_eq!(a.limit, Some(1));
341        assert_eq!(a.format, "text", "default format is text");
342    }
343
344    #[test]
345    fn parses_input_format_flag() {
346        let a = args(&["--query", "p(X)", "--input-format", "bson"]).unwrap();
347        assert_eq!(a.input_format, "bson");
348        // --query is optional at the parse layer (required only for text-input
349        // mode, enforced in plg_rt_main).
350        assert!(args(&["--input-format", "bson"]).is_ok());
351    }
352
353    #[test]
354    fn missing_value_flags_are_errors() {
355        assert!(args(&["--query"]).is_err());
356        assert!(args(&["--bogus", "x"]).is_err());
357        assert!(args(&["--input-format"]).is_err());
358    }
359}
plg_runtime/entry.rs

plg_runtime/
entry.rs