Skip to main content

plg_runtime/
query.rs

1//! Minimal goal-only parser for runtime `--query` strings.
2//!
3//! Deliberately NOT the full plg-frontend parser (binary size): a query
4//! is one goal term. Supports atoms (plain and quoted), variables,
5//! integers, compounds, lists, and the standard operator set via the
6//! precedence climber in `query::ops` — the levels mirror
7//! plg-frontend/src/parser (the reference implementation); the
8//! differential test corpus guards against drift.
9//!
10//! Terms are built directly on the machine heap; query variables are
11//! recorded in `m.query_vars` (first-occurrence order, `_` excluded —
12//! the renderer sorts by name, matching v1).
13
14mod ops;
15
16use crate::cell::{self, Word};
17use crate::machine::Machine;
18use std::collections::HashMap;
19
20pub fn parse_query(m: &mut Machine, src: &str) -> Result<Word, String> {
21    let mut p = QueryParser {
22        chars: src.chars().collect(),
23        pos: 0,
24        vars: HashMap::new(),
25    };
26    let goal = p.parse_level(m, 1200)?;
27    p.skip_ws();
28    // Tolerate a trailing '.' like the v1 query parser.
29    if p.peek() == Some('.') {
30        p.pos += 1;
31        p.skip_ws();
32    }
33    if p.pos < p.chars.len() {
34        return Err(format!("unexpected input at column {}", p.pos + 1));
35    }
36    Ok(goal)
37}
38
39pub(crate) struct QueryParser {
40    pub(crate) chars: Vec<char>,
41    pub(crate) pos: usize,
42    vars: HashMap<String, Word>,
43}
44
45impl QueryParser {
46    pub(crate) fn peek(&self) -> Option<char> {
47        self.chars.get(self.pos).copied()
48    }
49
50    pub(crate) fn skip_ws(&mut self) {
51        while let Some(c) = self.peek() {
52            if c.is_whitespace() {
53                self.pos += 1;
54            } else if c == '%' {
55                while self.peek().is_some_and(|c| c != '\n') {
56                    self.pos += 1;
57                }
58            } else {
59                break;
60            }
61        }
62    }
63
64    fn expect(&mut self, c: char) -> Result<(), String> {
65        self.skip_ws();
66        if self.peek() == Some(c) {
67            self.pos += 1;
68            Ok(())
69        } else {
70            Err(format!("expected `{c}` at column {}", self.pos + 1))
71        }
72    }
73
74    pub(crate) fn make_binop(&self, m: &mut Machine, name: &str, a: Word, b: Word) -> Word {
75        let id = m.atoms.intern(name);
76        let idx = m.heap.len();
77        m.heap.push(cell::pack_functor(id, 2));
78        m.heap.push(a);
79        m.heap.push(b);
80        cell::make(cell::TAG_STR, idx as u64)
81    }
82
83    /// A primary term: constants, variables, compounds, lists, parens.
84    pub(crate) fn parse_primary(&mut self, m: &mut Machine) -> Result<Word, String> {
85        self.skip_ws();
86        match self.peek() {
87            None => Err("unexpected end of query".to_string()),
88            Some('(') => {
89                self.pos += 1;
90                let t = self.parse_level(m, 1200)?;
91                self.expect(')')?;
92                Ok(t)
93            }
94            Some('[') => self.parse_list(m),
95            Some('\'') => {
96                let name = self.read_quoted()?;
97                self.parse_atom_or_compound(m, name)
98            }
99            Some(c) if c.is_ascii_digit() => self.parse_number(m, false),
100            // Standalone operator atoms (`(-)`, `[+, *]`, `call(=, X, Y)`)
101            // and canonical compound forms (`=(X, 7)`): a symbol run
102            // followed by `(` is a compound, by a term-end is an atom.
103            Some(c) if is_symbol_atom_char(c) && self.symbol_run_is_standalone() => {
104                let name = self.read_symbol_run();
105                self.parse_atom_or_compound(m, name)
106            }
107            Some('-')
108                if self
109                    .chars
110                    .get(self.pos + 1)
111                    .is_some_and(|c| c.is_ascii_digit()) =>
112            {
113                self.pos += 1;
114                self.parse_number(m, true)
115            }
116            Some('-') => {
117                // Prefix minus over a non-literal: -(Term).
118                self.pos += 1;
119                let t = self.parse_primary(m)?;
120                self.make_prefix(m, "-", t)
121            }
122            Some('+') => {
123                // Prefix plus: folds into a positive numeric literal
124                // (v1 issue #19), otherwise builds '+'/1.
125                self.pos += 1;
126                self.skip_ws();
127                if self.peek().is_some_and(|c| c.is_ascii_digit()) {
128                    self.parse_number(m, false)
129                } else {
130                    let t = self.parse_primary(m)?;
131                    self.make_prefix(m, "+", t)
132                }
133            }
134            Some('\\') => {
135                // Prefix `\` (and `\+` reached here only as an atom —
136                // the level-900 parser handles real negation).
137                let name = self.read_symbol_run();
138                if self.peek() == Some('(') {
139                    self.parse_atom_or_compound(m, name)
140                } else {
141                    let t = self.parse_primary(m)?;
142                    self.make_prefix(m, &name, t)
143                }
144            }
145            Some('!') => {
146                self.pos += 1;
147                Ok(cell::make_atom(m.atoms.intern("!")))
148            }
149            Some(c) if c.is_uppercase() || c == '_' => {
150                let name = self.read_ident();
151                Ok(self.var_word(m, &name))
152            }
153            Some(c) if c.is_lowercase() => {
154                let name = self.read_ident();
155                self.parse_atom_or_compound(m, name)
156            }
157            Some(c) => Err(format!("unexpected `{c}` at column {}", self.pos + 1)),
158        }
159    }
160
161    fn parse_atom_or_compound(&mut self, m: &mut Machine, name: String) -> Result<Word, String> {
162        let id = m.atoms.intern(&name);
163        // No whitespace allowed between functor and `(` (ISO).
164        if self.peek() == Some('(') {
165            self.pos += 1;
166            // Arguments parse at priority 999 (no bare `,`).
167            let mut args = vec![self.parse_level(m, 999)?];
168            loop {
169                self.skip_ws();
170                match self.peek() {
171                    Some(',') => {
172                        self.pos += 1;
173                        args.push(self.parse_level(m, 999)?);
174                    }
175                    Some(')') => {
176                        self.pos += 1;
177                        break;
178                    }
179                    _ => return Err(format!("expected `,` or `)` at column {}", self.pos + 1)),
180                }
181            }
182            let idx = m.heap.len();
183            m.heap.push(cell::pack_functor(id, args.len() as u32));
184            m.heap.extend_from_slice(&args);
185            Ok(cell::make(cell::TAG_STR, idx as u64))
186        } else {
187            Ok(cell::make_atom(id))
188        }
189    }
190
191    fn parse_list(&mut self, m: &mut Machine) -> Result<Word, String> {
192        self.expect('[')?;
193        self.skip_ws();
194        if self.peek() == Some(']') {
195            self.pos += 1;
196            return Ok(cell::make_atom(plg_shared::atom::ATOM_NIL));
197        }
198        let mut elements = vec![self.parse_level(m, 999)?];
199        let mut tail = None;
200        loop {
201            self.skip_ws();
202            match self.peek() {
203                Some(',') => {
204                    self.pos += 1;
205                    elements.push(self.parse_level(m, 999)?);
206                }
207                Some('|') => {
208                    self.pos += 1;
209                    tail = Some(self.parse_level(m, 999)?);
210                    self.expect(']')?;
211                    break;
212                }
213                Some(']') => {
214                    self.pos += 1;
215                    break;
216                }
217                _ => {
218                    return Err(format!(
219                        "expected `,`, `|` or `]` at column {}",
220                        self.pos + 1
221                    ));
222                }
223            }
224        }
225        let mut w = tail.unwrap_or(cell::make_atom(plg_shared::atom::ATOM_NIL));
226        for e in elements.into_iter().rev() {
227            let idx = m.heap.len();
228            m.heap.push(e);
229            m.heap.push(w);
230            w = cell::make(cell::TAG_LST, idx as u64);
231        }
232        Ok(w)
233    }
234
235    /// Integer or float literal. A `.` continues a float only when a
236    /// digit follows (otherwise it's the goal terminator) — same
237    /// disambiguation as the frontend tokenizer.
238    fn parse_number(&mut self, m: &mut Machine, neg: bool) -> Result<Word, String> {
239        let start = self.pos;
240        while self.peek().is_some_and(|c| c.is_ascii_digit()) {
241            self.pos += 1;
242        }
243        let is_float = self.peek() == Some('.')
244            && self
245                .chars
246                .get(self.pos + 1)
247                .is_some_and(|c| c.is_ascii_digit());
248        if is_float {
249            self.pos += 1; // '.'
250            while self.peek().is_some_and(|c| c.is_ascii_digit()) {
251                self.pos += 1;
252            }
253            let text: String = self.chars[start..self.pos].iter().collect();
254            let f: f64 = text
255                .parse()
256                .map_err(|_| format!("invalid float `{text}`"))?;
257            let f = if neg { -f } else { f };
258            let idx = m.heap.len();
259            m.heap.push(f.to_bits());
260            return Ok(cell::make(cell::TAG_FLT, idx as u64));
261        }
262        let digits: String = self.chars[start..self.pos].iter().collect();
263        let n: i64 = digits
264            .parse()
265            .map_err(|_| format!("invalid integer `{digits}`"))?;
266        let n = if neg { -n } else { n };
267        if !(cell::INT_MIN..=cell::INT_MAX).contains(&n) {
268            // Box beyond-immediate integers (full i64 range, v1 parity).
269            let idx = m.heap.len();
270            m.heap.push(n as u64);
271            return Ok(cell::make(cell::TAG_BIG, idx as u64));
272        }
273        Ok(cell::make_int(n))
274    }
275
276    pub(crate) fn read_ident(&mut self) -> String {
277        let start = self.pos;
278        while self.peek().is_some_and(|c| c.is_alphanumeric() || c == '_') {
279            self.pos += 1;
280        }
281        self.chars[start..self.pos].iter().collect()
282    }
283
284    fn read_quoted(&mut self) -> Result<String, String> {
285        self.pos += 1; // opening '
286        let mut out = String::new();
287        loop {
288            match self.peek() {
289                None => return Err("unterminated quoted atom".to_string()),
290                Some('\'') => {
291                    self.pos += 1;
292                    if self.peek() == Some('\'') {
293                        out.push('\''); // '' escape
294                        self.pos += 1;
295                    } else {
296                        return Ok(out);
297                    }
298                }
299                Some(c) => {
300                    out.push(c);
301                    self.pos += 1;
302                }
303            }
304        }
305    }
306
307    fn read_symbol_run(&mut self) -> String {
308        let start = self.pos;
309        while self.peek().is_some_and(is_symbol_atom_char) {
310            self.pos += 1;
311        }
312        self.chars[start..self.pos].iter().collect()
313    }
314
315    /// Does the symbol run starting at the cursor stand alone as an
316    /// atom (followed by a term-end) or start a canonical compound
317    /// (immediately followed by `(`)?
318    fn symbol_run_is_standalone(&self) -> bool {
319        let mut p = self.pos;
320        while self.chars.get(p).copied().is_some_and(is_symbol_atom_char) {
321            p += 1;
322        }
323        if self.chars.get(p) == Some(&'(') {
324            return true; // canonical compound: =(X, 7)
325        }
326        let mut q = p;
327        while self.chars.get(q).is_some_and(|c| c.is_whitespace()) {
328            q += 1;
329        }
330        match self.chars.get(q) {
331            None => {
332                // End of input — but a trailing '.' inside the run is
333                // the goal terminator, not part of the atom; only the
334                // run itself ending the input counts.
335                true
336            }
337            Some(')' | ',' | ']' | '|') => true,
338            _ => false,
339        }
340    }
341
342    fn make_prefix(&self, m: &mut Machine, name: &str, t: Word) -> Result<Word, String> {
343        let id = m.atoms.intern(name);
344        let idx = m.heap.len();
345        m.heap.push(cell::pack_functor(id, 1));
346        m.heap.push(t);
347        Ok(cell::make(cell::TAG_STR, idx as u64))
348    }
349
350    /// `_` is always fresh and never recorded; named variables are
351    /// shared within the query and recorded for solution output.
352    fn var_word(&mut self, m: &mut Machine, name: &str) -> Word {
353        if name == "_" {
354            return m.new_var();
355        }
356        if let Some(&w) = self.vars.get(name) {
357            return w;
358        }
359        let w = m.new_var();
360        self.vars.insert(name.to_string(), w);
361        m.query_vars
362            .push((name.to_string(), cell::payload(w) as usize));
363        w
364    }
365}
366
367/// Characters that form symbolic atoms (Prolog "symbol char" class).
368fn is_symbol_atom_char(c: char) -> bool {
369    matches!(
370        c,
371        '+' | '-' | '*' | '/' | '\\' | '<' | '>' | '=' | ':' | '@' | '^' | '.' | '?' | '&' | '~'
372    )
373}
374
375#[cfg(test)]
376mod tests {
377    use super::*;
378    use crate::cell::*;
379    use plg_shared::StringInterner;
380
381    fn machine() -> Box<Machine> {
382        Machine::new(StringInterner::new(), Vec::new())
383    }
384
385    fn functor_name(m: &Machine, w: Word) -> String {
386        let idx = payload(w) as usize;
387        let (f, _) = unpack_functor(m.heap[idx]);
388        m.atoms.resolve(f).to_string()
389    }
390
391    #[test]
392    fn parses_compound_with_vars() {
393        let mut m = machine();
394        let w = parse_query(&mut m, "parent(tom, X)").unwrap();
395        assert_eq!(tag_of(w), TAG_STR);
396        assert_eq!(functor_name(&m, w), "parent");
397        assert_eq!(m.query_vars.len(), 1);
398        assert_eq!(m.query_vars[0].0, "X");
399    }
400
401    #[test]
402    fn conjunction_shares_variables() {
403        let mut m = machine();
404        let w = parse_query(&mut m, "p(X), q(X, Y)").unwrap();
405        assert_eq!(functor_name(&m, w), ",");
406        assert_eq!(m.query_vars.len(), 2, "X shared, Y new");
407    }
408
409    #[test]
410    fn operator_goals_parse() {
411        let mut m = machine();
412        let w = parse_query(&mut m, "X is 2 + 3 * 4").unwrap();
413        assert_eq!(functor_name(&m, w), "is");
414        let w = parse_query(&mut m, "1 < 2").unwrap();
415        assert_eq!(functor_name(&m, w), "<");
416        let w = parse_query(&mut m, "(a ; b)").unwrap();
417        assert_eq!(functor_name(&m, w), ";");
418        let w = parse_query(&mut m, "(a -> b ; c)").unwrap();
419        assert_eq!(functor_name(&m, w), ";");
420        let w = parse_query(&mut m, "\\+ p(X)").unwrap();
421        assert_eq!(functor_name(&m, w), "\\+");
422    }
423
424    #[test]
425    fn precedence_multiplication_binds_tighter() {
426        let mut m = machine();
427        // is(X, +(2, *(3, 4)))
428        let w = parse_query(&mut m, "X is 2 + 3 * 4").unwrap();
429        let idx = payload(w) as usize;
430        let rhs = m.deref(m.heap[idx + 2]);
431        assert_eq!(functor_name(&m, rhs), "+");
432        let plus_idx = payload(rhs) as usize;
433        let right = m.deref(m.heap[plus_idx + 2]);
434        assert_eq!(functor_name(&m, right), "*");
435    }
436
437    #[test]
438    fn args_parse_operators_but_not_bare_comma() {
439        let mut m = machine();
440        let w = parse_query(&mut m, "p(1 + 2, X)").unwrap();
441        let idx = payload(w) as usize;
442        let (f, n) = unpack_functor(m.heap[idx]);
443        assert_eq!(m.atoms.resolve(f), "p");
444        assert_eq!(n, 2, "1 + 2 is one arg, X the other");
445    }
446
447    #[test]
448    fn lists_and_quoted_atoms() {
449        let mut m = machine();
450        let w = parse_query(&mut m, "p([1, 2 | T], 'hello world')").unwrap();
451        assert_eq!(tag_of(w), TAG_STR);
452        assert!(m.atoms.lookup("hello world").is_some());
453        assert_eq!(m.query_vars[0].0, "T");
454    }
455
456    #[test]
457    fn underscore_never_recorded() {
458        let mut m = machine();
459        parse_query(&mut m, "p(_, _)").unwrap();
460        assert!(m.query_vars.is_empty());
461    }
462
463    #[test]
464    fn negative_integers_and_trailing_dot() {
465        let mut m = machine();
466        let w = parse_query(&mut m, "p(-42).").unwrap();
467        let idx = payload(w) as usize;
468        assert_eq!(int_value(m.deref(m.heap[idx + 1])), -42);
469    }
470
471    #[test]
472    fn rejects_trailing_garbage() {
473        let mut m = machine();
474        assert!(parse_query(&mut m, "p(a) q").is_err());
475        assert!(parse_query(&mut m, "p(").is_err());
476    }
477}