Skip to main content

patch_prolog_frontend/parser/
mod.rs

1//! Operator-precedence parser for ISO Prolog programs and queries.
2//!
3//! Split into focused submodules:
4//! - [`operators`]: the operator-name table DATA (token → atom name).
5//! - [`term`]: term / primary parsing and the precedence-climbing levels.
6//! - [`clause`]: clause parsing and `:- ...` directive handling.
7//! - [`query`]: program / query entry points and goal-list parsing.
8//!
9//! `Term`/`Clause`/`StringInterner`/`VarId`/`AtomId` are sourced from
10//! `plg_shared`.
11
12mod cg;
13mod clause;
14pub mod operators;
15mod query;
16mod term;
17
18pub use cg::CgClause;
19
20use crate::parse_error::ParseError;
21use crate::tokenizer::{Token, TokenKind};
22use plg_shared::{AtomId, Span, StringInterner, VarId};
23use std::collections::HashMap;
24
25/// Directives extracted from a program (`:- dynamic(f/1).` etc).
26///
27/// Currently `dynamic/1` and `io_format/1` are recognized. Future directives
28/// (e.g. `multifile`, `discontiguous`) extend this struct.
29#[derive(Debug, Default, Clone)]
30pub struct ProgramDirectives {
31    /// `(functor, arity)` pairs declared `:- dynamic(F/A).`.
32    /// A goal referencing a predicate in this set fails silently when no
33    /// clauses match, instead of throwing `existence_error`.
34    pub dynamic: Vec<(AtomId, usize)>,
35    /// Wire-encoding names the program declares via `:- io_format([...])`
36    /// (e.g. `[text, bson]`). Default `[text]`. The codegen-baked capability
37    /// table gates `--format`; encoders not listed are dead-stripped from the
38    /// binary.
39    pub io_format: Vec<String>,
40}
41
42/// A source occurrence of an atom-functor term (`name` or `name(...)`),
43/// captured in `parse_primary`. This is a broad over-approximation of "call
44/// sites": it records *every* such term regardless of position — goals, but
45/// also atoms as constants (`X = foo`), atoms inside data (`p(foo, bar)`),
46/// functors in operator specs (`dynamic(foo/1)` records `dynamic`, `foo`,
47/// and `/`), and `[]` as `[]/0`. It never matches text in comments (those
48/// aren't parsed). The LSP narrows this to real calls by intersecting with
49/// the lint's undefined `(name, arity)` set, which keeps the false-positive
50/// surface small in practice.
51#[derive(Debug, Clone)]
52pub struct CallSite {
53    pub functor: AtomId,
54    pub arity: usize,
55    pub span: Span,
56}
57
58/// Parser for Edinburgh Prolog syntax.
59/// Parses tokens into Terms and Clauses, with variable scoping per clause.
60pub struct Parser<'a> {
61    tokens: Vec<Token>,
62    pos: usize,
63    interner: &'a mut StringInterner,
64    var_map: HashMap<String, VarId>,
65    next_var: VarId,
66    /// Atom-functor term occurrences, accumulated across the whole program
67    /// (not reset per clause — the LSP wants every buffer occurrence).
68    call_sites: Vec<CallSite>,
69    /// File id stamped on spans produced for the codegen path (SPANS.md
70    /// Layer 3). Default `0`; set per source by `parse_program_cg`.
71    file_id: plg_shared::FileId,
72}
73
74impl<'a> Parser<'a> {
75    /// Build a parser over already-tokenized input.
76    fn from_tokens(tokens: Vec<Token>, interner: &'a mut StringInterner) -> Self {
77        Parser {
78            tokens,
79            pos: 0,
80            interner,
81            var_map: HashMap::new(),
82            next_var: 0,
83            call_sites: Vec::new(),
84            file_id: 0,
85        }
86    }
87
88    /// Record an atom-functor term occurrence (see [`CallSite`]).
89    fn record_call_site(&mut self, functor: AtomId, arity: usize, span: Span) {
90        self.call_sites.push(CallSite {
91            functor,
92            arity,
93            span,
94        });
95    }
96
97    fn reset_vars(&mut self) {
98        self.var_map.clear();
99        self.next_var = 0;
100    }
101
102    fn current(&self) -> Option<&Token> {
103        self.tokens.get(self.pos)
104    }
105
106    fn current_kind(&self) -> Option<&TokenKind> {
107        self.current().map(|t| &t.kind)
108    }
109
110    fn at_eof(&self) -> bool {
111        matches!(self.current_kind(), None | Some(TokenKind::Eof))
112    }
113
114    fn advance(&mut self) -> &Token {
115        let tok = &self.tokens[self.pos];
116        self.pos += 1;
117        tok
118    }
119
120    /// Span of the current token, or a point at end-of-input if exhausted.
121    /// All parser errors point "here" — the position the parser stalled at.
122    fn here_span(&self) -> Span {
123        match self.current() {
124            Some(t) => Span::new(0, t.lo, t.hi),
125            None => self.eof_span(),
126        }
127    }
128
129    /// A point span at end of input (the `Eof` token's offset).
130    fn eof_span(&self) -> Span {
131        let off = self.tokens.last().map(|t| t.hi).unwrap_or(0);
132        Span::point(0, off)
133    }
134
135    /// Build a `ParseError` pointing at the current token.
136    fn error_here(&self, message: impl Into<String>) -> ParseError {
137        ParseError::new(message, self.here_span())
138    }
139
140    fn expect(&mut self, kind: &TokenKind) -> Result<(), ParseError> {
141        match self.current() {
142            Some(tok) if &tok.kind == kind => {
143                self.advance();
144                Ok(())
145            }
146            Some(tok) => {
147                let msg = format!("expected {}, got {}", kind, tok.kind);
148                Err(self.error_here(msg))
149            }
150            None => Err(self.error_here(format!("expected {kind}, got end of input"))),
151        }
152    }
153
154    /// Get the variable name map (for extracting query variable names in results).
155    pub fn var_names(&self) -> &HashMap<String, VarId> {
156        &self.var_map
157    }
158}