patch_prolog_frontend/parser/mod.rs
1//! Operator-precedence parser for ISO Prolog programs and queries.
2//!
3//! Ported from patch-prolog's `parser.rs`, split into focused submodules:
4//! - [`operators`]: the operator-name table DATA (token → atom name).
5//! - [`term`]: term / primary parsing and the precedence-climbing levels.
6//! - [`clause`]: clause parsing and `:- ...` directive handling.
7//! - [`query`]: program / query entry points and goal-list parsing.
8//!
9//! Changes from v1: `fnv::FnvHashMap` → `std::collections::HashMap`, serde
10//! derives dropped, and `Term`/`Clause`/`StringInterner`/`VarId`/`AtomId`
11//! sourced from `plg_shared`.
12
13mod cg;
14mod clause;
15pub mod operators;
16mod query;
17mod term;
18
19pub use cg::CgClause;
20
21use crate::parse_error::ParseError;
22use crate::tokenizer::{Token, TokenKind};
23use plg_shared::{AtomId, Span, StringInterner, VarId};
24use std::collections::HashMap;
25
26/// Directives extracted from a program (`:- dynamic(f/1).` etc).
27///
28/// Currently only `dynamic/1` is recognized. Future directives (e.g.
29/// `multifile`, `discontiguous`) extend this struct.
30#[derive(Debug, Default, Clone)]
31pub struct ProgramDirectives {
32 /// `(functor, arity)` pairs declared `:- dynamic(F/A).`.
33 /// A goal referencing a predicate in this set fails silently when no
34 /// clauses match, instead of throwing `existence_error`.
35 pub dynamic: Vec<(AtomId, usize)>,
36}
37
38/// A source occurrence of an atom-functor term (`name` or `name(...)`),
39/// captured in `parse_primary`. This is a broad over-approximation of "call
40/// sites": it records *every* such term regardless of position — goals, but
41/// also atoms as constants (`X = foo`), atoms inside data (`p(foo, bar)`),
42/// functors in operator specs (`dynamic(foo/1)` records `dynamic`, `foo`,
43/// and `/`), and `[]` as `[]/0`. It never matches text in comments (those
44/// aren't parsed). The LSP narrows this to real calls by intersecting with
45/// the lint's undefined `(name, arity)` set, which keeps the false-positive
46/// surface small in practice.
47#[derive(Debug, Clone)]
48pub struct CallSite {
49 pub functor: AtomId,
50 pub arity: usize,
51 pub span: Span,
52}
53
54/// Parser for Edinburgh Prolog syntax.
55/// Parses tokens into Terms and Clauses, with variable scoping per clause.
56pub struct Parser<'a> {
57 tokens: Vec<Token>,
58 pos: usize,
59 interner: &'a mut StringInterner,
60 var_map: HashMap<String, VarId>,
61 next_var: VarId,
62 /// Atom-functor term occurrences, accumulated across the whole program
63 /// (not reset per clause — the LSP wants every buffer occurrence).
64 call_sites: Vec<CallSite>,
65 /// File id stamped on spans produced for the codegen path (SPANS.md
66 /// Layer 3). Default `0`; set per source by `parse_program_cg`.
67 file_id: plg_shared::FileId,
68}
69
70impl<'a> Parser<'a> {
71 /// Build a parser over already-tokenized input.
72 fn from_tokens(tokens: Vec<Token>, interner: &'a mut StringInterner) -> Self {
73 Parser {
74 tokens,
75 pos: 0,
76 interner,
77 var_map: HashMap::new(),
78 next_var: 0,
79 call_sites: Vec::new(),
80 file_id: 0,
81 }
82 }
83
84 /// Record an atom-functor term occurrence (see [`CallSite`]).
85 fn record_call_site(&mut self, functor: AtomId, arity: usize, span: Span) {
86 self.call_sites.push(CallSite {
87 functor,
88 arity,
89 span,
90 });
91 }
92
93 fn reset_vars(&mut self) {
94 self.var_map.clear();
95 self.next_var = 0;
96 }
97
98 fn current(&self) -> Option<&Token> {
99 self.tokens.get(self.pos)
100 }
101
102 fn current_kind(&self) -> Option<&TokenKind> {
103 self.current().map(|t| &t.kind)
104 }
105
106 fn at_eof(&self) -> bool {
107 matches!(self.current_kind(), None | Some(TokenKind::Eof))
108 }
109
110 fn advance(&mut self) -> &Token {
111 let tok = &self.tokens[self.pos];
112 self.pos += 1;
113 tok
114 }
115
116 /// Span of the current token, or a point at end-of-input if exhausted.
117 /// All parser errors point "here" — the position the parser stalled at.
118 fn here_span(&self) -> Span {
119 match self.current() {
120 Some(t) => Span::new(0, t.lo, t.hi),
121 None => self.eof_span(),
122 }
123 }
124
125 /// A point span at end of input (the `Eof` token's offset).
126 fn eof_span(&self) -> Span {
127 let off = self.tokens.last().map(|t| t.hi).unwrap_or(0);
128 Span::point(0, off)
129 }
130
131 /// Build a `ParseError` pointing at the current token.
132 fn error_here(&self, message: impl Into<String>) -> ParseError {
133 ParseError::new(message, self.here_span())
134 }
135
136 fn expect(&mut self, kind: &TokenKind) -> Result<(), ParseError> {
137 match self.current() {
138 Some(tok) if &tok.kind == kind => {
139 self.advance();
140 Ok(())
141 }
142 Some(tok) => {
143 let msg = format!("expected {}, got {}", kind, tok.kind);
144 Err(self.error_here(msg))
145 }
146 None => Err(self.error_here(format!("expected {kind}, got end of input"))),
147 }
148 }
149
150 /// Get the variable name map (for extracting query variable names in results).
151 pub fn var_names(&self) -> &HashMap<String, VarId> {
152 &self.var_map
153 }
154}