Skip to main content

patch_prolog_frontend/parser/
query.rs

1//! Program / query entry points and goal-list (conjunction / disjunction)
2//! parsing. Ported from patch-prolog's `parser.rs`.
3
4use super::{Parser, ProgramDirectives};
5use crate::parse_error::ParseError;
6use crate::tokenizer::{TokenKind, Tokenizer};
7use plg_shared::{Clause, StringInterner, Term, VarId};
8use std::collections::HashMap;
9
10impl<'a> Parser<'a> {
11    /// Parse a complete program (multiple clauses) from source text.
12    /// Directives (`:- ...`) are recognized and skipped — use
13    /// `parse_program_with_directives` to capture them.
14    pub fn parse_program(
15        input: &str,
16        interner: &mut StringInterner,
17    ) -> Result<Vec<Clause>, ParseError> {
18        let (clauses, _) = Self::parse_program_with_directives(input, interner)?;
19        Ok(clauses)
20    }
21
22    /// Parse a complete program, returning both clauses and any directives
23    /// (currently `:- dynamic(F/A).`). The compile pipeline uses this so the
24    /// directive information reaches the database.
25    pub fn parse_program_with_directives(
26        input: &str,
27        interner: &mut StringInterner,
28    ) -> Result<(Vec<Clause>, ProgramDirectives), ParseError> {
29        let tokens = Tokenizer::tokenize(input)?;
30        let mut parser = Parser::from_tokens(tokens, interner);
31        parser.parse_program_body()
32    }
33
34    /// Like `parse_program_with_directives`, but also returns the atom-functor
35    /// call-site occurrences (see [`super::CallSite`]) for the LSP to map
36    /// undefined-predicate warnings onto precise source ranges.
37    pub fn parse_program_with_spans(
38        input: &str,
39        interner: &mut StringInterner,
40    ) -> Result<(Vec<Clause>, ProgramDirectives, Vec<super::CallSite>), ParseError> {
41        let tokens = Tokenizer::tokenize(input)?;
42        let mut parser = Parser::from_tokens(tokens, interner);
43        let (clauses, directives) = parser.parse_program_body()?;
44        Ok((clauses, directives, parser.call_sites))
45    }
46
47    /// Shared program-parsing loop. Clauses are collected; `:- ...` directives
48    /// are interpreted into `directives`.
49    fn parse_program_body(&mut self) -> Result<(Vec<Clause>, ProgramDirectives), ParseError> {
50        let mut clauses = Vec::new();
51        let mut directives = ProgramDirectives::default();
52        while !self.at_eof() {
53            self.reset_vars();
54            if self.current_kind() == Some(&TokenKind::Neck) {
55                self.advance();
56                let body = self.parse_term()?;
57                self.expect(&TokenKind::Dot)?;
58                self.process_directive(body, &mut directives)?;
59            } else {
60                clauses.push(self.parse_clause()?);
61            }
62        }
63        Ok((clauses, directives))
64    }
65
66    /// Parse a single query (goal list) from source text, e.g. "parent(tom, X)".
67    /// Does NOT require a trailing dot.
68    pub fn parse_query(
69        input: &str,
70        interner: &mut StringInterner,
71    ) -> Result<Vec<Term>, ParseError> {
72        let tokens = Tokenizer::tokenize(input)?;
73        let mut parser = Parser::from_tokens(tokens, interner);
74        // Skip optional ?- prefix
75        if parser.current_kind() == Some(&TokenKind::QueryOp) {
76            parser.advance();
77        }
78        let goals = parser.parse_goal_list()?;
79        // Allow optional trailing dot
80        if parser.current_kind() == Some(&TokenKind::Dot) {
81            parser.advance();
82        }
83        // Issue #30: the whole input must be consumed. Without this, a query
84        // like `member(X,[1,2,3]) zzz` would silently drop the trailing tokens
85        // and report success. The "after query" phrasing distinguishes this
86        // from mid-expression parse errors.
87        if !parser.at_eof() {
88            let pos = parser.current().unwrap();
89            let msg = format!("unexpected {} after query", pos.kind);
90            return Err(parser.error_here(msg));
91        }
92        Ok(goals)
93    }
94
95    /// Parse a query and also return the variable name mapping.
96    pub fn parse_query_with_vars(
97        input: &str,
98        interner: &mut StringInterner,
99    ) -> Result<(Vec<Term>, HashMap<String, VarId>), ParseError> {
100        let tokens = Tokenizer::tokenize(input)?;
101        let mut parser = Parser::from_tokens(tokens, interner);
102        if parser.current_kind() == Some(&TokenKind::QueryOp) {
103            parser.advance();
104        }
105        let goals = parser.parse_goal_list()?;
106        if parser.current_kind() == Some(&TokenKind::Dot) {
107            parser.advance();
108        }
109        // Issue #30 — see `parse_query` for rationale.
110        if !parser.at_eof() {
111            let pos = parser.current().unwrap();
112            let msg = format!("unexpected {} after query", pos.kind);
113            return Err(parser.error_here(msg));
114        }
115        let vars = parser.var_map;
116        Ok((goals, vars))
117    }
118
119    pub(super) fn parse_goal_list(&mut self) -> Result<Vec<Term>, ParseError> {
120        // Parse the entire body as a conjunction/disjunction tree.
121        // The solver flattens ','(a, b) via BuiltinResult::Conjunction.
122        let body = self.parse_goal_disjunction()?;
123        Ok(vec![body])
124    }
125
126    /// Parse disjunction (;) — ISO precedence 1100, looser than comma (1000).
127    pub(super) fn parse_goal_disjunction(&mut self) -> Result<Term, ParseError> {
128        let left = self.parse_goal_conjunction()?;
129        if self.current_kind() == Some(&TokenKind::Semicolon) {
130            self.advance();
131            let right = self.parse_goal_disjunction()?;
132            let functor = self.interner.intern(";");
133            Ok(Term::Compound {
134                functor,
135                args: vec![left, right],
136            })
137        } else {
138            Ok(left)
139        }
140    }
141
142    /// Parse conjunction (,) — ISO precedence 1000, tighter than semicolon.
143    fn parse_goal_conjunction(&mut self) -> Result<Term, ParseError> {
144        let first = self.parse_term()?;
145        if self.current_kind() == Some(&TokenKind::Comma) {
146            let mut goals = vec![first];
147            while self.current_kind() == Some(&TokenKind::Comma) {
148                self.advance();
149                goals.push(self.parse_term()?);
150            }
151            // Build right-associative conjunction: a, b, c → ','(a, ','(b, c))
152            let comma = self.interner.intern(",");
153            let mut result = goals.pop().unwrap();
154            while let Some(g) = goals.pop() {
155                result = Term::Compound {
156                    functor: comma,
157                    args: vec![g, result],
158                };
159            }
160            Ok(result)
161        } else {
162            Ok(first)
163        }
164    }
165}
166
167#[cfg(test)]
168mod tests;