Skip to main content

patch_prolog_frontend/parser/
cg.rs

1//! Codegen-path program parsing: like `parse_program_with_directives`, but
2//! each clause body is split into its top-level conjuncts, each carrying a
3//! source `Span` (SPANS.md Layer 3). Codegen threads those spans to raising
4//! call sites so runtime errors can name `file:line:col`.
5//!
6//! Granularity: top-level comma-separated goals get precise spans; a goal
7//! nested inside a top-level `;`/`->`/`\+` (or a parenthesized conjunction)
8//! inherits its enclosing conjunct's span — coarser, but present. A
9//! top-level `;` makes the whole body a single spanned goal.
10
11use super::{Parser, ProgramDirectives};
12use crate::parse_error::ParseError;
13use crate::tokenizer::{TokenKind, Tokenizer};
14use plg_shared::{Span, Spanned, StringInterner, Term};
15
16/// A clause as codegen consumes it: a plain head term plus body goals whose
17/// source spans survive lowering. (Provenance for the head is not needed —
18/// existence errors point at the call site, not the definition.)
19#[derive(Clone)]
20pub struct CgClause {
21    pub head: Term,
22    pub body: Vec<Spanned<Term>>,
23}
24
25impl<'a> Parser<'a> {
26    /// Parse a complete program for codegen, stamping `file_id` on every body
27    /// goal span (SPANS.md Layer 3). Mirrors `parse_program_with_directives`;
28    /// only the body representation differs.
29    pub fn parse_program_cg(
30        input: &str,
31        interner: &'a mut StringInterner,
32        file_id: plg_shared::FileId,
33    ) -> Result<(Vec<CgClause>, ProgramDirectives), ParseError> {
34        let tokens = Tokenizer::tokenize(input)?;
35        let mut parser = Parser::from_tokens(tokens, interner);
36        parser.file_id = file_id;
37        parser.parse_program_cg_body()
38    }
39}
40
41impl Parser<'_> {
42    fn parse_program_cg_body(&mut self) -> Result<(Vec<CgClause>, ProgramDirectives), ParseError> {
43        let mut clauses = Vec::new();
44        let mut directives = ProgramDirectives::default();
45        while !self.at_eof() {
46            self.reset_vars();
47            if self.current_kind() == Some(&TokenKind::Neck) {
48                self.advance();
49                let body = self.parse_term()?;
50                self.expect(&TokenKind::Dot)?;
51                self.process_directive(body, &mut directives)?;
52            } else {
53                clauses.push(self.parse_clause_cg()?);
54            }
55        }
56        Ok((clauses, directives))
57    }
58
59    fn parse_clause_cg(&mut self) -> Result<CgClause, ParseError> {
60        let head = self.parse_term()?;
61        match self.current_kind() {
62            Some(TokenKind::Dot) => {
63                self.advance();
64                Ok(CgClause { head, body: vec![] })
65            }
66            Some(TokenKind::Neck) => {
67                self.advance();
68                let body = self.parse_body_conjuncts()?;
69                self.expect(&TokenKind::Dot)?;
70                Ok(CgClause { head, body })
71            }
72            Some(tok) => {
73                let msg = format!("expected `.` or `:-`, got {tok}");
74                Err(self.error_here(msg))
75            }
76            None => Err(self.error_here("Unexpected end of input in clause")),
77        }
78    }
79
80    /// Body as top-level conjuncts with spans. A top-level `;` (looser than
81    /// `,`) collapses the whole body to one spanned goal.
82    fn parse_body_conjuncts(&mut self) -> Result<Vec<Spanned<Term>>, ParseError> {
83        let body_lo = self.here_lo();
84        let mut conjuncts = Vec::new();
85        self.push_conjunct(&mut conjuncts)?;
86        while self.current_kind() == Some(&TokenKind::Comma) {
87            self.advance();
88            self.push_conjunct(&mut conjuncts)?;
89        }
90        if self.current_kind() == Some(&TokenKind::Semicolon) {
91            // Top-level disjunction: rebuild what we parsed as the left arm,
92            // parse the rest, and return the whole `;` as a single goal.
93            let comma = self.interner.intern(",");
94            let left = rebuild_conjunction(conjuncts, comma);
95            self.advance();
96            let right = self.parse_goal_disjunction()?;
97            let semi = self.interner.intern(";");
98            let hi = self.prev_hi();
99            let whole = Term::Compound {
100                functor: semi,
101                args: vec![left, right],
102            };
103            return Ok(vec![Spanned::new(
104                whole,
105                Span::new(self.file_id, body_lo, hi),
106            )]);
107        }
108        Ok(conjuncts)
109    }
110
111    fn push_conjunct(&mut self, out: &mut Vec<Spanned<Term>>) -> Result<(), ParseError> {
112        let lo = self.here_lo();
113        let t = self.parse_term()?;
114        let hi = self.prev_hi();
115        out.push(Spanned::new(t, Span::new(self.file_id, lo, hi)));
116        Ok(())
117    }
118
119    /// Byte offset of the current token's start (or end-of-input).
120    fn here_lo(&self) -> u32 {
121        self.here_span().lo
122    }
123
124    /// Byte offset just past the most recently consumed token.
125    fn prev_hi(&self) -> u32 {
126        self.pos
127            .checked_sub(1)
128            .and_then(|i| self.tokens.get(i))
129            .map(|t| t.hi)
130            .unwrap_or(0)
131    }
132}
133
134/// Right-associate a goal list back into a `,`-tree: `[a, b, c]` →
135/// `','(a, ','(b, c))`. Inverse of the conjunct flatten.
136fn rebuild_conjunction(mut goals: Vec<Spanned<Term>>, comma: plg_shared::AtomId) -> Term {
137    let mut acc = goals.pop().expect("at least one conjunct").node;
138    while let Some(g) = goals.pop() {
139        acc = Term::Compound {
140            functor: comma,
141            args: vec![g.node, acc],
142        };
143    }
144    acc
145}