Skip to main content

pepl_parser/
parser.rs

1//! Core parser infrastructure: token cursor, error reporting, helpers.
2
3use pepl_lexer::token::{Token, TokenKind};
4use pepl_types::{CompileErrors, ErrorCode, PeplError, SourceFile, Span};
5
6/// The PEPL parser.
7///
8/// Consumes a token stream produced by the lexer and builds an AST.
9/// Collects errors and attempts recovery when possible.
10pub struct Parser<'src> {
11    /// The token stream.
12    tokens: Vec<Token>,
13    /// Current index into `tokens`.
14    pos: usize,
15    /// Source file for error context.
16    source_file: &'src SourceFile,
17    /// File name for error messages.
18    file_name: String,
19    /// Collected errors.
20    errors: CompileErrors,
21    /// Current lambda nesting depth (max 3).
22    pub(crate) lambda_depth: u32,
23    /// Current record literal nesting depth (max 4).
24    pub(crate) record_depth: u32,
25    /// Current expression nesting depth (max 16).
26    pub(crate) expr_depth: u32,
27    /// Current for-loop nesting depth (max 3).
28    pub(crate) for_depth: u32,
29}
30
31/// Result of parsing.
32pub struct ParseResult {
33    pub program: Option<pepl_types::ast::Program>,
34    pub errors: CompileErrors,
35}
36
37impl<'src> Parser<'src> {
38    /// Create a new parser from a token stream and source file.
39    pub fn new(tokens: Vec<Token>, source_file: &'src SourceFile) -> Self {
40        Self {
41            tokens,
42            pos: 0,
43            file_name: source_file.name.clone(),
44            source_file,
45            errors: CompileErrors::empty(),
46            lambda_depth: 0,
47            record_depth: 0,
48            expr_depth: 0,
49            for_depth: 0,
50        }
51    }
52
53    // ── Token Cursor ──────────────────────────────────────────────────────────
54
55    /// Returns the current token without advancing.
56    pub(crate) fn peek(&self) -> &Token {
57        self.tokens.get(self.pos).unwrap_or_else(|| {
58            self.tokens
59                .last()
60                .expect("token stream should end with Eof")
61        })
62    }
63
64    /// Returns the kind of the current token.
65    pub(crate) fn peek_kind(&self) -> &TokenKind {
66        &self.peek().kind
67    }
68
69    /// Advance the cursor by one and return the consumed token.
70    pub(crate) fn advance(&mut self) -> Token {
71        let token = self.peek().clone();
72        if self.pos < self.tokens.len() {
73            self.pos += 1;
74        }
75        token
76    }
77
78    /// Returns the previously consumed token's span.
79    pub(crate) fn previous_span(&self) -> Span {
80        if self.pos > 0 {
81            self.tokens[self.pos - 1].span
82        } else {
83            Span::point(1, 1)
84        }
85    }
86
87    /// Returns the span of the current token.
88    pub(crate) fn current_span(&self) -> Span {
89        self.peek().span
90    }
91
92    /// Returns `true` if the current token is `Eof`.
93    pub(crate) fn at_end(&self) -> bool {
94        matches!(self.peek_kind(), TokenKind::Eof)
95    }
96
97    /// Check if the current token matches the given kind (by discriminant).
98    #[allow(dead_code)]
99    pub(crate) fn check(&self, kind: &TokenKind) -> bool {
100        std::mem::discriminant(self.peek_kind()) == std::mem::discriminant(kind)
101    }
102
103    /// Check if the current token matches the given kind exactly.
104    pub(crate) fn check_exact(&self, kind: &TokenKind) -> bool {
105        self.peek_kind() == kind
106    }
107
108    /// If the current token matches, advance and return `true`.
109    pub(crate) fn eat(&mut self, kind: &TokenKind) -> bool {
110        if self.check_exact(kind) {
111            self.advance();
112            true
113        } else {
114            false
115        }
116    }
117
118    /// Look ahead by `n` tokens from current position.
119    pub(crate) fn look_ahead(&self, n: usize) -> &TokenKind {
120        let idx = self.pos + n;
121        self.tokens
122            .get(idx)
123            .map(|t| &t.kind)
124            .unwrap_or(&TokenKind::Eof)
125    }
126
127    // ── Newline Handling ──────────────────────────────────────────────────────
128
129    /// Skip all consecutive newline tokens.
130    pub(crate) fn skip_newlines(&mut self) {
131        while self.check_exact(&TokenKind::Newline) {
132            self.advance();
133        }
134    }
135
136    /// Expect a newline or end of file. Reports error if neither.
137    pub(crate) fn expect_newline_or_eof(&mut self) {
138        if self.at_end() {
139            return;
140        }
141        if self.check_exact(&TokenKind::Newline) {
142            self.advance();
143            self.skip_newlines();
144        } else if !self.check_exact(&TokenKind::RBrace) {
145            // RBrace is acceptable — the closing brace ends the block
146            self.error_at_current(
147                ErrorCode::UNEXPECTED_TOKEN,
148                format!("expected newline, got '{}'", self.peek_kind()),
149            );
150        }
151    }
152
153    // ── Expect Helpers ────────────────────────────────────────────────────────
154
155    /// Expect a specific token kind. Returns the token if matched, or emits an error.
156    pub(crate) fn expect(&mut self, expected: &TokenKind) -> Option<Token> {
157        if self.check_exact(expected) {
158            Some(self.advance())
159        } else {
160            self.error_at_current(
161                ErrorCode::UNEXPECTED_TOKEN,
162                format!("expected '{}', got '{}'", expected, self.peek_kind()),
163            );
164            None
165        }
166    }
167
168    /// Expect an identifier token. Returns the name and span.
169    pub(crate) fn expect_identifier(&mut self) -> Option<pepl_types::ast::Ident> {
170        match self.peek_kind().clone() {
171            TokenKind::Identifier(name) => {
172                let span = self.advance().span;
173                Some(pepl_types::ast::Ident::new(name, span))
174            }
175            _ => {
176                self.error_at_current(
177                    ErrorCode::UNEXPECTED_TOKEN,
178                    format!("expected identifier, got '{}'", self.peek_kind()),
179                );
180                None
181            }
182        }
183    }
184
185    /// Expect an identifier OR any keyword used as a record field name.
186    ///
187    /// Keywords are contextually valid as field names in:
188    /// - Record type fields: `{ color: string }`
189    /// - Record literal fields: `{ color: "#ff0000" }`
190    /// - State/derived field declarations: `color: string = "#000"`
191    /// - `set` path segments after `.`: `set theme.color = "#fff"`
192    pub(crate) fn expect_field_name(&mut self) -> Option<pepl_types::ast::Ident> {
193        let kind = self.peek_kind().clone();
194        match &kind {
195            TokenKind::Identifier(name) => {
196                let name = name.clone();
197                let span = self.advance().span;
198                Some(pepl_types::ast::Ident::new(name, span))
199            }
200            _ if kind.is_keyword() => {
201                let name = kind.to_string();
202                let span = self.advance().span;
203                Some(pepl_types::ast::Ident::new(name, span))
204            }
205            _ => {
206                self.error_at_current(
207                    ErrorCode::UNEXPECTED_TOKEN,
208                    format!("expected field name, got '{}'", self.peek_kind()),
209                );
210                None
211            }
212        }
213    }
214
215    /// Expect an identifier OR a module/capability keyword used as an identifier.
216    /// This handles cases like `record.get(...)` where `record` is a keyword but
217    /// used as a module name in qualified calls.
218    pub(crate) fn expect_ident_or_module_name(&mut self) -> Option<pepl_types::ast::Ident> {
219        let kind = self.peek_kind().clone();
220        match &kind {
221            TokenKind::Identifier(name) => {
222                let name = name.clone();
223                let span = self.advance().span;
224                Some(pepl_types::ast::Ident::new(name, span))
225            }
226            // Module names used as identifiers in qualified calls
227            TokenKind::Core
228            | TokenKind::Math
229            | TokenKind::Record
230            | TokenKind::Time
231            | TokenKind::Convert
232            | TokenKind::Json
233            | TokenKind::Timer
234            | TokenKind::Http
235            | TokenKind::Storage
236            | TokenKind::Location
237            | TokenKind::Notifications
238            | TokenKind::Clipboard
239            | TokenKind::Share
240            // Type keywords that are also module prefixes
241            | TokenKind::KwString
242            | TokenKind::KwList
243            | TokenKind::KwColor => {
244                let name = kind.to_string();
245                let span = self.advance().span;
246                Some(pepl_types::ast::Ident::new(name, span))
247            }
248            _ => {
249                self.error_at_current(
250                    ErrorCode::UNEXPECTED_TOKEN,
251                    format!("expected identifier, got '{}'", self.peek_kind()),
252                );
253                None
254            }
255        }
256    }
257
258    /// Expect a string literal token. Returns the string value.
259    pub(crate) fn expect_string_literal(&mut self) -> Option<String> {
260        match self.peek_kind().clone() {
261            TokenKind::StringLiteral(s) => {
262                self.advance();
263                Some(s)
264            }
265            _ => {
266                self.error_at_current(
267                    ErrorCode::UNEXPECTED_TOKEN,
268                    format!("expected string literal, got '{}'", self.peek_kind()),
269                );
270                None
271            }
272        }
273    }
274
275    /// Expect an upper-case identifier (component name or type name).
276    pub(crate) fn expect_upper_identifier(&mut self) -> Option<pepl_types::ast::Ident> {
277        match self.peek_kind().clone() {
278            TokenKind::Identifier(ref name)
279                if name.starts_with(|c: char| c.is_ascii_uppercase()) =>
280            {
281                let name = name.clone();
282                let span = self.advance().span;
283                Some(pepl_types::ast::Ident::new(name, span))
284            }
285            _ => {
286                self.error_at_current(
287                    ErrorCode::UNEXPECTED_TOKEN,
288                    format!("expected PascalCase identifier, got '{}'", self.peek_kind()),
289                );
290                None
291            }
292        }
293    }
294
295    /// Eat an optional trailing comma.
296    pub(crate) fn eat_comma(&mut self) -> bool {
297        self.eat(&TokenKind::Comma)
298    }
299
300    /// Expect an identifier or a keyword that can serve as a field/function name
301    /// after `.` (e.g., `list.set(...)` where `set` is a keyword).
302    pub(crate) fn expect_member_name(&mut self) -> Option<pepl_types::ast::Ident> {
303        let kind = self.peek_kind().clone();
304        match &kind {
305            TokenKind::Identifier(name) => {
306                let name = name.clone();
307                let span = self.advance().span;
308                Some(pepl_types::ast::Ident::new(name, span))
309            }
310            // Keywords that can appear as function/field names after `.`
311            TokenKind::Set => {
312                let span = self.advance().span;
313                Some(pepl_types::ast::Ident::new("set", span))
314            }
315            TokenKind::Type => {
316                let span = self.advance().span;
317                Some(pepl_types::ast::Ident::new("type", span))
318            }
319            TokenKind::Match => {
320                let span = self.advance().span;
321                Some(pepl_types::ast::Ident::new("match", span))
322            }
323            TokenKind::Update => {
324                let span = self.advance().span;
325                Some(pepl_types::ast::Ident::new("update", span))
326            }
327            _ => {
328                self.error_at_current(
329                    ErrorCode::UNEXPECTED_TOKEN,
330                    format!("expected identifier, got '{}'", self.peek_kind()),
331                );
332                None
333            }
334        }
335    }
336
337    // ── Error Reporting ───────────────────────────────────────────────────────
338
339    /// Report an error at the current token position.
340    pub(crate) fn error_at_current(&mut self, code: ErrorCode, message: impl Into<String>) {
341        let span = self.current_span();
342        self.error_at(code, message, span);
343    }
344
345    /// Report an error at a specific span.
346    pub(crate) fn error_at(&mut self, code: ErrorCode, message: impl Into<String>, span: Span) {
347        let source_line = self
348            .source_file
349            .line(span.start_line)
350            .unwrap_or("")
351            .to_string();
352        let error = PeplError::new(&self.file_name, code, message, span, source_line);
353        self.errors.push_error(error);
354    }
355
356    /// Returns `true` if we've hit the error limit and should stop.
357    pub(crate) fn too_many_errors(&self) -> bool {
358        self.errors.has_errors() && self.errors.total_errors >= pepl_types::MAX_ERRORS
359    }
360
361    // ── Synchronization ───────────────────────────────────────────────────────
362
363    /// Skip tokens until we reach a synchronization point.
364    /// Used after an error to resume at a known-good position.
365    pub(crate) fn synchronize(&mut self) {
366        while !self.at_end() {
367            // Stop at newline — each statement starts on a new line
368            if self.check_exact(&TokenKind::Newline) {
369                self.advance();
370                self.skip_newlines();
371                return;
372            }
373            // Stop at block-level keywords
374            match self.peek_kind() {
375                TokenKind::Space
376                | TokenKind::State
377                | TokenKind::Action
378                | TokenKind::View
379                | TokenKind::Set
380                | TokenKind::Let
381                | TokenKind::If
382                | TokenKind::For
383                | TokenKind::Match
384                | TokenKind::Return
385                | TokenKind::Invariant
386                | TokenKind::Capabilities
387                | TokenKind::Credentials
388                | TokenKind::Derived
389                | TokenKind::Tests
390                | TokenKind::Test
391                | TokenKind::Assert
392                | TokenKind::Type
393                | TokenKind::Update
394                | TokenKind::HandleEvent
395                | TokenKind::RBrace => return,
396                _ => {
397                    self.advance();
398                }
399            }
400        }
401    }
402
403    // ── Public API ────────────────────────────────────────────────────────────
404
405    /// Parse the token stream into a `Program` AST.
406    pub fn parse(mut self) -> ParseResult {
407        self.skip_newlines();
408        let program = self.parse_program();
409        ParseResult {
410            program,
411            errors: self.errors,
412        }
413    }
414}