Skip to main content

elm_ast/parse/
mod.rs

1pub mod declaration;
2pub mod expr;
3pub mod module;
4pub mod pattern;
5pub mod type_annotation;
6
7use crate::comment::Comment;
8use crate::node::Spanned;
9use crate::span::{Position, Span};
10use crate::token::Token;
11
12/// A parse error with source location.
13#[derive(Clone, Debug, PartialEq, Eq)]
14pub struct ParseError {
15    pub message: String,
16    pub span: Span,
17}
18
19impl std::fmt::Display for ParseError {
20    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
21        write!(
22            f,
23            "{}:{}: {}",
24            self.span.start.line, self.span.start.column, self.message
25        )
26    }
27}
28
29impl std::error::Error for ParseError {}
30
31pub type ParseResult<T> = Result<T, ParseError>;
32
33/// The parser. A cursor over a stream of spanned tokens.
34///
35/// The parser follows elm/compiler's approach to indentation: it tracks
36/// indentation context using token column positions rather than virtual
37/// INDENT/DEDENT tokens.
38/// Maximum expression nesting depth. Limits the size of the continuation
39/// stack in the iterative (CPS/trampoline) expression parser to prevent
40/// pathological input from consuming unbounded heap memory. Real Elm files
41/// rarely exceed 10–15 levels. Set high because the iterative parser has
42/// no stack-overflow risk — this is purely a resource-usage guard.
43pub(crate) const MAX_EXPR_DEPTH: usize = 256;
44
45pub struct Parser {
46    tokens: Vec<Spanned<Token>>,
47    pos: usize,
48    /// Nesting depth of parentheses/brackets/braces. When > 0,
49    /// indentation-sensitive layout rules are suspended (any column is valid).
50    /// This matches the elm/compiler behavior.
51    paren_depth: u32,
52    /// When set, `application_loop` uses this column instead of the function's
53    /// column for continuation checks. Set by list/record parsers (to the
54    /// opening bracket's column) so that function arguments at any column past
55    /// the bracket are collected. Cleared by case/let parsers before parsing
56    /// branch/declaration bodies so that normal column checking resumes.
57    pub(crate) app_context_col: Option<u32>,
58    /// Comments collected as a side-channel during parsing.
59    /// `skip_whitespace` saves comments here instead of silently discarding them,
60    /// so that `parse_module` can include them in the final AST.
61    collected_comments: Vec<Spanned<Comment>>,
62}
63
64impl Parser {
65    /// Create a parser from a token stream (as produced by the lexer).
66    pub fn new(tokens: Vec<Spanned<Token>>) -> Self {
67        Self {
68            tokens,
69            pos: 0,
70            paren_depth: 0,
71            app_context_col: None,
72            collected_comments: Vec::new(),
73        }
74    }
75
76    /// Drain all comments collected so far by `skip_whitespace`.
77    pub fn drain_comments(&mut self) -> Vec<Spanned<Comment>> {
78        std::mem::take(&mut self.collected_comments)
79    }
80
81    /// Take all comments collected since the last take,
82    /// returning them for attachment to an AST node.
83    pub fn take_pending_comments(&mut self) -> Vec<Spanned<Comment>> {
84        std::mem::take(&mut self.collected_comments)
85    }
86
87    /// Snapshot the current number of pending comments. Pair with
88    /// `take_pending_comments_since` to take only comments collected
89    /// after the snapshot, preserving earlier comments.
90    pub fn pending_comments_snapshot(&self) -> usize {
91        self.collected_comments.len()
92    }
93
94    /// Take pending comments collected after the given snapshot, leaving
95    /// earlier comments in place so they remain available for later attachment.
96    pub fn take_pending_comments_since(&mut self, snapshot: usize) -> Vec<Spanned<Comment>> {
97        if snapshot >= self.collected_comments.len() {
98            return Vec::new();
99        }
100        self.collected_comments.split_off(snapshot)
101    }
102
103    /// Put comments back into the pending buffer so a later stage can claim
104    /// them. Callers use this to return comments that turned out not to
105    /// belong to the node they were attached to.
106    pub fn restore_pending_comments(&mut self, comments: Vec<Spanned<Comment>>) {
107        self.collected_comments.extend(comments);
108    }
109
110    /// Returns true if currently inside parens/brackets/braces.
111    /// When true, indentation-sensitive layout rules are suspended.
112    pub fn in_paren_context(&self) -> bool {
113        self.paren_depth > 0
114    }
115
116    // ── Position & peeking ───────────────────────────────────────────
117
118    /// The current token (without advancing).
119    pub fn current(&self) -> &Spanned<Token> {
120        &self.tokens[self.pos.min(self.tokens.len() - 1)]
121    }
122
123    /// Peek at the current token value.
124    pub fn peek(&self) -> &Token {
125        &self.current().value
126    }
127
128    /// Peek at the current token's span.
129    pub fn peek_span(&self) -> Span {
130        self.current().span
131    }
132
133    /// The current position in source.
134    pub fn current_pos(&self) -> Position {
135        self.current().span.start
136    }
137
138    /// The column of the current token (1-based).
139    pub fn current_column(&self) -> u32 {
140        self.current().span.start.column
141    }
142
143    /// Check if we've reached Eof.
144    pub fn is_eof(&self) -> bool {
145        matches!(self.peek(), Token::Eof)
146    }
147
148    /// End-offset of the most recently consumed token (or 0 if nothing has
149    /// been consumed). Useful as a lower boundary for claiming pending
150    /// comments that sit between a just-consumed separator (like `|` or
151    /// `{`) and the next token.
152    pub fn prev_token_end_offset(&self) -> usize {
153        if self.pos == 0 {
154            0
155        } else {
156            self.tokens[self.pos - 1].span.end.offset
157        }
158    }
159
160    /// Peek at the raw token immediately after the current position,
161    /// without skipping whitespace. Returns `Token::Eof` if past the end.
162    pub fn peek_raw_next(&self) -> &Spanned<Token> {
163        let i = self.pos + 1;
164        if i < self.tokens.len() {
165            &self.tokens[i]
166        } else {
167            &self.tokens[self.tokens.len() - 1]
168        }
169    }
170
171    // ── Advancing ────────────────────────────────────────────────────
172
173    /// Advance past the current token and return it.
174    /// Automatically tracks paren/bracket/brace nesting depth.
175    pub fn advance(&mut self) -> Spanned<Token> {
176        let tok = self.tokens[self.pos.min(self.tokens.len() - 1)].clone();
177        // Track paren depth for indentation-context suspension.
178        match &tok.value {
179            Token::LeftParen | Token::LeftBracket | Token::LeftBrace => {
180                self.paren_depth += 1;
181            }
182            Token::RightParen | Token::RightBracket | Token::RightBrace => {
183                self.paren_depth = self.paren_depth.saturating_sub(1);
184            }
185            _ => {}
186        }
187        if self.pos < self.tokens.len() - 1 {
188            self.pos += 1;
189        }
190        tok
191    }
192
193    /// Skip over newline, line comment, block comment, and doc comment tokens.
194    /// Line and block comments are saved to `collected_comments` for round-tripping.
195    pub fn skip_whitespace(&mut self) {
196        while matches!(
197            self.peek(),
198            Token::Newline | Token::LineComment(_) | Token::BlockComment(_)
199        ) {
200            let Spanned { span, value, .. } = self.advance();
201            match value {
202                Token::LineComment(text) => {
203                    self.collected_comments
204                        .push(Spanned::new(span, Comment::Line(text)));
205                }
206                Token::BlockComment(text) => {
207                    self.collected_comments
208                        .push(Spanned::new(span, Comment::Block(text)));
209                }
210                _ => {} // Newline
211            }
212        }
213    }
214
215    /// Skip whitespace and comments, saving line/block comments, but stop
216    /// before consuming a `DocComment` token. Used by `try_doc_comment` so
217    /// it can see the doc comment after skipping preceding whitespace.
218    pub fn skip_whitespace_before_doc(&mut self) {
219        while matches!(
220            self.peek(),
221            Token::Newline | Token::LineComment(_) | Token::BlockComment(_)
222        ) {
223            let Spanned { span, value, .. } = self.advance();
224            match value {
225                Token::LineComment(text) => {
226                    self.collected_comments
227                        .push(Spanned::new(span, Comment::Line(text)));
228                }
229                Token::BlockComment(text) => {
230                    self.collected_comments
231                        .push(Spanned::new(span, Comment::Block(text)));
232                }
233                _ => {} // Newline
234            }
235        }
236    }
237
238    /// Skip newlines only (preserve comments for doc comment attachment).
239    pub fn skip_newlines(&mut self) {
240        while matches!(self.peek(), Token::Newline) {
241            self.advance();
242        }
243    }
244
245    // ── Expecting specific tokens ────────────────────────────────────
246
247    /// Consume the current token if it matches, otherwise return an error.
248    pub fn expect(&mut self, expected: &Token) -> ParseResult<Spanned<Token>> {
249        self.skip_whitespace();
250        if self.peek() == expected {
251            Ok(self.advance())
252        } else {
253            Err(self.error(format!(
254                "expected {}, found {}",
255                describe(expected),
256                describe(self.peek())
257            )))
258        }
259    }
260
261    /// Consume a `LowerName` and return the string.
262    pub fn expect_lower_name(&mut self) -> ParseResult<Spanned<String>> {
263        self.skip_whitespace();
264        if matches!(self.peek(), Token::LowerName(_)) {
265            let Spanned { span, value, .. } = self.advance();
266            let Token::LowerName(name) = value else {
267                unreachable!("matched LowerName above")
268            };
269            Ok(Spanned::new(span, name))
270        } else {
271            Err(self.error(format!(
272                "expected lowercase name, found {}",
273                describe(self.peek())
274            )))
275        }
276    }
277
278    /// Consume an `UpperName` and return the string.
279    pub fn expect_upper_name(&mut self) -> ParseResult<Spanned<String>> {
280        self.skip_whitespace();
281        if matches!(self.peek(), Token::UpperName(_)) {
282            let Spanned { span, value, .. } = self.advance();
283            let Token::UpperName(name) = value else {
284                unreachable!("matched UpperName above")
285            };
286            Ok(Spanned::new(span, name))
287        } else {
288            Err(self.error(format!(
289                "expected uppercase name, found {}",
290                describe(self.peek())
291            )))
292        }
293    }
294
295    // ── Lookahead helpers ────────────────────────────────────────────
296
297    /// Check if the current token matches (after skipping whitespace),
298    /// without consuming it.
299    pub fn check(&mut self, expected: &Token) -> bool {
300        self.skip_whitespace();
301        self.peek() == expected
302    }
303
304    /// If the current token matches, consume it and return `true`.
305    pub fn eat(&mut self, expected: &Token) -> bool {
306        self.skip_whitespace();
307        if self.peek() == expected {
308            self.advance();
309            true
310        } else {
311            false
312        }
313    }
314
315    /// Offset of the next non-whitespace token (or the end of input if
316    /// nothing remains). Does not modify parser state.
317    pub fn peek_past_whitespace_offset(&self) -> usize {
318        let mut i = self.pos;
319        while i < self.tokens.len() {
320            match &self.tokens[i].value {
321                Token::Newline
322                | Token::LineComment(_)
323                | Token::BlockComment(_)
324                | Token::DocComment(_) => i += 1,
325                _ => return self.tokens[i].span.start.offset,
326            }
327        }
328        self.tokens.last().map(|t| t.span.end.offset).unwrap_or(0)
329    }
330
331    /// Peek ahead past whitespace, returning the next non-whitespace token
332    /// without consuming anything.
333    pub fn peek_past_whitespace(&self) -> &Token {
334        let mut i = self.pos;
335        while i < self.tokens.len() {
336            match &self.tokens[i].value {
337                Token::Newline
338                | Token::LineComment(_)
339                | Token::BlockComment(_)
340                | Token::DocComment(_) => i += 1,
341                tok => return tok,
342            }
343        }
344        &Token::Eof
345    }
346
347    /// Peek at the token N positions ahead of current (ignoring whitespace).
348    pub fn peek_nth_past_whitespace(&self, n: usize) -> &Token {
349        let mut i = self.pos;
350        let mut count = 0;
351        while i < self.tokens.len() {
352            match &self.tokens[i].value {
353                Token::Newline
354                | Token::LineComment(_)
355                | Token::BlockComment(_)
356                | Token::DocComment(_) => i += 1,
357                tok => {
358                    if count == n {
359                        return tok;
360                    }
361                    count += 1;
362                    i += 1;
363                }
364            }
365        }
366        &Token::Eof
367    }
368
369    // ── Indentation ──────────────────────────────────────────────────
370
371    /// Check if the current token is indented past `min_col`.
372    /// When inside parens/brackets, indentation is always satisfied.
373    pub fn is_indented_past(&mut self, min_col: u32) -> bool {
374        self.skip_newlines();
375        !self.is_eof() && (self.in_paren_context() || self.current_column() > min_col)
376    }
377
378    /// Check if the current token is at or past `min_col`.
379    /// When inside parens/brackets, indentation is always satisfied.
380    pub fn is_at_or_past(&mut self, min_col: u32) -> bool {
381        self.skip_newlines();
382        !self.is_eof() && (self.in_paren_context() || self.current_column() >= min_col)
383    }
384
385    // ── Collecting a doc comment ─────────────────────────────────────
386
387    /// If the current token is a doc comment, consume and return it.
388    pub fn try_doc_comment(&mut self) -> Option<Spanned<String>> {
389        self.skip_whitespace_before_doc();
390        if matches!(self.peek(), Token::DocComment(_)) {
391            let Spanned { span, value, .. } = self.advance();
392            let Token::DocComment(text) = value else {
393                unreachable!("matched DocComment above")
394            };
395            Some(Spanned::new(span, text))
396        } else {
397            None
398        }
399    }
400
401    // ── Error construction ───────────────────────────────────────────
402
403    pub fn error(&self, message: impl Into<String>) -> ParseError {
404        ParseError {
405            message: message.into(),
406            span: self.peek_span(),
407        }
408    }
409
410    pub fn error_at(&self, span: Span, message: impl Into<String>) -> ParseError {
411        ParseError {
412            message: message.into(),
413            span,
414        }
415    }
416
417    // ── Span helpers ─────────────────────────────────────────────────
418
419    /// Create a span from `start` to the end of the previously consumed
420    /// token, skipping back past any trailing whitespace or comment tokens.
421    ///
422    /// This keeps declaration/expression spans tight (ending at the last
423    /// meaningful token) even when the parser has peeked past trailing
424    /// newlines and comments while searching for a continuation.
425    pub fn span_from(&self, start: Position) -> Span {
426        let mut i = self.pos;
427        while i > 0
428            && matches!(
429                self.tokens[i - 1].value,
430                Token::Newline
431                    | Token::LineComment(_)
432                    | Token::BlockComment(_)
433                    | Token::DocComment(_)
434            )
435        {
436            i -= 1;
437        }
438        let end = if i > 0 {
439            self.tokens[i - 1].span.end
440        } else {
441            start
442        };
443        Span::new(start, end)
444    }
445
446    /// Wrap a value with a span from `start` to the last consumed token.
447    pub fn spanned_from<T>(&self, start: Position, value: T) -> Spanned<T> {
448        Spanned::new(self.span_from(start), value)
449    }
450
451    /// Skip tokens until we reach the start of a new top-level declaration
452    /// or end of file. Used for error recovery.
453    ///
454    /// A new declaration starts with a token at column 1 that could begin
455    /// a declaration: lowercase name, `type`, `port`, `infix`, or doc comment.
456    pub fn skip_to_next_declaration(&mut self) {
457        loop {
458            self.skip_whitespace();
459            if self.is_eof() {
460                break;
461            }
462            let col = self.current_column();
463            let tok = self.peek();
464            // A token at column 1 that can start a declaration.
465            if col == 1
466                && matches!(
467                    tok,
468                    Token::LowerName(_)
469                        | Token::Type
470                        | Token::Port
471                        | Token::Infix
472                        | Token::DocComment(_)
473                )
474            {
475                break;
476            }
477            self.advance();
478        }
479    }
480}
481
482/// Produce a human-readable description of a token for error messages.
483fn describe(tok: &Token) -> String {
484    match tok {
485        Token::Module => "`module`".into(),
486        Token::Where => "`where`".into(),
487        Token::Import => "`import`".into(),
488        Token::As => "`as`".into(),
489        Token::Exposing => "`exposing`".into(),
490        Token::Type => "`type`".into(),
491        Token::Alias => "`alias`".into(),
492        Token::Port => "`port`".into(),
493        Token::If => "`if`".into(),
494        Token::Then => "`then`".into(),
495        Token::Else => "`else`".into(),
496        Token::Case => "`case`".into(),
497        Token::Of => "`of`".into(),
498        Token::Let => "`let`".into(),
499        Token::In => "`in`".into(),
500        Token::Infix => "`infix`".into(),
501        Token::LeftParen => "`(`".into(),
502        Token::RightParen => "`)`".into(),
503        Token::LeftBracket => "`[`".into(),
504        Token::RightBracket => "`]`".into(),
505        Token::LeftBrace => "`{`".into(),
506        Token::RightBrace => "`}`".into(),
507        Token::Comma => "`,`".into(),
508        Token::Pipe => "`|`".into(),
509        Token::Equals => "`=`".into(),
510        Token::Colon => "`:`".into(),
511        Token::Dot => "`.`".into(),
512        Token::DotDot => "`..`".into(),
513        Token::Backslash => "`\\`".into(),
514        Token::Underscore => "`_`".into(),
515        Token::Arrow => "`->`".into(),
516        Token::Operator(op) => format!("`{op}`"),
517        Token::Minus => "`-`".into(),
518        Token::LowerName(n) => format!("identifier `{n}`"),
519        Token::UpperName(n) => format!("type `{n}`"),
520        Token::Literal(_) => "literal".into(),
521        Token::LineComment(_) => "comment".into(),
522        Token::BlockComment(_) => "comment".into(),
523        Token::DocComment(_) => "doc comment".into(),
524        Token::Glsl(_) => "GLSL block".into(),
525        Token::Newline => "newline".into(),
526        Token::Eof => "end of file".into(),
527    }
528}
529
530/// Parse an Elm source string into an `ElmModule`.
531///
532/// Returns `Err` if the module header or imports fail to parse.
533/// For declaration-level errors, use [`parse_recovering`] instead to
534/// get a partial AST along with the errors.
535pub fn parse(source: &str) -> Result<crate::file::ElmModule, Vec<ParseError>> {
536    let lexer = crate::lexer::Lexer::new(source);
537    let (tokens, lex_errors) = lexer.tokenize();
538
539    if !lex_errors.is_empty() {
540        return Err(lex_errors
541            .into_iter()
542            .map(|e| ParseError {
543                message: e.message,
544                span: e.span,
545            })
546            .collect());
547    }
548
549    let mut parser = Parser::new(tokens);
550    module::parse_module(&mut parser).map_err(|e| vec![e])
551}
552
553/// Parse an Elm source string with error recovery.
554///
555/// Unlike [`parse`], this always returns a (possibly partial) AST along
556/// with any errors encountered. Declarations that fail to parse are skipped,
557/// and parsing continues with the next declaration.
558pub fn parse_recovering(source: &str) -> (Option<crate::file::ElmModule>, Vec<ParseError>) {
559    let lexer = crate::lexer::Lexer::new(source);
560    let (tokens, lex_errors) = lexer.tokenize();
561
562    if !lex_errors.is_empty() {
563        return (
564            None,
565            lex_errors
566                .into_iter()
567                .map(|e| ParseError {
568                    message: e.message,
569                    span: e.span,
570                })
571                .collect(),
572        );
573    }
574
575    let mut parser = Parser::new(tokens);
576    module::parse_module_recovering(&mut parser)
577}