elm-ast 0.1.4 - Docs.rs

pub mod declaration;
pub mod expr;
pub mod module;
pub mod pattern;
pub mod type_annotation;

use crate::comment::Comment;
use crate::node::Spanned;
use crate::span::{Position, Span};
use crate::token::Token;

/// A parse error with source location.
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct ParseError {
    pub message: String,
    pub span: Span,
}

impl std::fmt::Display for ParseError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(
            f,
            "{}:{}: {}",
            self.span.start.line, self.span.start.column, self.message
        )
    }
}

impl std::error::Error for ParseError {}

pub type ParseResult<T> = Result<T, ParseError>;

/// The parser. A cursor over a stream of spanned tokens.
///
/// The parser follows elm/compiler's approach to indentation: it tracks
/// indentation context using token column positions rather than virtual
/// INDENT/DEDENT tokens.
/// Maximum expression nesting depth. Limits the size of the continuation
/// stack in the iterative (CPS/trampoline) expression parser to prevent
/// pathological input from consuming unbounded heap memory. Real Elm files
/// rarely exceed 10–15 levels. Set high because the iterative parser has
/// no stack-overflow risk — this is purely a resource-usage guard.
pub(crate) const MAX_EXPR_DEPTH: usize = 256;

pub struct Parser {
    tokens: Vec<Spanned<Token>>,
    pos: usize,
    /// Nesting depth of parentheses/brackets/braces. When > 0,
    /// indentation-sensitive layout rules are suspended (any column is valid).
    /// This matches the elm/compiler behavior.
    paren_depth: u32,
    /// When set, `application_loop` uses this column instead of the function's
    /// column for continuation checks. Set by list/record parsers (to the
    /// opening bracket's column) so that function arguments at any column past
    /// the bracket are collected. Cleared by case/let parsers before parsing
    /// branch/declaration bodies so that normal column checking resumes.
    pub(crate) app_context_col: Option<u32>,
    /// Comments collected as a side-channel during parsing.
    /// `skip_whitespace` saves comments here instead of silently discarding them,
    /// so that `parse_module` can include them in the final AST.
    collected_comments: Vec<Spanned<Comment>>,
}

impl Parser {
    /// Create a parser from a token stream (as produced by the lexer).
    pub fn new(tokens: Vec<Spanned<Token>>) -> Self {
        Self {
            tokens,
            pos: 0,
            paren_depth: 0,
            app_context_col: None,
            collected_comments: Vec::new(),
        }
    }

    /// Drain all comments collected so far by `skip_whitespace`.
    pub fn drain_comments(&mut self) -> Vec<Spanned<Comment>> {
        std::mem::take(&mut self.collected_comments)
    }

    /// Take all comments collected since the last take,
    /// returning them for attachment to an AST node.
    pub fn take_pending_comments(&mut self) -> Vec<Spanned<Comment>> {
        std::mem::take(&mut self.collected_comments)
    }

    /// Snapshot the current number of pending comments. Pair with
    /// `take_pending_comments_since` to take only comments collected
    /// after the snapshot, preserving earlier comments.
    pub fn pending_comments_snapshot(&self) -> usize {
        self.collected_comments.len()
    }

    /// Take pending comments collected after the given snapshot, leaving
    /// earlier comments in place so they remain available for later attachment.
    pub fn take_pending_comments_since(&mut self, snapshot: usize) -> Vec<Spanned<Comment>> {
        if snapshot >= self.collected_comments.len() {
            return Vec::new();
        }
        self.collected_comments.split_off(snapshot)
    }

    /// Returns true if currently inside parens/brackets/braces.
    /// When true, indentation-sensitive layout rules are suspended.
    pub fn in_paren_context(&self) -> bool {
        self.paren_depth > 0
    }

    // ── Position & peeking ───────────────────────────────────────────

    /// The current token (without advancing).
    pub fn current(&self) -> &Spanned<Token> {
        &self.tokens[self.pos.min(self.tokens.len() - 1)]
    }

    /// Peek at the current token value.
    pub fn peek(&self) -> &Token {
        &self.current().value
    }

    /// Peek at the current token's span.
    pub fn peek_span(&self) -> Span {
        self.current().span
    }

    /// The current position in source.
    pub fn current_pos(&self) -> Position {
        self.current().span.start
    }

    /// The column of the current token (1-based).
    pub fn current_column(&self) -> u32 {
        self.current().span.start.column
    }

    /// Check if we've reached Eof.
    pub fn is_eof(&self) -> bool {
        matches!(self.peek(), Token::Eof)
    }

    // ── Advancing ────────────────────────────────────────────────────

    /// Advance past the current token and return it.
    /// Automatically tracks paren/bracket/brace nesting depth.
    pub fn advance(&mut self) -> Spanned<Token> {
        let tok = self.tokens[self.pos.min(self.tokens.len() - 1)].clone();
        // Track paren depth for indentation-context suspension.
        match &tok.value {
            Token::LeftParen | Token::LeftBracket | Token::LeftBrace => {
                self.paren_depth += 1;
            }
            Token::RightParen | Token::RightBracket | Token::RightBrace => {
                self.paren_depth = self.paren_depth.saturating_sub(1);
            }
            _ => {}
        }
        if self.pos < self.tokens.len() - 1 {
            self.pos += 1;
        }
        tok
    }

    /// Skip over newline, line comment, block comment, and doc comment tokens.
    /// Line and block comments are saved to `collected_comments` for round-tripping.
    pub fn skip_whitespace(&mut self) {
        while matches!(
            self.peek(),
            Token::Newline | Token::LineComment(_) | Token::BlockComment(_)
        ) {
            let tok = self.peek().clone();
            let spanned_tok = self.advance();
            match tok {
                Token::LineComment(text) => {
                    self.collected_comments
                        .push(Spanned::new(spanned_tok.span, Comment::Line(text)));
                }
                Token::BlockComment(text) => {
                    self.collected_comments
                        .push(Spanned::new(spanned_tok.span, Comment::Block(text)));
                }
                _ => {} // Newline
            }
        }
    }

    /// Skip whitespace and comments, saving line/block comments, but stop
    /// before consuming a `DocComment` token. Used by `try_doc_comment` so
    /// it can see the doc comment after skipping preceding whitespace.
    pub fn skip_whitespace_before_doc(&mut self) {
        while matches!(
            self.peek(),
            Token::Newline | Token::LineComment(_) | Token::BlockComment(_)
        ) {
            let tok = self.peek().clone();
            let spanned_tok = self.advance();
            match tok {
                Token::LineComment(text) => {
                    self.collected_comments
                        .push(Spanned::new(spanned_tok.span, Comment::Line(text)));
                }
                Token::BlockComment(text) => {
                    self.collected_comments
                        .push(Spanned::new(spanned_tok.span, Comment::Block(text)));
                }
                _ => {} // Newline
            }
        }
    }

    /// Skip newlines only (preserve comments for doc comment attachment).
    pub fn skip_newlines(&mut self) {
        while matches!(self.peek(), Token::Newline) {
            self.advance();
        }
    }

    // ── Expecting specific tokens ────────────────────────────────────

    /// Consume the current token if it matches, otherwise return an error.
    pub fn expect(&mut self, expected: &Token) -> ParseResult<Spanned<Token>> {
        self.skip_whitespace();
        if self.peek() == expected {
            Ok(self.advance())
        } else {
            Err(self.error(format!(
                "expected {}, found {}",
                describe(expected),
                describe(self.peek())
            )))
        }
    }

    /// Consume a `LowerName` and return the string.
    pub fn expect_lower_name(&mut self) -> ParseResult<Spanned<String>> {
        self.skip_whitespace();
        match self.peek().clone() {
            Token::LowerName(name) => {
                let tok = self.advance();
                Ok(Spanned::new(tok.span, name))
            }
            _ => Err(self.error(format!(
                "expected lowercase name, found {}",
                describe(self.peek())
            ))),
        }
    }

    /// Consume an `UpperName` and return the string.
    pub fn expect_upper_name(&mut self) -> ParseResult<Spanned<String>> {
        self.skip_whitespace();
        match self.peek().clone() {
            Token::UpperName(name) => {
                let tok = self.advance();
                Ok(Spanned::new(tok.span, name))
            }
            _ => Err(self.error(format!(
                "expected uppercase name, found {}",
                describe(self.peek())
            ))),
        }
    }

    // ── Lookahead helpers ────────────────────────────────────────────

    /// Check if the current token matches (after skipping whitespace),
    /// without consuming it.
    pub fn check(&mut self, expected: &Token) -> bool {
        self.skip_whitespace();
        self.peek() == expected
    }

    /// If the current token matches, consume it and return `true`.
    pub fn eat(&mut self, expected: &Token) -> bool {
        self.skip_whitespace();
        if self.peek() == expected {
            self.advance();
            true
        } else {
            false
        }
    }

    /// Peek ahead past whitespace, returning the next non-whitespace token
    /// without consuming anything.
    pub fn peek_past_whitespace(&self) -> &Token {
        let mut i = self.pos;
        while i < self.tokens.len() {
            match &self.tokens[i].value {
                Token::Newline
                | Token::LineComment(_)
                | Token::BlockComment(_)
                | Token::DocComment(_) => i += 1,
                tok => return tok,
            }
        }
        &Token::Eof
    }

    /// Peek at the token N positions ahead of current (ignoring whitespace).
    pub fn peek_nth_past_whitespace(&self, n: usize) -> &Token {
        let mut i = self.pos;
        let mut count = 0;
        while i < self.tokens.len() {
            match &self.tokens[i].value {
                Token::Newline
                | Token::LineComment(_)
                | Token::BlockComment(_)
                | Token::DocComment(_) => i += 1,
                tok => {
                    if count == n {
                        return tok;
                    }
                    count += 1;
                    i += 1;
                }
            }
        }
        &Token::Eof
    }

    // ── Indentation ──────────────────────────────────────────────────

    /// Check if the current token is indented past `min_col`.
    /// When inside parens/brackets, indentation is always satisfied.
    pub fn is_indented_past(&mut self, min_col: u32) -> bool {
        self.skip_newlines();
        !self.is_eof() && (self.in_paren_context() || self.current_column() > min_col)
    }

    /// Check if the current token is at or past `min_col`.
    /// When inside parens/brackets, indentation is always satisfied.
    pub fn is_at_or_past(&mut self, min_col: u32) -> bool {
        self.skip_newlines();
        !self.is_eof() && (self.in_paren_context() || self.current_column() >= min_col)
    }

    // ── Collecting a doc comment ─────────────────────────────────────

    /// If the current token is a doc comment, consume and return it.
    pub fn try_doc_comment(&mut self) -> Option<Spanned<String>> {
        self.skip_whitespace_before_doc();
        if let Token::DocComment(text) = self.peek().clone() {
            let tok = self.advance();
            Some(Spanned::new(tok.span, text))
        } else {
            None
        }
    }

    // ── Error construction ───────────────────────────────────────────

    pub fn error(&self, message: impl Into<String>) -> ParseError {
        ParseError {
            message: message.into(),
            span: self.peek_span(),
        }
    }

    pub fn error_at(&self, span: Span, message: impl Into<String>) -> ParseError {
        ParseError {
            message: message.into(),
            span,
        }
    }

    // ── Span helpers ─────────────────────────────────────────────────

    /// Create a span from `start` to the end of the previously consumed
    /// token, skipping back past any trailing whitespace or comment tokens.
    ///
    /// This keeps declaration/expression spans tight (ending at the last
    /// meaningful token) even when the parser has peeked past trailing
    /// newlines and comments while searching for a continuation.
    pub fn span_from(&self, start: Position) -> Span {
        let mut i = self.pos;
        while i > 0
            && matches!(
                self.tokens[i - 1].value,
                Token::Newline
                    | Token::LineComment(_)
                    | Token::BlockComment(_)
                    | Token::DocComment(_)
            )
        {
            i -= 1;
        }
        let end = if i > 0 {
            self.tokens[i - 1].span.end
        } else {
            start
        };
        Span::new(start, end)
    }

    /// Wrap a value with a span from `start` to the last consumed token.
    pub fn spanned_from<T>(&self, start: Position, value: T) -> Spanned<T> {
        Spanned::new(self.span_from(start), value)
    }

    /// Skip tokens until we reach the start of a new top-level declaration
    /// or end of file. Used for error recovery.
    ///
    /// A new declaration starts with a token at column 1 that could begin
    /// a declaration: lowercase name, `type`, `port`, `infix`, or doc comment.
    pub fn skip_to_next_declaration(&mut self) {
        loop {
            self.skip_whitespace();
            if self.is_eof() {
                break;
            }
            let col = self.current_column();
            let tok = self.peek();
            // A token at column 1 that can start a declaration.
            if col == 1
                && matches!(
                    tok,
                    Token::LowerName(_)
                        | Token::Type
                        | Token::Port
                        | Token::Infix
                        | Token::DocComment(_)
                )
            {
                break;
            }
            self.advance();
        }
    }
}

/// Produce a human-readable description of a token for error messages.
fn describe(tok: &Token) -> String {
    match tok {
        Token::Module => "`module`".into(),
        Token::Where => "`where`".into(),
        Token::Import => "`import`".into(),
        Token::As => "`as`".into(),
        Token::Exposing => "`exposing`".into(),
        Token::Type => "`type`".into(),
        Token::Alias => "`alias`".into(),
        Token::Port => "`port`".into(),
        Token::If => "`if`".into(),
        Token::Then => "`then`".into(),
        Token::Else => "`else`".into(),
        Token::Case => "`case`".into(),
        Token::Of => "`of`".into(),
        Token::Let => "`let`".into(),
        Token::In => "`in`".into(),
        Token::Infix => "`infix`".into(),
        Token::LeftParen => "`(`".into(),
        Token::RightParen => "`)`".into(),
        Token::LeftBracket => "`[`".into(),
        Token::RightBracket => "`]`".into(),
        Token::LeftBrace => "`{`".into(),
        Token::RightBrace => "`}`".into(),
        Token::Comma => "`,`".into(),
        Token::Pipe => "`|`".into(),
        Token::Equals => "`=`".into(),
        Token::Colon => "`:`".into(),
        Token::Dot => "`.`".into(),
        Token::DotDot => "`..`".into(),
        Token::Backslash => "`\\`".into(),
        Token::Underscore => "`_`".into(),
        Token::Arrow => "`->`".into(),
        Token::Operator(op) => format!("`{op}`"),
        Token::Minus => "`-`".into(),
        Token::LowerName(n) => format!("identifier `{n}`"),
        Token::UpperName(n) => format!("type `{n}`"),
        Token::Literal(_) => "literal".into(),
        Token::LineComment(_) => "comment".into(),
        Token::BlockComment(_) => "comment".into(),
        Token::DocComment(_) => "doc comment".into(),
        Token::Glsl(_) => "GLSL block".into(),
        Token::Newline => "newline".into(),
        Token::Eof => "end of file".into(),
    }
}

/// Parse an Elm source string into an `ElmModule`.
///
/// Returns `Err` if the module header or imports fail to parse.
/// For declaration-level errors, use [`parse_recovering`] instead to
/// get a partial AST along with the errors.
pub fn parse(source: &str) -> Result<crate::file::ElmModule, Vec<ParseError>> {
    let lexer = crate::lexer::Lexer::new(source);
    let (tokens, lex_errors) = lexer.tokenize();

    if !lex_errors.is_empty() {
        return Err(lex_errors
            .into_iter()
            .map(|e| ParseError {
                message: e.message,
                span: e.span,
            })
            .collect());
    }

    let mut parser = Parser::new(tokens);
    module::parse_module(&mut parser).map_err(|e| vec![e])
}

/// Parse an Elm source string with error recovery.
///
/// Unlike [`parse`], this always returns a (possibly partial) AST along
/// with any errors encountered. Declarations that fail to parse are skipped,
/// and parsing continues with the next declaration.
pub fn parse_recovering(source: &str) -> (Option<crate::file::ElmModule>, Vec<ParseError>) {
    let lexer = crate::lexer::Lexer::new(source);
    let (tokens, lex_errors) = lexer.tokenize();

    if !lex_errors.is_empty() {
        return (
            None,
            lex_errors
                .into_iter()
                .map(|e| ParseError {
                    message: e.message,
                    span: e.span,
                })
                .collect(),
        );
    }

    let mut parser = Parser::new(tokens);
    module::parse_module_recovering(&mut parser)
}