Skip to main content

harn_parser/parser/
state.rs

1use crate::ast::*;
2use harn_lexer::{Span, Token, TokenKind};
3
4use super::error::ParserError;
5
6pub(crate) const MAX_NESTING_DEPTH: usize = 64;
7
8/// Recursive descent parser for Harn.
9pub struct Parser {
10    pub(super) tokens: Vec<Token>,
11    pub(super) pos: usize,
12    pub(super) errors: Vec<ParserError>,
13    nesting_depth: usize,
14}
15
16impl Parser {
17    pub fn new(tokens: Vec<Token>) -> Self {
18        Self {
19            tokens,
20            pos: 0,
21            errors: Vec::new(),
22            nesting_depth: 0,
23        }
24    }
25
26    pub(super) fn check_token_nesting_limit(&self) -> Result<(), ParserError> {
27        let mut depth = 0usize;
28        for token in &self.tokens {
29            match token.kind {
30                TokenKind::LBrace | TokenKind::LBracket | TokenKind::LParen => {
31                    depth += 1;
32                    if depth > MAX_NESTING_DEPTH {
33                        return Err(ParserError::Unexpected {
34                            got: "source nesting depth exceeded".to_string(),
35                            expected: format!(
36                                "parser nesting depth within {MAX_NESTING_DEPTH} levels"
37                            ),
38                            span: token.span,
39                        });
40                    }
41                }
42                TokenKind::RBrace | TokenKind::RBracket | TokenKind::RParen => {
43                    depth = depth.saturating_sub(1);
44                }
45                _ => {}
46            }
47        }
48        Ok(())
49    }
50
51    pub(super) fn current_span(&self) -> Span {
52        self.tokens
53            .get(self.pos)
54            .map(|t| t.span)
55            .unwrap_or(Span::dummy())
56    }
57
58    pub(super) fn current_kind(&self) -> Option<&TokenKind> {
59        self.tokens.get(self.pos).map(|t| &t.kind)
60    }
61
62    pub(super) fn prev_span(&self) -> Span {
63        if self.pos > 0 {
64            self.tokens[self.pos - 1].span
65        } else {
66            Span::dummy()
67        }
68    }
69
70    /// Span of the most recently consumed *non-newline* token. Useful when
71    /// computing a node's end span after the parser has already consumed
72    /// trailing newlines (e.g. while looking ahead for an optional `else` /
73    /// `catch` / `finally` clause). Using `prev_span()` in that position
74    /// would report a newline token whose `end_line` is past the visual end
75    /// of the node, which downstream tools (notably the formatter) interpret
76    /// as belonging to the node.
77    pub(super) fn last_non_newline_span(&self) -> Span {
78        let mut i = self.pos;
79        while i > 0 {
80            i -= 1;
81            if self.tokens[i].kind != TokenKind::Newline {
82                return self.tokens[i].span;
83            }
84        }
85        Span::dummy()
86    }
87
88    /// Parse a complete .harn file. Reports multiple errors via recovery.
89    pub fn parse(&mut self) -> Result<Vec<SNode>, ParserError> {
90        self.check_token_nesting_limit()?;
91
92        let mut nodes = Vec::new();
93        self.skip_newlines();
94
95        while !self.is_at_end() {
96            // Recovery may leave us pointing at a stray `}` at top level; skip it.
97            if self.check(&TokenKind::RBrace) {
98                self.advance();
99                self.skip_newlines();
100                continue;
101            }
102
103            let result = if self.check(&TokenKind::Import) {
104                self.parse_import()
105            } else if self.check(&TokenKind::At) {
106                self.parse_attributed_decl()
107            } else if self.check(&TokenKind::Pipeline) {
108                self.parse_pipeline()
109            } else if self.check(&TokenKind::EvalPack) {
110                self.parse_eval_pack_decl(false)
111            } else {
112                self.parse_statement()
113            };
114
115            match result {
116                Ok(node) => {
117                    let end_line = node.span.end_line;
118                    nodes.push(node);
119                    let consumed_sep = self.consume_statement_separator();
120                    if !consumed_sep && !self.is_at_end() {
121                        self.require_statement_separator(end_line, "top-level item")?;
122                    }
123                }
124                Err(err) => {
125                    self.errors.push(err);
126                    self.synchronize();
127                }
128            }
129        }
130
131        if let Some(first) = self.errors.first() {
132            return Err(first.clone());
133        }
134        Ok(nodes)
135    }
136
137    /// Return all accumulated parser errors (after `parse()` returns).
138    pub fn all_errors(&self) -> &[ParserError] {
139        &self.errors
140    }
141
142    /// Check if the current token is one that starts a statement.
143    pub(super) fn is_statement_start(&self) -> bool {
144        matches!(
145            self.current_kind(),
146            Some(
147                TokenKind::Let
148                    | TokenKind::Const
149                    | TokenKind::Var
150                    | TokenKind::If
151                    | TokenKind::For
152                    | TokenKind::While
153                    | TokenKind::Match
154                    | TokenKind::Retry
155                    | TokenKind::Return
156                    | TokenKind::Throw
157                    | TokenKind::Fn
158                    | TokenKind::Pub
159                    | TokenKind::Try
160                    | TokenKind::Select
161                    | TokenKind::Pipeline
162                    | TokenKind::Import
163                    | TokenKind::Parallel
164                    | TokenKind::Enum
165                    | TokenKind::EvalPack
166                    | TokenKind::Struct
167                    | TokenKind::Interface
168                    | TokenKind::Emit
169                    | TokenKind::Guard
170                    | TokenKind::Require
171                    | TokenKind::Deadline
172                    | TokenKind::Yield
173                    | TokenKind::Mutex
174                    | TokenKind::Defer
175                    | TokenKind::Break
176                    | TokenKind::Continue
177                    | TokenKind::Tool
178                    | TokenKind::Skill
179                    | TokenKind::Impl
180            )
181        )
182    }
183
184    /// Advance past tokens until we reach a likely statement boundary.
185    pub(super) fn synchronize(&mut self) {
186        while !self.is_at_end() {
187            if self.check(&TokenKind::Semicolon) {
188                self.advance();
189                self.skip_newlines();
190                return;
191            }
192            if self.check(&TokenKind::Newline) {
193                self.advance();
194                if self.is_at_end() || self.is_statement_start() {
195                    return;
196                }
197                continue;
198            }
199            if self.check(&TokenKind::RBrace) {
200                return;
201            }
202            self.advance();
203        }
204    }
205
206    pub(super) fn is_at_end(&self) -> bool {
207        self.pos >= self.tokens.len()
208            || matches!(self.tokens.get(self.pos), Some(t) if t.kind == TokenKind::Eof)
209    }
210
211    pub(super) fn current(&self) -> Option<&Token> {
212        self.tokens.get(self.pos)
213    }
214
215    pub(super) fn peek_kind(&self) -> Option<&TokenKind> {
216        self.tokens.get(self.pos + 1).map(|t| &t.kind)
217    }
218
219    pub(super) fn peek_kind_at(&self, offset: usize) -> Option<&TokenKind> {
220        self.tokens.get(self.pos + offset).map(|t| &t.kind)
221    }
222
223    pub(super) fn check(&self, kind: &TokenKind) -> bool {
224        self.current()
225            .map(|t| std::mem::discriminant(&t.kind) == std::mem::discriminant(kind))
226            .unwrap_or(false)
227    }
228
229    /// Check for `kind`, skipping newlines first; used for binary operators
230    /// like `||` and `&&` that can span lines.
231    pub(super) fn check_skip_newlines(&mut self, kind: &TokenKind) -> bool {
232        let saved = self.pos;
233        self.skip_newlines();
234        if self.check(kind) {
235            true
236        } else {
237            self.pos = saved;
238            false
239        }
240    }
241
242    /// Check if current token is an identifier with the given name (without consuming it).
243    pub(super) fn check_identifier(&self, name: &str) -> bool {
244        matches!(self.current().map(|t| &t.kind), Some(TokenKind::Identifier(s)) if s == name)
245    }
246
247    /// `gen` is contextual so existing identifiers named `gen` keep working.
248    /// It starts a stream declaration only when followed by `fn`.
249    pub(super) fn check_contextual_gen_fn(&self) -> bool {
250        if !self.check_identifier("gen") {
251            return false;
252        }
253        matches!(
254            self.tokens.get(self.pos + 1).map(|t| &t.kind),
255            Some(TokenKind::Fn)
256        )
257    }
258
259    /// `scope` is contextual so existing identifiers named `scope` (including
260    /// dict keys and property names) keep working. It starts a structured
261    /// concurrency nursery only when immediately followed by `{`.
262    pub(super) fn check_contextual_scope_block(&self) -> bool {
263        if !self.check_identifier("scope") {
264            return false;
265        }
266        matches!(
267            self.tokens.get(self.pos + 1).map(|t| &t.kind),
268            Some(TokenKind::LBrace)
269        )
270    }
271
272    pub(super) fn advance(&mut self) {
273        if self.pos < self.tokens.len() {
274            self.pos += 1;
275        }
276    }
277
278    pub(super) fn consume(
279        &mut self,
280        kind: &TokenKind,
281        expected: &str,
282    ) -> Result<Token, ParserError> {
283        self.skip_newlines();
284        let tok = self.current().ok_or_else(|| self.make_error(expected))?;
285        if std::mem::discriminant(&tok.kind) != std::mem::discriminant(kind) {
286            return Err(self.make_error(expected));
287        }
288        let tok = tok.clone();
289        self.advance();
290        Ok(tok)
291    }
292
293    pub(super) fn consume_identifier(&mut self, expected: &str) -> Result<String, ParserError> {
294        self.skip_newlines();
295        let tok = self.current().ok_or_else(|| self.make_error(expected))?;
296        if let TokenKind::Identifier(name) = &tok.kind {
297            let name = name.clone();
298            self.advance();
299            Ok(name)
300        } else {
301            // Distinguish reserved-keyword misuse (e.g. `for tool in list`) from
302            // a general unexpected token so the error is actionable.
303            let kw_name = harn_lexer::KEYWORDS
304                .iter()
305                .find(|&&kw| kw == tok.kind.to_string());
306            if let Some(kw) = kw_name {
307                Err(ParserError::Unexpected {
308                    got: format!("'{kw}' (reserved keyword)"),
309                    expected: expected.into(),
310                    span: tok.span,
311                })
312            } else {
313                Err(self.make_error(expected))
314            }
315        }
316    }
317
318    pub(super) fn consume_contextual_keyword(
319        &mut self,
320        name: &str,
321        expected: &str,
322    ) -> Result<Token, ParserError> {
323        self.skip_newlines();
324        let tok = self.current().ok_or_else(|| self.make_error(expected))?;
325        if matches!(&tok.kind, TokenKind::Identifier(id) if id == name) {
326            let tok = tok.clone();
327            self.advance();
328            Ok(tok)
329        } else {
330            Err(self.make_error(expected))
331        }
332    }
333
334    /// Like `consume_identifier`, but also accepts keywords as identifiers.
335    /// Used for property access (e.g., `obj.type`) and dict keys where
336    /// keywords are valid member names.
337    pub(super) fn consume_identifier_or_keyword(
338        &mut self,
339        expected: &str,
340    ) -> Result<String, ParserError> {
341        self.skip_newlines();
342        let tok = self.current().ok_or_else(|| self.make_error(expected))?;
343        if let TokenKind::Identifier(name) = &tok.kind {
344            let name = name.clone();
345            self.advance();
346            return Ok(name);
347        }
348        let name = match &tok.kind {
349            TokenKind::Pipeline => "pipeline",
350            TokenKind::Extends => "extends",
351            TokenKind::Override => "override",
352            TokenKind::Let => "let",
353            TokenKind::Const => "const",
354            TokenKind::Var => "var",
355            TokenKind::If => "if",
356            TokenKind::Else => "else",
357            TokenKind::For => "for",
358            TokenKind::In => "in",
359            TokenKind::Match => "match",
360            TokenKind::Retry => "retry",
361            TokenKind::Parallel => "parallel",
362            TokenKind::Return => "return",
363            TokenKind::Import => "import",
364            TokenKind::True => "true",
365            TokenKind::False => "false",
366            TokenKind::Nil => "nil",
367            TokenKind::Try => "try",
368            TokenKind::Catch => "catch",
369            TokenKind::Throw => "throw",
370            TokenKind::Finally => "finally",
371            TokenKind::Fn => "fn",
372            TokenKind::Spawn => "spawn",
373            TokenKind::While => "while",
374            TokenKind::TypeKw => "type",
375            TokenKind::Enum => "enum",
376            TokenKind::EvalPack => "eval_pack",
377            TokenKind::Struct => "struct",
378            TokenKind::Interface => "interface",
379            TokenKind::Emit => "emit",
380            TokenKind::Pub => "pub",
381            TokenKind::From => "from",
382            TokenKind::To => "to",
383            TokenKind::Tool => "tool",
384            TokenKind::Exclusive => "exclusive",
385            TokenKind::Guard => "guard",
386            TokenKind::Require => "require",
387            TokenKind::Deadline => "deadline",
388            TokenKind::Defer => "defer",
389            TokenKind::Yield => "yield",
390            TokenKind::Mutex => "mutex",
391            TokenKind::Break => "break",
392            TokenKind::Continue => "continue",
393            TokenKind::Select => "select",
394            TokenKind::Impl => "impl",
395            TokenKind::Skill => "skill",
396            TokenKind::RequestApproval => "request_approval",
397            TokenKind::DualControl => "dual_control",
398            TokenKind::AskUser => "ask_user",
399            TokenKind::EscalateTo => "escalate_to",
400            _ => return Err(self.make_error(expected)),
401        };
402        let name = name.to_string();
403        self.advance();
404        Ok(name)
405    }
406
407    pub(super) fn skip_newlines(&mut self) {
408        while self.pos < self.tokens.len() && self.tokens[self.pos].kind == TokenKind::Newline {
409            self.pos += 1;
410        }
411    }
412
413    /// Consume an optional semicolon statement separator followed by any
414    /// number of newlines, or one-or-more newlines on their own.
415    ///
416    /// This is intentionally narrower than `skip_newlines()`: semicolons are
417    /// only legal between already-parsed list items, not in arbitrary parse
418    /// positions.
419    pub(super) fn consume_statement_separator(&mut self) -> bool {
420        let mut consumed = false;
421        if self.check(&TokenKind::Semicolon) {
422            self.advance();
423            consumed = true;
424        }
425        let start = self.pos;
426        self.skip_newlines();
427        consumed || self.pos != start
428    }
429
430    pub(super) fn require_statement_separator(
431        &self,
432        prev_end_line: usize,
433        expected_item: &str,
434    ) -> Result<(), ParserError> {
435        let Some(tok) = self.current() else {
436            return Ok(());
437        };
438        if tok.kind == TokenKind::Eof || tok.span.line != prev_end_line {
439            return Ok(());
440        }
441        Err(ParserError::Unexpected {
442            got: tok.kind.to_string(),
443            expected: format!("{expected_item} separator (`;` or newline)"),
444            span: tok.span,
445        })
446    }
447
448    pub(super) fn make_error(&self, expected: &str) -> ParserError {
449        if let Some(tok) = self.tokens.get(self.pos) {
450            if tok.kind == TokenKind::Eof {
451                return ParserError::UnexpectedEof {
452                    expected: expected.into(),
453                    span: tok.span,
454                };
455            }
456            ParserError::Unexpected {
457                got: tok.kind.to_string(),
458                expected: expected.into(),
459                span: tok.span,
460            }
461        } else {
462            ParserError::UnexpectedEof {
463                expected: expected.into(),
464                span: self.prev_span(),
465            }
466        }
467    }
468
469    pub(super) fn error(&self, expected: &str) -> ParserError {
470        self.make_error(expected)
471    }
472
473    pub(super) fn with_nesting<T>(
474        &mut self,
475        context: &'static str,
476        f: impl FnOnce(&mut Self) -> Result<T, ParserError>,
477    ) -> Result<T, ParserError> {
478        if self.nesting_depth >= MAX_NESTING_DEPTH {
479            return Err(ParserError::Unexpected {
480                got: format!("{context} nesting depth exceeded"),
481                expected: format!("parser nesting depth within {MAX_NESTING_DEPTH} levels"),
482                span: self.current_span(),
483            });
484        }
485        self.nesting_depth += 1;
486        let result = f(self);
487        self.nesting_depth = self.nesting_depth.saturating_sub(1);
488        result
489    }
490}