Skip to main content

harn_parser/parser/
state.rs

1use crate::ast::*;
2use harn_lexer::{Span, Token, TokenKind};
3
4use super::error::ParserError;
5
6pub(crate) const MAX_NESTING_DEPTH: usize = 64;
7
8/// Recursive descent parser for Harn.
9pub struct Parser {
10    pub(super) tokens: Vec<Token>,
11    pub(super) pos: usize,
12    pub(super) errors: Vec<ParserError>,
13    nesting_depth: usize,
14}
15
16impl Parser {
17    pub fn new(tokens: Vec<Token>) -> Self {
18        Self {
19            tokens,
20            pos: 0,
21            errors: Vec::new(),
22            nesting_depth: 0,
23        }
24    }
25
26    pub(super) fn check_token_nesting_limit(&self) -> Result<(), ParserError> {
27        let mut depth = 0usize;
28        for token in &self.tokens {
29            match token.kind {
30                TokenKind::LBrace | TokenKind::LBracket | TokenKind::LParen => {
31                    depth += 1;
32                    if depth > MAX_NESTING_DEPTH {
33                        return Err(ParserError::Unexpected {
34                            got: "source nesting depth exceeded".to_string(),
35                            expected: format!(
36                                "parser nesting depth within {MAX_NESTING_DEPTH} levels"
37                            ),
38                            span: token.span,
39                        });
40                    }
41                }
42                TokenKind::RBrace | TokenKind::RBracket | TokenKind::RParen => {
43                    depth = depth.saturating_sub(1);
44                }
45                _ => {}
46            }
47        }
48        Ok(())
49    }
50
51    pub(super) fn current_span(&self) -> Span {
52        self.tokens
53            .get(self.pos)
54            .map(|t| t.span)
55            .unwrap_or(Span::dummy())
56    }
57
58    pub(super) fn current_kind(&self) -> Option<&TokenKind> {
59        self.tokens.get(self.pos).map(|t| &t.kind)
60    }
61
62    pub(super) fn prev_span(&self) -> Span {
63        if self.pos > 0 {
64            self.tokens[self.pos - 1].span
65        } else {
66            Span::dummy()
67        }
68    }
69
70    /// Span of the most recently consumed *non-newline* token. Useful when
71    /// computing a node's end span after the parser has already consumed
72    /// trailing newlines (e.g. while looking ahead for an optional `else` /
73    /// `catch` / `finally` clause). Using `prev_span()` in that position
74    /// would report a newline token whose `end_line` is past the visual end
75    /// of the node, which downstream tools (notably the formatter) interpret
76    /// as belonging to the node.
77    pub(super) fn last_non_newline_span(&self) -> Span {
78        let mut i = self.pos;
79        while i > 0 {
80            i -= 1;
81            if self.tokens[i].kind != TokenKind::Newline {
82                return self.tokens[i].span;
83            }
84        }
85        Span::dummy()
86    }
87
88    /// Parse a complete .harn file. Reports multiple errors via recovery.
89    pub fn parse(&mut self) -> Result<Vec<SNode>, ParserError> {
90        self.check_token_nesting_limit()?;
91
92        let mut nodes = Vec::new();
93        self.skip_newlines();
94
95        while !self.is_at_end() {
96            // Recovery may leave us pointing at a stray `}` at top level; skip it.
97            if self.check(&TokenKind::RBrace) {
98                self.advance();
99                self.skip_newlines();
100                continue;
101            }
102
103            let result = if self.check(&TokenKind::Import) {
104                self.parse_import()
105            } else if self.check(&TokenKind::At) {
106                self.parse_attributed_decl()
107            } else if self.check(&TokenKind::Pipeline) {
108                self.parse_pipeline()
109            } else if self.check(&TokenKind::EvalPack) {
110                self.parse_eval_pack_decl(false)
111            } else {
112                self.parse_statement()
113            };
114
115            match result {
116                Ok(node) => {
117                    let end_line = node.span.end_line;
118                    nodes.push(node);
119                    let consumed_sep = self.consume_statement_separator();
120                    if !consumed_sep && !self.is_at_end() {
121                        self.require_statement_separator(end_line, "top-level item")?;
122                    }
123                }
124                Err(err) => {
125                    self.errors.push(err);
126                    self.synchronize();
127                }
128            }
129        }
130
131        if let Some(first) = self.errors.first() {
132            return Err(first.clone());
133        }
134        Ok(nodes)
135    }
136
137    /// Return all accumulated parser errors (after `parse()` returns).
138    pub fn all_errors(&self) -> &[ParserError] {
139        &self.errors
140    }
141
142    /// Check if the current token is one that starts a statement.
143    pub(super) fn is_statement_start(&self) -> bool {
144        matches!(
145            self.current_kind(),
146            Some(
147                TokenKind::Let
148                    | TokenKind::Const
149                    | TokenKind::Var
150                    | TokenKind::If
151                    | TokenKind::For
152                    | TokenKind::While
153                    | TokenKind::Match
154                    | TokenKind::Retry
155                    | TokenKind::Return
156                    | TokenKind::Throw
157                    | TokenKind::Fn
158                    | TokenKind::Pub
159                    | TokenKind::Try
160                    | TokenKind::Select
161                    | TokenKind::Pipeline
162                    | TokenKind::Import
163                    | TokenKind::Parallel
164                    | TokenKind::Enum
165                    | TokenKind::EvalPack
166                    | TokenKind::Struct
167                    | TokenKind::Interface
168                    | TokenKind::Emit
169                    | TokenKind::Guard
170                    | TokenKind::Require
171                    | TokenKind::Deadline
172                    | TokenKind::Yield
173                    | TokenKind::Mutex
174                    | TokenKind::Defer
175                    | TokenKind::Break
176                    | TokenKind::Continue
177                    | TokenKind::Tool
178                    | TokenKind::Skill
179                    | TokenKind::Impl
180            )
181        )
182    }
183
184    /// Advance past tokens until we reach a likely statement boundary.
185    pub(super) fn synchronize(&mut self) {
186        while !self.is_at_end() {
187            if self.check(&TokenKind::Semicolon) {
188                self.advance();
189                self.skip_newlines();
190                return;
191            }
192            if self.check(&TokenKind::Newline) {
193                self.advance();
194                if self.is_at_end() || self.is_statement_start() {
195                    return;
196                }
197                continue;
198            }
199            if self.check(&TokenKind::RBrace) {
200                return;
201            }
202            self.advance();
203        }
204    }
205
206    pub(super) fn is_at_end(&self) -> bool {
207        self.pos >= self.tokens.len()
208            || matches!(self.tokens.get(self.pos), Some(t) if t.kind == TokenKind::Eof)
209    }
210
211    pub(super) fn current(&self) -> Option<&Token> {
212        self.tokens.get(self.pos)
213    }
214
215    pub(super) fn peek_kind(&self) -> Option<&TokenKind> {
216        self.tokens.get(self.pos + 1).map(|t| &t.kind)
217    }
218
219    pub(super) fn peek_kind_at(&self, offset: usize) -> Option<&TokenKind> {
220        self.tokens.get(self.pos + offset).map(|t| &t.kind)
221    }
222
223    pub(super) fn check(&self, kind: &TokenKind) -> bool {
224        self.current()
225            .map(|t| std::mem::discriminant(&t.kind) == std::mem::discriminant(kind))
226            .unwrap_or(false)
227    }
228
229    /// Check for `kind`, skipping newlines first; used for binary operators
230    /// like `||` and `&&` that can span lines.
231    pub(super) fn check_skip_newlines(&mut self, kind: &TokenKind) -> bool {
232        let saved = self.pos;
233        self.skip_newlines();
234        if self.check(kind) {
235            true
236        } else {
237            self.pos = saved;
238            false
239        }
240    }
241
242    /// Check if current token is an identifier with the given name (without consuming it).
243    pub(super) fn check_identifier(&self, name: &str) -> bool {
244        matches!(self.current().map(|t| &t.kind), Some(TokenKind::Identifier(s)) if s == name)
245    }
246
247    /// `gen` is contextual so existing identifiers named `gen` keep working.
248    /// It starts a stream declaration only when followed by `fn`.
249    pub(super) fn check_contextual_gen_fn(&self) -> bool {
250        if !self.check_identifier("gen") {
251            return false;
252        }
253        matches!(
254            self.tokens.get(self.pos + 1).map(|t| &t.kind),
255            Some(TokenKind::Fn)
256        )
257    }
258
259    pub(super) fn advance(&mut self) {
260        if self.pos < self.tokens.len() {
261            self.pos += 1;
262        }
263    }
264
265    pub(super) fn consume(
266        &mut self,
267        kind: &TokenKind,
268        expected: &str,
269    ) -> Result<Token, ParserError> {
270        self.skip_newlines();
271        let tok = self.current().ok_or_else(|| self.make_error(expected))?;
272        if std::mem::discriminant(&tok.kind) != std::mem::discriminant(kind) {
273            return Err(self.make_error(expected));
274        }
275        let tok = tok.clone();
276        self.advance();
277        Ok(tok)
278    }
279
280    pub(super) fn consume_identifier(&mut self, expected: &str) -> Result<String, ParserError> {
281        self.skip_newlines();
282        let tok = self.current().ok_or_else(|| self.make_error(expected))?;
283        if let TokenKind::Identifier(name) = &tok.kind {
284            let name = name.clone();
285            self.advance();
286            Ok(name)
287        } else {
288            // Distinguish reserved-keyword misuse (e.g. `for tool in list`) from
289            // a general unexpected token so the error is actionable.
290            let kw_name = harn_lexer::KEYWORDS
291                .iter()
292                .find(|&&kw| kw == tok.kind.to_string());
293            if let Some(kw) = kw_name {
294                Err(ParserError::Unexpected {
295                    got: format!("'{kw}' (reserved keyword)"),
296                    expected: expected.into(),
297                    span: tok.span,
298                })
299            } else {
300                Err(self.make_error(expected))
301            }
302        }
303    }
304
305    pub(super) fn consume_contextual_keyword(
306        &mut self,
307        name: &str,
308        expected: &str,
309    ) -> Result<Token, ParserError> {
310        self.skip_newlines();
311        let tok = self.current().ok_or_else(|| self.make_error(expected))?;
312        if matches!(&tok.kind, TokenKind::Identifier(id) if id == name) {
313            let tok = tok.clone();
314            self.advance();
315            Ok(tok)
316        } else {
317            Err(self.make_error(expected))
318        }
319    }
320
321    /// Like `consume_identifier`, but also accepts keywords as identifiers.
322    /// Used for property access (e.g., `obj.type`) and dict keys where
323    /// keywords are valid member names.
324    pub(super) fn consume_identifier_or_keyword(
325        &mut self,
326        expected: &str,
327    ) -> Result<String, ParserError> {
328        self.skip_newlines();
329        let tok = self.current().ok_or_else(|| self.make_error(expected))?;
330        if let TokenKind::Identifier(name) = &tok.kind {
331            let name = name.clone();
332            self.advance();
333            return Ok(name);
334        }
335        let name = match &tok.kind {
336            TokenKind::Pipeline => "pipeline",
337            TokenKind::Extends => "extends",
338            TokenKind::Override => "override",
339            TokenKind::Let => "let",
340            TokenKind::Const => "const",
341            TokenKind::Var => "var",
342            TokenKind::If => "if",
343            TokenKind::Else => "else",
344            TokenKind::For => "for",
345            TokenKind::In => "in",
346            TokenKind::Match => "match",
347            TokenKind::Retry => "retry",
348            TokenKind::Parallel => "parallel",
349            TokenKind::Return => "return",
350            TokenKind::Import => "import",
351            TokenKind::True => "true",
352            TokenKind::False => "false",
353            TokenKind::Nil => "nil",
354            TokenKind::Try => "try",
355            TokenKind::Catch => "catch",
356            TokenKind::Throw => "throw",
357            TokenKind::Finally => "finally",
358            TokenKind::Fn => "fn",
359            TokenKind::Spawn => "spawn",
360            TokenKind::While => "while",
361            TokenKind::TypeKw => "type",
362            TokenKind::Enum => "enum",
363            TokenKind::EvalPack => "eval_pack",
364            TokenKind::Struct => "struct",
365            TokenKind::Interface => "interface",
366            TokenKind::Emit => "emit",
367            TokenKind::Pub => "pub",
368            TokenKind::From => "from",
369            TokenKind::To => "to",
370            TokenKind::Tool => "tool",
371            TokenKind::Exclusive => "exclusive",
372            TokenKind::Guard => "guard",
373            TokenKind::Require => "require",
374            TokenKind::Deadline => "deadline",
375            TokenKind::Defer => "defer",
376            TokenKind::Yield => "yield",
377            TokenKind::Mutex => "mutex",
378            TokenKind::Break => "break",
379            TokenKind::Continue => "continue",
380            TokenKind::Select => "select",
381            TokenKind::Impl => "impl",
382            TokenKind::Skill => "skill",
383            TokenKind::RequestApproval => "request_approval",
384            TokenKind::DualControl => "dual_control",
385            TokenKind::AskUser => "ask_user",
386            TokenKind::EscalateTo => "escalate_to",
387            _ => return Err(self.make_error(expected)),
388        };
389        let name = name.to_string();
390        self.advance();
391        Ok(name)
392    }
393
394    pub(super) fn skip_newlines(&mut self) {
395        while self.pos < self.tokens.len() && self.tokens[self.pos].kind == TokenKind::Newline {
396            self.pos += 1;
397        }
398    }
399
400    /// Consume an optional semicolon statement separator followed by any
401    /// number of newlines, or one-or-more newlines on their own.
402    ///
403    /// This is intentionally narrower than `skip_newlines()`: semicolons are
404    /// only legal between already-parsed list items, not in arbitrary parse
405    /// positions.
406    pub(super) fn consume_statement_separator(&mut self) -> bool {
407        let mut consumed = false;
408        if self.check(&TokenKind::Semicolon) {
409            self.advance();
410            consumed = true;
411        }
412        let start = self.pos;
413        self.skip_newlines();
414        consumed || self.pos != start
415    }
416
417    pub(super) fn require_statement_separator(
418        &self,
419        prev_end_line: usize,
420        expected_item: &str,
421    ) -> Result<(), ParserError> {
422        let Some(tok) = self.current() else {
423            return Ok(());
424        };
425        if tok.kind == TokenKind::Eof || tok.span.line != prev_end_line {
426            return Ok(());
427        }
428        Err(ParserError::Unexpected {
429            got: tok.kind.to_string(),
430            expected: format!("{expected_item} separator (`;` or newline)"),
431            span: tok.span,
432        })
433    }
434
435    pub(super) fn make_error(&self, expected: &str) -> ParserError {
436        if let Some(tok) = self.tokens.get(self.pos) {
437            if tok.kind == TokenKind::Eof {
438                return ParserError::UnexpectedEof {
439                    expected: expected.into(),
440                    span: tok.span,
441                };
442            }
443            ParserError::Unexpected {
444                got: tok.kind.to_string(),
445                expected: expected.into(),
446                span: tok.span,
447            }
448        } else {
449            ParserError::UnexpectedEof {
450                expected: expected.into(),
451                span: self.prev_span(),
452            }
453        }
454    }
455
456    pub(super) fn error(&self, expected: &str) -> ParserError {
457        self.make_error(expected)
458    }
459
460    pub(super) fn with_nesting<T>(
461        &mut self,
462        context: &'static str,
463        f: impl FnOnce(&mut Self) -> Result<T, ParserError>,
464    ) -> Result<T, ParserError> {
465        if self.nesting_depth >= MAX_NESTING_DEPTH {
466            return Err(ParserError::Unexpected {
467                got: format!("{context} nesting depth exceeded"),
468                expected: format!("parser nesting depth within {MAX_NESTING_DEPTH} levels"),
469                span: self.current_span(),
470            });
471        }
472        self.nesting_depth += 1;
473        let result = f(self);
474        self.nesting_depth = self.nesting_depth.saturating_sub(1);
475        result
476    }
477}