Skip to main content

harn_parser/parser/
state.rs

1use crate::ast::*;
2use harn_lexer::{Span, Token, TokenKind};
3
4use super::error::ParserError;
5
6/// Recursive descent parser for Harn.
7pub struct Parser {
8    pub(super) tokens: Vec<Token>,
9    pub(super) pos: usize,
10    pub(super) errors: Vec<ParserError>,
11}
12
13impl Parser {
14    pub fn new(tokens: Vec<Token>) -> Self {
15        Self {
16            tokens,
17            pos: 0,
18            errors: Vec::new(),
19        }
20    }
21
22    pub(super) fn current_span(&self) -> Span {
23        self.tokens
24            .get(self.pos)
25            .map(|t| t.span)
26            .unwrap_or(Span::dummy())
27    }
28
29    pub(super) fn current_kind(&self) -> Option<&TokenKind> {
30        self.tokens.get(self.pos).map(|t| &t.kind)
31    }
32
33    pub(super) fn prev_span(&self) -> Span {
34        if self.pos > 0 {
35            self.tokens[self.pos - 1].span
36        } else {
37            Span::dummy()
38        }
39    }
40
41    /// Span of the most recently consumed *non-newline* token. Useful when
42    /// computing a node's end span after the parser has already consumed
43    /// trailing newlines (e.g. while looking ahead for an optional `else` /
44    /// `catch` / `finally` clause). Using `prev_span()` in that position
45    /// would report a newline token whose `end_line` is past the visual end
46    /// of the node, which downstream tools (notably the formatter) interpret
47    /// as belonging to the node.
48    pub(super) fn last_non_newline_span(&self) -> Span {
49        let mut i = self.pos;
50        while i > 0 {
51            i -= 1;
52            if self.tokens[i].kind != TokenKind::Newline {
53                return self.tokens[i].span;
54            }
55        }
56        Span::dummy()
57    }
58
59    /// Parse a complete .harn file. Reports multiple errors via recovery.
60    pub fn parse(&mut self) -> Result<Vec<SNode>, ParserError> {
61        let mut nodes = Vec::new();
62        self.skip_newlines();
63
64        while !self.is_at_end() {
65            // Recovery may leave us pointing at a stray `}` at top level; skip it.
66            if self.check(&TokenKind::RBrace) {
67                self.advance();
68                self.skip_newlines();
69                continue;
70            }
71
72            let result = if self.check(&TokenKind::Import) {
73                self.parse_import()
74            } else if self.check(&TokenKind::At) {
75                self.parse_attributed_decl()
76            } else if self.check(&TokenKind::Pipeline) {
77                self.parse_pipeline()
78            } else if self.check(&TokenKind::EvalPack) {
79                self.parse_eval_pack_decl(false)
80            } else {
81                self.parse_statement()
82            };
83
84            match result {
85                Ok(node) => {
86                    let end_line = node.span.end_line;
87                    nodes.push(node);
88                    let consumed_sep = self.consume_statement_separator();
89                    if !consumed_sep && !self.is_at_end() {
90                        self.require_statement_separator(end_line, "top-level item")?;
91                    }
92                }
93                Err(err) => {
94                    self.errors.push(err);
95                    self.synchronize();
96                }
97            }
98        }
99
100        if let Some(first) = self.errors.first() {
101            return Err(first.clone());
102        }
103        Ok(nodes)
104    }
105
106    /// Return all accumulated parser errors (after `parse()` returns).
107    pub fn all_errors(&self) -> &[ParserError] {
108        &self.errors
109    }
110
111    /// Check if the current token is one that starts a statement.
112    pub(super) fn is_statement_start(&self) -> bool {
113        matches!(
114            self.current_kind(),
115            Some(
116                TokenKind::Let
117                    | TokenKind::Var
118                    | TokenKind::If
119                    | TokenKind::For
120                    | TokenKind::While
121                    | TokenKind::Match
122                    | TokenKind::Retry
123                    | TokenKind::Return
124                    | TokenKind::Throw
125                    | TokenKind::Fn
126                    | TokenKind::Pub
127                    | TokenKind::Try
128                    | TokenKind::Select
129                    | TokenKind::Pipeline
130                    | TokenKind::Import
131                    | TokenKind::Parallel
132                    | TokenKind::Enum
133                    | TokenKind::EvalPack
134                    | TokenKind::Struct
135                    | TokenKind::Interface
136                    | TokenKind::Emit
137                    | TokenKind::Guard
138                    | TokenKind::Require
139                    | TokenKind::Deadline
140                    | TokenKind::Yield
141                    | TokenKind::Mutex
142                    | TokenKind::Defer
143                    | TokenKind::Break
144                    | TokenKind::Continue
145                    | TokenKind::Tool
146                    | TokenKind::Skill
147                    | TokenKind::Impl
148            )
149        )
150    }
151
152    /// Advance past tokens until we reach a likely statement boundary.
153    pub(super) fn synchronize(&mut self) {
154        while !self.is_at_end() {
155            if self.check(&TokenKind::Semicolon) {
156                self.advance();
157                self.skip_newlines();
158                return;
159            }
160            if self.check(&TokenKind::Newline) {
161                self.advance();
162                if self.is_at_end() || self.is_statement_start() {
163                    return;
164                }
165                continue;
166            }
167            if self.check(&TokenKind::RBrace) {
168                return;
169            }
170            self.advance();
171        }
172    }
173
174    pub(super) fn is_at_end(&self) -> bool {
175        self.pos >= self.tokens.len()
176            || matches!(self.tokens.get(self.pos), Some(t) if t.kind == TokenKind::Eof)
177    }
178
179    pub(super) fn current(&self) -> Option<&Token> {
180        self.tokens.get(self.pos)
181    }
182
183    pub(super) fn peek_kind(&self) -> Option<&TokenKind> {
184        self.tokens.get(self.pos + 1).map(|t| &t.kind)
185    }
186
187    pub(super) fn peek_kind_at(&self, offset: usize) -> Option<&TokenKind> {
188        self.tokens.get(self.pos + offset).map(|t| &t.kind)
189    }
190
191    pub(super) fn check(&self, kind: &TokenKind) -> bool {
192        self.current()
193            .map(|t| std::mem::discriminant(&t.kind) == std::mem::discriminant(kind))
194            .unwrap_or(false)
195    }
196
197    /// Check for `kind`, skipping newlines first; used for binary operators
198    /// like `||` and `&&` that can span lines.
199    pub(super) fn check_skip_newlines(&mut self, kind: &TokenKind) -> bool {
200        let saved = self.pos;
201        self.skip_newlines();
202        if self.check(kind) {
203            true
204        } else {
205            self.pos = saved;
206            false
207        }
208    }
209
210    /// Check if current token is an identifier with the given name (without consuming it).
211    pub(super) fn check_identifier(&self, name: &str) -> bool {
212        matches!(self.current().map(|t| &t.kind), Some(TokenKind::Identifier(s)) if s == name)
213    }
214
215    /// `gen` is contextual so existing identifiers named `gen` keep working.
216    /// It starts a stream declaration only when followed by `fn`.
217    pub(super) fn check_contextual_gen_fn(&self) -> bool {
218        if !self.check_identifier("gen") {
219            return false;
220        }
221        matches!(
222            self.tokens.get(self.pos + 1).map(|t| &t.kind),
223            Some(TokenKind::Fn)
224        )
225    }
226
227    pub(super) fn advance(&mut self) {
228        if self.pos < self.tokens.len() {
229            self.pos += 1;
230        }
231    }
232
233    pub(super) fn consume(
234        &mut self,
235        kind: &TokenKind,
236        expected: &str,
237    ) -> Result<Token, ParserError> {
238        self.skip_newlines();
239        let tok = self.current().ok_or_else(|| self.make_error(expected))?;
240        if std::mem::discriminant(&tok.kind) != std::mem::discriminant(kind) {
241            return Err(self.make_error(expected));
242        }
243        let tok = tok.clone();
244        self.advance();
245        Ok(tok)
246    }
247
248    pub(super) fn consume_identifier(&mut self, expected: &str) -> Result<String, ParserError> {
249        self.skip_newlines();
250        let tok = self.current().ok_or_else(|| self.make_error(expected))?;
251        if let TokenKind::Identifier(name) = &tok.kind {
252            let name = name.clone();
253            self.advance();
254            Ok(name)
255        } else {
256            // Distinguish reserved-keyword misuse (e.g. `for tool in list`) from
257            // a general unexpected token so the error is actionable.
258            let kw_name = harn_lexer::KEYWORDS
259                .iter()
260                .find(|&&kw| kw == tok.kind.to_string());
261            if let Some(kw) = kw_name {
262                Err(ParserError::Unexpected {
263                    got: format!("'{kw}' (reserved keyword)"),
264                    expected: expected.into(),
265                    span: tok.span,
266                })
267            } else {
268                Err(self.make_error(expected))
269            }
270        }
271    }
272
273    pub(super) fn consume_contextual_keyword(
274        &mut self,
275        name: &str,
276        expected: &str,
277    ) -> Result<Token, ParserError> {
278        self.skip_newlines();
279        let tok = self.current().ok_or_else(|| self.make_error(expected))?;
280        if matches!(&tok.kind, TokenKind::Identifier(id) if id == name) {
281            let tok = tok.clone();
282            self.advance();
283            Ok(tok)
284        } else {
285            Err(self.make_error(expected))
286        }
287    }
288
289    /// Like `consume_identifier`, but also accepts keywords as identifiers.
290    /// Used for property access (e.g., `obj.type`) and dict keys where
291    /// keywords are valid member names.
292    pub(super) fn consume_identifier_or_keyword(
293        &mut self,
294        expected: &str,
295    ) -> Result<String, ParserError> {
296        self.skip_newlines();
297        let tok = self.current().ok_or_else(|| self.make_error(expected))?;
298        if let TokenKind::Identifier(name) = &tok.kind {
299            let name = name.clone();
300            self.advance();
301            return Ok(name);
302        }
303        let name = match &tok.kind {
304            TokenKind::Pipeline => "pipeline",
305            TokenKind::Extends => "extends",
306            TokenKind::Override => "override",
307            TokenKind::Let => "let",
308            TokenKind::Var => "var",
309            TokenKind::If => "if",
310            TokenKind::Else => "else",
311            TokenKind::For => "for",
312            TokenKind::In => "in",
313            TokenKind::Match => "match",
314            TokenKind::Retry => "retry",
315            TokenKind::Parallel => "parallel",
316            TokenKind::Return => "return",
317            TokenKind::Import => "import",
318            TokenKind::True => "true",
319            TokenKind::False => "false",
320            TokenKind::Nil => "nil",
321            TokenKind::Try => "try",
322            TokenKind::Catch => "catch",
323            TokenKind::Throw => "throw",
324            TokenKind::Finally => "finally",
325            TokenKind::Fn => "fn",
326            TokenKind::Spawn => "spawn",
327            TokenKind::While => "while",
328            TokenKind::TypeKw => "type",
329            TokenKind::Enum => "enum",
330            TokenKind::EvalPack => "eval_pack",
331            TokenKind::Struct => "struct",
332            TokenKind::Interface => "interface",
333            TokenKind::Emit => "emit",
334            TokenKind::Pub => "pub",
335            TokenKind::From => "from",
336            TokenKind::To => "to",
337            TokenKind::Tool => "tool",
338            TokenKind::Exclusive => "exclusive",
339            TokenKind::Guard => "guard",
340            TokenKind::Require => "require",
341            TokenKind::Deadline => "deadline",
342            TokenKind::Defer => "defer",
343            TokenKind::Yield => "yield",
344            TokenKind::Mutex => "mutex",
345            TokenKind::Break => "break",
346            TokenKind::Continue => "continue",
347            TokenKind::Select => "select",
348            TokenKind::Impl => "impl",
349            TokenKind::Skill => "skill",
350            TokenKind::RequestApproval => "request_approval",
351            TokenKind::DualControl => "dual_control",
352            TokenKind::AskUser => "ask_user",
353            TokenKind::EscalateTo => "escalate_to",
354            _ => return Err(self.make_error(expected)),
355        };
356        let name = name.to_string();
357        self.advance();
358        Ok(name)
359    }
360
361    pub(super) fn skip_newlines(&mut self) {
362        while self.pos < self.tokens.len() && self.tokens[self.pos].kind == TokenKind::Newline {
363            self.pos += 1;
364        }
365    }
366
367    /// Consume an optional semicolon statement separator followed by any
368    /// number of newlines, or one-or-more newlines on their own.
369    ///
370    /// This is intentionally narrower than `skip_newlines()`: semicolons are
371    /// only legal between already-parsed list items, not in arbitrary parse
372    /// positions.
373    pub(super) fn consume_statement_separator(&mut self) -> bool {
374        let mut consumed = false;
375        if self.check(&TokenKind::Semicolon) {
376            self.advance();
377            consumed = true;
378        }
379        let start = self.pos;
380        self.skip_newlines();
381        consumed || self.pos != start
382    }
383
384    pub(super) fn require_statement_separator(
385        &self,
386        prev_end_line: usize,
387        expected_item: &str,
388    ) -> Result<(), ParserError> {
389        let Some(tok) = self.current() else {
390            return Ok(());
391        };
392        if tok.kind == TokenKind::Eof || tok.span.line != prev_end_line {
393            return Ok(());
394        }
395        Err(ParserError::Unexpected {
396            got: tok.kind.to_string(),
397            expected: format!("{expected_item} separator (`;` or newline)"),
398            span: tok.span,
399        })
400    }
401
402    pub(super) fn make_error(&self, expected: &str) -> ParserError {
403        if let Some(tok) = self.tokens.get(self.pos) {
404            if tok.kind == TokenKind::Eof {
405                return ParserError::UnexpectedEof {
406                    expected: expected.into(),
407                    span: tok.span,
408                };
409            }
410            ParserError::Unexpected {
411                got: tok.kind.to_string(),
412                expected: expected.into(),
413                span: tok.span,
414            }
415        } else {
416            ParserError::UnexpectedEof {
417                expected: expected.into(),
418                span: self.prev_span(),
419            }
420        }
421    }
422
423    pub(super) fn error(&self, expected: &str) -> ParserError {
424        self.make_error(expected)
425    }
426}