Skip to main content

harn_parser/parser/
state.rs

1use crate::ast::*;
2use harn_lexer::{Span, Token, TokenKind};
3
4use super::error::ParserError;
5
6/// Recursive descent parser for Harn.
7pub struct Parser {
8    pub(super) tokens: Vec<Token>,
9    pub(super) pos: usize,
10    pub(super) errors: Vec<ParserError>,
11}
12
13impl Parser {
14    pub fn new(tokens: Vec<Token>) -> Self {
15        Self {
16            tokens,
17            pos: 0,
18            errors: Vec::new(),
19        }
20    }
21
22    pub(super) fn current_span(&self) -> Span {
23        self.tokens
24            .get(self.pos)
25            .map(|t| t.span)
26            .unwrap_or(Span::dummy())
27    }
28
29    pub(super) fn current_kind(&self) -> Option<&TokenKind> {
30        self.tokens.get(self.pos).map(|t| &t.kind)
31    }
32
33    pub(super) fn prev_span(&self) -> Span {
34        if self.pos > 0 {
35            self.tokens[self.pos - 1].span
36        } else {
37            Span::dummy()
38        }
39    }
40
41    /// Parse a complete .harn file. Reports multiple errors via recovery.
42    pub fn parse(&mut self) -> Result<Vec<SNode>, ParserError> {
43        let mut nodes = Vec::new();
44        self.skip_newlines();
45
46        while !self.is_at_end() {
47            // Recovery may leave us pointing at a stray `}` at top level; skip it.
48            if self.check(&TokenKind::RBrace) {
49                self.advance();
50                self.skip_newlines();
51                continue;
52            }
53
54            let result = if self.check(&TokenKind::Import) {
55                self.parse_import()
56            } else if self.check(&TokenKind::At) {
57                self.parse_attributed_decl()
58            } else if self.check(&TokenKind::Pipeline) {
59                self.parse_pipeline()
60            } else if self.check(&TokenKind::EvalPack) {
61                self.parse_eval_pack_decl(false)
62            } else {
63                self.parse_statement()
64            };
65
66            match result {
67                Ok(node) => {
68                    let end_line = node.span.end_line;
69                    nodes.push(node);
70                    let consumed_sep = self.consume_statement_separator();
71                    if !consumed_sep && !self.is_at_end() {
72                        self.require_statement_separator(end_line, "top-level item")?;
73                    }
74                }
75                Err(err) => {
76                    self.errors.push(err);
77                    self.synchronize();
78                }
79            }
80        }
81
82        if let Some(first) = self.errors.first() {
83            return Err(first.clone());
84        }
85        Ok(nodes)
86    }
87
88    /// Return all accumulated parser errors (after `parse()` returns).
89    pub fn all_errors(&self) -> &[ParserError] {
90        &self.errors
91    }
92
93    /// Check if the current token is one that starts a statement.
94    pub(super) fn is_statement_start(&self) -> bool {
95        matches!(
96            self.current_kind(),
97            Some(
98                TokenKind::Let
99                    | TokenKind::Var
100                    | TokenKind::If
101                    | TokenKind::For
102                    | TokenKind::While
103                    | TokenKind::Match
104                    | TokenKind::Retry
105                    | TokenKind::Return
106                    | TokenKind::Throw
107                    | TokenKind::Fn
108                    | TokenKind::Pub
109                    | TokenKind::Try
110                    | TokenKind::Select
111                    | TokenKind::Pipeline
112                    | TokenKind::Import
113                    | TokenKind::Parallel
114                    | TokenKind::Enum
115                    | TokenKind::EvalPack
116                    | TokenKind::Struct
117                    | TokenKind::Interface
118                    | TokenKind::Emit
119                    | TokenKind::Guard
120                    | TokenKind::Require
121                    | TokenKind::Deadline
122                    | TokenKind::Yield
123                    | TokenKind::Mutex
124                    | TokenKind::Defer
125                    | TokenKind::Break
126                    | TokenKind::Continue
127                    | TokenKind::Tool
128                    | TokenKind::Skill
129                    | TokenKind::Impl
130            )
131        )
132    }
133
134    /// Advance past tokens until we reach a likely statement boundary.
135    pub(super) fn synchronize(&mut self) {
136        while !self.is_at_end() {
137            if self.check(&TokenKind::Semicolon) {
138                self.advance();
139                self.skip_newlines();
140                return;
141            }
142            if self.check(&TokenKind::Newline) {
143                self.advance();
144                if self.is_at_end() || self.is_statement_start() {
145                    return;
146                }
147                continue;
148            }
149            if self.check(&TokenKind::RBrace) {
150                return;
151            }
152            self.advance();
153        }
154    }
155
156    pub(super) fn is_at_end(&self) -> bool {
157        self.pos >= self.tokens.len()
158            || matches!(self.tokens.get(self.pos), Some(t) if t.kind == TokenKind::Eof)
159    }
160
161    pub(super) fn current(&self) -> Option<&Token> {
162        self.tokens.get(self.pos)
163    }
164
165    pub(super) fn peek_kind(&self) -> Option<&TokenKind> {
166        self.tokens.get(self.pos + 1).map(|t| &t.kind)
167    }
168
169    pub(super) fn peek_kind_at(&self, offset: usize) -> Option<&TokenKind> {
170        self.tokens.get(self.pos + offset).map(|t| &t.kind)
171    }
172
173    pub(super) fn check(&self, kind: &TokenKind) -> bool {
174        self.current()
175            .map(|t| std::mem::discriminant(&t.kind) == std::mem::discriminant(kind))
176            .unwrap_or(false)
177    }
178
179    /// Check for `kind`, skipping newlines first; used for binary operators
180    /// like `||` and `&&` that can span lines.
181    pub(super) fn check_skip_newlines(&mut self, kind: &TokenKind) -> bool {
182        let saved = self.pos;
183        self.skip_newlines();
184        if self.check(kind) {
185            true
186        } else {
187            self.pos = saved;
188            false
189        }
190    }
191
192    /// Check if current token is an identifier with the given name (without consuming it).
193    pub(super) fn check_identifier(&self, name: &str) -> bool {
194        matches!(self.current().map(|t| &t.kind), Some(TokenKind::Identifier(s)) if s == name)
195    }
196
197    /// `gen` is contextual so existing identifiers named `gen` keep working.
198    /// It starts a stream declaration only when followed by `fn`.
199    pub(super) fn check_contextual_gen_fn(&self) -> bool {
200        if !self.check_identifier("gen") {
201            return false;
202        }
203        matches!(
204            self.tokens.get(self.pos + 1).map(|t| &t.kind),
205            Some(TokenKind::Fn)
206        )
207    }
208
209    pub(super) fn advance(&mut self) {
210        if self.pos < self.tokens.len() {
211            self.pos += 1;
212        }
213    }
214
215    pub(super) fn consume(
216        &mut self,
217        kind: &TokenKind,
218        expected: &str,
219    ) -> Result<Token, ParserError> {
220        self.skip_newlines();
221        let tok = self.current().ok_or_else(|| self.make_error(expected))?;
222        if std::mem::discriminant(&tok.kind) != std::mem::discriminant(kind) {
223            return Err(self.make_error(expected));
224        }
225        let tok = tok.clone();
226        self.advance();
227        Ok(tok)
228    }
229
230    pub(super) fn consume_identifier(&mut self, expected: &str) -> Result<String, ParserError> {
231        self.skip_newlines();
232        let tok = self.current().ok_or_else(|| self.make_error(expected))?;
233        if let TokenKind::Identifier(name) = &tok.kind {
234            let name = name.clone();
235            self.advance();
236            Ok(name)
237        } else {
238            // Distinguish reserved-keyword misuse (e.g. `for tool in list`) from
239            // a general unexpected token so the error is actionable.
240            let kw_name = harn_lexer::KEYWORDS
241                .iter()
242                .find(|&&kw| kw == tok.kind.to_string());
243            if let Some(kw) = kw_name {
244                Err(ParserError::Unexpected {
245                    got: format!("'{kw}' (reserved keyword)"),
246                    expected: expected.into(),
247                    span: tok.span,
248                })
249            } else {
250                Err(self.make_error(expected))
251            }
252        }
253    }
254
255    pub(super) fn consume_contextual_keyword(
256        &mut self,
257        name: &str,
258        expected: &str,
259    ) -> Result<Token, ParserError> {
260        self.skip_newlines();
261        let tok = self.current().ok_or_else(|| self.make_error(expected))?;
262        if matches!(&tok.kind, TokenKind::Identifier(id) if id == name) {
263            let tok = tok.clone();
264            self.advance();
265            Ok(tok)
266        } else {
267            Err(self.make_error(expected))
268        }
269    }
270
271    /// Like `consume_identifier`, but also accepts keywords as identifiers.
272    /// Used for property access (e.g., `obj.type`) and dict keys where
273    /// keywords are valid member names.
274    pub(super) fn consume_identifier_or_keyword(
275        &mut self,
276        expected: &str,
277    ) -> Result<String, ParserError> {
278        self.skip_newlines();
279        let tok = self.current().ok_or_else(|| self.make_error(expected))?;
280        if let TokenKind::Identifier(name) = &tok.kind {
281            let name = name.clone();
282            self.advance();
283            return Ok(name);
284        }
285        let name = match &tok.kind {
286            TokenKind::Pipeline => "pipeline",
287            TokenKind::Extends => "extends",
288            TokenKind::Override => "override",
289            TokenKind::Let => "let",
290            TokenKind::Var => "var",
291            TokenKind::If => "if",
292            TokenKind::Else => "else",
293            TokenKind::For => "for",
294            TokenKind::In => "in",
295            TokenKind::Match => "match",
296            TokenKind::Retry => "retry",
297            TokenKind::Parallel => "parallel",
298            TokenKind::Return => "return",
299            TokenKind::Import => "import",
300            TokenKind::True => "true",
301            TokenKind::False => "false",
302            TokenKind::Nil => "nil",
303            TokenKind::Try => "try",
304            TokenKind::Catch => "catch",
305            TokenKind::Throw => "throw",
306            TokenKind::Fn => "fn",
307            TokenKind::Spawn => "spawn",
308            TokenKind::While => "while",
309            TokenKind::TypeKw => "type",
310            TokenKind::Enum => "enum",
311            TokenKind::Struct => "struct",
312            TokenKind::Interface => "interface",
313            TokenKind::Emit => "emit",
314            TokenKind::Pub => "pub",
315            TokenKind::From => "from",
316            TokenKind::To => "to",
317            TokenKind::Tool => "tool",
318            TokenKind::Exclusive => "exclusive",
319            TokenKind::Guard => "guard",
320            TokenKind::Deadline => "deadline",
321            TokenKind::Defer => "defer",
322            TokenKind::Yield => "yield",
323            TokenKind::Mutex => "mutex",
324            TokenKind::Break => "break",
325            TokenKind::Continue => "continue",
326            TokenKind::Impl => "impl",
327            _ => return Err(self.make_error(expected)),
328        };
329        let name = name.to_string();
330        self.advance();
331        Ok(name)
332    }
333
334    pub(super) fn skip_newlines(&mut self) {
335        while self.pos < self.tokens.len() && self.tokens[self.pos].kind == TokenKind::Newline {
336            self.pos += 1;
337        }
338    }
339
340    /// Consume an optional semicolon statement separator followed by any
341    /// number of newlines, or one-or-more newlines on their own.
342    ///
343    /// This is intentionally narrower than `skip_newlines()`: semicolons are
344    /// only legal between already-parsed list items, not in arbitrary parse
345    /// positions.
346    pub(super) fn consume_statement_separator(&mut self) -> bool {
347        let mut consumed = false;
348        if self.check(&TokenKind::Semicolon) {
349            self.advance();
350            consumed = true;
351        }
352        let start = self.pos;
353        self.skip_newlines();
354        consumed || self.pos != start
355    }
356
357    pub(super) fn require_statement_separator(
358        &self,
359        prev_end_line: usize,
360        expected_item: &str,
361    ) -> Result<(), ParserError> {
362        let Some(tok) = self.current() else {
363            return Ok(());
364        };
365        if tok.kind == TokenKind::Eof || tok.span.line != prev_end_line {
366            return Ok(());
367        }
368        Err(ParserError::Unexpected {
369            got: tok.kind.to_string(),
370            expected: format!("{expected_item} separator (`;` or newline)"),
371            span: tok.span,
372        })
373    }
374
375    pub(super) fn make_error(&self, expected: &str) -> ParserError {
376        if let Some(tok) = self.tokens.get(self.pos) {
377            if tok.kind == TokenKind::Eof {
378                return ParserError::UnexpectedEof {
379                    expected: expected.into(),
380                    span: tok.span,
381                };
382            }
383            ParserError::Unexpected {
384                got: tok.kind.to_string(),
385                expected: expected.into(),
386                span: tok.span,
387            }
388        } else {
389            ParserError::UnexpectedEof {
390                expected: expected.into(),
391                span: self.prev_span(),
392            }
393        }
394    }
395
396    pub(super) fn error(&self, expected: &str) -> ParserError {
397        self.make_error(expected)
398    }
399}