Skip to main content

harn_parser/parser/
state.rs

1use crate::ast::*;
2use harn_lexer::{Span, Token, TokenKind};
3
4use super::error::ParserError;
5
6/// Recursive descent parser for Harn.
7pub struct Parser {
8    pub(super) tokens: Vec<Token>,
9    pub(super) pos: usize,
10    pub(super) errors: Vec<ParserError>,
11}
12
13impl Parser {
14    pub fn new(tokens: Vec<Token>) -> Self {
15        Self {
16            tokens,
17            pos: 0,
18            errors: Vec::new(),
19        }
20    }
21
22    pub(super) fn current_span(&self) -> Span {
23        self.tokens
24            .get(self.pos)
25            .map(|t| t.span)
26            .unwrap_or(Span::dummy())
27    }
28
29    pub(super) fn current_kind(&self) -> Option<&TokenKind> {
30        self.tokens.get(self.pos).map(|t| &t.kind)
31    }
32
33    pub(super) fn prev_span(&self) -> Span {
34        if self.pos > 0 {
35            self.tokens[self.pos - 1].span
36        } else {
37            Span::dummy()
38        }
39    }
40
41    /// Parse a complete .harn file. Reports multiple errors via recovery.
42    pub fn parse(&mut self) -> Result<Vec<SNode>, ParserError> {
43        let mut nodes = Vec::new();
44        self.skip_newlines();
45
46        while !self.is_at_end() {
47            // Recovery may leave us pointing at a stray `}` at top level; skip it.
48            if self.check(&TokenKind::RBrace) {
49                self.advance();
50                self.skip_newlines();
51                continue;
52            }
53
54            let result = if self.check(&TokenKind::Import) {
55                self.parse_import()
56            } else if self.check(&TokenKind::At) {
57                self.parse_attributed_decl()
58            } else if self.check(&TokenKind::Pipeline) {
59                self.parse_pipeline()
60            } else {
61                self.parse_statement()
62            };
63
64            match result {
65                Ok(node) => {
66                    let end_line = node.span.end_line;
67                    nodes.push(node);
68                    let consumed_sep = self.consume_statement_separator();
69                    if !consumed_sep && !self.is_at_end() {
70                        self.require_statement_separator(end_line, "top-level item")?;
71                    }
72                }
73                Err(err) => {
74                    self.errors.push(err);
75                    self.synchronize();
76                }
77            }
78        }
79
80        if let Some(first) = self.errors.first() {
81            return Err(first.clone());
82        }
83        Ok(nodes)
84    }
85
86    /// Return all accumulated parser errors (after `parse()` returns).
87    pub fn all_errors(&self) -> &[ParserError] {
88        &self.errors
89    }
90
91    /// Check if the current token is one that starts a statement.
92    pub(super) fn is_statement_start(&self) -> bool {
93        matches!(
94            self.current_kind(),
95            Some(
96                TokenKind::Let
97                    | TokenKind::Var
98                    | TokenKind::If
99                    | TokenKind::For
100                    | TokenKind::While
101                    | TokenKind::Match
102                    | TokenKind::Retry
103                    | TokenKind::Return
104                    | TokenKind::Throw
105                    | TokenKind::Fn
106                    | TokenKind::Pub
107                    | TokenKind::Try
108                    | TokenKind::Select
109                    | TokenKind::Pipeline
110                    | TokenKind::Import
111                    | TokenKind::Parallel
112                    | TokenKind::Enum
113                    | TokenKind::Struct
114                    | TokenKind::Interface
115                    | TokenKind::Emit
116                    | TokenKind::Guard
117                    | TokenKind::Require
118                    | TokenKind::Deadline
119                    | TokenKind::Yield
120                    | TokenKind::Mutex
121                    | TokenKind::Defer
122                    | TokenKind::Break
123                    | TokenKind::Continue
124                    | TokenKind::Tool
125                    | TokenKind::Skill
126                    | TokenKind::Impl
127            )
128        )
129    }
130
131    /// Advance past tokens until we reach a likely statement boundary.
132    pub(super) fn synchronize(&mut self) {
133        while !self.is_at_end() {
134            if self.check(&TokenKind::Semicolon) {
135                self.advance();
136                self.skip_newlines();
137                return;
138            }
139            if self.check(&TokenKind::Newline) {
140                self.advance();
141                if self.is_at_end() || self.is_statement_start() {
142                    return;
143                }
144                continue;
145            }
146            if self.check(&TokenKind::RBrace) {
147                return;
148            }
149            self.advance();
150        }
151    }
152
153    pub(super) fn is_at_end(&self) -> bool {
154        self.pos >= self.tokens.len()
155            || matches!(self.tokens.get(self.pos), Some(t) if t.kind == TokenKind::Eof)
156    }
157
158    pub(super) fn current(&self) -> Option<&Token> {
159        self.tokens.get(self.pos)
160    }
161
162    pub(super) fn peek_kind(&self) -> Option<&TokenKind> {
163        self.tokens.get(self.pos + 1).map(|t| &t.kind)
164    }
165
166    pub(super) fn peek_kind_at(&self, offset: usize) -> Option<&TokenKind> {
167        self.tokens.get(self.pos + offset).map(|t| &t.kind)
168    }
169
170    pub(super) fn check(&self, kind: &TokenKind) -> bool {
171        self.current()
172            .map(|t| std::mem::discriminant(&t.kind) == std::mem::discriminant(kind))
173            .unwrap_or(false)
174    }
175
176    /// Check for `kind`, skipping newlines first; used for binary operators
177    /// like `||` and `&&` that can span lines.
178    pub(super) fn check_skip_newlines(&mut self, kind: &TokenKind) -> bool {
179        let saved = self.pos;
180        self.skip_newlines();
181        if self.check(kind) {
182            true
183        } else {
184            self.pos = saved;
185            false
186        }
187    }
188
189    /// Check if current token is an identifier with the given name (without consuming it).
190    pub(super) fn check_identifier(&self, name: &str) -> bool {
191        matches!(self.current().map(|t| &t.kind), Some(TokenKind::Identifier(s)) if s == name)
192    }
193
194    /// `gen` is contextual so existing identifiers named `gen` keep working.
195    /// It starts a stream declaration only when followed by `fn`.
196    pub(super) fn check_contextual_gen_fn(&self) -> bool {
197        if !self.check_identifier("gen") {
198            return false;
199        }
200        matches!(
201            self.tokens.get(self.pos + 1).map(|t| &t.kind),
202            Some(TokenKind::Fn)
203        )
204    }
205
206    pub(super) fn advance(&mut self) {
207        if self.pos < self.tokens.len() {
208            self.pos += 1;
209        }
210    }
211
212    pub(super) fn consume(
213        &mut self,
214        kind: &TokenKind,
215        expected: &str,
216    ) -> Result<Token, ParserError> {
217        self.skip_newlines();
218        let tok = self.current().ok_or_else(|| self.make_error(expected))?;
219        if std::mem::discriminant(&tok.kind) != std::mem::discriminant(kind) {
220            return Err(self.make_error(expected));
221        }
222        let tok = tok.clone();
223        self.advance();
224        Ok(tok)
225    }
226
227    pub(super) fn consume_identifier(&mut self, expected: &str) -> Result<String, ParserError> {
228        self.skip_newlines();
229        let tok = self.current().ok_or_else(|| self.make_error(expected))?;
230        if let TokenKind::Identifier(name) = &tok.kind {
231            let name = name.clone();
232            self.advance();
233            Ok(name)
234        } else {
235            // Distinguish reserved-keyword misuse (e.g. `for tool in list`) from
236            // a general unexpected token so the error is actionable.
237            let kw_name = harn_lexer::KEYWORDS
238                .iter()
239                .find(|&&kw| kw == tok.kind.to_string());
240            if let Some(kw) = kw_name {
241                Err(ParserError::Unexpected {
242                    got: format!("'{kw}' (reserved keyword)"),
243                    expected: expected.into(),
244                    span: tok.span,
245                })
246            } else {
247                Err(self.make_error(expected))
248            }
249        }
250    }
251
252    pub(super) fn consume_contextual_keyword(
253        &mut self,
254        name: &str,
255        expected: &str,
256    ) -> Result<Token, ParserError> {
257        self.skip_newlines();
258        let tok = self.current().ok_or_else(|| self.make_error(expected))?;
259        if matches!(&tok.kind, TokenKind::Identifier(id) if id == name) {
260            let tok = tok.clone();
261            self.advance();
262            Ok(tok)
263        } else {
264            Err(self.make_error(expected))
265        }
266    }
267
268    /// Like `consume_identifier`, but also accepts keywords as identifiers.
269    /// Used for property access (e.g., `obj.type`) and dict keys where
270    /// keywords are valid member names.
271    pub(super) fn consume_identifier_or_keyword(
272        &mut self,
273        expected: &str,
274    ) -> Result<String, ParserError> {
275        self.skip_newlines();
276        let tok = self.current().ok_or_else(|| self.make_error(expected))?;
277        if let TokenKind::Identifier(name) = &tok.kind {
278            let name = name.clone();
279            self.advance();
280            return Ok(name);
281        }
282        let name = match &tok.kind {
283            TokenKind::Pipeline => "pipeline",
284            TokenKind::Extends => "extends",
285            TokenKind::Override => "override",
286            TokenKind::Let => "let",
287            TokenKind::Var => "var",
288            TokenKind::If => "if",
289            TokenKind::Else => "else",
290            TokenKind::For => "for",
291            TokenKind::In => "in",
292            TokenKind::Match => "match",
293            TokenKind::Retry => "retry",
294            TokenKind::Parallel => "parallel",
295            TokenKind::Return => "return",
296            TokenKind::Import => "import",
297            TokenKind::True => "true",
298            TokenKind::False => "false",
299            TokenKind::Nil => "nil",
300            TokenKind::Try => "try",
301            TokenKind::Catch => "catch",
302            TokenKind::Throw => "throw",
303            TokenKind::Fn => "fn",
304            TokenKind::Spawn => "spawn",
305            TokenKind::While => "while",
306            TokenKind::TypeKw => "type",
307            TokenKind::Enum => "enum",
308            TokenKind::Struct => "struct",
309            TokenKind::Interface => "interface",
310            TokenKind::Emit => "emit",
311            TokenKind::Pub => "pub",
312            TokenKind::From => "from",
313            TokenKind::To => "to",
314            TokenKind::Tool => "tool",
315            TokenKind::Exclusive => "exclusive",
316            TokenKind::Guard => "guard",
317            TokenKind::Deadline => "deadline",
318            TokenKind::Defer => "defer",
319            TokenKind::Yield => "yield",
320            TokenKind::Mutex => "mutex",
321            TokenKind::Break => "break",
322            TokenKind::Continue => "continue",
323            TokenKind::Impl => "impl",
324            _ => return Err(self.make_error(expected)),
325        };
326        let name = name.to_string();
327        self.advance();
328        Ok(name)
329    }
330
331    pub(super) fn skip_newlines(&mut self) {
332        while self.pos < self.tokens.len() && self.tokens[self.pos].kind == TokenKind::Newline {
333            self.pos += 1;
334        }
335    }
336
337    /// Consume an optional semicolon statement separator followed by any
338    /// number of newlines, or one-or-more newlines on their own.
339    ///
340    /// This is intentionally narrower than `skip_newlines()`: semicolons are
341    /// only legal between already-parsed list items, not in arbitrary parse
342    /// positions.
343    pub(super) fn consume_statement_separator(&mut self) -> bool {
344        let mut consumed = false;
345        if self.check(&TokenKind::Semicolon) {
346            self.advance();
347            consumed = true;
348        }
349        let start = self.pos;
350        self.skip_newlines();
351        consumed || self.pos != start
352    }
353
354    pub(super) fn require_statement_separator(
355        &self,
356        prev_end_line: usize,
357        expected_item: &str,
358    ) -> Result<(), ParserError> {
359        let Some(tok) = self.current() else {
360            return Ok(());
361        };
362        if tok.kind == TokenKind::Eof || tok.span.line != prev_end_line {
363            return Ok(());
364        }
365        Err(ParserError::Unexpected {
366            got: tok.kind.to_string(),
367            expected: format!("{expected_item} separator (`;` or newline)"),
368            span: tok.span,
369        })
370    }
371
372    pub(super) fn make_error(&self, expected: &str) -> ParserError {
373        if let Some(tok) = self.tokens.get(self.pos) {
374            if tok.kind == TokenKind::Eof {
375                return ParserError::UnexpectedEof {
376                    expected: expected.into(),
377                    span: tok.span,
378                };
379            }
380            ParserError::Unexpected {
381                got: tok.kind.to_string(),
382                expected: expected.into(),
383                span: tok.span,
384            }
385        } else {
386            ParserError::UnexpectedEof {
387                expected: expected.into(),
388                span: self.prev_span(),
389            }
390        }
391    }
392
393    pub(super) fn error(&self, expected: &str) -> ParserError {
394        self.make_error(expected)
395    }
396}