Skip to main content

harn_parser/parser/
state.rs

1use crate::ast::*;
2use harn_lexer::{Span, Token, TokenKind};
3
4use super::error::ParserError;
5
6/// Recursive descent parser for Harn.
7pub struct Parser {
8    pub(super) tokens: Vec<Token>,
9    pub(super) pos: usize,
10    pub(super) errors: Vec<ParserError>,
11}
12
13impl Parser {
14    pub fn new(tokens: Vec<Token>) -> Self {
15        Self {
16            tokens,
17            pos: 0,
18            errors: Vec::new(),
19        }
20    }
21
22    pub(super) fn current_span(&self) -> Span {
23        self.tokens
24            .get(self.pos)
25            .map(|t| t.span)
26            .unwrap_or(Span::dummy())
27    }
28
29    pub(super) fn current_kind(&self) -> Option<&TokenKind> {
30        self.tokens.get(self.pos).map(|t| &t.kind)
31    }
32
33    pub(super) fn prev_span(&self) -> Span {
34        if self.pos > 0 {
35            self.tokens[self.pos - 1].span
36        } else {
37            Span::dummy()
38        }
39    }
40
41    /// Parse a complete .harn file. Reports multiple errors via recovery.
42    pub fn parse(&mut self) -> Result<Vec<SNode>, ParserError> {
43        let mut nodes = Vec::new();
44        self.skip_newlines();
45
46        while !self.is_at_end() {
47            // Recovery may leave us pointing at a stray `}` at top level; skip it.
48            if self.check(&TokenKind::RBrace) {
49                self.advance();
50                self.skip_newlines();
51                continue;
52            }
53
54            let result = if self.check(&TokenKind::Import) {
55                self.parse_import()
56            } else if self.check(&TokenKind::At) {
57                self.parse_attributed_decl()
58            } else if self.check(&TokenKind::Pipeline) {
59                self.parse_pipeline()
60            } else {
61                self.parse_statement()
62            };
63
64            match result {
65                Ok(node) => {
66                    let end_line = node.span.end_line;
67                    nodes.push(node);
68                    let consumed_sep = self.consume_statement_separator();
69                    if !consumed_sep && !self.is_at_end() {
70                        self.require_statement_separator(end_line, "top-level item")?;
71                    }
72                }
73                Err(err) => {
74                    self.errors.push(err);
75                    self.synchronize();
76                }
77            }
78        }
79
80        if let Some(first) = self.errors.first() {
81            return Err(first.clone());
82        }
83        Ok(nodes)
84    }
85
86    /// Return all accumulated parser errors (after `parse()` returns).
87    pub fn all_errors(&self) -> &[ParserError] {
88        &self.errors
89    }
90
91    /// Check if the current token is one that starts a statement.
92    pub(super) fn is_statement_start(&self) -> bool {
93        matches!(
94            self.current_kind(),
95            Some(
96                TokenKind::Let
97                    | TokenKind::Var
98                    | TokenKind::If
99                    | TokenKind::For
100                    | TokenKind::While
101                    | TokenKind::Match
102                    | TokenKind::Retry
103                    | TokenKind::Return
104                    | TokenKind::Throw
105                    | TokenKind::Fn
106                    | TokenKind::Pub
107                    | TokenKind::Try
108                    | TokenKind::Select
109                    | TokenKind::Pipeline
110                    | TokenKind::Import
111                    | TokenKind::Parallel
112                    | TokenKind::Enum
113                    | TokenKind::Struct
114                    | TokenKind::Interface
115                    | TokenKind::Guard
116                    | TokenKind::Require
117                    | TokenKind::Deadline
118                    | TokenKind::Yield
119                    | TokenKind::Mutex
120                    | TokenKind::Defer
121                    | TokenKind::Break
122                    | TokenKind::Continue
123                    | TokenKind::Tool
124                    | TokenKind::Skill
125                    | TokenKind::Impl
126            )
127        )
128    }
129
130    /// Advance past tokens until we reach a likely statement boundary.
131    pub(super) fn synchronize(&mut self) {
132        while !self.is_at_end() {
133            if self.check(&TokenKind::Semicolon) {
134                self.advance();
135                self.skip_newlines();
136                return;
137            }
138            if self.check(&TokenKind::Newline) {
139                self.advance();
140                if self.is_at_end() || self.is_statement_start() {
141                    return;
142                }
143                continue;
144            }
145            if self.check(&TokenKind::RBrace) {
146                return;
147            }
148            self.advance();
149        }
150    }
151
152    pub(super) fn is_at_end(&self) -> bool {
153        self.pos >= self.tokens.len()
154            || matches!(self.tokens.get(self.pos), Some(t) if t.kind == TokenKind::Eof)
155    }
156
157    pub(super) fn current(&self) -> Option<&Token> {
158        self.tokens.get(self.pos)
159    }
160
161    pub(super) fn peek_kind(&self) -> Option<&TokenKind> {
162        self.tokens.get(self.pos + 1).map(|t| &t.kind)
163    }
164
165    pub(super) fn peek_kind_at(&self, offset: usize) -> Option<&TokenKind> {
166        self.tokens.get(self.pos + offset).map(|t| &t.kind)
167    }
168
169    pub(super) fn check(&self, kind: &TokenKind) -> bool {
170        self.current()
171            .map(|t| std::mem::discriminant(&t.kind) == std::mem::discriminant(kind))
172            .unwrap_or(false)
173    }
174
175    /// Check for `kind`, skipping newlines first; used for binary operators
176    /// like `||` and `&&` that can span lines.
177    pub(super) fn check_skip_newlines(&mut self, kind: &TokenKind) -> bool {
178        let saved = self.pos;
179        self.skip_newlines();
180        if self.check(kind) {
181            true
182        } else {
183            self.pos = saved;
184            false
185        }
186    }
187
188    /// Check if current token is an identifier with the given name (without consuming it).
189    pub(super) fn check_identifier(&self, name: &str) -> bool {
190        matches!(self.current().map(|t| &t.kind), Some(TokenKind::Identifier(s)) if s == name)
191    }
192
193    pub(super) fn advance(&mut self) {
194        if self.pos < self.tokens.len() {
195            self.pos += 1;
196        }
197    }
198
199    pub(super) fn consume(
200        &mut self,
201        kind: &TokenKind,
202        expected: &str,
203    ) -> Result<Token, ParserError> {
204        self.skip_newlines();
205        let tok = self.current().ok_or_else(|| self.make_error(expected))?;
206        if std::mem::discriminant(&tok.kind) != std::mem::discriminant(kind) {
207            return Err(self.make_error(expected));
208        }
209        let tok = tok.clone();
210        self.advance();
211        Ok(tok)
212    }
213
214    pub(super) fn consume_identifier(&mut self, expected: &str) -> Result<String, ParserError> {
215        self.skip_newlines();
216        let tok = self.current().ok_or_else(|| self.make_error(expected))?;
217        if let TokenKind::Identifier(name) = &tok.kind {
218            let name = name.clone();
219            self.advance();
220            Ok(name)
221        } else {
222            // Distinguish reserved-keyword misuse (e.g. `for tool in list`) from
223            // a general unexpected token so the error is actionable.
224            let kw_name = harn_lexer::KEYWORDS
225                .iter()
226                .find(|&&kw| kw == tok.kind.to_string());
227            if let Some(kw) = kw_name {
228                Err(ParserError::Unexpected {
229                    got: format!("'{kw}' (reserved keyword)"),
230                    expected: expected.into(),
231                    span: tok.span,
232                })
233            } else {
234                Err(self.make_error(expected))
235            }
236        }
237    }
238
239    /// Like `consume_identifier`, but also accepts keywords as identifiers.
240    /// Used for property access (e.g., `obj.type`) and dict keys where
241    /// keywords are valid member names.
242    pub(super) fn consume_identifier_or_keyword(
243        &mut self,
244        expected: &str,
245    ) -> Result<String, ParserError> {
246        self.skip_newlines();
247        let tok = self.current().ok_or_else(|| self.make_error(expected))?;
248        if let TokenKind::Identifier(name) = &tok.kind {
249            let name = name.clone();
250            self.advance();
251            return Ok(name);
252        }
253        let name = match &tok.kind {
254            TokenKind::Pipeline => "pipeline",
255            TokenKind::Extends => "extends",
256            TokenKind::Override => "override",
257            TokenKind::Let => "let",
258            TokenKind::Var => "var",
259            TokenKind::If => "if",
260            TokenKind::Else => "else",
261            TokenKind::For => "for",
262            TokenKind::In => "in",
263            TokenKind::Match => "match",
264            TokenKind::Retry => "retry",
265            TokenKind::Parallel => "parallel",
266            TokenKind::Return => "return",
267            TokenKind::Import => "import",
268            TokenKind::True => "true",
269            TokenKind::False => "false",
270            TokenKind::Nil => "nil",
271            TokenKind::Try => "try",
272            TokenKind::Catch => "catch",
273            TokenKind::Throw => "throw",
274            TokenKind::Fn => "fn",
275            TokenKind::Spawn => "spawn",
276            TokenKind::While => "while",
277            TokenKind::TypeKw => "type",
278            TokenKind::Enum => "enum",
279            TokenKind::Struct => "struct",
280            TokenKind::Interface => "interface",
281            TokenKind::Pub => "pub",
282            TokenKind::From => "from",
283            TokenKind::To => "to",
284            TokenKind::Tool => "tool",
285            TokenKind::Exclusive => "exclusive",
286            TokenKind::Guard => "guard",
287            TokenKind::Deadline => "deadline",
288            TokenKind::Defer => "defer",
289            TokenKind::Yield => "yield",
290            TokenKind::Mutex => "mutex",
291            TokenKind::Break => "break",
292            TokenKind::Continue => "continue",
293            TokenKind::Impl => "impl",
294            _ => return Err(self.make_error(expected)),
295        };
296        let name = name.to_string();
297        self.advance();
298        Ok(name)
299    }
300
301    pub(super) fn skip_newlines(&mut self) {
302        while self.pos < self.tokens.len() && self.tokens[self.pos].kind == TokenKind::Newline {
303            self.pos += 1;
304        }
305    }
306
307    /// Consume an optional semicolon statement separator followed by any
308    /// number of newlines, or one-or-more newlines on their own.
309    ///
310    /// This is intentionally narrower than `skip_newlines()`: semicolons are
311    /// only legal between already-parsed list items, not in arbitrary parse
312    /// positions.
313    pub(super) fn consume_statement_separator(&mut self) -> bool {
314        let mut consumed = false;
315        if self.check(&TokenKind::Semicolon) {
316            self.advance();
317            consumed = true;
318        }
319        let start = self.pos;
320        self.skip_newlines();
321        consumed || self.pos != start
322    }
323
324    pub(super) fn require_statement_separator(
325        &self,
326        prev_end_line: usize,
327        expected_item: &str,
328    ) -> Result<(), ParserError> {
329        let Some(tok) = self.current() else {
330            return Ok(());
331        };
332        if tok.kind == TokenKind::Eof || tok.span.line != prev_end_line {
333            return Ok(());
334        }
335        Err(ParserError::Unexpected {
336            got: tok.kind.to_string(),
337            expected: format!("{expected_item} separator (`;` or newline)"),
338            span: tok.span,
339        })
340    }
341
342    pub(super) fn make_error(&self, expected: &str) -> ParserError {
343        if let Some(tok) = self.tokens.get(self.pos) {
344            if tok.kind == TokenKind::Eof {
345                return ParserError::UnexpectedEof {
346                    expected: expected.into(),
347                    span: tok.span,
348                };
349            }
350            ParserError::Unexpected {
351                got: tok.kind.to_string(),
352                expected: expected.into(),
353                span: tok.span,
354            }
355        } else {
356            ParserError::UnexpectedEof {
357                expected: expected.into(),
358                span: self.prev_span(),
359            }
360        }
361    }
362
363    pub(super) fn error(&self, expected: &str) -> ParserError {
364        self.make_error(expected)
365    }
366}