Skip to main content

harn_parser/parser/
state.rs

1use crate::ast::*;
2use harn_lexer::{Span, Token, TokenKind};
3use std::collections::HashSet;
4
5use super::error::ParserError;
6
7/// Recursive descent parser for Harn.
8pub struct Parser {
9    pub(super) tokens: Vec<Token>,
10    pub(super) pos: usize,
11    pub(super) errors: Vec<ParserError>,
12    pub(super) struct_names: HashSet<String>,
13}
14
15impl Parser {
16    pub fn new(tokens: Vec<Token>) -> Self {
17        Self {
18            tokens,
19            pos: 0,
20            errors: Vec::new(),
21            struct_names: HashSet::new(),
22        }
23    }
24
25    pub(super) fn current_span(&self) -> Span {
26        self.tokens
27            .get(self.pos)
28            .map(|t| t.span)
29            .unwrap_or(Span::dummy())
30    }
31
32    pub(super) fn current_kind(&self) -> Option<&TokenKind> {
33        self.tokens.get(self.pos).map(|t| &t.kind)
34    }
35
36    pub(super) fn prev_span(&self) -> Span {
37        if self.pos > 0 {
38            self.tokens[self.pos - 1].span
39        } else {
40            Span::dummy()
41        }
42    }
43
44    /// Parse a complete .harn file. Reports multiple errors via recovery.
45    pub fn parse(&mut self) -> Result<Vec<SNode>, ParserError> {
46        let mut nodes = Vec::new();
47        self.skip_newlines();
48
49        while !self.is_at_end() {
50            // Recovery may leave us pointing at a stray `}` at top level; skip it.
51            if self.check(&TokenKind::RBrace) {
52                self.advance();
53                self.skip_newlines();
54                continue;
55            }
56
57            let result = if self.check(&TokenKind::Import) {
58                self.parse_import()
59            } else if self.check(&TokenKind::At) {
60                self.parse_attributed_decl()
61            } else if self.check(&TokenKind::Pipeline) {
62                self.parse_pipeline()
63            } else {
64                self.parse_statement()
65            };
66
67            match result {
68                Ok(node) => {
69                    let end_line = node.span.end_line;
70                    nodes.push(node);
71                    let consumed_sep = self.consume_statement_separator();
72                    if !consumed_sep && !self.is_at_end() {
73                        self.require_statement_separator(end_line, "top-level item")?;
74                    }
75                }
76                Err(err) => {
77                    self.errors.push(err);
78                    self.synchronize();
79                }
80            }
81        }
82
83        if let Some(first) = self.errors.first() {
84            return Err(first.clone());
85        }
86        Ok(nodes)
87    }
88
89    /// Return all accumulated parser errors (after `parse()` returns).
90    pub fn all_errors(&self) -> &[ParserError] {
91        &self.errors
92    }
93
94    /// Check if the current token is one that starts a statement.
95    pub(super) fn is_statement_start(&self) -> bool {
96        matches!(
97            self.current_kind(),
98            Some(
99                TokenKind::Let
100                    | TokenKind::Var
101                    | TokenKind::If
102                    | TokenKind::For
103                    | TokenKind::While
104                    | TokenKind::Match
105                    | TokenKind::Retry
106                    | TokenKind::Return
107                    | TokenKind::Throw
108                    | TokenKind::Fn
109                    | TokenKind::Pub
110                    | TokenKind::Try
111                    | TokenKind::Select
112                    | TokenKind::Pipeline
113                    | TokenKind::Import
114                    | TokenKind::Parallel
115                    | TokenKind::Enum
116                    | TokenKind::Struct
117                    | TokenKind::Interface
118                    | TokenKind::Guard
119                    | TokenKind::Require
120                    | TokenKind::Deadline
121                    | TokenKind::Yield
122                    | TokenKind::Mutex
123                    | TokenKind::Defer
124                    | TokenKind::Break
125                    | TokenKind::Continue
126                    | TokenKind::Tool
127                    | TokenKind::Skill
128                    | TokenKind::Impl
129            )
130        )
131    }
132
133    /// Advance past tokens until we reach a likely statement boundary.
134    pub(super) fn synchronize(&mut self) {
135        while !self.is_at_end() {
136            if self.check(&TokenKind::Semicolon) {
137                self.advance();
138                self.skip_newlines();
139                return;
140            }
141            if self.check(&TokenKind::Newline) {
142                self.advance();
143                if self.is_at_end() || self.is_statement_start() {
144                    return;
145                }
146                continue;
147            }
148            if self.check(&TokenKind::RBrace) {
149                return;
150            }
151            self.advance();
152        }
153    }
154
155    pub(super) fn is_at_end(&self) -> bool {
156        self.pos >= self.tokens.len()
157            || matches!(self.tokens.get(self.pos), Some(t) if t.kind == TokenKind::Eof)
158    }
159
160    pub(super) fn current(&self) -> Option<&Token> {
161        self.tokens.get(self.pos)
162    }
163
164    pub(super) fn peek_kind(&self) -> Option<&TokenKind> {
165        self.tokens.get(self.pos + 1).map(|t| &t.kind)
166    }
167
168    pub(super) fn peek_kind_at(&self, offset: usize) -> Option<&TokenKind> {
169        self.tokens.get(self.pos + offset).map(|t| &t.kind)
170    }
171
172    pub(super) fn check(&self, kind: &TokenKind) -> bool {
173        self.current()
174            .map(|t| std::mem::discriminant(&t.kind) == std::mem::discriminant(kind))
175            .unwrap_or(false)
176    }
177
178    /// Check for `kind`, skipping newlines first; used for binary operators
179    /// like `||` and `&&` that can span lines.
180    pub(super) fn check_skip_newlines(&mut self, kind: &TokenKind) -> bool {
181        let saved = self.pos;
182        self.skip_newlines();
183        if self.check(kind) {
184            true
185        } else {
186            self.pos = saved;
187            false
188        }
189    }
190
191    /// Check if current token is an identifier with the given name (without consuming it).
192    pub(super) fn check_identifier(&self, name: &str) -> bool {
193        matches!(self.current().map(|t| &t.kind), Some(TokenKind::Identifier(s)) if s == name)
194    }
195
196    pub(super) fn advance(&mut self) {
197        if self.pos < self.tokens.len() {
198            self.pos += 1;
199        }
200    }
201
202    pub(super) fn consume(
203        &mut self,
204        kind: &TokenKind,
205        expected: &str,
206    ) -> Result<Token, ParserError> {
207        self.skip_newlines();
208        let tok = self.current().ok_or_else(|| self.make_error(expected))?;
209        if std::mem::discriminant(&tok.kind) != std::mem::discriminant(kind) {
210            return Err(self.make_error(expected));
211        }
212        let tok = tok.clone();
213        self.advance();
214        Ok(tok)
215    }
216
217    pub(super) fn consume_identifier(&mut self, expected: &str) -> Result<String, ParserError> {
218        self.skip_newlines();
219        let tok = self.current().ok_or_else(|| self.make_error(expected))?;
220        if let TokenKind::Identifier(name) = &tok.kind {
221            let name = name.clone();
222            self.advance();
223            Ok(name)
224        } else {
225            // Distinguish reserved-keyword misuse (e.g. `for tool in list`) from
226            // a general unexpected token so the error is actionable.
227            let kw_name = harn_lexer::KEYWORDS
228                .iter()
229                .find(|&&kw| kw == tok.kind.to_string());
230            if let Some(kw) = kw_name {
231                Err(ParserError::Unexpected {
232                    got: format!("'{kw}' (reserved keyword)"),
233                    expected: expected.into(),
234                    span: tok.span,
235                })
236            } else {
237                Err(self.make_error(expected))
238            }
239        }
240    }
241
242    /// Like `consume_identifier`, but also accepts keywords as identifiers.
243    /// Used for property access (e.g., `obj.type`) and dict keys where
244    /// keywords are valid member names.
245    pub(super) fn consume_identifier_or_keyword(
246        &mut self,
247        expected: &str,
248    ) -> Result<String, ParserError> {
249        self.skip_newlines();
250        let tok = self.current().ok_or_else(|| self.make_error(expected))?;
251        if let TokenKind::Identifier(name) = &tok.kind {
252            let name = name.clone();
253            self.advance();
254            return Ok(name);
255        }
256        let name = match &tok.kind {
257            TokenKind::Pipeline => "pipeline",
258            TokenKind::Extends => "extends",
259            TokenKind::Override => "override",
260            TokenKind::Let => "let",
261            TokenKind::Var => "var",
262            TokenKind::If => "if",
263            TokenKind::Else => "else",
264            TokenKind::For => "for",
265            TokenKind::In => "in",
266            TokenKind::Match => "match",
267            TokenKind::Retry => "retry",
268            TokenKind::Parallel => "parallel",
269            TokenKind::Return => "return",
270            TokenKind::Import => "import",
271            TokenKind::True => "true",
272            TokenKind::False => "false",
273            TokenKind::Nil => "nil",
274            TokenKind::Try => "try",
275            TokenKind::Catch => "catch",
276            TokenKind::Throw => "throw",
277            TokenKind::Fn => "fn",
278            TokenKind::Spawn => "spawn",
279            TokenKind::While => "while",
280            TokenKind::TypeKw => "type",
281            TokenKind::Enum => "enum",
282            TokenKind::Struct => "struct",
283            TokenKind::Interface => "interface",
284            TokenKind::Pub => "pub",
285            TokenKind::From => "from",
286            TokenKind::To => "to",
287            TokenKind::Tool => "tool",
288            TokenKind::Exclusive => "exclusive",
289            TokenKind::Guard => "guard",
290            TokenKind::Deadline => "deadline",
291            TokenKind::Defer => "defer",
292            TokenKind::Yield => "yield",
293            TokenKind::Mutex => "mutex",
294            TokenKind::Break => "break",
295            TokenKind::Continue => "continue",
296            TokenKind::Impl => "impl",
297            _ => return Err(self.make_error(expected)),
298        };
299        let name = name.to_string();
300        self.advance();
301        Ok(name)
302    }
303
304    pub(super) fn skip_newlines(&mut self) {
305        while self.pos < self.tokens.len() && self.tokens[self.pos].kind == TokenKind::Newline {
306            self.pos += 1;
307        }
308    }
309
310    /// Consume an optional semicolon statement separator followed by any
311    /// number of newlines, or one-or-more newlines on their own.
312    ///
313    /// This is intentionally narrower than `skip_newlines()`: semicolons are
314    /// only legal between already-parsed list items, not in arbitrary parse
315    /// positions.
316    pub(super) fn consume_statement_separator(&mut self) -> bool {
317        let mut consumed = false;
318        if self.check(&TokenKind::Semicolon) {
319            self.advance();
320            consumed = true;
321        }
322        let start = self.pos;
323        self.skip_newlines();
324        consumed || self.pos != start
325    }
326
327    pub(super) fn require_statement_separator(
328        &self,
329        prev_end_line: usize,
330        expected_item: &str,
331    ) -> Result<(), ParserError> {
332        let Some(tok) = self.current() else {
333            return Ok(());
334        };
335        if tok.kind == TokenKind::Eof || tok.span.line != prev_end_line {
336            return Ok(());
337        }
338        Err(ParserError::Unexpected {
339            got: tok.kind.to_string(),
340            expected: format!("{expected_item} separator (`;` or newline)"),
341            span: tok.span,
342        })
343    }
344
345    pub(super) fn make_error(&self, expected: &str) -> ParserError {
346        if let Some(tok) = self.tokens.get(self.pos) {
347            if tok.kind == TokenKind::Eof {
348                return ParserError::UnexpectedEof {
349                    expected: expected.into(),
350                    span: tok.span,
351                };
352            }
353            ParserError::Unexpected {
354                got: tok.kind.to_string(),
355                expected: expected.into(),
356                span: tok.span,
357            }
358        } else {
359            ParserError::UnexpectedEof {
360                expected: expected.into(),
361                span: self.prev_span(),
362            }
363        }
364    }
365
366    pub(super) fn error(&self, expected: &str) -> ParserError {
367        self.make_error(expected)
368    }
369}