Skip to main content

harn_parser/parser/
state.rs

1use crate::ast::*;
2use harn_lexer::{Span, Token, TokenKind};
3use std::collections::HashSet;
4
5use super::error::ParserError;
6
7/// Recursive descent parser for Harn.
8pub struct Parser {
9    pub(super) tokens: Vec<Token>,
10    pub(super) pos: usize,
11    pub(super) errors: Vec<ParserError>,
12    pub(super) struct_names: HashSet<String>,
13}
14
15impl Parser {
16    pub fn new(tokens: Vec<Token>) -> Self {
17        Self {
18            tokens,
19            pos: 0,
20            errors: Vec::new(),
21            struct_names: HashSet::new(),
22        }
23    }
24
25    pub(super) fn current_span(&self) -> Span {
26        self.tokens
27            .get(self.pos)
28            .map(|t| t.span)
29            .unwrap_or(Span::dummy())
30    }
31
32    pub(super) fn current_kind(&self) -> Option<&TokenKind> {
33        self.tokens.get(self.pos).map(|t| &t.kind)
34    }
35
36    pub(super) fn prev_span(&self) -> Span {
37        if self.pos > 0 {
38            self.tokens[self.pos - 1].span
39        } else {
40            Span::dummy()
41        }
42    }
43
44    /// Parse a complete .harn file. Reports multiple errors via recovery.
45    pub fn parse(&mut self) -> Result<Vec<SNode>, ParserError> {
46        let mut nodes = Vec::new();
47        self.skip_newlines();
48
49        while !self.is_at_end() {
50            // Recovery may leave us pointing at a stray `}` at top level; skip it.
51            if self.check(&TokenKind::RBrace) {
52                self.advance();
53                self.skip_newlines();
54                continue;
55            }
56
57            let result = if self.check(&TokenKind::Import) {
58                self.parse_import()
59            } else if self.check(&TokenKind::At) {
60                self.parse_attributed_decl()
61            } else if self.check(&TokenKind::Pipeline) {
62                self.parse_pipeline()
63            } else {
64                self.parse_statement()
65            };
66
67            match result {
68                Ok(node) => nodes.push(node),
69                Err(err) => {
70                    self.errors.push(err);
71                    self.synchronize();
72                }
73            }
74            self.skip_newlines();
75        }
76
77        if let Some(first) = self.errors.first() {
78            return Err(first.clone());
79        }
80        Ok(nodes)
81    }
82
83    /// Return all accumulated parser errors (after `parse()` returns).
84    pub fn all_errors(&self) -> &[ParserError] {
85        &self.errors
86    }
87
88    /// Check if the current token is one that starts a statement.
89    pub(super) fn is_statement_start(&self) -> bool {
90        matches!(
91            self.current_kind(),
92            Some(
93                TokenKind::Let
94                    | TokenKind::Var
95                    | TokenKind::If
96                    | TokenKind::For
97                    | TokenKind::While
98                    | TokenKind::Match
99                    | TokenKind::Retry
100                    | TokenKind::Return
101                    | TokenKind::Throw
102                    | TokenKind::Fn
103                    | TokenKind::Pub
104                    | TokenKind::Try
105                    | TokenKind::Select
106                    | TokenKind::Pipeline
107                    | TokenKind::Import
108                    | TokenKind::Parallel
109                    | TokenKind::Enum
110                    | TokenKind::Struct
111                    | TokenKind::Interface
112                    | TokenKind::Guard
113                    | TokenKind::Require
114                    | TokenKind::Deadline
115                    | TokenKind::Yield
116                    | TokenKind::Mutex
117                    | TokenKind::Tool
118            )
119        )
120    }
121
122    /// Advance past tokens until we reach a likely statement boundary.
123    pub(super) fn synchronize(&mut self) {
124        while !self.is_at_end() {
125            if self.check(&TokenKind::Newline) {
126                self.advance();
127                if self.is_at_end() || self.is_statement_start() {
128                    return;
129                }
130                continue;
131            }
132            if self.check(&TokenKind::RBrace) {
133                return;
134            }
135            self.advance();
136        }
137    }
138
139    pub(super) fn is_at_end(&self) -> bool {
140        self.pos >= self.tokens.len()
141            || matches!(self.tokens.get(self.pos), Some(t) if t.kind == TokenKind::Eof)
142    }
143
144    pub(super) fn current(&self) -> Option<&Token> {
145        self.tokens.get(self.pos)
146    }
147
148    pub(super) fn peek_kind(&self) -> Option<&TokenKind> {
149        self.tokens.get(self.pos + 1).map(|t| &t.kind)
150    }
151
152    pub(super) fn peek_kind_at(&self, offset: usize) -> Option<&TokenKind> {
153        self.tokens.get(self.pos + offset).map(|t| &t.kind)
154    }
155
156    pub(super) fn check(&self, kind: &TokenKind) -> bool {
157        self.current()
158            .map(|t| std::mem::discriminant(&t.kind) == std::mem::discriminant(kind))
159            .unwrap_or(false)
160    }
161
162    /// Check for `kind`, skipping newlines first; used for binary operators
163    /// like `||` and `&&` that can span lines.
164    pub(super) fn check_skip_newlines(&mut self, kind: &TokenKind) -> bool {
165        let saved = self.pos;
166        self.skip_newlines();
167        if self.check(kind) {
168            true
169        } else {
170            self.pos = saved;
171            false
172        }
173    }
174
175    /// Check if current token is an identifier with the given name (without consuming it).
176    pub(super) fn check_identifier(&self, name: &str) -> bool {
177        matches!(self.current().map(|t| &t.kind), Some(TokenKind::Identifier(s)) if s == name)
178    }
179
180    pub(super) fn advance(&mut self) {
181        if self.pos < self.tokens.len() {
182            self.pos += 1;
183        }
184    }
185
186    pub(super) fn consume(
187        &mut self,
188        kind: &TokenKind,
189        expected: &str,
190    ) -> Result<Token, ParserError> {
191        self.skip_newlines();
192        let tok = self.current().ok_or_else(|| self.make_error(expected))?;
193        if std::mem::discriminant(&tok.kind) != std::mem::discriminant(kind) {
194            return Err(self.make_error(expected));
195        }
196        let tok = tok.clone();
197        self.advance();
198        Ok(tok)
199    }
200
201    pub(super) fn consume_identifier(&mut self, expected: &str) -> Result<String, ParserError> {
202        self.skip_newlines();
203        let tok = self.current().ok_or_else(|| self.make_error(expected))?;
204        if let TokenKind::Identifier(name) = &tok.kind {
205            let name = name.clone();
206            self.advance();
207            Ok(name)
208        } else {
209            // Distinguish reserved-keyword misuse (e.g. `for tool in list`) from
210            // a general unexpected token so the error is actionable.
211            let kw_name = harn_lexer::KEYWORDS
212                .iter()
213                .find(|&&kw| kw == tok.kind.to_string());
214            if let Some(kw) = kw_name {
215                Err(ParserError::Unexpected {
216                    got: format!("'{kw}' (reserved keyword)"),
217                    expected: expected.into(),
218                    span: tok.span,
219                })
220            } else {
221                Err(self.make_error(expected))
222            }
223        }
224    }
225
226    /// Like `consume_identifier`, but also accepts keywords as identifiers.
227    /// Used for property access (e.g., `obj.type`) and dict keys where
228    /// keywords are valid member names.
229    pub(super) fn consume_identifier_or_keyword(
230        &mut self,
231        expected: &str,
232    ) -> Result<String, ParserError> {
233        self.skip_newlines();
234        let tok = self.current().ok_or_else(|| self.make_error(expected))?;
235        if let TokenKind::Identifier(name) = &tok.kind {
236            let name = name.clone();
237            self.advance();
238            return Ok(name);
239        }
240        let name = match &tok.kind {
241            TokenKind::Pipeline => "pipeline",
242            TokenKind::Extends => "extends",
243            TokenKind::Override => "override",
244            TokenKind::Let => "let",
245            TokenKind::Var => "var",
246            TokenKind::If => "if",
247            TokenKind::Else => "else",
248            TokenKind::For => "for",
249            TokenKind::In => "in",
250            TokenKind::Match => "match",
251            TokenKind::Retry => "retry",
252            TokenKind::Parallel => "parallel",
253            TokenKind::Return => "return",
254            TokenKind::Import => "import",
255            TokenKind::True => "true",
256            TokenKind::False => "false",
257            TokenKind::Nil => "nil",
258            TokenKind::Try => "try",
259            TokenKind::Catch => "catch",
260            TokenKind::Throw => "throw",
261            TokenKind::Fn => "fn",
262            TokenKind::Spawn => "spawn",
263            TokenKind::While => "while",
264            TokenKind::TypeKw => "type",
265            TokenKind::Enum => "enum",
266            TokenKind::Struct => "struct",
267            TokenKind::Interface => "interface",
268            TokenKind::Pub => "pub",
269            TokenKind::From => "from",
270            TokenKind::To => "to",
271            TokenKind::Tool => "tool",
272            TokenKind::Exclusive => "exclusive",
273            TokenKind::Guard => "guard",
274            TokenKind::Deadline => "deadline",
275            TokenKind::Defer => "defer",
276            TokenKind::Yield => "yield",
277            TokenKind::Mutex => "mutex",
278            TokenKind::Break => "break",
279            TokenKind::Continue => "continue",
280            TokenKind::Impl => "impl",
281            _ => return Err(self.make_error(expected)),
282        };
283        let name = name.to_string();
284        self.advance();
285        Ok(name)
286    }
287
288    pub(super) fn skip_newlines(&mut self) {
289        while self.pos < self.tokens.len() && self.tokens[self.pos].kind == TokenKind::Newline {
290            self.pos += 1;
291        }
292    }
293
294    pub(super) fn make_error(&self, expected: &str) -> ParserError {
295        if let Some(tok) = self.tokens.get(self.pos) {
296            if tok.kind == TokenKind::Eof {
297                return ParserError::UnexpectedEof {
298                    expected: expected.into(),
299                    span: tok.span,
300                };
301            }
302            ParserError::Unexpected {
303                got: tok.kind.to_string(),
304                expected: expected.into(),
305                span: tok.span,
306            }
307        } else {
308            ParserError::UnexpectedEof {
309                expected: expected.into(),
310                span: self.prev_span(),
311            }
312        }
313    }
314
315    pub(super) fn error(&self, expected: &str) -> ParserError {
316        self.make_error(expected)
317    }
318}