Skip to main content

nash_parse/
lib.rs

1use bumpalo::Bump;
2use nash_region::{Located, Position, Region};
3
4mod declaration;
5pub mod error;
6mod exposing;
7mod expression;
8mod import;
9mod keyword;
10mod module;
11mod number;
12mod pattern;
13mod space;
14mod string;
15mod symbol;
16mod type_;
17
18pub type Row = u16;
19pub type Col = u16;
20
21/// Saved parser state for backtracking.
22#[derive(Clone, Copy)]
23struct ParserState {
24    pos: usize,
25    indent: u16,
26    row: Row,
27    col: Col,
28}
29
30/// Parser for Nash source code.
31///
32/// Combines the arena allocator with parsing state for a unified API.
33/// All parsed AST nodes are allocated in the provided bump arena.
34///
35/// The source bytes should already be allocated in the arena (via `bump.alloc_str`),
36/// so all string slices in the resulting AST share the `'a` lifetime.
37pub struct Parser<'a> {
38    /// Arena allocator for AST nodes
39    bump: &'a Bump,
40    /// Source bytes (UTF-8, already in arena)
41    src: &'a [u8],
42    /// Current byte position
43    pos: usize,
44    /// Current indentation level (for layout-sensitive parsing)
45    indent: u16,
46    /// Current row (1-indexed)
47    row: Row,
48    /// Current column (1-indexed)
49    col: Col,
50}
51
52impl<'a> Parser<'a> {
53    /// Create a new parser for the given source bytes.
54    ///
55    /// The source should already be allocated in the arena.
56    pub fn new(bump: &'a Bump, src: &'a [u8]) -> Self {
57        Parser {
58            bump,
59            src,
60            pos: 0,
61            indent: 1, // Start at 1 like Elm - top-level declarations are at column 1
62            row: 1,
63            col: 1,
64        }
65    }
66
67    // -------------------------------------------------------------------------
68    // Position & State
69    // -------------------------------------------------------------------------
70
71    /// Current position as (row, col).
72    #[inline]
73    pub fn position(&self) -> (Row, Col) {
74        (self.row, self.col)
75    }
76
77    /// Get the current position as a `Position`.
78    #[inline]
79    pub fn get_position(&self) -> Position {
80        Position::new(self.row, self.col)
81    }
82
83    /// Create a `Located` value spanning from `start` to the current position,
84    /// allocated directly in the arena.
85    #[inline]
86    pub fn add_end<T>(&self, start: Position, value: T) -> &'a Located<T> {
87        let end = self.get_position();
88        self.alloc(Located::at(Region::new(start, end), value))
89    }
90
91    /// Current row (1-indexed).
92    #[inline]
93    pub fn row(&self) -> Row {
94        self.row
95    }
96
97    /// Current column (1-indexed).
98    #[inline]
99    pub fn col(&self) -> Col {
100        self.col
101    }
102
103    /// Current indentation level.
104    #[inline]
105    pub fn indent(&self) -> u16 {
106        self.indent
107    }
108
109    /// Set the indentation level.
110    #[inline]
111    pub fn set_indent(&mut self, indent: u16) {
112        self.indent = indent;
113    }
114
115    /// Run a parser with the current column as the indent level,
116    /// then restore the old indent level.
117    ///
118    /// Mirrors Elm's `withIndent`:
119    /// ```haskell
120    /// withIndent (Parser parser) =
121    ///   Parser $ \(State src pos end oldIndent row col) cok eok cerr eerr ->
122    ///     let
123    ///       cok' a (State s p e _ r c) = cok a (State s p e oldIndent r c)
124    ///       eok' a (State s p e _ r c) = eok a (State s p e oldIndent r c)
125    ///     in
126    ///     parser (State src pos end col row col) cok' eok' cerr eerr
127    /// ```
128    pub fn with_indent<T, E>(
129        &mut self,
130        parser: impl FnOnce(&mut Self) -> Result<T, E>,
131    ) -> Result<T, E> {
132        let old_indent = self.indent;
133        self.indent = self.col;
134        let result = parser(self);
135        self.indent = old_indent;
136        result
137    }
138
139    /// Run a parser with indent set to (current column - backset),
140    /// then restore the old indent level.
141    ///
142    /// Mirrors Elm's `withBacksetIndent`:
143    /// ```haskell
144    /// withBacksetIndent backset (Parser parser) =
145    ///   Parser $ \(State src pos end oldIndent row col) cok eok cerr eerr ->
146    ///     parser (State src pos end (col - backset) row col) ...
147    /// ```
148    pub fn with_backset_indent<T, E>(
149        &mut self,
150        backset: u16,
151        parser: impl FnOnce(&mut Self) -> Result<T, E>,
152    ) -> Result<T, E> {
153        let old_indent = self.indent;
154        self.indent = self.col.saturating_sub(backset);
155        let result = parser(self);
156        self.indent = old_indent;
157        result
158    }
159
160    /// Check if we've reached the end of input.
161    #[inline]
162    pub fn is_eof(&self) -> bool {
163        self.pos >= self.src.len()
164    }
165
166    /// Save current parser state for backtracking.
167    #[inline]
168    fn save_state(&self) -> ParserState {
169        ParserState {
170            pos: self.pos,
171            indent: self.indent,
172            row: self.row,
173            col: self.col,
174        }
175    }
176
177    /// Restore parser state for backtracking.
178    #[inline]
179    fn restore_state(&mut self, state: ParserState) {
180        self.pos = state.pos;
181        self.indent = state.indent;
182        self.row = state.row;
183        self.col = state.col;
184    }
185
186    // -------------------------------------------------------------------------
187    // Combinators
188    // -------------------------------------------------------------------------
189
190    /// Try multiple parsers in order, returning the first success.
191    ///
192    /// Mirrors Elm's `oneOf`:
193    /// ```haskell
194    /// oneOf :: (Row -> Col -> x) -> [Parser x a] -> Parser x a
195    /// ```
196    ///
197    /// Key semantics:
198    /// - If a parser fails without consuming input, try the next one
199    /// - If a parser fails after consuming input, propagate the error (committed)
200    /// - If all parsers fail without consuming, call `to_error(row, col)`
201    ///
202    /// # Example
203    /// ```ignore
204    /// parser.one_of(
205    ///     error::Expr::Start,
206    ///     vec![
207    ///         Box::new(|p: &mut Parser| p.string(start)),
208    ///         Box::new(|p| p.number(start)),
209    ///     ],
210    /// )
211    /// ```
212    #[allow(clippy::type_complexity)]
213    pub fn one_of<T, E>(
214        &mut self,
215        to_error: impl FnOnce(Row, Col) -> E,
216        parsers: Vec<Box<dyn FnOnce(&mut Self) -> Result<T, E> + '_>>,
217    ) -> Result<T, E> {
218        let initial_state = self.save_state();
219
220        for parser in parsers {
221            let before = self.save_state();
222            match parser(self) {
223                Ok(value) => return Ok(value),
224                Err(e) => {
225                    // Did we consume any input?
226                    if self.pos != before.pos {
227                        // Committed - propagate error
228                        return Err(e);
229                    }
230                    // No input consumed - restore and try next
231                    self.restore_state(before);
232                }
233            }
234        }
235
236        // All parsers failed without consuming - restore to initial and return error
237        self.restore_state(initial_state);
238        let (row, col) = self.position();
239        Err(to_error(row, col))
240    }
241
242    /// Like `one_of` but returns a fallback value if nothing matches.
243    ///
244    /// Mirrors Elm's `oneOfWithFallback`:
245    /// ```haskell
246    /// oneOfWithFallback :: [Parser x a] -> a -> Parser x a
247    /// ```
248    #[allow(clippy::type_complexity)]
249    pub fn one_of_with_fallback<T, E>(
250        &mut self,
251        parsers: Vec<Box<dyn FnOnce(&mut Self) -> Result<T, E> + '_>>,
252        fallback: T,
253    ) -> Result<T, E> {
254        let initial_state = self.save_state();
255
256        for parser in parsers {
257            let before = self.save_state();
258            match parser(self) {
259                Ok(value) => return Ok(value),
260                Err(e) => {
261                    // Did we consume any input?
262                    if self.pos != before.pos {
263                        // Committed - propagate error
264                        return Err(e);
265                    }
266                    // No input consumed - restore and try next
267                    self.restore_state(before);
268                }
269            }
270        }
271
272        // All parsers failed without consuming - return fallback
273        self.restore_state(initial_state);
274        Ok(fallback)
275    }
276
277    /// Parse with error context wrapping.
278    ///
279    /// Mirrors Elm's `inContext`:
280    /// ```haskell
281    /// inContext :: (x -> Row -> Col -> y) -> Parser y start -> Parser x a -> Parser y a
282    /// ```
283    ///
284    /// 1. Saves the starting position
285    /// 2. Runs `start_parser` - if it fails without consuming, returns that error
286    /// 3. If start succeeds, runs `body_parser`
287    /// 4. If body fails, wraps the error using `add_context` at the original position
288    ///
289    /// The `add_context` closure receives the bump allocator so it can allocate wrapped errors.
290    ///
291    /// This is used to provide better error context, e.g., "error in list expression".
292    pub fn in_context<T, StartErr, BodyErr, ContextErr>(
293        &mut self,
294        add_context: impl FnOnce(&'a Bump, BodyErr, Row, Col) -> ContextErr,
295        start_parser: impl FnOnce(&mut Self) -> Result<(), StartErr>,
296        body_parser: impl FnOnce(&mut Self) -> Result<T, BodyErr>,
297    ) -> Result<T, ContextErr>
298    where
299        StartErr: Into<ContextErr>,
300    {
301        let (start_row, start_col) = self.position();
302
303        // Try to parse start token
304        match start_parser(self) {
305            Ok(()) => {
306                // Start succeeded, now parse body
307                match body_parser(self) {
308                    Ok(value) => Ok(value),
309                    Err(body_err) => {
310                        // Wrap body error with context at original position
311                        Err(add_context(self.bump, body_err, start_row, start_col))
312                    }
313                }
314            }
315            Err(start_err) => {
316                // Start failed - convert to context error type
317                Err(start_err.into())
318            }
319        }
320    }
321
322    /// Transform errors from one type to another with position context.
323    ///
324    /// Mirrors Elm's `specialize`:
325    /// ```haskell
326    /// specialize :: (x -> Row -> Col -> y) -> Parser x a -> Parser y a
327    /// ```
328    ///
329    /// Runs the parser and wraps any error with the context at the starting position.
330    /// The `add_context` closure receives the bump allocator so it can allocate wrapped errors.
331    pub fn specialize<T, InnerErr, OuterErr>(
332        &mut self,
333        add_context: impl FnOnce(&'a Bump, InnerErr, Row, Col) -> OuterErr,
334        parser: impl FnOnce(&mut Self) -> Result<T, InnerErr>,
335    ) -> Result<T, OuterErr> {
336        let (start_row, start_col) = self.position();
337
338        match parser(self) {
339            Ok(value) => Ok(value),
340            Err(inner_err) => Err(add_context(self.bump, inner_err, start_row, start_col)),
341        }
342    }
343
344    // -------------------------------------------------------------------------
345    // Single-byte parsing
346    // -------------------------------------------------------------------------
347
348    /// Parse a single expected byte.
349    ///
350    /// Mirrors Elm's `word1`:
351    /// ```haskell
352    /// word1 :: Word8 -> (Row -> Col -> x) -> Parser x ()
353    /// ```
354    ///
355    /// Returns `Ok(())` and advances if the byte matches.
356    /// Returns `Err` without consuming if it doesn't match.
357    #[inline]
358    pub fn word1<E>(
359        &mut self,
360        expected: u8,
361        to_error: impl FnOnce(Row, Col) -> E,
362    ) -> Result<(), E> {
363        if self.peek() == Some(expected) {
364            self.advance();
365            Ok(())
366        } else {
367            let (row, col) = self.position();
368            Err(to_error(row, col))
369        }
370    }
371
372    /// Parse two expected consecutive bytes.
373    ///
374    /// Mirrors Elm's `word2`:
375    /// ```haskell
376    /// word2 :: Word8 -> Word8 -> (Row -> Col -> x) -> Parser x ()
377    /// ```
378    #[inline]
379    pub fn word2<E>(
380        &mut self,
381        b1: u8,
382        b2: u8,
383        to_error: impl FnOnce(Row, Col) -> E,
384    ) -> Result<(), E> {
385        if self.peek() == Some(b1) && self.peek_at(1) == Some(b2) {
386            self.advance();
387            self.advance();
388            Ok(())
389        } else {
390            let (row, col) = self.position();
391            Err(to_error(row, col))
392        }
393    }
394
395    // -------------------------------------------------------------------------
396    // Peeking
397    // -------------------------------------------------------------------------
398
399    /// Peek at the current byte without consuming it.
400    #[inline]
401    pub fn peek(&self) -> Option<u8> {
402        self.src.get(self.pos).copied()
403    }
404
405    /// Peek at a byte at the given offset from current position.
406    #[inline]
407    pub fn peek_at(&self, offset: usize) -> Option<u8> {
408        self.src.get(self.pos + offset).copied()
409    }
410
411    /// Get the remaining bytes from current position.
412    #[inline]
413    pub fn remaining(&self) -> &'a [u8] {
414        &self.src[self.pos..]
415    }
416
417    // -------------------------------------------------------------------------
418    // Advancing
419    // -------------------------------------------------------------------------
420
421    /// Advance by one byte, updating row/col for newlines.
422    #[inline]
423    pub fn advance(&mut self) {
424        if let Some(b) = self.peek() {
425            self.pos += 1;
426            if b == b'\n' {
427                self.row += 1;
428                self.col = 1;
429            } else {
430                self.col += 1;
431            }
432        }
433    }
434
435    /// Advance by n bytes, tracking newlines.
436    #[inline]
437    pub fn advance_by(&mut self, n: usize) {
438        for _ in 0..n {
439            self.advance();
440        }
441    }
442
443    // -------------------------------------------------------------------------
444    // Allocation helpers
445    // -------------------------------------------------------------------------
446
447    /// Allocate a value in the arena.
448    #[inline]
449    pub fn alloc<T>(&self, value: T) -> &'a T {
450        self.bump.alloc(value)
451    }
452
453    /// Allocate a slice in the arena by copying.
454    #[inline]
455    pub fn alloc_slice_copy<T: Copy>(&self, slice: &[T]) -> &'a [T] {
456        self.bump.alloc_slice_copy(slice)
457    }
458
459    /// Allocate a string in the arena (for constructed strings like escape sequences).
460    #[inline]
461    pub fn alloc_str(&self, s: &str) -> &'a str {
462        self.bump.alloc_str(s)
463    }
464}
465
466#[cfg(test)]
467mod tests {
468    use super::*;
469
470    #[test]
471    fn test_parser_new() {
472        let bump = Bump::new();
473        let src = bump.alloc_str("hello");
474        let parser = Parser::new(&bump, src.as_bytes());
475
476        assert_eq!(parser.row(), 1);
477        assert_eq!(parser.col(), 1);
478        assert_eq!(parser.peek(), Some(b'h'));
479        assert!(!parser.is_eof());
480    }
481
482    #[test]
483    fn test_parser_advance() {
484        let bump = Bump::new();
485        let src = bump.alloc_str("ab\ncd");
486        let mut parser = Parser::new(&bump, src.as_bytes());
487
488        assert_eq!(parser.position(), (1, 1));
489        parser.advance(); // 'a'
490        assert_eq!(parser.position(), (1, 2));
491        parser.advance(); // 'b'
492        assert_eq!(parser.position(), (1, 3));
493        parser.advance(); // '\n'
494        assert_eq!(parser.position(), (2, 1));
495        parser.advance(); // 'c'
496        assert_eq!(parser.position(), (2, 2));
497    }
498
499    #[test]
500    fn test_parser_eof() {
501        let bump = Bump::new();
502        let src = bump.alloc_str("x");
503        let mut parser = Parser::new(&bump, src.as_bytes());
504
505        assert!(!parser.is_eof());
506        parser.advance();
507        assert!(parser.is_eof());
508        assert_eq!(parser.peek(), None);
509    }
510}