nash_parse/lib.rs
1use bumpalo::Bump;
2use nash_region::{Located, Position, Region};
3
4mod declaration;
5pub mod error;
6mod exposing;
7mod expression;
8mod import;
9mod keyword;
10mod module;
11mod number;
12mod pattern;
13mod space;
14mod string;
15mod symbol;
16mod type_;
17
18pub type Row = u16;
19pub type Col = u16;
20
21/// Saved parser state for backtracking.
22#[derive(Clone, Copy)]
23struct ParserState {
24 pos: usize,
25 indent: u16,
26 row: Row,
27 col: Col,
28}
29
30/// Parser for Nash source code.
31///
32/// Combines the arena allocator with parsing state for a unified API.
33/// All parsed AST nodes are allocated in the provided bump arena.
34///
35/// The source bytes should already be allocated in the arena (via `bump.alloc_str`),
36/// so all string slices in the resulting AST share the `'a` lifetime.
37pub struct Parser<'a> {
38 /// Arena allocator for AST nodes
39 bump: &'a Bump,
40 /// Source bytes (UTF-8, already in arena)
41 src: &'a [u8],
42 /// Current byte position
43 pos: usize,
44 /// Current indentation level (for layout-sensitive parsing)
45 indent: u16,
46 /// Current row (1-indexed)
47 row: Row,
48 /// Current column (1-indexed)
49 col: Col,
50}
51
52impl<'a> Parser<'a> {
53 /// Create a new parser for the given source bytes.
54 ///
55 /// The source should already be allocated in the arena.
56 pub fn new(bump: &'a Bump, src: &'a [u8]) -> Self {
57 Parser {
58 bump,
59 src,
60 pos: 0,
61 indent: 1, // Start at 1 like Elm - top-level declarations are at column 1
62 row: 1,
63 col: 1,
64 }
65 }
66
67 // -------------------------------------------------------------------------
68 // Position & State
69 // -------------------------------------------------------------------------
70
71 /// Current position as (row, col).
72 #[inline]
73 pub fn position(&self) -> (Row, Col) {
74 (self.row, self.col)
75 }
76
77 /// Get the current position as a `Position`.
78 #[inline]
79 pub fn get_position(&self) -> Position {
80 Position::new(self.row, self.col)
81 }
82
83 /// Create a `Located` value spanning from `start` to the current position,
84 /// allocated directly in the arena.
85 #[inline]
86 pub fn add_end<T>(&self, start: Position, value: T) -> &'a Located<T> {
87 let end = self.get_position();
88 self.alloc(Located::at(Region::new(start, end), value))
89 }
90
91 /// Current row (1-indexed).
92 #[inline]
93 pub fn row(&self) -> Row {
94 self.row
95 }
96
97 /// Current column (1-indexed).
98 #[inline]
99 pub fn col(&self) -> Col {
100 self.col
101 }
102
103 /// Current indentation level.
104 #[inline]
105 pub fn indent(&self) -> u16 {
106 self.indent
107 }
108
109 /// Set the indentation level.
110 #[inline]
111 pub fn set_indent(&mut self, indent: u16) {
112 self.indent = indent;
113 }
114
115 /// Run a parser with the current column as the indent level,
116 /// then restore the old indent level.
117 ///
118 /// Mirrors Elm's `withIndent`:
119 /// ```haskell
120 /// withIndent (Parser parser) =
121 /// Parser $ \(State src pos end oldIndent row col) cok eok cerr eerr ->
122 /// let
123 /// cok' a (State s p e _ r c) = cok a (State s p e oldIndent r c)
124 /// eok' a (State s p e _ r c) = eok a (State s p e oldIndent r c)
125 /// in
126 /// parser (State src pos end col row col) cok' eok' cerr eerr
127 /// ```
128 pub fn with_indent<T, E>(
129 &mut self,
130 parser: impl FnOnce(&mut Self) -> Result<T, E>,
131 ) -> Result<T, E> {
132 let old_indent = self.indent;
133 self.indent = self.col;
134 let result = parser(self);
135 self.indent = old_indent;
136 result
137 }
138
139 /// Run a parser with indent set to (current column - backset),
140 /// then restore the old indent level.
141 ///
142 /// Mirrors Elm's `withBacksetIndent`:
143 /// ```haskell
144 /// withBacksetIndent backset (Parser parser) =
145 /// Parser $ \(State src pos end oldIndent row col) cok eok cerr eerr ->
146 /// parser (State src pos end (col - backset) row col) ...
147 /// ```
148 pub fn with_backset_indent<T, E>(
149 &mut self,
150 backset: u16,
151 parser: impl FnOnce(&mut Self) -> Result<T, E>,
152 ) -> Result<T, E> {
153 let old_indent = self.indent;
154 self.indent = self.col.saturating_sub(backset);
155 let result = parser(self);
156 self.indent = old_indent;
157 result
158 }
159
160 /// Check if we've reached the end of input.
161 #[inline]
162 pub fn is_eof(&self) -> bool {
163 self.pos >= self.src.len()
164 }
165
166 /// Save current parser state for backtracking.
167 #[inline]
168 fn save_state(&self) -> ParserState {
169 ParserState {
170 pos: self.pos,
171 indent: self.indent,
172 row: self.row,
173 col: self.col,
174 }
175 }
176
177 /// Restore parser state for backtracking.
178 #[inline]
179 fn restore_state(&mut self, state: ParserState) {
180 self.pos = state.pos;
181 self.indent = state.indent;
182 self.row = state.row;
183 self.col = state.col;
184 }
185
186 // -------------------------------------------------------------------------
187 // Combinators
188 // -------------------------------------------------------------------------
189
190 /// Try multiple parsers in order, returning the first success.
191 ///
192 /// Mirrors Elm's `oneOf`:
193 /// ```haskell
194 /// oneOf :: (Row -> Col -> x) -> [Parser x a] -> Parser x a
195 /// ```
196 ///
197 /// Key semantics:
198 /// - If a parser fails without consuming input, try the next one
199 /// - If a parser fails after consuming input, propagate the error (committed)
200 /// - If all parsers fail without consuming, call `to_error(row, col)`
201 ///
202 /// # Example
203 /// ```ignore
204 /// parser.one_of(
205 /// error::Expr::Start,
206 /// vec![
207 /// Box::new(|p: &mut Parser| p.string(start)),
208 /// Box::new(|p| p.number(start)),
209 /// ],
210 /// )
211 /// ```
212 #[allow(clippy::type_complexity)]
213 pub fn one_of<T, E>(
214 &mut self,
215 to_error: impl FnOnce(Row, Col) -> E,
216 parsers: Vec<Box<dyn FnOnce(&mut Self) -> Result<T, E> + '_>>,
217 ) -> Result<T, E> {
218 let initial_state = self.save_state();
219
220 for parser in parsers {
221 let before = self.save_state();
222 match parser(self) {
223 Ok(value) => return Ok(value),
224 Err(e) => {
225 // Did we consume any input?
226 if self.pos != before.pos {
227 // Committed - propagate error
228 return Err(e);
229 }
230 // No input consumed - restore and try next
231 self.restore_state(before);
232 }
233 }
234 }
235
236 // All parsers failed without consuming - restore to initial and return error
237 self.restore_state(initial_state);
238 let (row, col) = self.position();
239 Err(to_error(row, col))
240 }
241
242 /// Like `one_of` but returns a fallback value if nothing matches.
243 ///
244 /// Mirrors Elm's `oneOfWithFallback`:
245 /// ```haskell
246 /// oneOfWithFallback :: [Parser x a] -> a -> Parser x a
247 /// ```
248 #[allow(clippy::type_complexity)]
249 pub fn one_of_with_fallback<T, E>(
250 &mut self,
251 parsers: Vec<Box<dyn FnOnce(&mut Self) -> Result<T, E> + '_>>,
252 fallback: T,
253 ) -> Result<T, E> {
254 let initial_state = self.save_state();
255
256 for parser in parsers {
257 let before = self.save_state();
258 match parser(self) {
259 Ok(value) => return Ok(value),
260 Err(e) => {
261 // Did we consume any input?
262 if self.pos != before.pos {
263 // Committed - propagate error
264 return Err(e);
265 }
266 // No input consumed - restore and try next
267 self.restore_state(before);
268 }
269 }
270 }
271
272 // All parsers failed without consuming - return fallback
273 self.restore_state(initial_state);
274 Ok(fallback)
275 }
276
277 /// Parse with error context wrapping.
278 ///
279 /// Mirrors Elm's `inContext`:
280 /// ```haskell
281 /// inContext :: (x -> Row -> Col -> y) -> Parser y start -> Parser x a -> Parser y a
282 /// ```
283 ///
284 /// 1. Saves the starting position
285 /// 2. Runs `start_parser` - if it fails without consuming, returns that error
286 /// 3. If start succeeds, runs `body_parser`
287 /// 4. If body fails, wraps the error using `add_context` at the original position
288 ///
289 /// The `add_context` closure receives the bump allocator so it can allocate wrapped errors.
290 ///
291 /// This is used to provide better error context, e.g., "error in list expression".
292 pub fn in_context<T, StartErr, BodyErr, ContextErr>(
293 &mut self,
294 add_context: impl FnOnce(&'a Bump, BodyErr, Row, Col) -> ContextErr,
295 start_parser: impl FnOnce(&mut Self) -> Result<(), StartErr>,
296 body_parser: impl FnOnce(&mut Self) -> Result<T, BodyErr>,
297 ) -> Result<T, ContextErr>
298 where
299 StartErr: Into<ContextErr>,
300 {
301 let (start_row, start_col) = self.position();
302
303 // Try to parse start token
304 match start_parser(self) {
305 Ok(()) => {
306 // Start succeeded, now parse body
307 match body_parser(self) {
308 Ok(value) => Ok(value),
309 Err(body_err) => {
310 // Wrap body error with context at original position
311 Err(add_context(self.bump, body_err, start_row, start_col))
312 }
313 }
314 }
315 Err(start_err) => {
316 // Start failed - convert to context error type
317 Err(start_err.into())
318 }
319 }
320 }
321
322 /// Transform errors from one type to another with position context.
323 ///
324 /// Mirrors Elm's `specialize`:
325 /// ```haskell
326 /// specialize :: (x -> Row -> Col -> y) -> Parser x a -> Parser y a
327 /// ```
328 ///
329 /// Runs the parser and wraps any error with the context at the starting position.
330 /// The `add_context` closure receives the bump allocator so it can allocate wrapped errors.
331 pub fn specialize<T, InnerErr, OuterErr>(
332 &mut self,
333 add_context: impl FnOnce(&'a Bump, InnerErr, Row, Col) -> OuterErr,
334 parser: impl FnOnce(&mut Self) -> Result<T, InnerErr>,
335 ) -> Result<T, OuterErr> {
336 let (start_row, start_col) = self.position();
337
338 match parser(self) {
339 Ok(value) => Ok(value),
340 Err(inner_err) => Err(add_context(self.bump, inner_err, start_row, start_col)),
341 }
342 }
343
344 // -------------------------------------------------------------------------
345 // Single-byte parsing
346 // -------------------------------------------------------------------------
347
348 /// Parse a single expected byte.
349 ///
350 /// Mirrors Elm's `word1`:
351 /// ```haskell
352 /// word1 :: Word8 -> (Row -> Col -> x) -> Parser x ()
353 /// ```
354 ///
355 /// Returns `Ok(())` and advances if the byte matches.
356 /// Returns `Err` without consuming if it doesn't match.
357 #[inline]
358 pub fn word1<E>(
359 &mut self,
360 expected: u8,
361 to_error: impl FnOnce(Row, Col) -> E,
362 ) -> Result<(), E> {
363 if self.peek() == Some(expected) {
364 self.advance();
365 Ok(())
366 } else {
367 let (row, col) = self.position();
368 Err(to_error(row, col))
369 }
370 }
371
372 /// Parse two expected consecutive bytes.
373 ///
374 /// Mirrors Elm's `word2`:
375 /// ```haskell
376 /// word2 :: Word8 -> Word8 -> (Row -> Col -> x) -> Parser x ()
377 /// ```
378 #[inline]
379 pub fn word2<E>(
380 &mut self,
381 b1: u8,
382 b2: u8,
383 to_error: impl FnOnce(Row, Col) -> E,
384 ) -> Result<(), E> {
385 if self.peek() == Some(b1) && self.peek_at(1) == Some(b2) {
386 self.advance();
387 self.advance();
388 Ok(())
389 } else {
390 let (row, col) = self.position();
391 Err(to_error(row, col))
392 }
393 }
394
395 // -------------------------------------------------------------------------
396 // Peeking
397 // -------------------------------------------------------------------------
398
399 /// Peek at the current byte without consuming it.
400 #[inline]
401 pub fn peek(&self) -> Option<u8> {
402 self.src.get(self.pos).copied()
403 }
404
405 /// Peek at a byte at the given offset from current position.
406 #[inline]
407 pub fn peek_at(&self, offset: usize) -> Option<u8> {
408 self.src.get(self.pos + offset).copied()
409 }
410
411 /// Get the remaining bytes from current position.
412 #[inline]
413 pub fn remaining(&self) -> &'a [u8] {
414 &self.src[self.pos..]
415 }
416
417 // -------------------------------------------------------------------------
418 // Advancing
419 // -------------------------------------------------------------------------
420
421 /// Advance by one byte, updating row/col for newlines.
422 #[inline]
423 pub fn advance(&mut self) {
424 if let Some(b) = self.peek() {
425 self.pos += 1;
426 if b == b'\n' {
427 self.row += 1;
428 self.col = 1;
429 } else {
430 self.col += 1;
431 }
432 }
433 }
434
435 /// Advance by n bytes, tracking newlines.
436 #[inline]
437 pub fn advance_by(&mut self, n: usize) {
438 for _ in 0..n {
439 self.advance();
440 }
441 }
442
443 // -------------------------------------------------------------------------
444 // Allocation helpers
445 // -------------------------------------------------------------------------
446
447 /// Allocate a value in the arena.
448 #[inline]
449 pub fn alloc<T>(&self, value: T) -> &'a T {
450 self.bump.alloc(value)
451 }
452
453 /// Allocate a slice in the arena by copying.
454 #[inline]
455 pub fn alloc_slice_copy<T: Copy>(&self, slice: &[T]) -> &'a [T] {
456 self.bump.alloc_slice_copy(slice)
457 }
458
459 /// Allocate a string in the arena (for constructed strings like escape sequences).
460 #[inline]
461 pub fn alloc_str(&self, s: &str) -> &'a str {
462 self.bump.alloc_str(s)
463 }
464}
465
466#[cfg(test)]
467mod tests {
468 use super::*;
469
470 #[test]
471 fn test_parser_new() {
472 let bump = Bump::new();
473 let src = bump.alloc_str("hello");
474 let parser = Parser::new(&bump, src.as_bytes());
475
476 assert_eq!(parser.row(), 1);
477 assert_eq!(parser.col(), 1);
478 assert_eq!(parser.peek(), Some(b'h'));
479 assert!(!parser.is_eof());
480 }
481
482 #[test]
483 fn test_parser_advance() {
484 let bump = Bump::new();
485 let src = bump.alloc_str("ab\ncd");
486 let mut parser = Parser::new(&bump, src.as_bytes());
487
488 assert_eq!(parser.position(), (1, 1));
489 parser.advance(); // 'a'
490 assert_eq!(parser.position(), (1, 2));
491 parser.advance(); // 'b'
492 assert_eq!(parser.position(), (1, 3));
493 parser.advance(); // '\n'
494 assert_eq!(parser.position(), (2, 1));
495 parser.advance(); // 'c'
496 assert_eq!(parser.position(), (2, 2));
497 }
498
499 #[test]
500 fn test_parser_eof() {
501 let bump = Bump::new();
502 let src = bump.alloc_str("x");
503 let mut parser = Parser::new(&bump, src.as_bytes());
504
505 assert!(!parser.is_eof());
506 parser.advance();
507 assert!(parser.is_eof());
508 assert_eq!(parser.peek(), None);
509 }
510}