parse_it/
parser.rs

1//! Basic definitions for working with the parser.
2//!
3//! If you're looking for a convenient way to parse data, you don't need to dive into
4//! the details of the parser. The [`ParseIt::parse`] method abstracts away all the
5//! complexity, making it easy to use.
6//!
7//! However, if you're interested in learning more about how the parser works under the
8//! hood, you can refer to the [`ParserState`] documentation.
9//!
10//! [`ParseIt::parse`]: crate::ParseIt::parse
11
12use std::{cell::RefCell, fmt::Debug, rc::Rc};
13
14use crate::lexer::{Lexer, Span};
15
16/// An error that occurred during parsing.
17#[derive(Debug)]
18pub struct Error {
19    /// The span in the source code where the error occurred.
20    pub span: Span,
21}
22
23impl Error {
24    /// Create a new error from the given span.
25    pub fn new(span: Span) -> Self {
26        Self { span }
27    }
28}
29
30/// The inner state of a parser.
31///
32/// `ParserState` is a cursor over the lexer and keeps track of the current position
33/// in the source code. It is used to drive the parsing process.
34///
35/// # Writing a Parser
36///
37/// A parser is a function `Fn(&ParserState) -> Result<T, Error>`, that takes a
38/// `&ParserState` as input and returns the parsed result or an error.
39///
40/// The common use case is to call the [`parse`](ParserState::parse) method to
41/// read a token from the lexer and advance the state by one token.
42///
43/// ```
44/// # use parse_it::*;
45/// fn parse_abc(state: &mut ParserState<CharLexer>) -> Result<char, Error> {
46///     state.parse('a')?;
47///     state.parse('b')?;
48///     state.parse('c')?;
49///     Ok('c')
50/// }
51///
52/// let mut state = ParserState::new(CharLexer::new("abc"));
53/// parse_abc(&mut state).unwrap();
54/// assert!(state.is_empty());
55/// ```
56///
57/// Please note that `ParserState` uses interior mutability to share its state
58/// between parsers. This means that even if a parser takes a `&ParserState`,
59/// the state can still be mutated.
60///
61/// # Speculative Parsing
62///
63/// `ParserState` allows you to create a fork of the current state via the
64/// [`fork`](ParserState::fork) method, and join it back to the original state
65/// later via the [`advance_to`](ParserState::advance_to) method. This is useful
66/// for speculative parsing.
67///
68/// It's important to note that `ParserState` can only move forward and not
69/// backward. When joining a fork back to the original state, it must be
70/// ensured that the fork is at a position beyond or equal to the original
71/// state.
72///
73/// ```
74/// # use parse_it::*;
75/// fn parse_option(
76///     state: &mut ParserState<CharLexer>,
77///     parser: impl Fn(&mut ParserState<CharLexer>) -> Result<char, Error>
78/// ) -> Result<Option<char>, Error> {
79///     let fork = &mut state.fork();
80///     match parser(fork) {
81///         Ok(c) => {
82///             state.advance_to(fork);
83///             Ok(Some(c))
84///         }
85///         Err(_) => Ok(None),
86///     }
87/// }
88///
89/// let mut state = ParserState::new(CharLexer::new("aaa"));
90/// assert_eq!(parse_option(&mut state, |state| state.parse('a')).unwrap(), Some('a'));
91/// assert_eq!(parse_option(&mut state, |state| state.parse('b')).unwrap(), None);
92/// ```
93pub struct ParserState<L> {
94    lexer: L,
95    stack: Rc<RefCell<Vec<(&'static str, usize)>>>,
96}
97
98impl<'a, L: Lexer<'a>> ParserState<L> {
99    /// Create a new parser state from the given lexer.
100    pub fn new(lexer: L) -> Self {
101        Self {
102            lexer,
103            stack: Rc::new(RefCell::new(Vec::new())),
104        }
105    }
106
107    /// Get the current parsing position.
108    pub fn pos(&self) -> &L::Position {
109        self.lexer.pos()
110    }
111
112    /// Advance to the next token.
113    fn next(&mut self) -> Option<L::Token> {
114        self.lexer.next()
115    }
116
117    /// Consume the next token if it matches the given token.
118    pub fn parse_with<T>(
119        &mut self,
120        matches: impl FnOnce(L::Token) -> Option<T>,
121    ) -> Result<T, Error> {
122        self.next()
123            .and_then(matches)
124            .ok_or_else(|| Error::new(self.lexer.span()))
125    }
126
127    /// Consume the next token if it matches the given token via [`PartialEq`].
128    pub fn parse<T>(&mut self, terminal: T) -> Result<L::Token, Error>
129    where
130        L::Token: PartialEq<T>,
131    {
132        self.parse_with(|tt| tt.eq(&terminal).then_some(tt))
133    }
134
135    /// Report an error at the current position.
136    pub fn error(&self) -> Error {
137        Error::new(self.lexer.span())
138    }
139
140    /// Whether the parser is at the end of the input.
141    pub fn is_empty(&self) -> bool {
142        self.lexer.is_empty()
143    }
144
145    /// Advance the state to the given state.
146    ///
147    /// # Panics
148    /// Panics if the given state is before the current state.
149    pub fn advance_to(&mut self, other: &Self) {
150        self.advance_to_pos(other.lexer.pos())
151    }
152
153    /// Advance the state to the given position.
154    ///
155    /// # Panics
156    /// Panics if the given position is before the current position.
157    pub fn advance_to_pos(&mut self, pos: &L::Position) {
158        assert!(pos >= self.lexer.pos(), "you cannot rewind");
159        self.lexer.advance_to_pos(pos);
160    }
161
162    /// Create a fork of the current state for speculative parsing.
163    pub fn fork(&self) -> Self {
164        Self {
165            lexer: self.lexer.fork(),
166            stack: self.stack.clone(),
167        }
168    }
169
170    /// Push the given name onto the stack (for debugging purposes).
171    pub fn push(&self, name: &'static str) {
172        self.stack.borrow_mut().push((name, self.lexer.span().end));
173    }
174
175    /// Pop the last name from the stack (for debugging purposes).
176    pub fn pop(&self) {
177        self.stack.borrow_mut().pop();
178    }
179
180    /// Get the current stack (for debugging purposes).
181    pub fn debug(&self) -> String {
182        format!("{:?}", self.stack.borrow())
183    }
184}