parse_it/parser.rs
1//! Basic definitions for working with the parser.
2//!
3//! If you're looking for a convenient way to parse data, you don't need to dive into
4//! the details of the parser. The [`ParseIt::parse`] method abstracts away all the
5//! complexity, making it easy to use.
6//!
7//! However, if you're interested in learning more about how the parser works under the
8//! hood, you can refer to the [`ParserState`] documentation.
9//!
10//! [`ParseIt::parse`]: crate::ParseIt::parse
11
12use std::{cell::RefCell, fmt::Debug, rc::Rc};
13
14use crate::lexer::{Lexer, Span};
15
16/// An error that occurred during parsing.
17#[derive(Debug)]
18pub struct Error {
19 /// The span in the source code where the error occurred.
20 pub span: Span,
21}
22
23impl Error {
24 /// Create a new error from the given span.
25 pub fn new(span: Span) -> Self {
26 Self { span }
27 }
28}
29
30/// The inner state of a parser.
31///
32/// `ParserState` is a cursor over the lexer and keeps track of the current position
33/// in the source code. It is used to drive the parsing process.
34///
35/// # Writing a Parser
36///
37/// A parser is a function `Fn(&ParserState) -> Result<T, Error>`, that takes a
38/// `&ParserState` as input and returns the parsed result or an error.
39///
40/// The common use case is to call the [`parse`](ParserState::parse) method to
41/// read a token from the lexer and advance the state by one token.
42///
43/// ```
44/// # use parse_it::*;
45/// fn parse_abc(state: &mut ParserState<CharLexer>) -> Result<char, Error> {
46/// state.parse('a')?;
47/// state.parse('b')?;
48/// state.parse('c')?;
49/// Ok('c')
50/// }
51///
52/// let mut state = ParserState::new(CharLexer::new("abc"));
53/// parse_abc(&mut state).unwrap();
54/// assert!(state.is_empty());
55/// ```
56///
57/// Please note that `ParserState` uses interior mutability to share its state
58/// between parsers. This means that even if a parser takes a `&ParserState`,
59/// the state can still be mutated.
60///
61/// # Speculative Parsing
62///
63/// `ParserState` allows you to create a fork of the current state via the
64/// [`fork`](ParserState::fork) method, and join it back to the original state
65/// later via the [`advance_to`](ParserState::advance_to) method. This is useful
66/// for speculative parsing.
67///
68/// It's important to note that `ParserState` can only move forward and not
69/// backward. When joining a fork back to the original state, it must be
70/// ensured that the fork is at a position beyond or equal to the original
71/// state.
72///
73/// ```
74/// # use parse_it::*;
75/// fn parse_option(
76/// state: &mut ParserState<CharLexer>,
77/// parser: impl Fn(&mut ParserState<CharLexer>) -> Result<char, Error>
78/// ) -> Result<Option<char>, Error> {
79/// let fork = &mut state.fork();
80/// match parser(fork) {
81/// Ok(c) => {
82/// state.advance_to(fork);
83/// Ok(Some(c))
84/// }
85/// Err(_) => Ok(None),
86/// }
87/// }
88///
89/// let mut state = ParserState::new(CharLexer::new("aaa"));
90/// assert_eq!(parse_option(&mut state, |state| state.parse('a')).unwrap(), Some('a'));
91/// assert_eq!(parse_option(&mut state, |state| state.parse('b')).unwrap(), None);
92/// ```
93pub struct ParserState<L> {
94 lexer: L,
95 stack: Rc<RefCell<Vec<(&'static str, usize)>>>,
96}
97
98impl<'a, L: Lexer<'a>> ParserState<L> {
99 /// Create a new parser state from the given lexer.
100 pub fn new(lexer: L) -> Self {
101 Self {
102 lexer,
103 stack: Rc::new(RefCell::new(Vec::new())),
104 }
105 }
106
107 /// Get the current parsing position.
108 pub fn pos(&self) -> &L::Position {
109 self.lexer.pos()
110 }
111
112 /// Advance to the next token.
113 fn next(&mut self) -> Option<L::Token> {
114 self.lexer.next()
115 }
116
117 /// Consume the next token if it matches the given token.
118 pub fn parse_with<T>(
119 &mut self,
120 matches: impl FnOnce(L::Token) -> Option<T>,
121 ) -> Result<T, Error> {
122 self.next()
123 .and_then(matches)
124 .ok_or_else(|| Error::new(self.lexer.span()))
125 }
126
127 /// Consume the next token if it matches the given token via [`PartialEq`].
128 pub fn parse<T>(&mut self, terminal: T) -> Result<L::Token, Error>
129 where
130 L::Token: PartialEq<T>,
131 {
132 self.parse_with(|tt| tt.eq(&terminal).then_some(tt))
133 }
134
135 /// Report an error at the current position.
136 pub fn error(&self) -> Error {
137 Error::new(self.lexer.span())
138 }
139
140 /// Whether the parser is at the end of the input.
141 pub fn is_empty(&self) -> bool {
142 self.lexer.is_empty()
143 }
144
145 /// Advance the state to the given state.
146 ///
147 /// # Panics
148 /// Panics if the given state is before the current state.
149 pub fn advance_to(&mut self, other: &Self) {
150 self.advance_to_pos(other.lexer.pos())
151 }
152
153 /// Advance the state to the given position.
154 ///
155 /// # Panics
156 /// Panics if the given position is before the current position.
157 pub fn advance_to_pos(&mut self, pos: &L::Position) {
158 assert!(pos >= self.lexer.pos(), "you cannot rewind");
159 self.lexer.advance_to_pos(pos);
160 }
161
162 /// Create a fork of the current state for speculative parsing.
163 pub fn fork(&self) -> Self {
164 Self {
165 lexer: self.lexer.fork(),
166 stack: self.stack.clone(),
167 }
168 }
169
170 /// Push the given name onto the stack (for debugging purposes).
171 pub fn push(&self, name: &'static str) {
172 self.stack.borrow_mut().push((name, self.lexer.span().end));
173 }
174
175 /// Pop the last name from the stack (for debugging purposes).
176 pub fn pop(&self) {
177 self.stack.borrow_mut().pop();
178 }
179
180 /// Get the current stack (for debugging purposes).
181 pub fn debug(&self) -> String {
182 format!("{:?}", self.stack.borrow())
183 }
184}