parse_it/parser.rs
1//! Basic definitions for working with the parser.
2//!
3//! If you're looking for a convenient way to parse data, you don't need to dive into
4//! the details of the parser. The [`ParseIt::parse`] method abstracts away all the
5//! complexity, making it easy to use.
6//!
7//! However, if you're interested in learning more about how the parser works under the
8//! hood, you can refer to the [`ParserState`] documentation.
9//!
10//! [`ParseIt::parse`]: crate::ParseIt::parse
11
12use std::{cell::RefCell, fmt::Debug, rc::Rc};
13
14use crate::{
15 lexer::{Cursor, LexerState, Span, TryConvert},
16 LexIt,
17};
18
19/// An error that occurred during parsing.
20#[derive(Debug)]
21pub struct Error {
22 /// The span in the source code where the error occurred.
23 pub span: Span,
24}
25
26impl Error {
27 /// Create a new error from the given span.
28 pub fn new(span: Span) -> Self {
29 Self { span }
30 }
31}
32
33/// The inner state of a parser.
34///
35/// `ParserState` is a cursor over the lexer and keeps track of the current position
36/// in the source code. It is used to drive the parsing process.
37///
38/// # Writing a Parser
39///
40/// A parser is a function `Fn(&ParserState) -> Result<T, Error>`, that takes a
41/// `&ParserState` as input and returns the parsed result or an error.
42///
43/// The common use case is to call the [`parse`](ParserState::parse) method to
44/// read a token from the lexer and advance the state by one token.
45///
46/// ```
47/// # use parse_it::*;
48/// fn parse_abc(state: &mut ParserState<CharLexer>) -> Result<char, Error> {
49/// state.parse_char('a')?;
50/// state.parse_char('b')?;
51/// state.parse_char('c')?;
52/// Ok('c')
53/// }
54///
55/// let mut state = ParserState::new("abc");
56/// parse_abc(&mut state).unwrap();
57/// assert!(state.is_empty());
58/// ```
59///
60/// Please note that `ParserState` uses interior mutability to share its state
61/// between parsers. This means that even if a parser takes a `&ParserState`,
62/// the state can still be mutated.
63///
64/// # Speculative Parsing
65///
66/// `ParserState` allows you to create a fork of the current state via the
67/// [`fork`](ParserState::fork) method, and join it back to the original state
68/// later via the [`advance_to`](ParserState::advance_to) method. This is useful
69/// for speculative parsing.
70///
71/// It's important to note that `ParserState` can only move forward and not
72/// backward. When joining a fork back to the original state, it must be
73/// ensured that the fork is at a position beyond or equal to the original
74/// state.
75///
76/// ```
77/// # use parse_it::*;
78/// fn parse_option(
79/// state: &mut ParserState<CharLexer>,
80/// parser: impl Fn(&mut ParserState<CharLexer>) -> Result<char, Error>
81/// ) -> Result<Option<char>, Error> {
82/// let fork = &mut state.fork();
83/// match parser(fork) {
84/// Ok(c) => {
85/// state.advance_to(fork);
86/// Ok(Some(c))
87/// }
88/// Err(_) => Ok(None),
89/// }
90/// }
91///
92/// let mut state = ParserState::new("aaa");
93/// assert_eq!(parse_option(&mut state, |state| state.parse_char('a')).unwrap(), Some('a'));
94/// assert_eq!(parse_option(&mut state, |state| state.parse_char('b')).unwrap(), None);
95/// ```
96pub struct ParserState<'a, L> {
97 lexer: L,
98 lexbuf: LexerState<'a>,
99 stack: Rc<RefCell<Vec<(&'static str, usize)>>>,
100}
101
102impl<'a, L: LexIt + Clone> ParserState<'a, L> {
103 /// Create a new parser state from the given lexer.
104 pub fn new(input: &'a str) -> Self {
105 Self {
106 lexer: L::new(),
107 lexbuf: LexerState::new(input),
108 stack: Rc::new(RefCell::new(Vec::new())),
109 }
110 }
111
112 /// Get the current parsing position.
113 pub fn cursor(&self) -> Cursor {
114 self.lexbuf.cursor()
115 }
116
117 /// Advance to the next token.
118 fn next(&mut self) -> Option<L::Token<'a>> {
119 self.lexer.next(&mut self.lexbuf)
120 }
121
122 /// Consume the next token if it matches the given token.
123 pub fn parse_with<T>(
124 &mut self,
125 matches: impl FnOnce(L::Token<'a>) -> Option<T>,
126 ) -> Result<T, Error> {
127 self.next().and_then(matches).ok_or_else(|| self.error())
128 }
129
130 /// Parse a token that can be converted to the given type.
131 pub fn parse_type<T>(&mut self) -> Result<T, Error>
132 where
133 L::Token<'a>: TryConvert<T>,
134 T: PartialEq,
135 {
136 self.parse_with(|tt| tt.try_convert())
137 }
138
139 /// Parse a token that exactly matches the given character.
140 pub fn parse_char(&mut self, c: char) -> Result<char, Error> {
141 self.next().ok_or_else(|| self.error())?;
142 let lexeme = self.lexbuf.lexeme();
143 let mut chars = lexeme.chars();
144 let ch = chars.next().ok_or_else(|| self.error())?;
145 if ch == c && chars.as_str().is_empty() {
146 Ok(ch)
147 } else {
148 Err(self.error())
149 }
150 }
151
152 /// Parse a token that exactly matches the given string.
153 pub fn parse_str(&mut self, literal: &'a str) -> Result<&str, Error> {
154 self.next().ok_or_else(|| self.error())?;
155 let lexeme = self.lexbuf.lexeme();
156 if lexeme == literal {
157 Ok(lexeme)
158 } else {
159 Err(self.error())
160 }
161 }
162
163 /// Report an error at the current position.
164 pub fn error(&self) -> Error {
165 Error::new(self.lexbuf.span())
166 }
167
168 /// Whether the parser is at the end of the input.
169 pub fn is_empty(&self) -> bool {
170 self.lexbuf.is_empty()
171 }
172
173 /// Advance the state to the given state.
174 ///
175 /// # Panics
176 /// Panics if the given state is before the current state.
177 pub fn advance_to(&mut self, other: &Self) {
178 self.advance_to_cursor(other.lexbuf.cursor())
179 }
180
181 /// Advance the state to the given position.
182 ///
183 /// # Panics
184 /// Panics if the given position is before the current position.
185 pub fn advance_to_cursor(&mut self, cursor: Cursor) {
186 assert!(cursor >= self.lexbuf.cursor(), "you cannot rewind");
187 self.lexbuf.advance_to_cursor(cursor);
188 }
189
190 /// Create a fork of the current state for speculative parsing.
191 pub fn fork(&self) -> Self {
192 Self {
193 lexer: self.lexer.clone(),
194 lexbuf: self.lexbuf.clone(),
195 stack: self.stack.clone(),
196 }
197 }
198
199 /// Push the given name onto the stack (for debugging purposes).
200 pub fn push(&self, name: &'static str) {
201 self.stack.borrow_mut().push((name, self.lexbuf.span().end));
202 }
203
204 /// Pop the last name from the stack (for debugging purposes).
205 pub fn pop(&self) {
206 self.stack.borrow_mut().pop();
207 }
208
209 /// Get the current stack (for debugging purposes).
210 pub fn debug(&self) -> String {
211 format!("{:?}", self.stack.borrow())
212 }
213}