parse_it/parser.rs
1//! Basic definitions for working with the parser.
2//!
3//! If you're looking for a convenient way to parse data, you don't need to dive into
4//! the details of the parser. The [`ParseIt::parse`] method abstracts away all the
5//! complexity, making it easy to use.
6//!
7//! However, if you're interested in learning more about how the parser works under the
8//! hood, you can refer to the [`ParserState`] documentation.
9//!
10//! [`ParseIt::parse`]: crate::ParseIt::parse
11
12use std::cell::{Cell, RefCell};
13use std::fmt::Debug;
14use std::rc::Rc;
15
16use crate::lexer::Lexer;
17
18/// A span in the source code.
19#[derive(Debug, Clone, Copy, PartialEq, Eq)]
20pub struct Span {
21 /// The start of the span, inclusive
22 pub start: usize,
23 /// The end of the span, exclusive
24 pub end: usize,
25}
26
27/// An error that occurred during parsing.
28#[derive(Debug)]
29pub struct Error {
30 /// The span in the source code where the error occurred.
31 pub span: Span,
32}
33
34impl Error {
35 /// Create a new error from the given span.
36 pub fn new(span: Span) -> Self {
37 Self { span }
38 }
39}
40
41/// The inner state of a parser.
42///
43/// `ParserState` is a cursor over the lexer and keeps track of the current position
44/// in the source code. It is used to drive the parsing process.
45///
46/// # Writing a Parser
47///
48/// A parser is a function `Fn(&ParserState) -> Result<T, Error>`, that takes a
49/// `&ParserState` as input and returns the parsed result or an error.
50///
51/// The common use case is to call the [`parse`](ParserState::parse) method to
52/// read a token from the lexer and advance the state by one token.
53///
54/// ```
55/// # use parse_it::*;
56/// fn parse_abc(state: &ParserState<CharLexer>) -> Result<char, Error> {
57/// state.parse('a')?;
58/// state.parse('b')?;
59/// state.parse('c')?;
60/// Ok('c')
61/// }
62///
63/// let state = ParserState::new(CharLexer::new("abc"));
64/// parse_abc(&state).unwrap();
65/// assert!(state.is_empty());
66/// ```
67///
68/// Please note that `ParserState` uses interior mutability to share its state
69/// between parsers. This means that even if a parser takes a `&ParserState`,
70/// the state can still be mutated.
71///
72/// # Speculative Parsing
73///
74/// `ParserState` allows you to create a fork of the current state via the
75/// [`fork`](ParserState::fork) method, and join it back to the original state
76/// later via the [`advance_to`](ParserState::advance_to) method. This is useful
77/// for speculative parsing.
78///
79/// It's important to note that `ParserState` can only move forward and not
80/// backward. When joining a fork back to the original state, it must be
81/// ensured that the fork is at a position beyond or equal to the original
82/// state.
83///
84/// ```
85/// # use parse_it::*;
86/// fn parse_option(
87/// state: &ParserState<CharLexer>,
88/// parser: impl Fn(&ParserState<CharLexer>) -> Result<char, Error>
89/// ) -> Result<Option<char>, Error> {
90/// let fork = state.fork();
91/// match parser(&fork) {
92/// Ok(c) => {
93/// state.advance_to(&fork);
94/// Ok(Some(c))
95/// }
96/// Err(_) => Ok(None),
97/// }
98/// }
99///
100/// let state = ParserState::new(CharLexer::new("aaa"));
101/// assert_eq!(parse_option(&state, |state| state.parse('a')).unwrap(), Some('a'));
102/// assert_eq!(parse_option(&state, |state| state.parse('b')).unwrap(), None);
103/// ```
104pub struct ParserState<L> {
105 span: Cell<Span>,
106 lexer: L,
107 stack: Rc<RefCell<Vec<(&'static str, usize)>>>,
108}
109
110impl<'a, L: Lexer<'a>> ParserState<L> {
111 /// Create a new parser state from the given lexer.
112 pub fn new(lexer: L) -> Self {
113 Self {
114 span: Cell::new(Span { start: 0, end: 0 }),
115 lexer,
116 stack: Rc::new(RefCell::new(Vec::new())),
117 }
118 }
119
120 /// Get the current parsing position.
121 pub fn pos(&self) -> L::Position {
122 self.lexer.pos()
123 }
124
125 /// Advance to the next token.
126 fn next(&self) -> Option<L::Token> {
127 match self.lexer.next() {
128 (Some(token), advance) => {
129 let Span { end, .. } = self.span.get();
130 self.span.set(Span {
131 start: end,
132 end: end + advance,
133 });
134 Some(token)
135 }
136 _ => None,
137 }
138 }
139
140 /// Consume the next token if it matches the given token.
141 pub fn parse(&self, token: L::Token) -> Result<L::Token, Error> {
142 match self.next() {
143 Some(tt) if tt == token => Ok(tt),
144 _ => Err(self.error()),
145 }
146 }
147
148 /// Report an error at the current position.
149 pub fn error(&self) -> Error {
150 Error::new(self.span.get())
151 }
152
153 /// Whether the parser is at the end of the input.
154 pub fn is_empty(&self) -> bool {
155 self.lexer.is_empty()
156 }
157
158 /// Advance the state to the given state.
159 ///
160 /// # Panics
161 /// Panics if the given state is before the current state.
162 pub fn advance_to(&self, other: &Self) {
163 self.advance_to_pos(other.lexer.pos())
164 }
165
166 /// Advance the state to the given position.
167 ///
168 /// # Panics
169 /// Panics if the given position is before the current position.
170 pub fn advance_to_pos(&self, pos: L::Position) {
171 assert!(pos >= self.lexer.pos(), "you cannot rewind");
172 self.lexer.advance_to_pos(pos);
173 }
174
175 /// Create a fork of the current state for speculative parsing.
176 pub fn fork(&self) -> Self {
177 Self {
178 span: self.span.clone(),
179 lexer: self.lexer.fork(),
180 stack: self.stack.clone(),
181 }
182 }
183
184 /// Push the given name onto the stack (for debugging purposes).
185 pub fn push(&self, name: &'static str) {
186 self.stack.borrow_mut().push((name, self.span.get().end));
187 }
188
189 /// Pop the last name from the stack (for debugging purposes).
190 pub fn pop(&self) {
191 self.stack.borrow_mut().pop();
192 }
193
194 /// Get the current stack (for debugging purposes).
195 pub fn debug(&self) -> String {
196 format!("{:?}", self.stack.borrow())
197 }
198}