parse_it/
parser.rs

1//! Basic definitions for working with the parser.
2//!
3//! If you're looking for a convenient way to parse data, you don't need to dive into
4//! the details of the parser. The [`ParseIt::parse`] method abstracts away all the
5//! complexity, making it easy to use.
6//!
7//! However, if you're interested in learning more about how the parser works under the
8//! hood, you can refer to the [`ParserState`] documentation.
9//!
10//! [`ParseIt::parse`]: crate::ParseIt::parse
11
12use std::cell::{Cell, RefCell};
13use std::fmt::Debug;
14use std::rc::Rc;
15
16use crate::lexer::Lexer;
17
18/// A span in the source code.
19#[derive(Debug, Clone, Copy, PartialEq, Eq)]
20pub struct Span {
21    /// The start of the span, inclusive
22    pub start: usize,
23    /// The end of the span, exclusive
24    pub end: usize,
25}
26
27/// An error that occurred during parsing.
28#[derive(Debug)]
29pub struct Error {
30    /// The span in the source code where the error occurred.
31    pub span: Span,
32}
33
34impl Error {
35    /// Create a new error from the given span.
36    pub fn new(span: Span) -> Self {
37        Self { span }
38    }
39}
40
41/// The inner state of a parser.
42///
43/// `ParserState` is a cursor over the lexer and keeps track of the current position
44/// in the source code. It is used to drive the parsing process.
45///
46/// # Writing a Parser
47///
48/// A parser is a function `Fn(&ParserState) -> Result<T, Error>`, that takes a 
49/// `&ParserState` as input and returns the parsed result or an error. 
50///
51/// The common use case is to call the [`parse`](ParserState::parse) method to
52/// read a token from the lexer and advance the state by one token.
53///
54/// ```
55/// # use parse_it::*;
56/// fn parse_abc(state: &ParserState<CharLexer>) -> Result<char, Error> {
57///     state.parse('a')?;
58///     state.parse('b')?;
59///     state.parse('c')?;
60///     Ok('c')
61/// }
62///
63/// let state = ParserState::new(CharLexer::new("abc"));
64/// parse_abc(&state).unwrap();
65/// assert!(state.is_empty());
66/// ```
67/// 
68/// Please note that `ParserState` uses interior mutability to share its state
69/// between parsers. This means that even if a parser takes a `&ParserState`,
70/// the state can still be mutated.
71///
72/// # Speculative Parsing
73///
74/// `ParserState` allows you to create a fork of the current state via the
75/// [`fork`](ParserState::fork) method, and join it back to the original state
76/// later via the [`advance_to`](ParserState::advance_to) method. This is useful
77/// for speculative parsing.
78///
79/// It's important to note that `ParserState` can only move forward and not
80/// backward. When joining a fork back to the original state, it must be
81/// ensured that the fork is at a position beyond or equal to the original
82/// state.
83///
84/// ```
85/// # use parse_it::*;
86/// fn parse_option(
87///     state: &ParserState<CharLexer>,
88///     parser: impl Fn(&ParserState<CharLexer>) -> Result<char, Error>
89/// ) -> Result<Option<char>, Error> {
90///     let fork = state.fork();
91///     match parser(&fork) {
92///         Ok(c) => {
93///             state.advance_to(&fork);
94///             Ok(Some(c))
95///         }
96///         Err(_) => Ok(None),
97///     }
98/// }
99///
100/// let state = ParserState::new(CharLexer::new("aaa"));
101/// assert_eq!(parse_option(&state, |state| state.parse('a')).unwrap(), Some('a'));
102/// assert_eq!(parse_option(&state, |state| state.parse('b')).unwrap(), None);
103/// ```
104pub struct ParserState<L> {
105    span: Cell<Span>,
106    lexer: L,
107    stack: Rc<RefCell<Vec<(&'static str, usize)>>>,
108}
109
110impl<'a, L: Lexer<'a>> ParserState<L> {
111    /// Create a new parser state from the given lexer.
112    pub fn new(lexer: L) -> Self {
113        Self {
114            span: Cell::new(Span { start: 0, end: 0 }),
115            lexer,
116            stack: Rc::new(RefCell::new(Vec::new())),
117        }
118    }
119
120    /// Get the current parsing position.
121    pub fn pos(&self) -> L::Position {
122        self.lexer.pos()
123    }
124
125    /// Advance to the next token.
126    fn next(&self) -> Option<L::Token> {
127        match self.lexer.next() {
128            (Some(token), advance) => {
129                let Span { end, .. } = self.span.get();
130                self.span.set(Span {
131                    start: end,
132                    end: end + advance,
133                });
134                Some(token)
135            }
136            _ => None,
137        }
138    }
139
140    /// Consume the next token if it matches the given token.
141    pub fn parse(&self, token: L::Token) -> Result<L::Token, Error> {
142        match self.next() {
143            Some(tt) if tt == token => Ok(tt),
144            _ => Err(self.error()),
145        }
146    }
147
148    /// Report an error at the current position.
149    pub fn error(&self) -> Error {
150        Error::new(self.span.get())
151    }
152
153    /// Whether the parser is at the end of the input.
154    pub fn is_empty(&self) -> bool {
155        self.lexer.is_empty()
156    }
157
158    /// Advance the state to the given state.
159    ///
160    /// # Panics
161    /// Panics if the given state is before the current state.
162    pub fn advance_to(&self, other: &Self) {
163        self.advance_to_pos(other.lexer.pos())
164    }
165
166    /// Advance the state to the given position.
167    ///
168    /// # Panics
169    /// Panics if the given position is before the current position.
170    pub fn advance_to_pos(&self, pos: L::Position) {
171        assert!(pos >= self.lexer.pos(), "you cannot rewind");
172        self.lexer.advance_to_pos(pos);
173    }
174
175    /// Create a fork of the current state for speculative parsing.
176    pub fn fork(&self) -> Self {
177        Self {
178            span: self.span.clone(),
179            lexer: self.lexer.fork(),
180            stack: self.stack.clone(),
181        }
182    }
183
184    /// Push the given name onto the stack (for debugging purposes).
185    pub fn push(&self, name: &'static str) {
186        self.stack.borrow_mut().push((name, self.span.get().end));
187    }
188
189    /// Pop the last name from the stack (for debugging purposes).
190    pub fn pop(&self) {
191        self.stack.borrow_mut().pop();
192    }
193
194    /// Get the current stack (for debugging purposes).
195    pub fn debug(&self) -> String {
196        format!("{:?}", self.stack.borrow())
197    }
198}