parse_it/
parser.rs

1//! Basic definitions for working with the parser.
2//!
3//! If you're looking for a convenient way to parse data, you don't need to dive into
4//! the details of the parser. The [`ParseIt::parse`] method abstracts away all the
5//! complexity, making it easy to use.
6//!
7//! However, if you're interested in learning more about how the parser works under the
8//! hood, you can refer to the [`ParserState`] documentation.
9//!
10//! [`ParseIt::parse`]: crate::ParseIt::parse
11
12use std::{cell::RefCell, fmt::Debug, rc::Rc};
13
14use crate::{
15    lexer::{Cursor, LexerState, Span, TryConvert},
16    LexIt,
17};
18
19/// An error that occurred during parsing.
20#[derive(Debug)]
21pub struct Error {
22    /// The span in the source code where the error occurred.
23    pub span: Span,
24}
25
26impl Error {
27    /// Create a new error from the given span.
28    pub fn new(span: Span) -> Self {
29        Self { span }
30    }
31}
32
33/// The inner state of a parser.
34///
35/// `ParserState` is a cursor over the lexer and keeps track of the current position
36/// in the source code. It is used to drive the parsing process.
37///
38/// # Writing a Parser
39///
40/// A parser is a function `Fn(&ParserState) -> Result<T, Error>`, that takes a
41/// `&ParserState` as input and returns the parsed result or an error.
42///
43/// The common use case is to call the [`parse`](ParserState::parse) method to
44/// read a token from the lexer and advance the state by one token.
45///
46/// ```
47/// # use parse_it::*;
48/// fn parse_abc(state: &mut ParserState<CharLexer>) -> Result<char, Error> {
49///     state.parse_char('a')?;
50///     state.parse_char('b')?;
51///     state.parse_char('c')?;
52///     Ok('c')
53/// }
54///
55/// let mut state = ParserState::new("abc");
56/// parse_abc(&mut state).unwrap();
57/// assert!(state.is_empty());
58/// ```
59///
60/// Please note that `ParserState` uses interior mutability to share its state
61/// between parsers. This means that even if a parser takes a `&ParserState`,
62/// the state can still be mutated.
63///
64/// # Speculative Parsing
65///
66/// `ParserState` allows you to create a fork of the current state via the
67/// [`fork`](ParserState::fork) method, and join it back to the original state
68/// later via the [`advance_to`](ParserState::advance_to) method. This is useful
69/// for speculative parsing.
70///
71/// It's important to note that `ParserState` can only move forward and not
72/// backward. When joining a fork back to the original state, it must be
73/// ensured that the fork is at a position beyond or equal to the original
74/// state.
75///
76/// ```
77/// # use parse_it::*;
78/// fn parse_option(
79///     state: &mut ParserState<CharLexer>,
80///     parser: impl Fn(&mut ParserState<CharLexer>) -> Result<char, Error>
81/// ) -> Result<Option<char>, Error> {
82///     let fork = &mut state.fork();
83///     match parser(fork) {
84///         Ok(c) => {
85///             state.advance_to(fork);
86///             Ok(Some(c))
87///         }
88///         Err(_) => Ok(None),
89///     }
90/// }
91///
92/// let mut state = ParserState::new("aaa");
93/// assert_eq!(parse_option(&mut state, |state| state.parse_char('a')).unwrap(), Some('a'));
94/// assert_eq!(parse_option(&mut state, |state| state.parse_char('b')).unwrap(), None);
95/// ```
96pub struct ParserState<'a, L> {
97    lexer: L,
98    lexbuf: LexerState<'a>,
99    stack: Rc<RefCell<Vec<(&'static str, usize)>>>,
100}
101
102impl<'a, L: LexIt + Clone> ParserState<'a, L> {
103    /// Create a new parser state from the given lexer.
104    pub fn new(input: &'a str) -> Self {
105        Self {
106            lexer: L::new(),
107            lexbuf: LexerState::new(input),
108            stack: Rc::new(RefCell::new(Vec::new())),
109        }
110    }
111
112    /// Get the current parsing position.
113    pub fn cursor(&self) -> Cursor {
114        self.lexbuf.cursor()
115    }
116
117    /// Advance to the next token.
118    fn next(&mut self) -> Option<L::Token<'a>> {
119        self.lexer.next(&mut self.lexbuf)
120    }
121
122    /// Consume the next token if it matches the given token.
123    pub fn parse_with<T>(
124        &mut self,
125        matches: impl FnOnce(L::Token<'a>) -> Option<T>,
126    ) -> Result<T, Error> {
127        self.next().and_then(matches).ok_or_else(|| self.error())
128    }
129
130    /// Parse a token that can be converted to the given type.
131    pub fn parse_type<T>(&mut self) -> Result<T, Error>
132    where
133        L::Token<'a>: TryConvert<T>,
134        T: PartialEq,
135    {
136        self.parse_with(|tt| tt.try_convert())
137    }
138
139    /// Parse a token that exactly matches the given character.
140    pub fn parse_char(&mut self, c: char) -> Result<char, Error> {
141        self.next().ok_or_else(|| self.error())?;
142        let lexeme = self.lexbuf.lexeme();
143        let mut chars = lexeme.chars();
144        let ch = chars.next().ok_or_else(|| self.error())?;
145        if ch == c && chars.as_str().is_empty() {
146            Ok(ch)
147        } else {
148            Err(self.error())
149        }
150    }
151
152    /// Parse a token that exactly matches the given string.
153    pub fn parse_str(&mut self, literal: &'a str) -> Result<&str, Error> {
154        self.next().ok_or_else(|| self.error())?;
155        let lexeme = self.lexbuf.lexeme();
156        if lexeme == literal {
157            Ok(lexeme)
158        } else {
159            Err(self.error())
160        }
161    }
162
163    /// Report an error at the current position.
164    pub fn error(&self) -> Error {
165        Error::new(self.lexbuf.span())
166    }
167
168    /// Whether the parser is at the end of the input.
169    pub fn is_empty(&self) -> bool {
170        self.lexbuf.is_empty()
171    }
172
173    /// Advance the state to the given state.
174    ///
175    /// # Panics
176    /// Panics if the given state is before the current state.
177    pub fn advance_to(&mut self, other: &Self) {
178        self.advance_to_cursor(other.lexbuf.cursor())
179    }
180
181    /// Advance the state to the given position.
182    ///
183    /// # Panics
184    /// Panics if the given position is before the current position.
185    pub fn advance_to_cursor(&mut self, cursor: Cursor) {
186        assert!(cursor >= self.lexbuf.cursor(), "you cannot rewind");
187        self.lexbuf.advance_to_cursor(cursor);
188    }
189
190    /// Create a fork of the current state for speculative parsing.
191    pub fn fork(&self) -> Self {
192        Self {
193            lexer: self.lexer.clone(),
194            lexbuf: self.lexbuf.clone(),
195            stack: self.stack.clone(),
196        }
197    }
198
199    /// Push the given name onto the stack (for debugging purposes).
200    pub fn push(&self, name: &'static str) {
201        self.stack.borrow_mut().push((name, self.lexbuf.span().end));
202    }
203
204    /// Pop the last name from the stack (for debugging purposes).
205    pub fn pop(&self) {
206        self.stack.borrow_mut().pop();
207    }
208
209    /// Get the current stack (for debugging purposes).
210    pub fn debug(&self) -> String {
211        format!("{:?}", self.stack.borrow())
212    }
213}