sprout/lib.rs
1#![allow(dead_code)]
2
3mod token_match;
4mod compare;
5
6pub mod tokenize;
7pub mod parse;
8pub mod prelude;
9
10use std::fmt;
11use std::hash::Hash;
12use colored::Colorize;
13use parse::Grammar;
14use tokenize::Alphabet;
15use trees::Tree;
16
17/// A position in the text.
18///
19/// Points to a specific `line` and `char` (index in that line).
20/// `line` starts at 1, and `char` at 0.
21///
22#[derive(Clone, Debug, PartialEq, Eq)]
23pub struct TextPosition {
24 /// The line of text this position points to.
25 ///
26 /// **Important**: The first line has line number 1, _not_ 0.
27 pub line: usize,
28 /// The index of the character this position points to within the line.
29 pub char: usize,
30 /// The global index of the character this position points to within the text.
31 pub index: usize
32}
33
34impl TextPosition {
35 /// Constructs a new `TextPosition`.
36 ///
37 /// **Important**: While this constructor won't prevent you from doing it,
38 /// you shouldn't enter 0 as a value for `line`. The `line` value of a text position
39 /// is usually assumed to start at 1.
40 pub fn new(line: usize, char: usize, index: usize) -> Self {
41 TextPosition { line, char, index }
42 }
43}
44
45impl fmt::Display for TextPosition {
46 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
47 write!(f, "{}:{}", self.line, self.char)
48 }
49}
50
51/// A node in the [`AST`]. It represents an occurrence of one of the procedures
52/// from the grammar in the text. As such, it has three properties:
53///
54/// - `proc`: The procedure that this node represents
55/// - `text`: The exact string that was matched in the text to produce the node ([`String`])
56/// - `pos`: The position of the beginning of this node's text segment in the text ([`TextPosition`])
57///
58#[derive(Debug, PartialEq, Clone)]
59pub struct ASTNode<PN: fmt::Debug + PartialEq + Copy> {
60 /// The procedure that this node represents
61 pub proc: PN,
62 /// The exact string that was matched in the text to produce this node
63 pub text: String,
64 /// The position of the beginning of this node's text segment in the text
65 pub pos: TextPosition
66}
67
68impl<PN: fmt::Debug + PartialEq + Copy> ASTNode<PN> {
69 /// Constructs a new [`ASTNode`].
70 ///
71 /// See also: [`TextPosition`]
72 ///
73 /// # Examples
74 ///
75 /// Basic usage
76 /// ```
77 /// # use sprout::prelude::*;
78 /// #
79 /// # #[derive(std::fmt::Debug, PartialEq, Clone, Copy)]
80 /// # enum Proc { SomeProcName }
81 /// #
82 /// ASTNode::new(Proc::SomeProcName, "abc".to_string(), TextPosition::new(6, 9, 123));
83 /// ```
84 pub fn new(proc: PN, text: String, pos: TextPosition) -> Self {
85 ASTNode { proc, text, pos }
86 }
87}
88
89impl<PN: fmt::Debug + fmt::Display + PartialEq + Copy> fmt::Display for ASTNode<PN> {
90 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
91 write!(f, "{:?}[\"{}\"]", self.proc, self.text)
92 }
93}
94
95/// An AST (abstract syntax tree). Its nodes and represent occurrences of grammar procedures
96/// in the parsed text.
97///
98/// `AST` is a type alias for a [`trees::Tree`] of [`ASTNode`]s.
99pub type AST<PN> = Tree<ASTNode<PN>>;
100
101/// An error object returned when a part of the parsing process failed.
102/// It contains `message`, a [`String`] that describes what went wrong,
103/// and `pos`, a [`TextPosition`] that points to the problematic location in the text
104/// whose parsing was attempted.
105#[derive(Debug, PartialEq, Eq)]
106pub struct ParsingError {
107 pub message: String,
108 pub pos: TextPosition,
109 pub source: Option<String>
110}
111
112impl ParsingError {
113 /// Constructs a new [`ParsingError`].
114 ///
115 /// See also: [`TextPosition`]
116 ///
117 /// # Examples
118 ///
119 /// Basic usage
120 /// ```
121 /// # use sprout::prelude::*;
122 /// ParsingError::new("something went wrong!".to_string(), TextPosition::new(2, 3, 5), None);
123 /// ```
124 pub fn new(message: String, pos: TextPosition, source: Option<String>) -> Self {
125 ParsingError { message, pos, source }
126 }
127
128 fn find_src_offset_index(&self, source: &String, start: isize, incr: isize, max: isize) -> (usize, bool) {
129 let mut offset = start;
130 let mut ellipsis = false;
131 loop {
132 let char = source.chars().nth((self.pos.index as isize + offset) as usize);
133 if char.is_none() || char == Some('\n') {
134 offset -= incr;
135 break;
136 }
137
138 offset += incr;
139 if offset == max {
140 ellipsis = true;
141 break;
142 }
143 }
144 ((self.pos.index as isize + offset) as usize, ellipsis)
145 }
146
147 fn format_source_pointer(&self, f: &mut fmt::Formatter<'_>, source: &String) -> fmt::Result {
148 let (first_index, start_ellipsis) = self.find_src_offset_index(source, -1, -1, -20);
149 let (last_index, end_ellipsis) = self.find_src_offset_index(source, 0, 1, 19);
150
151 if start_ellipsis { write!(f, "...")?; }
152 write!(f, "{}", &source[first_index .. last_index + 1])?;
153 if end_ellipsis { write!(f, "...")?; }
154 writeln!(f)?;
155
156 let mut pointer_offset = self.pos.index - first_index;
157 if start_ellipsis { pointer_offset += 3 }
158
159 write!(f, "{}", " ".repeat(pointer_offset))?;
160 writeln!(f, "{}", "^".bold().red())
161 }
162}
163
164impl fmt::Display for ParsingError {
165 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
166 writeln!(f, "{}: {} ({})", "Parsing error".red().bold(), self.message, self.pos)?;
167 if let Some(source) = &self.source {
168 writeln!(f)?;
169 self.format_source_pointer(f, source)?;
170 }
171 Ok(())
172 }
173}
174
175/// The `Parser` is instantiated with an [`Alphabet`] and [`Grammar`], and provides the
176/// `parse` method to directly compile a [`String`] to an [`AST`].
177///
178/// # Examples
179///
180/// Simple example
181/// ```
182/// # use sprout::prelude::*;
183/// #
184/// // Define an enum for our tokens
185/// #[derive(Clone, Copy, Debug, Eq, PartialEq)]
186/// enum Token { X }
187///
188/// // Define an enum for our procedures
189/// #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
190/// enum Proc { AllTheX }
191///
192/// // Implement std::fmt::Display for our two enums for error message generation
193/// impl std::fmt::Display for Token {
194/// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
195/// match self {
196/// Self::X => write!(f, "x")
197/// }
198/// }
199/// }
200///
201/// impl std::fmt::Display for Proc {
202/// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
203/// match self {
204/// Self::AllTheX => write!(f, "all the x!")
205/// }
206/// }
207/// }
208///
209/// // import our tokens for convenience (this is not required)
210/// use Token::*;
211/// use Proc::*;
212///
213/// // Define alphabet and grammar and create parser
214/// let parser = Parser::new(
215/// alphabet! {
216/// X => "x"
217/// },
218/// grammar! {
219/// #AllTheX => (X)*
220/// }
221/// );
222///
223/// // Parse a test string
224/// let result = parser.parse(AllTheX, "xxxxxxxx".to_string());
225/// assert!(result.is_ok())
226///
227/// ```
228///
229/// For a detailed explanation on how to define alphabets and grammars, see [`alphabet`] and [`grammar`].
230#[derive(Debug)]
231pub struct Parser<PN: Eq + Hash + Copy + fmt::Display + fmt::Debug, TN: Eq + Copy + fmt::Display + fmt::Debug> {
232 alphabet: Alphabet<TN>,
233 grammar: Grammar<PN, TN>
234}
235
236impl<PN: Eq + Hash + Copy + fmt::Display + fmt::Debug, TN: Eq + Copy + fmt::Display + fmt::Debug> Parser<PN, TN> {
237 /// Constructs a new parser from an [`Alphabet`] and a [`Grammar`].
238 ///
239 /// For a complete usage example, see [`Parser`].
240 ///
241 /// See also: [`Alphabet`], [`Grammar`].
242 pub fn new(alphabet: Alphabet<TN>, grammar: Grammar<PN, TN>) -> Self {
243 Parser { alphabet, grammar }
244 }
245
246 /// Parse a [`String`], according to some `proc` defined in the grammar of this parser,
247 /// to an [`AST`].
248 ///
249 /// Returns a `Result` with [`ParsingError`] as the error type.
250 ///
251 /// # Examples
252 ///
253 /// Parsing a string
254 /// ```
255 /// # use sprout::prelude::*;
256 /// # use std::fmt::{Display, Formatter, Result};
257 /// #
258 /// # #[derive(Clone, Copy, Debug, Eq, PartialEq)]
259 /// # enum Token { Number }
260 /// #
261 /// # impl Display for Token {
262 /// # fn fmt(&self, f: &mut Formatter<'_>) -> Result { write!(f, "number") }
263 /// # }
264 /// # #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
265 /// # enum Proc { Number }
266 /// #
267 /// # impl Display for Proc {
268 /// # fn fmt(&self, f: &mut Formatter<'_>) -> Result { write!(f, "number") }
269 /// # }
270 /// #
271 /// # let parser = Parser::new(alphabet!(Token::Number => "[0-9]+"), grammar!(#Proc::Number => Token::Number));
272 /// #
273 /// // Given the grammar contains a procedure Proc::Number
274 /// let result = parser.parse(Proc::Number, "3453".to_string());
275 /// ```
276 ///
277 /// For more details, see [`Parser`]
278 pub fn parse(&self, proc: PN, text: String) -> Result<AST<PN>, ParsingError> {
279 let result = self.alphabet.tokenize(text.clone()).and_then(|tokens| {
280 self.grammar.parse(proc, &tokens)
281 });
282 match result {
283 Ok(ast) => Ok(ast),
284 Err(mut error) => {
285 error.source = Some(text);
286 Err(error)
287 }
288 }
289 }
290}
291
292#[cfg(test)]
293mod tests {
294 use colored::Colorize;
295
296 use crate::{ParsingError, TextPosition};
297
298 #[test]
299 fn parsing_error_with_small_char_should_format_properly() {
300 let parsing_error = ParsingError::new(
301 "Error happened here".to_string(),
302 TextPosition::new(2, 1, 5),
303 Some("123\n456789\nsgfde".to_string())
304 );
305
306 assert_eq!(parsing_error.to_string(), format!("{}: Error happened here (2:1)\n\n456789\n {}\n", "Parsing error".red().bold(), "^".red().bold()));
307 }
308
309 #[test]
310 fn parsing_error_should_format_properly() {
311 let parsing_error = ParsingError::new(
312 "Error happened here".to_string(),
313 TextPosition::new(2, 5, 9),
314 Some("123\n456789\nsgfde".to_string())
315 );
316
317 assert_eq!(parsing_error.to_string(), format!("{}: Error happened here (2:5)\n\n456789\n {}\n", "Parsing error".red().bold(), "^".red().bold()));
318 }
319
320 #[test]
321 fn parsing_error_should_format_ellipses_properly() {
322 let parsing_error = ParsingError::new(
323 "Error happened here".to_string(),
324 TextPosition::new(2, 43, 48),
325 Some("123\naaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa456789aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\nsgfde".to_string())
326 );
327
328 assert_eq!(parsing_error.to_string(), format!("{}: Error happened here (2:43)\n\n...aaaaaaaaaaaaaaaa456789aaaaaaaaaaaaaaaaaa...\n {}\n", "Parsing error".red().bold(), "^".red().bold()));
329 }
330}