sprout/
lib.rs

1#![allow(dead_code)]
2
3mod token_match;
4mod compare;
5
6pub mod tokenize;
7pub mod parse;
8pub mod prelude;
9
10use std::fmt;
11use std::hash::Hash;
12use colored::Colorize;
13use parse::Grammar;
14use tokenize::Alphabet;
15use trees::Tree;
16
17/// A position in the text.
18/// 
19/// Points to a specific `line` and `char` (index in that line).
20/// `line` starts at 1, and `char` at 0.
21/// 
22#[derive(Clone, Debug, PartialEq, Eq)]
23pub struct TextPosition {
24	/// The line of text this position points to.
25	/// 
26	/// **Important**: The first line has line number 1, _not_ 0.
27    pub line: usize,
28	/// The index of the character this position points to within the line.
29    pub char: usize,
30	/// The global index of the character this position points to within the text.
31	pub index: usize
32}
33
34impl TextPosition {
35	/// Constructs a new `TextPosition`.
36	/// 
37	/// **Important**: While this constructor won't prevent you from doing it,
38	/// you shouldn't enter 0 as a value for `line`. The `line` value of a text position
39	/// is usually assumed to start at 1.
40    pub fn new(line: usize, char: usize, index: usize) -> Self {
41        TextPosition { line, char, index }
42    }
43}
44
45impl fmt::Display for TextPosition {
46    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
47        write!(f, "{}:{}", self.line, self.char)
48    }
49}
50
51/// A node in the [`AST`]. It represents an occurrence of one of the procedures
52/// from the grammar in the text. As such, it has three properties:
53/// 
54/// - `proc`: The procedure that this node represents
55/// - `text`: The exact string that was matched in the text to produce the node ([`String`])
56/// - `pos`: The position of the beginning of this node's text segment in the text ([`TextPosition`])
57/// 
58#[derive(Debug, PartialEq, Clone)]
59pub struct ASTNode<PN: fmt::Debug + PartialEq + Copy> {
60	/// The procedure that this node represents
61	pub proc: PN,
62	/// The exact string that was matched in the text to produce this node
63	pub text: String,
64	/// The position of the beginning of this node's text segment in the text
65	pub pos: TextPosition
66}
67
68impl<PN: fmt::Debug + PartialEq + Copy> ASTNode<PN> {
69	/// Constructs a new [`ASTNode`].
70	/// 
71	/// See also: [`TextPosition`]
72	/// 
73	/// # Examples
74	/// 
75	/// Basic usage
76	/// ```
77	/// # use sprout::prelude::*;
78	/// #
79	/// # #[derive(std::fmt::Debug, PartialEq, Clone, Copy)]
80	/// # enum Proc { SomeProcName }
81	/// #
82	/// ASTNode::new(Proc::SomeProcName, "abc".to_string(), TextPosition::new(6, 9, 123));
83	/// ```
84	pub fn new(proc: PN, text: String, pos: TextPosition) -> Self {
85		ASTNode { proc, text, pos }
86	}
87}
88
89impl<PN: fmt::Debug + fmt::Display + PartialEq + Copy> fmt::Display for ASTNode<PN> {
90	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
91		write!(f, "{:?}[\"{}\"]", self.proc, self.text)
92	}
93}
94
95/// An AST (abstract syntax tree). Its nodes and represent occurrences of grammar procedures
96/// in the parsed text.
97/// 
98/// `AST` is a type alias for a [`trees::Tree`] of [`ASTNode`]s.
99pub type AST<PN> = Tree<ASTNode<PN>>;
100
101/// An error object returned when a part of the parsing process failed.
102/// It contains `message`, a [`String`] that describes what went wrong, 
103/// and `pos`, a [`TextPosition`] that points to the problematic location in the text
104/// whose parsing was attempted.
105#[derive(Debug, PartialEq, Eq)]
106pub struct ParsingError {
107	pub message: String,
108	pub pos: TextPosition,
109	pub source: Option<String>
110}
111
112impl ParsingError {
113	/// Constructs a new [`ParsingError`].
114	/// 
115	/// See also: [`TextPosition`]
116	/// 
117	/// # Examples
118	/// 
119	/// Basic usage
120	/// ```
121	/// # use sprout::prelude::*;
122	/// ParsingError::new("something went wrong!".to_string(), TextPosition::new(2, 3, 5), None);
123	/// ```
124	pub fn new(message: String, pos: TextPosition, source: Option<String>) -> Self {
125		ParsingError { message, pos, source }
126	}
127
128	fn find_src_offset_index(&self, source: &String, start: isize, incr: isize, max: isize) -> (usize, bool) {
129		let mut offset = start;
130		let mut ellipsis = false;
131		loop {
132			let char = source.chars().nth((self.pos.index as isize + offset) as usize);
133			if char.is_none() || char == Some('\n') {
134				offset -= incr;
135				break;
136			}
137
138			offset += incr;
139			if offset == max {
140				ellipsis = true;
141				break;
142			}
143		}
144		((self.pos.index as isize + offset) as usize, ellipsis)
145	}
146
147	fn format_source_pointer(&self, f: &mut fmt::Formatter<'_>, source: &String) -> fmt::Result {
148		let (first_index, start_ellipsis) = self.find_src_offset_index(source, -1, -1, -20);
149		let (last_index, end_ellipsis) = self.find_src_offset_index(source, 0, 1, 19);
150
151		if start_ellipsis { write!(f, "...")?; }
152		write!(f, "{}", &source[first_index .. last_index + 1])?;
153		if end_ellipsis { write!(f, "...")?; }
154		writeln!(f)?;
155
156		let mut pointer_offset = self.pos.index - first_index;
157		if start_ellipsis { pointer_offset += 3 }
158
159		write!(f, "{}", " ".repeat(pointer_offset))?;
160		writeln!(f, "{}", "^".bold().red())
161	}
162}
163
164impl fmt::Display for ParsingError {
165	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
166		writeln!(f, "{}: {} ({})", "Parsing error".red().bold(), self.message, self.pos)?;
167		if let Some(source) = &self.source {
168			writeln!(f)?;
169			self.format_source_pointer(f, source)?;
170		}
171		Ok(())
172	}
173}
174
175/// The `Parser` is instantiated with an [`Alphabet`] and [`Grammar`], and provides the
176/// `parse` method to directly compile a [`String`] to an [`AST`].
177/// 
178/// # Examples
179/// 
180/// Simple example
181/// ```
182/// # use sprout::prelude::*;
183/// #
184/// // Define an enum for our tokens
185/// #[derive(Clone, Copy, Debug, Eq, PartialEq)]
186/// enum Token { X }
187///
188/// // Define an enum for our procedures
189/// #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
190/// enum Proc { AllTheX }
191/// 
192/// // Implement std::fmt::Display for our two enums for error message generation
193/// impl std::fmt::Display for Token {
194/// 	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
195/// 		match self {
196/// 			Self::X => write!(f, "x")
197/// 		}
198/// 	}
199/// }
200/// 
201/// impl std::fmt::Display for Proc {
202/// 	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
203/// 		match self {
204/// 			Self::AllTheX => write!(f, "all the x!")
205/// 		}
206/// 	}
207/// }
208///
209/// // import our tokens for convenience (this is not required)
210/// use Token::*;
211/// use Proc::*;
212/// 
213/// // Define alphabet and grammar and create parser
214/// let parser = Parser::new(
215/// 	alphabet! {
216/// 		X => "x"
217/// 	},
218/// 	grammar! {
219/// 		#AllTheX => (X)*
220/// 	}
221/// );
222/// 
223/// // Parse a test string
224/// let result = parser.parse(AllTheX, "xxxxxxxx".to_string());
225/// assert!(result.is_ok())
226/// 
227/// ```
228/// 
229/// For a detailed explanation on how to define alphabets and grammars, see [`alphabet`] and [`grammar`].
230#[derive(Debug)]
231pub struct Parser<PN: Eq + Hash + Copy + fmt::Display + fmt::Debug, TN: Eq + Copy + fmt::Display + fmt::Debug> {
232	alphabet: Alphabet<TN>,
233	grammar: Grammar<PN, TN>
234}
235
236impl<PN: Eq + Hash + Copy + fmt::Display + fmt::Debug, TN: Eq + Copy + fmt::Display + fmt::Debug> Parser<PN, TN> {
237	/// Constructs a new parser from an [`Alphabet`] and a [`Grammar`].
238	/// 
239	/// For a complete usage example, see [`Parser`].
240	/// 
241	/// See also: [`Alphabet`], [`Grammar`].
242	pub fn new(alphabet: Alphabet<TN>, grammar: Grammar<PN, TN>) -> Self {
243		Parser { alphabet, grammar }
244	}
245
246	/// Parse a [`String`], according to some `proc` defined in the grammar of this parser,
247	/// to an [`AST`].
248	/// 
249	/// Returns a `Result` with [`ParsingError`] as the error type.
250	/// 
251	/// # Examples
252	/// 
253	/// Parsing a string
254	/// ```
255	/// # use sprout::prelude::*;
256	/// # use std::fmt::{Display, Formatter, Result};
257	/// #
258	/// # #[derive(Clone, Copy, Debug, Eq, PartialEq)]
259	/// # enum Token { Number }
260	/// #
261	/// # impl Display for Token {
262	/// # 	fn fmt(&self, f: &mut Formatter<'_>) -> Result { write!(f, "number") }
263	/// # }
264	/// # #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
265	/// # enum Proc { Number }
266	/// #
267	/// # impl Display for Proc {
268	/// # 	fn fmt(&self, f: &mut Formatter<'_>) -> Result { write!(f, "number") }
269	/// # }
270	/// #
271	/// # let parser = Parser::new(alphabet!(Token::Number => "[0-9]+"), grammar!(#Proc::Number => Token::Number));
272	/// #
273	/// // Given the grammar contains a procedure Proc::Number
274	/// let result = parser.parse(Proc::Number, "3453".to_string());
275	/// ```
276	/// 
277	/// For more details, see [`Parser`]
278	pub fn parse(&self, proc: PN, text: String) -> Result<AST<PN>, ParsingError> {
279		let result = self.alphabet.tokenize(text.clone()).and_then(|tokens| {
280			self.grammar.parse(proc, &tokens)
281		});
282		match result {
283			Ok(ast) => Ok(ast),
284			Err(mut error) => {
285				error.source = Some(text);
286				Err(error)
287			}
288		}
289	}
290}
291
292#[cfg(test)]
293mod tests {
294    use colored::Colorize;
295
296    use crate::{ParsingError, TextPosition};
297
298	#[test]
299	fn parsing_error_with_small_char_should_format_properly() {
300		let parsing_error = ParsingError::new(
301			"Error happened here".to_string(),
302			TextPosition::new(2, 1, 5),
303			Some("123\n456789\nsgfde".to_string())
304		);
305
306		assert_eq!(parsing_error.to_string(), format!("{}: Error happened here (2:1)\n\n456789\n {}\n", "Parsing error".red().bold(), "^".red().bold()));
307	}
308
309	#[test]
310	fn parsing_error_should_format_properly() {
311		let parsing_error = ParsingError::new(
312			"Error happened here".to_string(),
313			TextPosition::new(2, 5, 9),
314			Some("123\n456789\nsgfde".to_string())
315		);
316
317		assert_eq!(parsing_error.to_string(), format!("{}: Error happened here (2:5)\n\n456789\n     {}\n", "Parsing error".red().bold(), "^".red().bold()));
318	}
319
320	#[test]
321	fn parsing_error_should_format_ellipses_properly() {
322		let parsing_error = ParsingError::new(
323			"Error happened here".to_string(),
324			TextPosition::new(2, 43, 48),
325			Some("123\naaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa456789aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\nsgfde".to_string())
326		);
327
328		assert_eq!(parsing_error.to_string(), format!("{}: Error happened here (2:43)\n\n...aaaaaaaaaaaaaaaa456789aaaaaaaaaaaaaaaaaa...\n                       {}\n", "Parsing error".red().bold(), "^".red().bold()));
329	}
330}