wagon_lexer/
lib.rs

1#![warn(missing_docs)]
2//! WAGon Lexers
3//!
4//! Provides lexers for the WAGon DSL, as well as helper iterators which can switch between lexers on the fly.
5//! Most likely, all you will care about are [`LexerBridge`] and [`Tokens`].
6//!
7//! # Example
8//! ```rust
9//! let s = r#"
10//! meta: "data";
11//! ============
12//! S -> A;
13//! "#;
14//! use wagon_lexer::{Tokens, LexerBridge, LexResult};
15//! use wagon_ident::Ident;
16//! # use wagon_lexer::productions::Productions;
17//! # use wagon_lexer::math::Math;
18//! # use wagon_lexer::metadata::Metadata;
19//! 
20//! let lexer = LexerBridge::new(s);
21//! let tokens: Vec<LexResult> = lexer.collect();
22//! assert_eq!(tokens, vec![
23//! Ok(Tokens::MetadataToken(Metadata::Identifier("meta".into()))), 
24//! Ok(Tokens::MetadataToken(Metadata::Colon)),
25//! Ok(Tokens::MathToken(Math::LitString("data".to_string()))),
26//! Ok(Tokens::MathToken(Math::Semi)),
27//! Ok(Tokens::MetadataToken(Metadata::Delim)),
28//! Ok(Tokens::ProductionToken(Productions::Identifier(Ident::Unknown("S".to_string())))),
29//! Ok(Tokens::ProductionToken(Productions::Produce)),
30//! Ok(Tokens::ProductionToken(Productions::Identifier(Ident::Unknown("A".to_string())))),
31//! Ok(Tokens::ProductionToken(Productions::Semi))
32//! ])
33//! ```
34
35
36pub(crate) mod ident;
37/// The lexer for the Math DSL
38pub mod math;
39/// The Lexer for the Grammar DSL
40pub mod productions;
41/// The Lexer for the Metadata
42pub mod metadata;
43
44use metadata::Metadata;
45use wagon_utils::{ResultNext, Peek, ResultPeek, Spannable};
46use logos::Logos;
47use std::{fmt::{self, Display}, collections::VecDeque, error::Error};
48use productions::Productions;
49use math::Math;
50use wagon_ident::Ident;
51use replace_with::replace_with_or_abort;
52use logos::Span;
53
54/// An Enum for any errors that may occur during lexing.
55#[derive(Default, Debug, Clone, Eq, PartialEq)]
56pub enum LexingError {
57	/// Error for any unknown reason. Usually when a character is encountered that can not be lexed.
58	#[default]
59	UnknownError,
60	/// Error for an unexpected character.
61	UnexpectedCharacter(String, Span),
62	/// Error when encountering EOF before we expect.
63	UnexpectedEOF(Span),
64	/// Error when parsing an int
65	ParseIntError(std::num::ParseIntError, Span),
66	/// Error when parsing a float
67	ParseFloatError(std::num::ParseFloatError, Span)
68}
69
70/// The result of each lex step is either a token or an error.
71pub type LexResult = Result<Tokens, LexingError>;
72
73impl Display for LexingError {
74    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
75        match self {
76            Self::UnknownError => write!(f, "Encountered unknown error!"),
77            Self::UnexpectedCharacter(c, _) => write!(f, "Encountered unexpected character {c}"),
78            Self::UnexpectedEOF(_) => write!(f, "Got EOF but expected more characters!"),
79            Self::ParseIntError(p, _) => p.fmt(f),
80            Self::ParseFloatError(p, _) => p.fmt(f),
81        }
82    }
83}
84
85impl Error for LexingError {}
86
87impl Spannable for LexingError {
88    fn span(&self) -> Span {
89        match self {
90            Self::UnknownError => Span::default(),
91            Self::UnexpectedCharacter(_, s) | Self::UnexpectedEOF(s) | Self::ParseIntError(_, s) | Self::ParseFloatError(_, s) => s.to_owned(),
92        }
93    }
94}
95
96#[derive(Debug)]
97struct MetaLexer<'source> {
98	lexer: logos::Lexer<'source, Metadata>,
99	peeked: VecDeque<Result<Metadata, LexingError>>
100}
101
102/// Peek implemented as a FIFO queue.
103///
104/// Any time `peek` is called, the main lexer is advanced and the result is stored in the `peeked` queue. If `next` is called,
105/// it takes from `peeked` first before taking from the lexer itself.
106impl Peek for MetaLexer<'_> {
107    fn peek(&mut self) -> Option<&Self::Item> {
108        let item = self.lexer.next()?;
109        self.peeked.push_back(item);
110        self.peeked.back()
111    }
112
113    fn peek_mut(&mut self) -> Option<&mut Self::Item> {
114        let item = self.lexer.next()?;
115        self.peeked.push_back(item);
116        self.peeked.back_mut()
117    }
118}
119
120impl Iterator for MetaLexer<'_> {
121    type Item = Result<Metadata, LexingError>;
122
123    fn next(&mut self) -> Option<Self::Item> {
124        match self.peeked.pop_front() {
125            Some(x) => Some(x),
126            None => self.lexer.next(),
127        }
128    }
129}
130
131impl<'source> MetaLexer<'source> {
132	const fn new(lexer: logos::Lexer<'source, Metadata>) -> Self {
133		Self{lexer, peeked: VecDeque::new()}
134	}
135}
136
137#[derive(Debug)]
138enum Lexer<'source> {
139	Productions(logos::Lexer<'source, Productions>),
140	Math(logos::Lexer<'source, Math>),
141	Metadata(MetaLexer<'source>),
142}
143
144impl<'source> Lexer<'source> {
145	fn new(s: &'source str) -> Self {
146		let mut meta_lexer = MetaLexer::new(Metadata::lexer(s));
147		let fst = meta_lexer.peek();
148		if matches!(fst, Some(Ok(Metadata::Identifier(_)))) {
149			if matches!(meta_lexer.peek(), Some(Ok(Metadata::Colon))) {
150				Self::Metadata(meta_lexer)
151			} else {
152				Self::Productions(Productions::lexer(s))
153			}
154		} else if !matches!(fst, Some(Err(_))) {
155			Self::Metadata(meta_lexer)
156		} else {
157			Self::Productions(Productions::lexer(s))
158		}
159	}
160}
161
162/// An enum that holds the different types of tokens for the different lexers.
163#[derive(Debug, PartialEq, Clone)]
164pub enum Tokens {
165	/// Tokens created by the [Productions] lexer.
166	ProductionToken(Productions),
167	/// Tokens created by the [Math] lexer.
168	MathToken(Math),
169	/// Tokens created by the [Metadata] lexer.
170	MetadataToken(Metadata)
171}
172
173impl fmt::Display for Tokens {
174    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
175        match self {
176            Self::ProductionToken(t) => t.fmt(f),
177            Self::MathToken(t) => t.fmt(f),
178            Self::MetadataToken(t) => t.fmt(f)
179        }
180    }
181}
182
183impl Default for Tokens {
184    fn default() -> Self {
185        Self::ProductionToken(Productions::Identifier(Ident::default()))
186    }
187}
188
189/// A struct which automatically switches between the different lexers based on context.
190#[allow(clippy::option_option)]
191pub struct LexerBridge<'source> {
192	lexer: Lexer<'source>,
193	counter: u16,
194	in_meta: bool,
195	peeked: Option<Option<<LexerBridge<'source> as IntoIterator>::Item>>
196}
197
198impl<'source> LexerBridge<'source> {
199	/// Initialize the `LexerBridge`
200	#[must_use] pub fn new(s: &'source str) -> Self {
201		let lexer = Lexer::new(s);
202		let counter = 0;
203		let in_meta = matches!(lexer, Lexer::Metadata(_));
204		let peeked = None;
205		Self { lexer, counter, in_meta, peeked }
206	}
207
208	/// Inspect what part of the input string the lexer is currently at
209	#[must_use] pub fn slice(&self) -> &str {
210		match &self.lexer {
211		    Lexer::Productions(l) => l.slice(),
212		    Lexer::Math(l) => l.slice(),
213		    Lexer::Metadata(l) => l.lexer.slice()
214		}
215	}
216
217	pub(crate) fn _morph_to_math<Token>(curr: logos::Lexer<'source, Token>) -> Lexer
218	where
219        Token: Logos<'source, Source = <Math as Logos<'source>>::Source>,
220        Token::Extras: Into<<Math as Logos<'source>>::Extras>,
221    {
222		Lexer::Math(curr.morph())
223	}
224
225	pub(crate) fn _morph_to_productions<Token>(curr: logos::Lexer<'source, Token>) -> Lexer
226	where
227        Token: Logos<'source, Source = <Productions as Logos<'source>>::Source>,
228        Token::Extras: Into<<Productions as Logos<'source>>::Extras>,
229    {
230		Lexer::Productions(curr.morph())
231	}
232
233	pub(crate) fn _morph_to_metadata<Token>(curr: logos::Lexer<'source, Token>) -> Lexer
234	where
235        Token: Logos<'source, Source = <Metadata as Logos<'source>>::Source>,
236        Token::Extras: Into<<Metadata as Logos<'source>>::Extras>,
237    {
238		Lexer::Metadata(MetaLexer::new(curr.morph()))
239	}
240
241	/// Make the `LexerBridge` use the [Math] lexer.
242	pub fn morph_to_math(&mut self) {
243		replace_with_or_abort(&mut self.lexer, |lexer| match lexer {
244			Lexer::Productions(prod) => Self::_morph_to_math(prod),
245		    Lexer::Math(_) => lexer,
246		    Lexer::Metadata(meta) => Self::_morph_to_math(meta.lexer),
247		});
248	}
249
250	/// Make the `LexerBridge` use the [Productions] lexer.
251	pub fn morph_to_productions(&mut self) {
252		replace_with_or_abort(&mut self.lexer, |lexer| match lexer {
253			Lexer::Productions(_) => lexer,
254		    Lexer::Math(math) => Self::_morph_to_productions(math),
255		    Lexer::Metadata(meta) => Self::_morph_to_productions(meta.lexer),
256		});
257	}
258
259	/// Make the `LexerBridge` use the [Metadata] lexer.
260	pub fn morph_to_metadata(&mut self) {
261		replace_with_or_abort(&mut self.lexer, |lexer| match lexer {
262			Lexer::Productions(prod) => Self::_morph_to_metadata(prod),
263		    Lexer::Math(math) => Self::_morph_to_metadata(math),
264		    Lexer::Metadata(_) => lexer,
265		});
266	}
267}
268
269/// Implemented like [`std::iter::Peekable`]. 
270impl<'source> Peek for LexerBridge<'source> {
271
272    fn peek(&mut self) -> Option<&Self::Item> {
273    	let next = self.peeked.take().map_or_else(|| self.next(), |x| x);
274	    self.peeked.get_or_insert(next).as_ref()
275    }
276
277    fn peek_mut(&mut self) -> Option<&mut Self::Item> {
278        let next = self.peeked.take().map_or_else(|| self.next(), |x| x);
279	    self.peeked.get_or_insert(next).as_mut()
280    }
281}
282
283impl<'source> ResultNext<Tokens, LexingError> for LexerBridge<'source> {
284    fn next_result(&mut self) -> Result<Tokens, LexingError> {
285        match self.next() {
286            Some(Err(LexingError::UnknownError)) => Err(LexingError::UnexpectedCharacter(self.slice().to_string(), self.span())),
287            Some(x) => x,
288            None => Err(LexingError::UnexpectedEOF(self.span())),
289        }
290    }
291}
292
293impl<'source> ResultPeek<Tokens, LexingError> for LexerBridge<'source> {
294    fn peek_result(&mut self) -> Result<&Tokens, LexingError> {
295    	let next = self.peeked.take().map_or_else(|| self.next(), |x| x);
296	    let slice = self.slice().to_string();
297    	let span = self.span();
298	    let peek = self.peeked.get_or_insert(next).as_ref();
299        match peek {
300        	Some(Err(LexingError::UnknownError)) => Err(LexingError::UnexpectedCharacter(slice, span)),
301            Some(Ok(x)) => Ok(x),
302            Some(Err(e)) => Err(e.clone()),
303            None => Err(LexingError::UnexpectedEOF(span)),
304        }
305    }
306}
307
308impl Spannable for LexerBridge<'_> {
309	fn span(&self) -> Span {
310		self.lexer.span()
311	}
312}
313
314impl Spannable for Lexer<'_> {
315	fn span(&self) -> Span {
316		match self {
317			Self::Productions(l) => l.span(),
318			Self::Math(l) => l.span(),
319			Self::Metadata(l) => l.lexer.span(),
320		}
321	}
322}
323
324trait TypeDetect {
325	fn detect(inp: &str, span: Span) -> Result<Self, LexingError> where Self: Sized;
326}
327
328impl<'source> Iterator for LexerBridge<'source> {
329	type Item = Result<Tokens, LexingError>;
330	fn next(&mut self) -> Option<Self::Item> {
331        use Tokens::{MathToken, MetadataToken, ProductionToken};
332        type MorphFunc<'source> = Option<Box<dyn Fn(&mut LexerBridge<'source>)>>;
333        if let Some(v) = self.peeked.take() {
334        	v
335        } else {
336        	let (morph_func, item): (MorphFunc, Option<Result<Tokens, LexingError>>) = match &mut self.lexer {
337	            Lexer::Productions(prod) => {
338	            	let result = prod.next();
339	                let ret_func: MorphFunc = match result {
340	                	Some(Ok(Productions::LBr | Productions::LCur)) => Some(Box::new(Self::morph_to_math)),
341	                	_ => None
342	                };
343	                (ret_func, result.map(|prod| prod.map(ProductionToken)))
344	            },
345	            Lexer::Math(math) => {
346	            	let result = math.next();
347	                let ret_func: MorphFunc = match result {
348	                	Some(Ok(Math::LBr)) => {self.counter += 1; None},
349	                	Some(Ok(Math::RCur)) => Some(Box::new(Self::morph_to_productions)),
350	                	Some(Ok(Math::RBr)) if self.counter == 0 => Some(Box::new(Self::morph_to_productions)),
351	                	Some(Ok(Math::Semi)) if self.in_meta => Some(Box::new(Self::morph_to_metadata)),
352	                	Some(Ok(Math::RBr)) => {self.counter -= 1; None},
353	                	_ => None
354	                };
355	                (ret_func, result.map(|math| math.map(MathToken)))
356	            },
357	            Lexer::Metadata(meta) => {
358	            	let result = meta.next();
359	            	let ret_func: MorphFunc = match result {
360	            		Some(Ok(Metadata::Delim)) => {
361	            			self.in_meta = false;
362	            			Some(Box::new(Self::morph_to_productions))
363	            		},
364	            		Some(Ok(Metadata::Colon)) => Some(Box::new(Self::morph_to_math)),
365	            		_ => None
366	            	};
367	            	(ret_func, result.map(|m| m.map(MetadataToken)))
368	            },
369	        };
370	        if let Some(fun) = morph_func {
371	        	fun(self);
372	        }
373	        item
374	    }
375    }
376}
377
378#[cfg(test)]
379/// A helper method to assert that a lexer will encounter a given list of tokens.
380///
381/// # Panics
382/// This assert panics if it reaches the end of the tokenstream before it's done checking all expected tokens.
383pub fn assert_lex<'a, Token>(
384    source: &'a Token::Source,
385    tokens: &[Result<Token, Token::Error>],
386) where
387    Token: Logos<'a> + fmt::Debug + PartialEq,
388    Token::Extras: Default,
389{
390    let mut lex = Token::lexer(source);
391
392    for token in tokens {
393        assert_eq!(
394            &lex.next().expect("Unexpected end"),
395            token
396        );
397    }
398
399    assert_eq!(lex.next(), None);
400}
401
402
403#[cfg(test)]
404mod tests {
405
406	use super::{LexerBridge, Tokens, Tokens::*};
407	use super::LexingError;
408	use super::productions::Productions;
409	use super::math::Math;
410	use super::metadata::Metadata;
411	use super::Ident::*;
412
413	use pretty_assertions::assert_eq;
414
415	#[test]
416	fn test_mode_switching() {
417		let s = "[3 + 2.3 - &x] 'a thing' X {y = 4 + ($z['key'] < 3);}";
418		let lexer = LexerBridge::new(s);
419		let results: Vec<Result<Tokens, LexingError>> = lexer.collect();
420		let expect = vec![
421			Ok(ProductionToken(Productions::LBr)),
422			Ok(MathToken(Math::LitInt(3))),
423			Ok(MathToken(Math::Add)),
424			Ok(MathToken(Math::LitFloat(2.3))),
425			Ok(MathToken(Math::Sub)),
426			Ok(MathToken(Math::Identifier(Synth("x".to_string())))),
427			Ok(MathToken(Math::RBr)),
428			Ok(ProductionToken(Productions::LitString("a thing".to_string()))),
429			Ok(ProductionToken(Productions::Identifier(Unknown("X".to_string())))),
430			Ok(ProductionToken(Productions::LCur)),
431			Ok(MathToken(Math::Identifier(Unknown("y".to_string())))),
432			Ok(MathToken(Math::Assigns)),
433			Ok(MathToken(Math::LitInt(4))),
434			Ok(MathToken(Math::Add)),
435			Ok(MathToken(Math::LPar)),
436			Ok(MathToken(Math::Identifier(Local("z".to_string())))),
437			Ok(MathToken(Math::LBr)),
438			Ok(MathToken(Math::LitString("key".to_string()))),
439			Ok(MathToken(Math::RBr)),
440			Ok(MathToken(Math::Lt)),
441			Ok(MathToken(Math::LitInt(3))),
442			Ok(MathToken(Math::RPar)),
443			Ok(MathToken(Math::Semi)),
444			Ok(MathToken(Math::RCur))
445		];
446		assert_eq!(results, expect);
447	}
448
449	#[test]
450	fn test_mode_switching_complex() {
451		let s = r#"
452		include some::path;
453		left: "right";
454		true_val: true;
455		===================
456		S -> [2] A;
457		"#;
458		let lexer = LexerBridge::new(s);
459		let results: Vec<Result<Tokens, LexingError>> = lexer.collect();
460		let expect = vec![
461			Ok(MetadataToken(Metadata::Include)),
462			Ok(MetadataToken(Metadata::Path("some::path".to_string()))),
463			Ok(MetadataToken(Metadata::Semi)),
464			Ok(MetadataToken(Metadata::Identifier("left".into()))),
465			Ok(MetadataToken(Metadata::Colon)),
466			Ok(MathToken(Math::LitString("right".to_string()))),
467			Ok(MathToken(Math::Semi)),
468			Ok(MetadataToken(Metadata::Identifier("true_val".into()))),
469			Ok(MetadataToken(Metadata::Colon)),
470			Ok(MathToken(Math::LitBool(true))),
471			Ok(MathToken(Math::Semi)),
472			Ok(MetadataToken(Metadata::Delim)),
473			Ok(ProductionToken(Productions::Identifier(Unknown("S".to_string())))),
474			Ok(ProductionToken(Productions::Produce)),
475			Ok(ProductionToken(Productions::LBr)),
476			Ok(MathToken(Math::LitInt(2))),
477			Ok(MathToken(Math::RBr)),
478			Ok(ProductionToken(Productions::Identifier(Unknown("A".to_string())))),
479			Ok(ProductionToken(Productions::Semi))
480		];
481		assert_eq!(results, expect);
482	}
483}