sphinx/lexer/
rules.rs

1pub mod strmatcher;
2pub mod general;
3pub mod literals;
4pub mod keywords;
5pub mod comments;
6
7pub use general::*;
8
9
10use std::error::Error;
11use crate::lexer::Token;
12
13// Helpers
14
15// "word" characters are the alphabetic/alphanumeric chars + '_' (underscore)
16trait WordChar {
17    fn is_word_alphanumeric(&self) -> bool;
18    fn is_word_alphabetic(&self) -> bool;
19    fn is_word_ascii_alphanumeric(&self) -> bool;
20    fn is_word_ascii_alphabetic(&self) -> bool;
21}
22
23impl WordChar for char {
24    fn is_word_alphanumeric(&self) -> bool {
25        *self == '_' || self.is_alphanumeric()
26    }
27
28    fn is_word_alphabetic(&self) -> bool {
29        *self == '_' || self.is_alphabetic()
30    }
31
32    fn is_word_ascii_alphanumeric(&self) -> bool {
33        *self == '_' || self.is_ascii_alphanumeric()
34    }
35
36    fn is_word_ascii_alphabetic(&self) -> bool {
37        *self == '_' || self.is_ascii_alphabetic()
38    }
39}
40
41// Match Result
42
43#[derive(Clone, Copy)]
44pub enum MatchResult {
45    // has not consumed enough characters to produce a valid token, but could if given further correct input
46    IncompleteMatch,
47    
48    // has consumed enough characters to produce a valid token, may still yet accept further correct input
49    // should either remain in this state, or drop to the NoMatch state if incorrect input given
50    CompleteMatch,
51    
52    // not a match for the characters that have been given, should remain in this state until reset
53    NoMatch,
54}
55
56impl MatchResult {
57    pub fn is_match(&self) -> bool {
58        match self {
59            MatchResult::IncompleteMatch | MatchResult::CompleteMatch => true,
60            MatchResult::NoMatch => false,
61        }
62    }
63    
64    pub fn is_complete_match(&self) -> bool { matches!(self, MatchResult::CompleteMatch) }
65    
66    pub fn is_incomplete_match(&self) -> bool { matches!(self, MatchResult::IncompleteMatch) }
67}
68
69// Lexer Rules
70type TokenError = Box<dyn Error + 'static>;
71
72pub trait LexerRule: __LexerRule_Clone {
73    fn reset(&mut self);
74    
75    fn current_state(&self) -> MatchResult;
76    
77    // like feed, but only modifies the LexerRule state if would match
78    // return the match state if ch was passed to feed()
79    fn try_match(&mut self, prev: Option<char>, next: char) -> MatchResult;
80    
81    // should always panic if current_state() is not MatchResult::CompleteMatch
82    // and produce an error if the Token could not be produced for some other reason
83    // e.g. attempting to read an integer literal that overflows
84    fn get_token(&self) -> Result<Token, TokenError>;
85}
86
87
88// In order to use LexerBuilder as a Lexer factory, we need to be able to clone LexerRules.
89// Since each LexerRule is actually a state machine, and in fact likely constitutes most of the state
90// used by a Lexer, each new Lexer must own its own LexerRules.
91
92// Unfortunately, trait objects are not clonable because any trait with Fn() -> Self is not object safe.
93// However, we can work around this to make Box<dyn LexerRule> cloneable...
94
95#[allow(non_camel_case_types)]
96pub trait __LexerRule_Clone {
97    fn __clone_box(&self) -> Box<dyn LexerRule>;
98}
99
100impl<T> __LexerRule_Clone for T where T: 'static + LexerRule + Clone {
101    fn __clone_box(&self) -> Box<dyn LexerRule> { Box::new(self.clone()) }
102}
103
104impl Clone for Box<dyn LexerRule> {
105    fn clone(&self) -> Box<dyn LexerRule> { self.__clone_box() }
106}