sphinx/lexer/rules/
literals.rs

1use core::str::FromStr;
2use crate::language;
3use crate::lexer::Token;
4use crate::lexer::rules::{MatchResult, LexerRule, WordChar, TokenError};
5use crate::lexer::rules::strmatcher::StrMatcher;
6
7pub mod string;
8
9// Identifiers
10
11#[derive(Clone)]
12pub struct IdentifierRule {
13    buf: String,
14}
15
16impl Default for IdentifierRule {
17    fn default() -> Self { Self::new() }
18}
19
20impl IdentifierRule {
21    pub fn new() -> Self {
22        IdentifierRule { buf: String::new() }
23    }
24}
25
26// Identifiers are ( :alphanumeric: | '_' ), first character cannot be a digit
27impl LexerRule for IdentifierRule {
28    fn reset(&mut self) {
29        self.buf.clear();
30    }
31    
32    fn current_state(&self) -> MatchResult { 
33        if self.buf.is_empty() {
34            MatchResult::IncompleteMatch
35        } else {
36            MatchResult::CompleteMatch
37        }
38    }
39    
40    fn try_match(&mut self, prev: Option<char>, next: char) -> MatchResult {
41        
42        let valid;
43        if self.buf.is_empty() {
44            let at_word_start = prev.map(|c| !c.is_word_ascii_alphanumeric()).unwrap_or(true);
45            valid = at_word_start && next.is_word_ascii_alphabetic();
46        } else {
47            valid = next.is_word_ascii_alphanumeric();
48        }
49        
50        if valid {
51            self.buf.push(next);
52            
53            MatchResult::CompleteMatch
54        } else {
55            MatchResult::NoMatch
56        }
57    }
58    
59    fn get_token(&self) -> Result<Token, TokenError> {
60        debug_assert!(self.current_state().is_complete_match());
61        Ok(Token::Identifier(self.buf.clone()))
62    }
63}
64
65#[derive(Clone)]
66pub struct LabelRule {
67    buf: String,
68    prefix: StrMatcher<'static>,
69}
70
71impl LabelRule {
72    pub fn new(prefix: &'static str) -> Self {
73        LabelRule {
74            buf: String::new(),
75            prefix: StrMatcher::case_sensitive(prefix),
76        }
77    }
78}
79
80impl LexerRule for LabelRule {
81    fn reset(&mut self) {
82        self.buf.clear();
83        self.prefix.reset();
84    }
85    
86    fn current_state(&self) -> MatchResult {
87        let match_result = self.prefix.last_match_result();
88        
89        if !match_result.is_complete_match() {
90            match_result
91        } else if self.buf.is_empty() {
92            MatchResult::IncompleteMatch
93        } else {
94            MatchResult::CompleteMatch
95        }
96    }
97    
98    fn try_match(&mut self, prev: Option<char>, next: char) -> MatchResult {
99        // don't match if the last char was word alphanumeric
100        let at_word_start = prev.map(|c| !c.is_word_ascii_alphanumeric()).unwrap_or(true);
101        
102        if self.buf.is_empty() && self.prefix.count() == 0 && !at_word_start {
103            return MatchResult::NoMatch;
104        }
105        
106        if !self.prefix.last_match_result().is_complete_match() {
107            let match_result = self.prefix.try_match(next);
108            if match_result.is_complete_match() {
109                return MatchResult::IncompleteMatch;
110            }
111            return match_result;
112        }
113        
114        if next.is_word_ascii_alphanumeric() {
115            self.buf.push(next);
116            
117            MatchResult::CompleteMatch
118        } else {
119            MatchResult::NoMatch
120        }
121    }
122    
123    fn get_token(&self) -> Result<Token, TokenError> {
124        debug_assert!(self.current_state().is_complete_match());
125        Ok(Token::Label(self.buf.clone()))
126    }
127}
128
129// Plain Integer Literals
130
131#[derive(Clone)]
132pub struct IntegerLiteralRule {
133    buf: String,
134}
135
136impl Default for IntegerLiteralRule {
137    fn default() -> Self { Self::new() }
138}
139
140impl IntegerLiteralRule {
141    pub fn new() -> Self {
142        IntegerLiteralRule { buf: String::new() }
143    }
144}
145
146impl LexerRule for IntegerLiteralRule {
147    fn reset(&mut self) {
148        self.buf.clear();
149    }
150    
151    fn current_state(&self) -> MatchResult {
152        if self.buf.is_empty() {
153            MatchResult::IncompleteMatch
154        } else {
155            MatchResult::CompleteMatch
156        }
157    }
158    
159    fn try_match(&mut self, prev: Option<char>, next: char) -> MatchResult {
160        if self.buf.is_empty() && matches!(prev, Some(c) if c.is_ascii_digit()) {
161            return MatchResult::NoMatch;
162        }
163        
164        if next.is_ascii_digit() {
165            self.buf.push(next);
166            
167            MatchResult::CompleteMatch
168        } else {
169            MatchResult::NoMatch
170        }
171    }
172    
173    fn get_token(&self) -> Result<Token, TokenError> {
174        debug_assert!(self.current_state().is_complete_match());
175        
176        let conversion = language::IntType::from_str_radix(self.buf.as_str(), 10);
177        match conversion {
178            Ok(value) => Ok(Token::IntegerLiteral(value)),
179            
180            // most likely the value overflowed language::IntType
181            Err(err) => Err(Box::new(err)),
182        }
183    }
184    
185}
186
187#[derive(Clone)]
188pub struct PrefixedIntegerLiteralRule {
189    buf: String,
190    prefix: StrMatcher<'static>,
191    radix: u32,
192}
193
194impl PrefixedIntegerLiteralRule {
195    pub fn new(prefix: &'static str, radix: u32) -> Self {
196        PrefixedIntegerLiteralRule {
197            buf: String::new(),
198            prefix: StrMatcher::ascii_case_insensitive(prefix),
199            radix,
200        }
201    }
202}
203
204impl LexerRule for PrefixedIntegerLiteralRule {
205    fn reset(&mut self) {
206        self.buf.clear();
207        self.prefix.reset();
208    }
209    
210    fn current_state(&self) -> MatchResult {
211        if self.buf.is_empty() {
212            MatchResult::IncompleteMatch
213        } else {
214            self.prefix.last_match_result()
215        }
216    }
217    
218    fn try_match(&mut self, prev: Option<char>, next: char) -> MatchResult {
219        if self.buf.is_empty() && self.prefix.count() == 0 && matches!(prev, Some(c) if c.is_ascii_digit()) {
220            return MatchResult::NoMatch;
221        }
222        
223        if !self.prefix.last_match_result().is_complete_match() {
224            return self.prefix.try_match(next);
225        }
226        
227        if next.is_ascii_hexdigit() {
228            self.buf.push(next);
229            
230            MatchResult::CompleteMatch
231        } else {
232            MatchResult::NoMatch
233        }
234    }
235    
236    fn get_token(&self) -> Result<Token, TokenError> {
237        debug_assert!(self.current_state().is_complete_match());
238        
239        let conversion = language::IntType::from_str_radix(self.buf.as_str(), self.radix);
240        match conversion {
241            Ok(value) => Ok(Token::IntegerLiteral(value)),
242            
243            // most likely the value overflowed language::IntType
244            Err(err) => Err(Box::new(err)),
245        }
246    }
247    
248}
249
250// Floating-Point Literals
251
252#[derive(Clone)]
253pub struct FloatLiteralRule {
254    buf: String,
255    point: bool,
256    exp: bool,
257    last: Option<char>,
258}
259
260impl Default for FloatLiteralRule {
261    fn default() -> Self { Self::new() }
262}
263
264impl FloatLiteralRule {
265    pub fn new() -> Self {
266        Self { 
267            buf: String::new(), 
268            point: false,
269            exp: false,
270            last: None,
271        }
272    }
273}
274
275impl LexerRule for FloatLiteralRule {
276    fn reset(&mut self) {
277        self.buf.clear();
278        self.point = false;
279        self.exp = false;
280        self.last = None;
281    }
282    
283    fn current_state(&self) -> MatchResult {
284        if self.buf.is_empty() || matches!(self.last, Some('e' | 'E')) {
285            MatchResult::IncompleteMatch
286        } else {
287            MatchResult::CompleteMatch
288        }
289    }
290    
291    fn try_match(&mut self, prev: Option<char>, next: char) -> MatchResult {
292        if self.buf.is_empty() && (matches!(prev, Some(c) if c.is_ascii_digit()) || matches!(prev, Some('e' | 'E' | '.'))) {
293            return MatchResult::NoMatch;
294        }
295        
296        if next == '.' {
297            if self.point || self.exp {
298                return MatchResult::NoMatch;
299            }
300            
301            self.point = true;
302            self.buf.push(next);
303            self.last = Some(next);
304            return MatchResult::CompleteMatch;
305        }
306        
307        if matches!(next, 'e' | 'E') {
308            if self.exp {
309                return MatchResult::NoMatch;
310            }
311            
312            self.exp = true;
313            self.buf.push(next);
314            self.last = Some(next);
315            return MatchResult::IncompleteMatch;
316        }
317        
318        if next.is_ascii_digit() {
319            self.buf.push(next);
320            self.last = Some(next);
321            MatchResult::CompleteMatch
322        } else {
323            MatchResult::NoMatch
324        }
325    }
326    
327    fn get_token(&self) -> Result<Token, TokenError> {
328        debug_assert!(self.current_state().is_complete_match());
329        
330        let conversion = language::FloatType::from_str(self.buf.as_str());
331        match conversion {
332            Ok(value) => Ok(Token::FloatLiteral(value)),
333            
334            // most likely the value overflowed language::IntType
335            Err(err) => Err(Box::new(err)),
336        }
337    }
338    
339}