rslua_march1917/
lexer.rs

1use crate::tokens::{Token, TokenType, TokenValue};
2use crate::types::{FloatType, IntType, Number, Source};
3use crate::{debuggable, error, success};
4use std::str;
5
6// context for lexer
7struct Context<'a> {
8    buffer: &'a str,
9    current: usize,
10    line: usize,
11    col: usize,
12    old_pos: usize,
13    old_line: usize,
14    old_col: usize,
15    offset: usize,
16    comment_offset: usize,
17}
18
19impl<'a> Context<'a> {
20    pub fn new(buffer: &'a str) -> Self {
21        Context::<'a> {
22            buffer,
23            current: 0,
24            line: 1,
25            col: 1,
26            old_pos: 0,
27            old_line: 0,
28            old_col: 0,
29            offset: 0,
30            comment_offset: 0,
31        }
32    }
33
34    pub fn save(&mut self) {
35        self.old_pos = self.current;
36        self.old_col = self.col;
37        self.old_line = self.line;
38    }
39
40    pub fn get_saved_source(&self) -> Source {
41        Source {
42            pos: self.old_pos,
43            length: self.current - self.old_pos,
44            line: self.old_line,
45            col: self.old_col,
46        }
47    }
48
49    pub fn next(&mut self) {
50        self.skip(1);
51    }
52
53    pub fn skip(&mut self, n: usize) {
54        self.current += n;
55        self.col += n;
56    }
57
58    // eat n chars, and write these chars to output
59    pub fn write_into(&mut self, n: usize, output: &mut Vec<u8>) {
60        if let Some(slice) = self.buffer.as_bytes().get(self.current..(self.current + n)) {
61            output.extend_from_slice(slice);
62            self.skip(n);
63        }
64    }
65
66    pub fn inc_line(&mut self) {
67        self.col = 1;
68        self.line += 1;
69    }
70
71    pub fn get(&self) -> Option<u8> {
72        self.get_ahead(0)
73    }
74
75    pub fn get_next(&self) -> Option<u8> {
76        self.get_ahead(1)
77    }
78
79    pub fn get_ahead(&self, index: usize) -> Option<u8> {
80        self.buffer.as_bytes().get(self.current + index).copied()
81    }
82}
83
84pub struct LexerConfig {
85    // if use origin string, lexer won't escape special chars and keep the quotes or string boundaries.
86    pub use_origin_string: bool,
87    // reserve comments or not
88    pub reserve_comments: bool,
89}
90
91impl LexerConfig {
92    pub fn default() -> Self {
93        LexerConfig {
94            use_origin_string: false,
95            reserve_comments: false,
96        }
97    }
98}
99
100pub struct Lexer {
101    debug: bool,
102    config: LexerConfig,
103    tokens: Vec<Token>,
104}
105
106#[derive(Debug)]
107pub struct LexError(String);
108
109type LexResult = Result<Option<(TokenType, TokenValue)>, LexError>;
110
111macro_rules! lex_error {
112    ($self:ident, $ctx:ident, $msg:expr) => {
113        error!(
114            $self,
115            LexError,
116            format!("[lex error] {} at line [{}:{}].", $msg, $ctx.line, $ctx.col)
117        )
118    };
119}
120
121impl<'a> Lexer {
122    pub fn new() -> Self {
123        Lexer {
124            debug: false,
125            tokens: Vec::<Token>::new(),
126            config: LexerConfig::default(),
127        }
128    }
129
130    pub fn set_config(&mut self, config: LexerConfig) {
131        self.config = config;
132    }
133
134    pub fn run(&mut self, input: &'a str) -> Result<&Vec<Token>, LexError> {
135        self.reset();
136        let mut ctx = Context::new(input);
137        loop {
138            ctx.save();
139            if let Some(c) = ctx.get() {
140                if let Some((token_type, token_value)) = match c {
141                    _ if Lexer::is_line_break(c) => self.read_line_break(&mut ctx)?,
142                    _ if Lexer::is_space(c) => self.read_space(&mut ctx)?,
143                    _ if Lexer::is_digit(c) => self.read_number(&mut ctx)?,
144                    b'-' if self.check_next(&ctx, '-') => self.read_comment(&mut ctx)?,
145                    b'=' => self.read_eq_assign(&mut ctx)?,
146                    b'<' => self.read_le_shl_lt(&mut ctx)?,
147                    b'>' => self.read_ge_shr_gt(&mut ctx)?,
148                    b'/' if self.check_next(&ctx, '/') => self.read_idiv(&mut ctx)?,
149                    b'~' => self.read_ne_xor(&mut ctx)?,
150                    b':' => self.read_colon(&mut ctx)?,
151                    b'.' => self.read_attr_concat_dots_numbers(&mut ctx)?,
152                    b'"' | b'\'' => self.read_short_string(&mut ctx)?,
153                    b'[' if self.check_next2(&ctx, '[', '=') => self.read_long_string(&mut ctx)?,
154                    _ => self.read_other_tokens(&mut ctx)?,
155                } {
156                    self.add_token(&mut ctx, token_type, token_value);
157                }
158            } else {
159                // append eos and return tokens
160                self.add_token(&mut ctx, TokenType::Eos, TokenValue::None);
161                return Ok(&self.tokens);
162            }
163        }
164    }
165
166    pub fn tokens(&self) -> &Vec<Token> {
167        &self.tokens
168    }
169
170    fn read_line_break(&self, ctx: &mut Context) -> LexResult {
171        let old = ctx.get();
172        ctx.next();
173
174        // skip \r\n or \n\r
175        if old != ctx.get() && self.check_current_if(ctx, |c| Lexer::is_line_break(c)) {
176            ctx.next();
177        }
178
179        ctx.inc_line();
180        Ok(None)
181    }
182
183    fn read_space(&self, ctx: &mut Context) -> LexResult {
184        ctx.next();
185        Ok(None)
186    }
187
188    fn read_comment(&mut self, ctx: &mut Context) -> LexResult {
189        ctx.skip(2);
190        let sep_count = self.try_read_long_string_boundary(ctx, b'[');
191        if sep_count >= 0 {
192            let comment = self.read_long_string_impl(ctx, sep_count as usize, "comment")?;
193            if self.config.reserve_comments {
194                success!((TokenType::MComment, TokenValue::Str(comment)))
195            } else {
196                Ok(None)
197            }
198        } else {
199            self.read_short_comment(ctx)
200        }
201    }
202
203    fn read_short_comment(&mut self, ctx: &mut Context) -> LexResult {
204        let mut bytes: Vec<u8> = Vec::new();
205        while let Some(c) = ctx.get() {
206            if Lexer::is_line_break(c) {
207                break;
208            }
209            ctx.write_into(1, &mut bytes);
210        }
211        if let Ok(comment) = str::from_utf8(&bytes) {
212            if self.config.reserve_comments {
213                success!((TokenType::SComment, TokenValue::Str(comment.to_string())))
214            } else {
215                Ok(None)
216            }
217        } else {
218            lex_error!(self, ctx, "invalid single line comment")
219        }
220    }
221
222    // if next char equals c, return t1, else return t2
223    fn read_token2(
224        &mut self,
225        ctx: &mut Context,
226        c: char,
227        t1: TokenType,
228        t2: TokenType,
229    ) -> LexResult {
230        ctx.next();
231        if self.check_current(ctx, c) {
232            ctx.next();
233            return success!((t1, TokenValue::None));
234        }
235        success!((t2, TokenValue::None))
236    }
237
238    // if next char equals c1, return t1, else if equals t2, return t2, else return t3
239    fn read_token3(
240        &mut self,
241        ctx: &mut Context,
242        c1: char,
243        c2: char,
244        t1: TokenType,
245        t2: TokenType,
246        t3: TokenType,
247    ) -> LexResult {
248        ctx.next();
249        if self.check_current(ctx, c1) {
250            ctx.next();
251            success!((t1, TokenValue::None))
252        } else if self.check_current(ctx, c2) {
253            ctx.next();
254            success!((t2, TokenValue::None))
255        } else {
256            success!((t3, TokenValue::None))
257        }
258    }
259
260    fn read_eq_assign(&mut self, ctx: &mut Context) -> LexResult {
261        self.read_token2(ctx, '=', TokenType::Eq, TokenType::Assign)
262    }
263
264    fn read_le_shl_lt(&mut self, ctx: &mut Context) -> LexResult {
265        self.read_token3(ctx, '=', '<', TokenType::Le, TokenType::Shl, TokenType::Lt)
266    }
267
268    fn read_ge_shr_gt(&mut self, ctx: &mut Context) -> LexResult {
269        self.read_token3(ctx, '=', '>', TokenType::Ge, TokenType::Shr, TokenType::Gt)
270    }
271
272    fn read_idiv(&mut self, ctx: &mut Context) -> LexResult {
273        ctx.skip(2);
274        success!((TokenType::IDiv, TokenValue::None))
275    }
276
277    fn read_ne_xor(&mut self, ctx: &mut Context) -> LexResult {
278        self.read_token2(ctx, '=', TokenType::Ne, TokenType::BXor)
279    }
280
281    fn read_colon(&mut self, ctx: &mut Context) -> LexResult {
282        self.read_token2(ctx, ':', TokenType::DbColon, TokenType::Colon)
283    }
284
285    fn read_attr_concat_dots_numbers(&mut self, ctx: &mut Context) -> LexResult {
286        if self.check_next(ctx, '.') {
287            ctx.next();
288            if self.check_next(ctx, '.') {
289                ctx.skip(2);
290                return success!((TokenType::Dots, TokenValue::None));
291            } else {
292                ctx.next();
293                return success!((TokenType::Concat, TokenValue::None));
294            }
295        }
296        if let Some(c) = ctx.get_next() {
297            if Lexer::is_digit(c) {
298                return self.read_number(ctx);
299            }
300        }
301        ctx.next();
302        success!((TokenType::Attr, TokenValue::None))
303    }
304
305    fn read_number(&mut self, ctx: &mut Context) -> LexResult {
306        let mut expo = ('E', 'e');
307        let mut num_str: Vec<u8> = Vec::new();
308        let mut hex = false;
309        if self.check_current(ctx, '0') && self.check_next2(ctx, 'x', 'X') {
310            expo = ('P', 'p');
311            ctx.write_into(2, &mut num_str);
312            hex = true;
313        }
314        let is_digit = |c| {
315            (hex && Lexer::is_hex_digit(c)) || (!hex && Lexer::is_digit(c)) || (c as char) == '.'
316        };
317        loop {
318            if self.check_current_if(ctx, is_digit) {
319                ctx.write_into(1, &mut num_str)
320            } else if self.check_current2(ctx, expo.0, expo.1) {
321                ctx.write_into(1, &mut num_str);
322                if self.check_current2(ctx, '-', '+') {
323                    ctx.write_into(1, &mut num_str)
324                }
325            } else {
326                break;
327            }
328        }
329        if let Ok(string) = str::from_utf8(&num_str) {
330            let num = Lexer::str_to_num(string);
331            match num {
332                Number::Int(n) => success!((TokenType::Int, TokenValue::Int(n))),
333                Number::Float(n) => success!((TokenType::Flt, TokenValue::Float(n))),
334                _ => lex_error!(self, ctx, "malformed number"),
335            }
336        } else {
337            unreachable!();
338        }
339    }
340
341    fn try_read_hexa(&mut self, ctx: &mut Context) -> Option<u8> {
342        ctx.get().filter(|c| Lexer::is_hex_digit(*c)).map(|c| {
343            ctx.next();
344            Lexer::to_hex_digit(c)
345        })
346    }
347
348    fn try_read_hex_esc(&mut self, ctx: &mut Context) -> Result<u8, LexError> {
349        if let Some(p1) = self.try_read_hexa(ctx) {
350            if let Some(p2) = self.try_read_hexa(ctx) {
351                return Ok((p1 << 4) + p2);
352            }
353        }
354        lex_error!(self, ctx, "hexadecimal digit expected")
355    }
356
357    fn try_read_utf8_esc(
358        &mut self,
359        ctx: &mut Context,
360        bytes: &mut Vec<u8>,
361    ) -> Result<(), LexError> {
362        if let Some(c) = ctx.get() {
363            if c != b'{' {
364                return lex_error!(self, ctx, "missing '{'");
365            }
366            ctx.next();
367            if let Some(c) = self.try_read_hexa(ctx) {
368                let mut r = c as u32;
369                while let Some(c) = self.try_read_hexa(ctx) {
370                    r = (r << 4) + (c as u32);
371                    if r > 0x10FFFF {
372                        return lex_error!(self, ctx, "UTF-8 value too large");
373                    }
374                }
375                if self.check_current(ctx, '}') {
376                    if let Some(c) = std::char::from_u32(r) {
377                        let mut string = String::new();
378                        string.push(c);
379                        bytes.append(&mut string.into_bytes());
380                        ctx.next();
381                    } else {
382                        return lex_error!(self, ctx, "invalid utf8 codepoint");
383                    }
384                } else {
385                    return lex_error!(self, ctx, "missing '}'");
386                }
387            } else {
388                return lex_error!(self, ctx, "hexadecimal digit expected");
389            }
390        }
391        Ok(())
392    }
393
394    fn try_read_dec_esc(
395        &mut self,
396        ctx: &mut Context,
397        bytes: &mut Vec<u8>,
398        first_place: u8,
399    ) -> Result<(), LexError> {
400        let mut r: u32 = Lexer::to_digit(first_place) as u32;
401        let mut i = 0;
402        while let Some(c) = ctx.get() {
403            i += 1;
404            if i > 2 || !Lexer::is_digit(c) {
405                // three digits at most
406                break;
407            }
408            r = r * 10 + Lexer::to_digit(c) as u32;
409            ctx.next();
410        }
411        if r > 0xFF {
412            lex_error!(self, ctx, "decimal escape too large")
413        } else {
414            bytes.push(r as u8);
415            Ok(())
416        }
417    }
418
419    fn try_read_esc(&mut self, ctx: &mut Context, bytes: &mut Vec<u8>) -> Result<(), LexError> {
420        ctx.next();
421        if let Some(next) = ctx.get() {
422            ctx.next();
423            match next {
424                b'n' => bytes.push(b'\n'),
425                b'r' => bytes.push(b'\r'),
426                b't' => bytes.push(b'\t'),
427                b'a' => bytes.push(b'\x07'),
428                b'b' => bytes.push(b'\x08'),
429                b'v' => bytes.push(b'\x0B'),
430                b'f' => bytes.push(b'\x0C'),
431                b'x' => {
432                    let v = self.try_read_hex_esc(ctx)?;
433                    bytes.push(v);
434                }
435                b'u' => self.try_read_utf8_esc(ctx, bytes)?,
436                b'\r' | b'\n' => {
437                    bytes.push(b'\n');
438                    ctx.inc_line();
439                }
440                b'\\' | b'"' | b'\'' => bytes.push(next),
441                b'z' => {
442                    // skip the following span of white-space characters, including line breaks
443                    while let Some(c) = ctx.get() {
444                        match c {
445                            _ if Lexer::is_space(c) => ctx.next(),
446                            _ if Lexer::is_line_break(c) => {
447                                ctx.next();
448                                ctx.inc_line();
449                            }
450                            _ => break,
451                        }
452                    }
453                }
454                _ if Lexer::is_digit(next) => self.try_read_dec_esc(ctx, bytes, next)?,
455                _ => {
456                    return lex_error!(self, ctx, "invalid escape sequence");
457                }
458            }
459        }
460        Ok(())
461    }
462
463    fn read_short_string(&mut self, ctx: &mut Context) -> LexResult {
464        let mut bytes: Vec<u8> = Vec::new();
465        let start = ctx.get();
466        if self.config.use_origin_string {
467            bytes.push(start.unwrap());
468        }
469        ctx.next();
470        let unfinished_error: &'static str = "unfinished string";
471        while ctx.get() != start {
472            match ctx.get() {
473                Some(b'\\') if self.config.use_origin_string => ctx.write_into(2, &mut bytes),
474                Some(b'\\') => self.try_read_esc(ctx, &mut bytes)?,
475                Some(c) => {
476                    if Lexer::is_line_break(c) {
477                        return lex_error!(self, ctx, unfinished_error);
478                    } else {
479                        bytes.push(c);
480                        ctx.next();
481                    }
482                }
483                None => return lex_error!(self, ctx, unfinished_error),
484            }
485        }
486        if self.config.use_origin_string {
487            bytes.push(ctx.get().unwrap());
488        }
489        if let Ok(string) = String::from_utf8(bytes) {
490            ctx.next();
491            success!((TokenType::String, TokenValue::Str(string)))
492        } else {
493            lex_error!(self, ctx, "invalid utf8 string")
494        }
495    }
496
497    // return count of '===' if a long string, otherwise return -1
498    fn try_read_long_string_boundary(&self, ctx: &mut Context, sep: u8) -> i8 {
499        if self.check_current(ctx, sep as char) {
500            let mut sep_count = 0;
501            loop {
502                if let Some(c) = ctx.get_ahead(sep_count + 1) {
503                    match c {
504                        b'=' => sep_count += 1,
505                        _ if c == sep => {
506                            ctx.skip(sep_count + 2);
507                            return sep_count as i8;
508                        }
509                        _ => break,
510                    };
511                }
512            }
513        }
514        -1
515    }
516
517    // read long string
518    fn read_long_string_impl(
519        &mut self,
520        ctx: &mut Context,
521        sep_count: usize,
522        sem: &str,
523    ) -> Result<String, LexError> {
524        let line = ctx.line;
525        let mut start = 0;
526
527        if self.config.use_origin_string {
528            start = ctx.current - 2 - sep_count;
529        }
530
531        // skip first line break
532        if self.check_current_if(ctx, |c| Lexer::is_line_break(c)) {
533            self.read_line_break(ctx)?;
534        }
535
536        if !self.config.use_origin_string {
537            start = ctx.current;
538        }
539
540        while let Some(c) = ctx.get() {
541            match c {
542                b']' => {
543                    if self.try_read_long_string_boundary(ctx, b']') == sep_count as i8 {
544                        let length;
545                        if self.config.use_origin_string {
546                            length = ctx.current - start;
547                        } else {
548                            length = ctx.current - 2 - sep_count - start;
549                        }
550                        if let Some(slice) = ctx.buffer.get(start..(start + length)) {
551                            return Ok(slice.to_string());
552                        }
553                    } else {
554                        ctx.next();
555                    }
556                }
557                _ if Lexer::is_line_break(c) => {
558                    self.read_line_break(ctx)?;
559                }
560                _ => ctx.next(),
561            }
562        }
563        lex_error!(
564            self,
565            ctx,
566            &format!("unfinished long {} (starting at line {})", sem, line)
567        )
568    }
569
570    fn read_long_string(&mut self, ctx: &mut Context) -> LexResult {
571        let sep_count = self.try_read_long_string_boundary(ctx, b'[');
572        if sep_count >= 0 {
573            let string = self.read_long_string_impl(ctx, sep_count as usize, "string")?;
574            return success!((TokenType::String, TokenValue::Str(string)));
575        }
576        unreachable!()
577    }
578
579    fn read_other_tokens(&mut self, ctx: &mut Context) -> LexResult {
580        if let Some(c) = ctx.get() {
581            let token_type = match c {
582                b'+' => Some(TokenType::Add),
583                b'-' => Some(TokenType::Minus),
584                b'*' => Some(TokenType::Mul),
585                b'/' => Some(TokenType::Div),
586                b'%' => Some(TokenType::Mod),
587                b'^' => Some(TokenType::Pow),
588                b'#' => Some(TokenType::Len),
589                b'&' => Some(TokenType::BAnd),
590                b'|' => Some(TokenType::BOr),
591                b'(' => Some(TokenType::Lp),
592                b')' => Some(TokenType::Rp),
593                b'[' => Some(TokenType::Ls),
594                b']' => Some(TokenType::Rs),
595                b'{' => Some(TokenType::Lb),
596                b'}' => Some(TokenType::Rb),
597                b';' => Some(TokenType::Semi),
598                b',' => Some(TokenType::Comma),
599                _ => None,
600            };
601
602            if let Some(t) = token_type {
603                ctx.next();
604                return success!((t, TokenValue::None));
605            } else if self.check_current_if(ctx, |c| Lexer::is_valid_name_start(c)) {
606                let mut word: Vec<u8> = Vec::new();
607                ctx.write_into(1, &mut word);
608                while self.check_current_if(ctx, |c| Lexer::is_valid_name(c)) {
609                    ctx.write_into(1, &mut word);
610                }
611                if let Ok(s) = str::from_utf8(&word) {
612                    if let Some(t) = TokenType::from_keyword(s) {
613                        return success!((t, TokenValue::None));
614                    } else {
615                        return success!((TokenType::Name, TokenValue::Str(s.to_string())));
616                    }
617                }
618            } else {
619                return lex_error!(self, ctx, &format!("unknown token near {}", c as char));
620            }
621        }
622        unreachable!()
623    }
624
625    fn reset(&mut self) {
626        self.tokens.clear();
627    }
628
629    fn is_line_break(c: u8) -> bool {
630        match c {
631            b'\r' | b'\n' => true,
632            _ => false,
633        }
634    }
635
636    fn is_space(c: u8) -> bool {
637        match c {
638            b' ' | b'\t' | b'\x0B' | b'\x0C' => true,
639            _ => false,
640        }
641    }
642
643    fn is_digit(c: u8) -> bool {
644        match c {
645            b'0' | b'1' | b'2' | b'3' | b'4' | b'5' | b'6' | b'7' | b'8' | b'9' => true,
646            _ => false,
647        }
648    }
649
650    fn is_hex_digit(c: u8) -> bool {
651        match c {
652            b'a' | b'b' | b'c' | b'd' | b'e' | b'f' | b'A' | b'B' | b'C' | b'D' | b'E' | b'F' => {
653                true
654            }
655            _ if Lexer::is_digit(c) => true,
656            _ => false,
657        }
658    }
659
660    fn is_alpha(c: u8) -> bool {
661        (c as char).is_ascii_alphabetic()
662    }
663
664    fn is_valid_name_start(c: u8) -> bool {
665        Lexer::is_alpha(c) || Lexer::is_digit(c) || c == b'_'
666    }
667
668    fn is_valid_name(c: u8) -> bool {
669        Lexer::is_valid_name_start(c) || Lexer::is_alpha(c)
670    }
671
672    fn to_digit(c: u8) -> u8 {
673        c - b'0'
674    }
675
676    fn to_hex_digit(c: u8) -> u8 {
677        if c >= b'0' && c <= b'9' {
678            return c - b'0';
679        } else {
680            return ((c as char).to_ascii_lowercase() as u8) - b'a' + 10;
681        }
682    }
683
684    fn skip_spaces(bytes: &[u8], i: usize) -> usize {
685        let mut index = i;
686        while index < bytes.len() && bytes[index] == b' ' {
687            index += 1;
688        }
689        index
690    }
691
692    fn starts_with_0x(bytes: &[u8], i: usize) -> bool {
693        bytes.len() > i + 2
694            && bytes[i + 0] == b'0'
695            && (bytes[i + 1] == b'x' || bytes[i + 1] == b'X')
696    }
697
698    // get number sign, return (sign, index)
699    fn get_sign(bytes: &[u8], i: usize) -> (IntType, usize) {
700        let (mut sign, mut index) = (1, i);
701        if index < bytes.len() {
702            if bytes[index] == b'-' {
703                index += 1;
704                sign = -1;
705            } else if bytes[i] == b'+' {
706                index += 1;
707            }
708        }
709        (sign, index)
710    }
711
712    pub fn str_to_int(s: &str) -> Option<IntType> {
713        let bytes = s.as_bytes();
714        let len = bytes.len();
715        let mut r: IntType = 0;
716        let mut i = 0;
717        let mut empty = true;
718        i = Lexer::skip_spaces(bytes, i);
719        let (sign, mut i) = Lexer::get_sign(bytes, i);
720        if Lexer::starts_with_0x(bytes, i) {
721            i += 2;
722            while i < len && Lexer::is_hex_digit(bytes[i]) {
723                r = (r << 4) + (Lexer::to_hex_digit(bytes[i]) as IntType);
724                i += 1;
725                empty = false;
726            }
727        } else {
728            while i < len && Lexer::is_digit(bytes[i]) {
729                r = r * 10 + (Lexer::to_digit(bytes[i]) as IntType);
730                i += 1;
731                empty = false;
732            }
733        }
734        i = Lexer::skip_spaces(bytes, i);
735        if empty || i != len {
736            None
737        } else {
738            Some((r as IntType) * sign)
739        }
740    }
741
742    pub fn str_to_float(s: &str) -> Option<FloatType> {
743        let bytes = s.as_bytes();
744        let mut i = 0;
745        i = Lexer::skip_spaces(bytes, i);
746        if Lexer::starts_with_0x(bytes, i) {
747            Lexer::str_to_hex_float(&bytes[2..])
748        } else {
749            match s.parse::<FloatType>() {
750                Ok(f) => Some(f),
751                Err(_e) => None,
752            }
753        }
754    }
755
756    pub fn str_to_hex_float(bytes: &[u8]) -> Option<FloatType> {
757        let (sign, mut i) = Lexer::get_sign(bytes, 0);
758        let mut has_dot = false;
759        let mut e: IntType = 0;
760        let mut r = 0.0;
761        let mut empty = true;
762        while i < bytes.len() {
763            match bytes[i] {
764                b'.' => {
765                    if has_dot {
766                        // can't have more than one dot.
767                        return None;
768                    } else {
769                        has_dot = true;
770                    }
771                }
772                _ if Lexer::is_hex_digit(bytes[i]) => {
773                    r = r * 16.0 + Lexer::to_hex_digit(bytes[i]) as FloatType;
774                    if has_dot {
775                        e -= 1;
776                    }
777                    empty = false;
778                }
779                _ => break,
780            }
781            i += 1;
782        }
783        e *= 4;
784        if i < bytes.len() && (bytes[i] == b'p' || bytes[i] == b'P') {
785            i += 1;
786            let (esign, mut index) = Lexer::get_sign(bytes, i);
787            let mut exp_value = 0;
788            let mut exp_empty = true;
789            while index < bytes.len() {
790                if Lexer::is_digit(bytes[index]) {
791                    exp_empty = false;
792                    exp_value = exp_value * 10 + Lexer::to_digit(bytes[index]) as IntType;
793                } else {
794                    break;
795                }
796                index += 1;
797            }
798            if exp_empty {
799                return None;
800            }
801            e += exp_value * esign;
802            i = index;
803        }
804        r = r * (2 as FloatType).powf(e as FloatType);
805        Lexer::skip_spaces(bytes, i);
806        if empty || i != bytes.len() {
807            None
808        } else {
809            Some(r * (sign as FloatType))
810        }
811    }
812
813    fn str_to_num(s: &str) -> Number {
814        if let Some(i) = Lexer::str_to_int(s) {
815            Number::Int(i)
816        } else if let Some(f) = Lexer::str_to_float(s) {
817            Number::Float(f)
818        } else {
819            Number::None
820        }
821    }
822
823    fn check(&self, src: Option<u8>, target: char) -> bool {
824        match src {
825            Some(c) => c as char == target,
826            None => false,
827        }
828    }
829
830    fn check_if(&self, src: Option<u8>, f: impl Fn(u8) -> bool) -> bool {
831        match src {
832            Some(c) => f(c),
833            None => false,
834        }
835    }
836
837    fn check_current(&self, ctx: &Context, c: char) -> bool {
838        self.check(ctx.get(), c)
839    }
840
841    fn check_current2(&self, ctx: &Context, c1: char, c2: char) -> bool {
842        self.check(ctx.get(), c1) || self.check(ctx.get(), c2)
843    }
844
845    fn check_current_if(&self, ctx: &Context, f: impl Fn(u8) -> bool) -> bool {
846        self.check_if(ctx.get(), f)
847    }
848
849    fn check_next(&self, ctx: &Context, c: char) -> bool {
850        self.check(ctx.get_next(), c)
851    }
852
853    fn check_next2(&self, ctx: &Context, c1: char, c2: char) -> bool {
854        self.check(ctx.get_next(), c1) || self.check(ctx.get_next(), c2)
855    }
856
857    fn add_token(&mut self, ctx: &mut Context, t: TokenType, value: TokenValue) {
858        let source = ctx.get_saved_source();
859        self.tokens.push(Token {
860            t,
861            value,
862            source,
863            offset: ctx.offset,
864            comment_offset: ctx.comment_offset,
865        });
866        ctx.offset += 1;
867        if t != TokenType::SComment && t != TokenType::MComment {
868            ctx.comment_offset = ctx.offset;
869        }
870    }
871
872    debuggable!();
873}
rslua_march1917/lexer.rs

rslua_march1917/
lexer.rs