lib_ruby_parser/lexer/
parse_numeric.rs

1use crate::lexer::TokAdd;
2use crate::maybe_byte::*;
3use crate::source::buffer::*;
4use crate::Lexer;
5use crate::TokenBuf;
6use crate::{lex_states::*, DiagnosticMessage};
7
8const NUM_SUFFIX_R: i8 = 1 << 0;
9const NUM_SUFFIX_I: i8 = 1 << 1;
10const NUM_SUFFIX_ALL: i8 = 3;
11
12impl Lexer {
13    pub(crate) fn parse_numeric(&mut self, prefix: u8) -> i32 {
14        let mut c = MaybeByte::new(prefix);
15
16        let mut is_float: bool = false;
17        let mut seen_point: Option<usize> = None;
18        let mut seen_e: bool = false;
19        let mut nondigit: Option<MaybeByte> = None;
20        let suffix: i8;
21
22        self.lex_state.set(EXPR_END);
23        self.newtok();
24        if c == b'-' || c == b'+' {
25            self.tokadd(c);
26            c = self.nextc();
27        }
28        if c == b'0' {
29            let start = self.toklen();
30            c = self.nextc();
31            if c == b'x' || c == b'X' {
32                // hexadecimal
33                self.tokadd(c);
34                c = self.nextc();
35                if !c.is_eof() && c.is_hexdigit() {
36                    loop {
37                        if c == b'_' {
38                            if nondigit.is_some() {
39                                break;
40                            }
41                            nondigit = Some(c);
42                            self.tokadd(c);
43                            c = self.nextc();
44                            if c.is_eof() {
45                                break;
46                            }
47                            continue;
48                        }
49                        if !c.is_hexdigit() {
50                            break;
51                        }
52                        nondigit = None;
53                        self.tokadd(c);
54
55                        c = self.nextc();
56                        if c.is_eof() {
57                            break;
58                        }
59                    }
60                }
61                self.buffer.pushback(c);
62                self.tokfix();
63                if self.toklen() == start + 1 {
64                    return self.no_digits();
65                } else if let Some(MaybeByte::Some(byte)) = nondigit {
66                    return self.trailing_uc(byte);
67                }
68                suffix = self.number_literal_suffix(NUM_SUFFIX_ALL);
69                let mut tok = self.tokenbuf.take();
70                tok.prepend(b"0");
71                return self.set_integer_literal(&mut tok, suffix);
72            }
73            if c == b'b' || c == b'B' {
74                // binary
75                self.tokadd(c);
76                c = self.nextc();
77                if c == b'0' || c == b'1' {
78                    loop {
79                        if c == b'_' {
80                            if nondigit.is_some() {
81                                break;
82                            }
83                            nondigit = Some(c);
84                            self.tokadd(c);
85                            c = self.nextc();
86                            if c.is_eof() {
87                                break;
88                            }
89                            continue;
90                        }
91                        if c != b'0' && c != b'1' {
92                            break;
93                        }
94                        nondigit = None;
95                        self.tokadd(c);
96
97                        c = self.nextc();
98                        if c.is_eof() {
99                            break;
100                        }
101                    }
102                }
103                self.buffer.pushback(c);
104                self.tokfix();
105                if self.toklen() == start + 1 {
106                    return self.no_digits();
107                } else if let Some(MaybeByte::Some(byte)) = nondigit {
108                    return self.trailing_uc(byte);
109                }
110                suffix = self.number_literal_suffix(NUM_SUFFIX_ALL);
111                let mut tok = self.tokenbuf.take();
112                tok.prepend(b"0");
113                return self.set_integer_literal(&mut tok, suffix);
114            }
115            if c == b'd' || c == b'D' {
116                // decimal
117                self.tokadd(c);
118                c = self.nextc();
119                if !c.is_eof() && c.is_digit() {
120                    loop {
121                        if c == b'_' {
122                            if nondigit.is_some() {
123                                break;
124                            }
125                            nondigit = Some(c);
126                            self.tokadd(c);
127                            c = self.nextc();
128                            if c.is_eof() {
129                                break;
130                            }
131                            continue;
132                        }
133                        if !c.is_digit() {
134                            break;
135                        }
136                        nondigit = None;
137                        self.tokadd(c);
138
139                        c = self.nextc();
140                        if c.is_eof() {
141                            break;
142                        }
143                    }
144                }
145                self.buffer.pushback(c);
146                self.tokfix();
147                if self.toklen() == start + 1 {
148                    return self.no_digits();
149                } else if let Some(MaybeByte::Some(byte)) = nondigit {
150                    return self.trailing_uc(byte);
151                }
152                suffix = self.number_literal_suffix(NUM_SUFFIX_ALL);
153                let mut tok = self.tokenbuf.take();
154                tok.prepend(b"0");
155                return self.set_integer_literal(&mut tok, suffix);
156            }
157            if c == b'_' {
158                // 0_0
159                if let Some(result) = self.parse_octal(&mut c, &mut nondigit, start) {
160                    return result;
161                }
162            }
163            if c == b'o' || c == b'O' {
164                self.tokadd(c);
165                // prefixed octal
166                c = self.nextc();
167                if c.is_eof() || c == b'_' || !c.is_digit() {
168                    return self.no_digits();
169                }
170            }
171            // `c` here is a MaybeByte that implements PartialOrd<u8>
172            #[allow(clippy::manual_range_contains)]
173            if c >= b'0' && c <= b'7' {
174                // octal
175                if let Some(result) = self.parse_octal(&mut c, &mut nondigit, start) {
176                    return result;
177                }
178            }
179            if c > b'7' && c <= b'9' {
180                self.invalid_octal();
181            } else if c == b'.' || c == b'e' || c == b'E' {
182                self.tokadd(b'0');
183            } else {
184                self.buffer.pushback(c);
185                suffix = self.number_literal_suffix(NUM_SUFFIX_ALL);
186
187                let mut tok = self.tokenbuf.take();
188                tok.push(b'0');
189                return self.set_integer_literal(&mut tok, suffix);
190            }
191        }
192
193        loop {
194            match c.as_option() {
195                Some(b'0') | Some(b'1') | Some(b'2') | Some(b'3') | Some(b'4') | Some(b'5')
196                | Some(b'6') | Some(b'7') | Some(b'8') | Some(b'9') => {
197                    nondigit = None;
198                    self.tokadd(c);
199                }
200
201                Some(b'.') => {
202                    if let Some(MaybeByte::Some(byte)) = nondigit {
203                        return self.trailing_uc(byte);
204                    }
205                    if seen_point.is_some() || seen_e {
206                        return self.decode_num(c, nondigit, is_float, seen_e);
207                    } else {
208                        let c0 = self.nextc();
209                        if c.is_eof() || !c0.is_digit() {
210                            self.buffer.pushback(c0);
211                            return self.decode_num(c, nondigit, is_float, seen_e);
212                        }
213                        c = c0;
214                    }
215                    seen_point = Some(self.toklen());
216                    self.tokadd(b'.');
217                    self.tokadd(c);
218                    is_float = true;
219                    nondigit = None;
220                }
221
222                Some(b'e') | Some(b'E') => {
223                    if let Some(nondigit_value) = &nondigit {
224                        self.buffer.pushback(c);
225                        c = *nondigit_value;
226                        return self.decode_num(c, nondigit, is_float, seen_e);
227                    }
228                    if seen_e {
229                        return self.decode_num(c, nondigit, is_float, seen_e);
230                    }
231                    nondigit = Some(c);
232                    c = self.nextc();
233                    if c != b'-' && c != b'+' && !c.is_digit() {
234                        self.buffer.pushback(c);
235                        nondigit = None;
236                        return self.decode_num(c, nondigit, is_float, seen_e);
237                    }
238                    self.tokadd(nondigit.expect("nondigit must be set"));
239                    seen_e = true;
240                    is_float = true;
241                    self.tokadd(c);
242                    nondigit = if c == b'-' || c == b'+' {
243                        Some(c)
244                    } else {
245                        None
246                    };
247                }
248
249                Some(b'_') => {
250                    self.tokadd(c);
251                    if nondigit.is_some() {
252                        return self.decode_num(c, nondigit, is_float, seen_e);
253                    }
254                    nondigit = Some(c);
255                }
256
257                _ => return self.decode_num(c, nondigit, is_float, seen_e),
258            }
259
260            c = self.nextc();
261        }
262    }
263
264    fn parse_octal(
265        &mut self,
266        c: &mut MaybeByte,
267        nondigit: &mut Option<MaybeByte>,
268        start: usize,
269    ) -> Option<i32> {
270        loop {
271            if *c == b'_' {
272                if nondigit.is_some() {
273                    break;
274                }
275                *nondigit = Some(*c);
276                self.tokadd(*c);
277                *c = self.nextc();
278                if c.is_eof() {
279                    break;
280                }
281                continue;
282            }
283            if *c < b'0' || *c > b'9' {
284                break;
285            }
286            if *c > b'7' {
287                self.invalid_octal();
288                return None;
289            }
290            *nondigit = None;
291            self.tokadd(*c);
292
293            *c = self.nextc();
294            if c.is_eof() {
295                break;
296            }
297        }
298
299        if self.toklen() > start {
300            self.buffer.pushback(c);
301            self.tokfix();
302            if let Some(MaybeByte::Some(byte)) = nondigit {
303                return Some(self.trailing_uc(*byte));
304            }
305            let suffix = self.number_literal_suffix(NUM_SUFFIX_ALL);
306            let mut tok = self.tokenbuf.take();
307            tok.prepend(b"0");
308            return Some(self.set_integer_literal(&mut tok, suffix));
309        }
310        if let Some(MaybeByte::Some(byte)) = nondigit {
311            self.buffer.pushback(c);
312            return Some(self.trailing_uc(*byte));
313        }
314
315        None
316    }
317
318    fn invalid_octal(&mut self) -> i32 {
319        self.yyerror0(DiagnosticMessage::InvalidOctalDigit {});
320        Self::END_OF_INPUT
321    }
322
323    fn trailing_uc(&mut self, nondigit: u8) -> i32 {
324        self.literal_flush(self.buffer.pcur - 1);
325        self.yyerror0(DiagnosticMessage::TrailingCharInNumber { c: nondigit });
326        Self::END_OF_INPUT
327    }
328
329    fn decode_num(
330        &mut self,
331        c: MaybeByte,
332        nondigit: Option<MaybeByte>,
333        is_float: bool,
334        seen_e: bool,
335    ) -> i32 {
336        self.buffer.pushback(c);
337        if let Some(MaybeByte::Some(byte)) = nondigit {
338            self.trailing_uc(byte);
339        }
340        self.parse_numeric_footer(is_float, seen_e)
341    }
342
343    fn parse_numeric_footer(&mut self, is_float: bool, seen_e: bool) -> i32 {
344        self.tokfix();
345        if is_float {
346            let mut token_type: i32 = Self::tFLOAT;
347
348            let suffix =
349                self.number_literal_suffix(if seen_e { NUM_SUFFIX_I } else { NUM_SUFFIX_ALL });
350            let mut tokenbuf = if (suffix & NUM_SUFFIX_R) != 0 {
351                let mut value = self.tokenbuf.take();
352                value.push(b'r');
353                token_type = Self::tRATIONAL;
354                value
355            } else {
356                self.tokenbuf.take()
357            };
358            // we don't parse the number
359            return self.set_number_literal(&mut tokenbuf, token_type, suffix);
360        }
361        let suffix = self.number_literal_suffix(NUM_SUFFIX_ALL);
362        let mut tokenbuf = self.tokenbuf.take();
363        self.set_integer_literal(&mut tokenbuf, suffix)
364    }
365
366    fn set_number_literal(&mut self, value: &mut TokenBuf, token_type: i32, suffix: i8) -> i32 {
367        let mut token_type = token_type;
368        if suffix & NUM_SUFFIX_I != 0 {
369            value.push(b'i');
370            token_type = Self::tIMAGINARY;
371        }
372        self.set_yylval_literal(value);
373        self.lex_state.set(EXPR_END);
374        token_type
375    }
376
377    fn no_digits(&mut self) -> i32 {
378        self.yyerror0(DiagnosticMessage::NumericLiteralWithoutDigits {});
379        if self.buffer.peek(b'_') {
380            self.nextc();
381        }
382        self.set_integer_literal(&mut TokenBuf::new(b"0"), 0)
383    }
384
385    fn number_literal_suffix(&mut self, mask: i8) -> i8 {
386        let mut c: MaybeByte;
387        let mut mask = mask;
388        let mut result: i8 = 0;
389        let lastp = self.buffer.pcur;
390
391        loop {
392            c = self.nextc();
393            if c.is_eof() {
394                break;
395            }
396
397            if (mask & NUM_SUFFIX_I != 0) && c == b'i' {
398                result |= mask & NUM_SUFFIX_I;
399                mask &= !NUM_SUFFIX_I;
400                // r after i, rational of complex is disallowed
401                mask &= !NUM_SUFFIX_R;
402                continue;
403            }
404            if (mask & NUM_SUFFIX_R != 0) && c == b'r' {
405                result |= mask & NUM_SUFFIX_R;
406                mask &= !NUM_SUFFIX_R;
407                continue;
408            }
409            if !c.is_ascii() || c.is_alpha() || c == b'_' {
410                self.buffer.pcur = lastp;
411                // self.literal_flush(self.buffer.pcur);
412                return 0;
413            }
414            self.buffer.pushback(c);
415            break;
416        }
417
418        result
419    }
420
421    fn set_integer_literal(&mut self, value: &mut TokenBuf, suffix: i8) -> i32 {
422        let mut token_type = Self::tINTEGER;
423        if suffix & NUM_SUFFIX_R != 0 {
424            value.push(b'r');
425            token_type = Self::tRATIONAL;
426        }
427        self.set_number_literal(value, token_type, suffix)
428    }
429}