text_scanner/ext/
python.rs

1use crate::{CharExt, ScanResult, Scanner, ScannerResult};
2
3// Reference: https://docs.python.org/3/reference/lexical_analysis.html#keywords
4pub const PYTHON_KEYWORDS: &[&str] = &[
5    "False", "await", "else", "import", "pass", "None", "break", "except", "in", "raise", "True",
6    "class", "finally", "is", "return", "and", "continue", "for", "lambda", "try", "as", "def",
7    "from", "nonlocal", "while", "assert", "del", "global", "not", "with", "async", "elif", "if",
8    "or", "yield",
9];
10
11// Reference: https://docs.python.org/3/reference/lexical_analysis.html#soft-keywords
12pub const PYTHON_SOFT_KEYWORDS: &[&str] = &["match", "case", "_"];
13
14// Mix between Python operators and delimiters.
15//
16// Reference: https://docs.python.org/3/reference/lexical_analysis.html#operators
17// Reference: https://docs.python.org/3/reference/lexical_analysis.html#delimiters
18pub const PYTHON_OPERATORS: &[&str] = &[
19    "+", "-", "*", "**", "/", "//", "%", "@", "<<", ">>", "&", "|", "^", "~", ":=", "<", ">", "<=",
20    ">=", "==", "!=", ",", ":", ".", ";", /*"@",*/ "=", "->", "+=", "-=", "*=", "/=", "//=",
21    "%=", "@=", "&=", "|=", "^=", ">>=", "<<=", "**=",
22];
23
24// Reference: https://docs.python.org/3/reference/lexical_analysis.html#delimiters
25pub const PYTHON_DELIMITERS: &[&str] = &["(", ")", "[", "]", "{", "}"];
26
27/// [`Scanner`] extension for scanning Python tokens.
28///
29/// _Based on Python 3.11._
30pub trait PythonScannerExt<'text>: crate::private::Sealed {
31    fn scan_python_line_comment(&mut self) -> ScannerResult<'text, &'text str>;
32
33    fn scan_python_explicit_line_joiner(&mut self) -> ScannerResult<'text, &'text str>;
34
35    fn scan_python_identifier(&mut self) -> ScannerResult<'text, &'text str>;
36    fn scan_python_keyword(&mut self) -> ScannerResult<'text, &'text str>;
37    fn scan_python_soft_keyword(&mut self) -> ScannerResult<'text, &'text str>;
38
39    fn scan_python_operator(&mut self) -> ScannerResult<'text, &'text str>;
40    fn scan_python_delimiter(&mut self) -> ScannerResult<'text, &'text str>;
41
42    fn scan_python_int_dec(&mut self) -> ScannerResult<'text, &'text str>;
43    fn scan_python_int_hex(&mut self) -> ScannerResult<'text, &'text str>;
44    fn scan_python_int_oct(&mut self) -> ScannerResult<'text, &'text str>;
45    fn scan_python_int_bin(&mut self) -> ScannerResult<'text, &'text str>;
46    fn scan_python_float(&mut self) -> ScannerResult<'text, &'text str>;
47
48    fn scan_python_string(&mut self) -> ScannerResult<'text, &'text str>;
49    fn scan_python_short_string(&mut self) -> ScannerResult<'text, &'text str>;
50    fn scan_python_long_string(&mut self) -> ScannerResult<'text, &'text str>;
51
52    fn scan_python_bytes(&mut self) -> ScannerResult<'text, &'text str>;
53    fn scan_python_short_bytes(&mut self) -> ScannerResult<'text, &'text str>;
54    fn scan_python_long_bytes(&mut self) -> ScannerResult<'text, &'text str>;
55}
56
57impl<'text> PythonScannerExt<'text> for Scanner<'text> {
58    // Reference: https://docs.python.org/3/reference/lexical_analysis.html#comments
59    fn scan_python_line_comment(&mut self) -> ScannerResult<'text, &'text str> {
60        self.scan_with(|scanner| {
61            scanner.accept_char('#')?;
62            scanner.skip_until_char_any(&['\n', '\r']);
63            Ok(())
64        })
65    }
66
67    // Reference: https://docs.python.org/3/reference/lexical_analysis.html#explicit-line-joining
68    fn scan_python_explicit_line_joiner(&mut self) -> ScannerResult<'text, &'text str> {
69        self.scan_with(|scanner| {
70            let (r, _c) = scanner.accept_char('\\')?;
71
72            if !scanner.has_remaining_text() {
73                return Ok(());
74            }
75
76            let remaining = scanner.remaining_text();
77            if remaining.starts_with('\n') || remaining.starts_with("\r\n") || (remaining == "\r") {
78                return Ok(());
79            }
80
81            Err(scanner.ranged_text(r))
82        })
83    }
84
85    // Reference: https://docs.python.org/3/reference/lexical_analysis.html#identifiers
86    fn scan_python_identifier(&mut self) -> ScannerResult<'text, &'text str> {
87        self.scan_with(|scanner| {
88            scanner.accept_if(|c| c.is_alphabetic() || (c == '_'))?;
89            scanner.skip_while(|c| c.is_alphanumeric() || (c == '_'));
90            Ok(())
91        })
92    }
93
94    // Reference: https://docs.python.org/3/reference/lexical_analysis.html#keywords
95    fn scan_python_keyword(&mut self) -> ScannerResult<'text, &'text str> {
96        self.scan_with(|scanner| {
97            let (r, s) = scanner.scan_python_identifier()?;
98            if s.is_python_keyword() {
99                Ok(())
100            } else {
101                Err((r, s))
102            }
103        })
104    }
105
106    // Reference: https://docs.python.org/3/reference/lexical_analysis.html#soft-keywords
107    fn scan_python_soft_keyword(&mut self) -> ScannerResult<'text, &'text str> {
108        self.scan_with(|scanner| {
109            let (r, s) = scanner.scan_python_identifier()?;
110            if s.is_python_soft_keyword() {
111                Ok(())
112            } else {
113                Err((r, s))
114            }
115        })
116    }
117
118    // Reference: https://docs.python.org/3/reference/lexical_analysis.html#operators
119    // Reference: https://docs.python.org/3/reference/lexical_analysis.html#delimiters
120    fn scan_python_operator(&mut self) -> ScannerResult<'text, &'text str> {
121        self.scan_with(|scanner| {
122            let (r, c) = scanner.next()?;
123            match c {
124                '=' => {
125                    _ = scanner.accept_char('=');
126                }
127                '/' => {
128                    _ = scanner.accept_char('/');
129                    _ = scanner.accept_char('=');
130                }
131                '-' => {
132                    _ = scanner.accept_char_any(&['=', '>']);
133                }
134                '+' | '%' | '&' | '|' | '^' => {
135                    _ = scanner.accept_char('=');
136                }
137                '*' => {
138                    _ = scanner.accept_char('*');
139                    _ = scanner.accept_char('=');
140                }
141                '<' => {
142                    _ = scanner.accept_char('<');
143                    _ = scanner.accept_char('=');
144                }
145                '>' => {
146                    _ = scanner.accept_char('>');
147                    _ = scanner.accept_char('=');
148                }
149                '@' => {
150                    _ = scanner.accept_char('=');
151                }
152                ':' => {
153                    _ = scanner.accept_char('=');
154                }
155                '!' => {
156                    scanner.accept_char('=')?;
157                }
158                ',' | '.' | ';' | '~' => {}
159                _ => return Err(scanner.ranged_text(r)),
160            }
161            Ok(())
162        })
163    }
164
165    // Reference: https://docs.python.org/3/reference/lexical_analysis.html#delimiters
166    fn scan_python_delimiter(&mut self) -> ScannerResult<'text, &'text str> {
167        let (r, c) = self.peek()?;
168        let ret = self.ranged_text(r);
169        match c {
170            '(' | ')' | '[' | ']' | '{' | '}' => {
171                self.cursor = ret.0.end;
172                Ok(ret)
173            }
174            _ => Err(ret),
175        }
176    }
177
178    // Reference: https://docs.python.org/3/reference/lexical_analysis.html#integer-literals
179    fn scan_python_int_dec(&mut self) -> ScannerResult<'text, &'text str> {
180        self.scan_with(|scanner| {
181            scanner.accept_if_ext(char::is_ascii_digit)?;
182            scanner.skip_while(|c| c.is_ascii_digit() || (c == '_'));
183            Ok(())
184        })
185    }
186
187    // Reference: https://docs.python.org/3/reference/lexical_analysis.html#integer-literals
188    fn scan_python_int_hex(&mut self) -> ScannerResult<'text, &'text str> {
189        self.scan_with(|scanner| {
190            scanner.accept_char('0')?;
191            scanner.accept_char_any(&['x', 'X'])?;
192
193            scanner.skip_while_char('_');
194            scanner.accept_if_ext(char::is_ascii_hexdigit)?;
195
196            scanner.skip_while(|c| c.is_ascii_hexdigit() || (c == '_'));
197
198            Ok(())
199        })
200    }
201
202    // Reference: https://docs.python.org/3/reference/lexical_analysis.html#integer-literals
203    fn scan_python_int_oct(&mut self) -> ScannerResult<'text, &'text str> {
204        self.scan_with(|scanner| {
205            scanner.accept_char('0')?;
206            scanner.accept_char_any(&['o', 'O'])?;
207
208            scanner.skip_while_char('_');
209            scanner.accept_if(CharExt::is_ascii_octdigit)?;
210
211            scanner.skip_while(|c| CharExt::is_ascii_octdigit(c) || (c == '_'));
212
213            Ok(())
214        })
215    }
216
217    // Reference: https://docs.python.org/3/reference/lexical_analysis.html#integer-literals
218    fn scan_python_int_bin(&mut self) -> ScannerResult<'text, &'text str> {
219        self.scan_with(|scanner| {
220            scanner.accept_char('0')?;
221            scanner.accept_char_any(&['b', 'B'])?;
222
223            scanner.skip_while_char('_');
224            scanner.accept_if(CharExt::is_ascii_bindigit)?;
225
226            scanner.skip_while(|c| c.is_ascii_bindigit() || (c == '_'));
227
228            Ok(())
229        })
230    }
231
232    // Reference: https://docs.python.org/3/reference/lexical_analysis.html#floating-point-literals
233    fn scan_python_float(&mut self) -> ScannerResult<'text, &'text str> {
234        self.scan_with(|scanner| {
235            let mut int_range = None;
236
237            if scanner.accept_char('.').is_ok() {
238                scanner.scan_python_int_dec()?;
239            } else {
240                int_range = Some(scanner.scan_python_int_dec()?.0);
241
242                if scanner.accept_char('.').is_ok() {
243                    int_range = None;
244                    _ = scanner.scan_python_int_dec();
245                }
246            }
247
248            if scanner.accept_char_any(&['e', 'E']).is_ok() {
249                _ = scanner.accept_char_any(&['+', '-']);
250
251                scanner.skip_while_char('_');
252                scanner.accept_if_ext(char::is_ascii_digit)?;
253                scanner.skip_while(|c| c.is_ascii_digit() || (c == '_'));
254            } else if let Some(r) = int_range {
255                return Err(scanner.ranged_text(r));
256            }
257
258            Ok(())
259        })
260    }
261
262    // Reference: https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
263    #[inline]
264    fn scan_python_string(&mut self) -> ScannerResult<'text, &'text str> {
265        self.scan_python_long_string()
266            .or_else(|_| self.scan_python_short_string())
267    }
268
269    // Reference: https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
270    fn scan_python_short_string(&mut self) -> ScannerResult<'text, &'text str> {
271        self.scan_with(|scanner| {
272            scan_python_string_prefix(scanner)?;
273            scan_python_short_string(scanner)?;
274            Ok(())
275        })
276    }
277
278    // Reference: https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
279    fn scan_python_long_string(&mut self) -> ScannerResult<'text, &'text str> {
280        self.scan_with(|scanner| {
281            scan_python_string_prefix(scanner)?;
282            scan_python_long_string(scanner)?;
283            Ok(())
284        })
285    }
286
287    // Reference: https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
288    #[inline]
289    fn scan_python_bytes(&mut self) -> ScannerResult<'text, &'text str> {
290        self.scan_python_long_bytes()
291            .or_else(|_| self.scan_python_short_bytes())
292    }
293
294    // Reference: https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
295    fn scan_python_short_bytes(&mut self) -> ScannerResult<'text, &'text str> {
296        self.scan_with(|scanner| {
297            scan_python_bytes_prefix(scanner)?;
298            // Note: Does not validate the bytes string contents
299            scan_python_short_string(scanner)?;
300            Ok(())
301        })
302    }
303
304    // Reference: https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
305    fn scan_python_long_bytes(&mut self) -> ScannerResult<'text, &'text str> {
306        self.scan_with(|scanner| {
307            scan_python_bytes_prefix(scanner)?;
308            // Note: Does not validate the bytes string contents
309            scan_python_long_string(scanner)?;
310            Ok(())
311        })
312    }
313}
314
315// Reference: https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
316#[inline]
317fn scan_python_string_prefix<'text>(scanner: &mut Scanner<'text>) -> ScanResult<'text> {
318    let c = match scanner.accept_char_any(&['r', 'R', 'f', 'F', 'u', 'U']) {
319        Ok((_r, c)) => c,
320        Err(_) => return Ok(()),
321    };
322
323    match c {
324        'f' | 'F' => {
325            _ = scanner.accept_char_any(&['r', 'R']);
326        }
327        'r' | 'R' => {
328            _ = scanner.accept_char_any(&['f', 'F']);
329        }
330        'u' | 'U' => {}
331        _ => unreachable!(),
332    }
333
334    Ok(())
335}
336
337// Reference: https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
338#[inline]
339fn scan_python_bytes_prefix<'text>(scanner: &mut Scanner<'text>) -> ScanResult<'text> {
340    let c = match scanner.accept_char_any(&['b', 'B', 'r', 'R']) {
341        Ok((_r, c)) => c,
342        Err(_) => return Ok(()),
343    };
344
345    match c {
346        'b' | 'B' => {
347            _ = scanner.accept_char_any(&['r', 'R']);
348        }
349        'r' | 'R' => {
350            scanner.accept_char_any(&['b', 'B'])?;
351        }
352        _ => unreachable!(),
353    }
354    Ok(())
355}
356
357// Reference: https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
358#[inline]
359fn scan_python_short_string<'text>(scanner: &mut Scanner<'text>) -> ScanResult<'text> {
360    let (_r, quote) = scanner.accept_char_any(&['"', '\''])?;
361
362    loop {
363        scanner.skip_until_char_any(&[quote, '\\', '\n']);
364        match scanner.peek() {
365            Ok((_r, c)) if c == quote => {
366                _ = scanner.next();
367                break;
368            }
369            Ok((_r, '\\')) => {
370                _ = scanner.next();
371                // Skip the next character as it is escaped
372                // Note: Technically any character is not valid
373                _ = scanner.next();
374            }
375            Ok((_r, '\n')) => break,
376            Ok(_) => unreachable!(),
377            Err(_) => break,
378        }
379    }
380
381    Ok(())
382}
383
384// Reference: https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
385#[inline]
386fn scan_python_long_string<'text>(scanner: &mut Scanner<'text>) -> ScanResult<'text> {
387    let (_r, quote) = scanner.accept_char_any(&['"', '\''])?;
388    scanner.accept_char(quote)?;
389    scanner.accept_char(quote)?;
390
391    'scan: loop {
392        scanner.skip_until_char_any(&[quote, '\\']);
393        match scanner.peek() {
394            Ok((_r, c)) if c == quote => {
395                _ = scanner.next();
396
397                for _ in 0..2 {
398                    if scanner.accept_char(quote).is_err() {
399                        continue 'scan;
400                    }
401                }
402
403                break;
404            }
405            Ok((_r, '\\')) => {
406                _ = scanner.next();
407                // Skip the next character as it is escaped
408                // Note: Technically any character is not valid
409                _ = scanner.next();
410            }
411            Ok(_) => unreachable!(),
412            Err(_) => break,
413        }
414    }
415
416    Ok(())
417}
418
419/// [`str`] extension for checking if a `&str` is e.g. a Python keyword.
420pub trait PythonStrExt {
421    fn is_python_keyword(&self) -> bool;
422    fn is_python_soft_keyword(&self) -> bool;
423    fn is_python_operator(&self) -> bool;
424    fn is_python_delimiter(&self) -> bool;
425}
426
427impl PythonStrExt for str {
428    #[inline]
429    fn is_python_keyword(&self) -> bool {
430        PYTHON_KEYWORDS.contains(&self)
431    }
432
433    #[inline]
434    fn is_python_soft_keyword(&self) -> bool {
435        PYTHON_SOFT_KEYWORDS.contains(&self)
436    }
437
438    #[inline]
439    fn is_python_operator(&self) -> bool {
440        PYTHON_OPERATORS.contains(&self)
441    }
442
443    #[inline]
444    fn is_python_delimiter(&self) -> bool {
445        PYTHON_DELIMITERS.contains(&self)
446    }
447}
448
449#[cfg(test)]
450mod tests {
451    use super::*;
452
453    #[test]
454    fn test_python_line_comment() {
455        let cases = [
456            // text, expected, remaining text
457            ("#", Ok((0..1, "#")), ""),
458            ("#\n", Ok((0..1, "#")), "\n"),
459            ("#\r\n", Ok((0..1, "#")), "\r\n"),
460            //
461            ("# Line Comment", Ok((0..14, "# Line Comment")), ""),
462            ("# Line Comment\n", Ok((0..14, "# Line Comment")), "\n"),
463            ("# Line Comment\r\n", Ok((0..14, "# Line Comment")), "\r\n"),
464            //
465            ("", Err((0..0, "")), ""),
466            (" #", Err((0..0, "")), " #"),
467            (" #\n", Err((0..0, "")), " #\n"),
468            (" #\r\n", Err((0..0, "")), " #\r\n"),
469        ];
470
471        for (text, expected, remaining) in cases {
472            let mut scanner = Scanner::new(text);
473
474            let actual = scanner.scan_python_line_comment();
475            assert_eq!(actual, expected);
476
477            assert_eq!(scanner.remaining_text(), remaining);
478        }
479    }
480
481    #[test]
482    fn test_python_explicit_line_joiner() {
483        let cases = [
484            // text, expected, remaining text
485            ("\\", Ok((0..1, "\\")), ""),
486            ("\\\n", Ok((0..1, "\\")), "\n"),
487            ("\\\r\n", Ok((0..1, "\\")), "\r\n"),
488            ("\\\r", Ok((0..1, "\\")), "\r"),
489            //
490            ("\\Foo", Err((0..1, "\\")), "\\Foo"),
491            ("\\\nFoo", Ok((0..1, "\\")), "\nFoo"),
492            ("\\\r\nFoo", Ok((0..1, "\\")), "\r\nFoo"),
493            //
494            ("\\ Foo", Err((0..1, "\\")), "\\ Foo"),
495            ("\\\n Foo", Ok((0..1, "\\")), "\n Foo"),
496            ("\\\r\n Foo", Ok((0..1, "\\")), "\r\n Foo"),
497            //
498            ("\\\rFoo", Err((0..1, "\\")), "\\\rFoo"),
499            ("\\ Foo", Err((0..1, "\\")), "\\ Foo"),
500            ("\\ \rFoo", Err((0..1, "\\")), "\\ \rFoo"),
501            //
502            ("\\\\", Err((0..1, "\\")), "\\\\"),
503            ("\\\\\n", Err((0..1, "\\")), "\\\\\n"),
504            ("\\\\\r\n", Err((0..1, "\\")), "\\\\\r\n"),
505        ];
506
507        for (text, expected, remaining) in cases {
508            let mut scanner = Scanner::new(text);
509
510            let actual = scanner.scan_python_explicit_line_joiner();
511            assert_eq!(actual, expected);
512
513            assert_eq!(scanner.remaining_text(), remaining);
514        }
515    }
516
517    #[test]
518    fn test_python_identifier() {
519        let cases = [
520            // text, expected, remaining text
521            ("x", Ok((0..1, "x")), ""),
522            ("_", Ok((0..1, "_")), ""),
523            ("x_", Ok((0..2, "x_")), ""),
524            ("xyz", Ok((0..3, "xyz")), ""),
525            ("x_y_z", Ok((0..5, "x_y_z")), ""),
526            ("_x_y_z_", Ok((0..7, "_x_y_z_")), ""),
527            //
528            ("x1", Ok((0..2, "x1")), ""),
529            ("_1", Ok((0..2, "_1")), ""),
530            //
531            ("x ", Ok((0..1, "x")), " "),
532            ("x\t", Ok((0..1, "x")), "\t"),
533            ("x\n", Ok((0..1, "x")), "\n"),
534            ("x\r\n", Ok((0..1, "x")), "\r\n"),
535            //
536            ("x-", Ok((0..1, "x")), "-"),
537            ("x+", Ok((0..1, "x")), "+"),
538            ("x()", Ok((0..1, "x")), "()"),
539            //
540            ("_2-", Ok((0..2, "_2")), "-"),
541            ("_-2", Ok((0..1, "_")), "-2"),
542            //
543            ("", Err((0..0, "")), ""),
544            (" x", Err((0..0, "")), " x"),
545            ("\tx", Err((0..0, "")), "\tx"),
546            ("\nx", Err((0..0, "")), "\nx"),
547            //
548            ("1x", Err((0..0, "")), "1x"),
549            ("-x", Err((0..0, "")), "-x"),
550        ];
551
552        for (text, expected, remaining) in cases {
553            let mut scanner = Scanner::new(text);
554
555            let actual = scanner.scan_python_identifier();
556            assert_eq!(actual, expected);
557
558            assert_eq!(scanner.remaining_text(), remaining);
559        }
560    }
561
562    #[test]
563    fn test_python_keyword() {
564        for &expected in PYTHON_KEYWORDS {
565            let mut scanner = Scanner::new(expected);
566
567            let actual = scanner.scan_python_keyword().map(|(_r, kw)| kw);
568            assert_eq!(actual, Ok(expected));
569        }
570    }
571
572    #[test]
573    fn test_python_soft_keyword() {
574        for &expected in PYTHON_SOFT_KEYWORDS {
575            let mut scanner = Scanner::new(expected);
576
577            let actual = scanner.scan_python_soft_keyword().map(|(_r, kw)| kw);
578            assert_eq!(actual, Ok(expected));
579        }
580    }
581
582    #[test]
583    fn test_python_operator() {
584        for &expected in PYTHON_OPERATORS {
585            let mut scanner = Scanner::new(expected);
586
587            let actual = scanner.scan_python_operator().map(|(_r, kw)| kw);
588            assert_eq!(actual, Ok(expected));
589        }
590    }
591
592    #[test]
593    fn test_python_delimiter() {
594        for &expected in PYTHON_DELIMITERS {
595            let mut scanner = Scanner::new(expected);
596
597            let actual = scanner.scan_python_delimiter().map(|(_r, kw)| kw);
598            assert_eq!(actual, Ok(expected));
599        }
600    }
601
602    #[test]
603    fn test_python_int_dec() {
604        let cases = [
605            // text, expected, remaining text
606            ("0", Ok((0..1, "0")), ""),
607            ("1", Ok((0..1, "1")), ""),
608            ("123", Ok((0..3, "123")), ""),
609            ("1234567890", Ok((0..10, "1234567890")), ""),
610            //
611            ("0+", Ok((0..1, "0")), "+"),
612            //
613            ("1_2", Ok((0..3, "1_2")), ""),
614            // FIXME: ("1_2_", Ok((0..3, "1_2")), ""),
615            ("_1_2", Err((0..0, "")), "_1_2"),
616            //
617            // FIXME: ("0123", Err((0..1, "0")), "0123"),
618            // FIXME: ("12__34", Err((0..3, "12_")), "12__34"),
619            //
620            ("-0", Err((0..0, "")), "-0"),
621            ("-123", Err((0..0, "")), "-123"),
622        ];
623
624        for (text, expected, remaining) in cases {
625            let mut scanner = Scanner::new(text);
626
627            let actual = scanner.scan_python_int_dec();
628            assert_eq!(actual, expected);
629
630            assert_eq!(scanner.remaining_text(), remaining);
631        }
632    }
633
634    #[test]
635    fn test_python_int_hex() {
636        let cases = [
637            // text, expected, remaining text
638            ("0x0", Ok((0..3, "0x0")), ""),
639            ("0xF", Ok((0..3, "0xF")), ""),
640            ("0xf", Ok((0..3, "0xf")), ""),
641            ("0xFF", Ok((0..4, "0xFF")), ""),
642            //
643            ("0X0", Ok((0..3, "0X0")), ""),
644            ("0XF", Ok((0..3, "0XF")), ""),
645            ("0Xf", Ok((0..3, "0Xf")), ""),
646            ("0XFF", Ok((0..4, "0XFF")), ""),
647            //
648            ("0xFFF", Ok((0..5, "0xFFF")), ""),
649            ("0xFFFFFF", Ok((0..8, "0xFFFFFF")), ""),
650            ("0xFFFFFFFFFFFF", Ok((0..14, "0xFFFFFFFFFFFF")), ""),
651            ("0x0123456789ABCDEF", Ok((0..18, "0x0123456789ABCDEF")), ""),
652            ("0x0123456789abcdef", Ok((0..18, "0x0123456789abcdef")), ""),
653            //
654            ("0xFF+", Ok((0..4, "0xFF")), "+"),
655            //
656            ("0xF_F", Ok((0..5, "0xF_F")), ""),
657            ("0x_FF", Ok((0..5, "0x_FF")), ""),
658            ("0x_F_F", Ok((0..6, "0x_F_F")), ""),
659            ("0x_", Err((0..3, "0x_")), "0x_"),
660            // FIXME: ("0xF__F", Err((0..4, "0xF_")), "0xF__F"),
661            //
662            ("0", Err((0..1, "0")), "0"),
663            ("0x", Err((0..2, "0x")), "0x"),
664            //
665            ("1x", Err((0..0, "")), "1x"),
666            ("1xF", Err((0..0, "")), "1xF"),
667            ("1xFF", Err((0..0, "")), "1xFF"),
668            //
669            ("-0xFF", Err((0..0, "")), "-0xFF"),
670        ];
671
672        for (text, expected, remaining) in cases {
673            let mut scanner = Scanner::new(text);
674
675            let actual = scanner.scan_python_int_hex();
676            assert_eq!(actual, expected);
677
678            assert_eq!(scanner.remaining_text(), remaining);
679        }
680    }
681
682    #[test]
683    fn test_python_int_oct() {
684        let cases = [
685            // text, expected, remaining text
686            ("0o0", Ok((0..3, "0o0")), ""),
687            ("0o7", Ok((0..3, "0o7")), ""),
688            ("0o00", Ok((0..4, "0o00")), ""),
689            ("0o77", Ok((0..4, "0o77")), ""),
690            ("0o1234567", Ok((0..9, "0o1234567")), ""),
691            //
692            ("0O0", Ok((0..3, "0O0")), ""),
693            ("0O7", Ok((0..3, "0O7")), ""),
694            ("0O00", Ok((0..4, "0O00")), ""),
695            ("0O77", Ok((0..4, "0O77")), ""),
696            ("0O1234567", Ok((0..9, "0O1234567")), ""),
697            //
698            ("0o77+", Ok((0..4, "0o77")), "+"),
699            //
700            ("0o7_7", Ok((0..5, "0o7_7")), ""),
701            ("0o_77", Ok((0..5, "0o_77")), ""),
702            ("0o_7_7", Ok((0..6, "0o_7_7")), ""),
703            ("0o_", Err((0..3, "0o_")), "0o_"),
704            // FIXME: ("0o7__7", Err((0..4, "0o7_")), "0o7__7"),
705            //
706            ("0", Err((0..1, "0")), "0"),
707            ("0o", Err((0..2, "0o")), "0o"),
708            //
709            ("1", Err((0..0, "")), "1"),
710            ("1o", Err((0..0, "")), "1o"),
711            ("1o77", Err((0..0, "")), "1o77"),
712            ("1o2345670", Err((0..0, "")), "1o2345670"),
713            //
714            ("-0o77", Err((0..0, "")), "-0o77"),
715        ];
716
717        for (text, expected, remaining) in cases {
718            let mut scanner = Scanner::new(text);
719
720            let actual = scanner.scan_python_int_oct();
721            assert_eq!(actual, expected);
722
723            assert_eq!(scanner.remaining_text(), remaining);
724        }
725    }
726
727    #[test]
728    fn test_python_int_bin() {
729        let cases = [
730            // text, expected, remaining text
731            ("0b0", Ok((0..3, "0b0")), ""),
732            ("0b1", Ok((0..3, "0b1")), ""),
733            ("0b2", Err((0..2, "0b")), "0b2"),
734            ("0B0", Ok((0..3, "0B0")), ""),
735            ("0B1", Ok((0..3, "0B1")), ""),
736            ("0B2", Err((0..2, "0B")), "0B2"),
737            //
738            ("0b0000", Ok((0..6, "0b0000")), ""),
739            ("0b1111", Ok((0..6, "0b1111")), ""),
740            ("0b0011", Ok((0..6, "0b0011")), ""),
741            ("0b1100", Ok((0..6, "0b1100")), ""),
742            //
743            ("1b0", Err((0..0, "")), "1b0"),
744            ("1b1", Err((0..0, "")), "1b1"),
745            ("1B0", Err((0..0, "")), "1B0"),
746            ("1B1", Err((0..0, "")), "1B1"),
747            //
748            ("0b0+", Ok((0..3, "0b0")), "+"),
749            ("0b1+", Ok((0..3, "0b1")), "+"),
750            //
751            ("-0b0", Err((0..0, "")), "-0b0"),
752            ("-0b1", Err((0..0, "")), "-0b1"),
753            (" 0b0", Err((0..0, "")), " 0b0"),
754            (" 0b1", Err((0..0, "")), " 0b1"),
755            //
756            ("0b1_1", Ok((0..5, "0b1_1")), ""),
757            ("0b_11", Ok((0..5, "0b_11")), ""),
758            ("0b_1_1", Ok((0..6, "0b_1_1")), ""),
759            ("0b_", Err((0..3, "0b_")), "0b_"),
760            // FIXME: ("0b1__1", Err((0..4, "0b1_")), "0b1__1"),
761        ];
762
763        for (text, expected, remaining) in cases {
764            let mut scanner = Scanner::new(text);
765
766            let actual = scanner.scan_python_int_bin();
767            assert_eq!(actual, expected);
768
769            assert_eq!(scanner.remaining_text(), remaining);
770        }
771    }
772
773    #[test]
774    fn test_python_float() {
775        let cases = [
776            // text, expected, remaining text
777            ("1.", Ok((0..2, "1.")), ""),
778            (".2", Ok((0..2, ".2")), ""),
779            ("1.2", Ok((0..3, "1.2")), ""),
780            //
781            ("1.2E3", Ok((0..5, "1.2E3")), ""),
782            ("1.2E+3", Ok((0..6, "1.2E+3")), ""),
783            ("1.2E-3", Ok((0..6, "1.2E-3")), ""),
784            ("1.2e3", Ok((0..5, "1.2e3")), ""),
785            ("1.2e+3", Ok((0..6, "1.2e+3")), ""),
786            ("1.2e-3", Ok((0..6, "1.2e-3")), ""),
787            //
788            ("12345.", Ok((0..6, "12345.")), ""),
789            (".12345", Ok((0..6, ".12345")), ""),
790            ("12345.12345", Ok((0..11, "12345.12345")), ""),
791            ("12345.12345E+12345", Ok((0..18, "12345.12345E+12345")), ""),
792            //
793            ("0e0", Ok((0..3, "0e0")), ""),
794            (".001", Ok((0..4, ".001")), ""),
795            ("1e100", Ok((0..5, "1e100")), ""),
796            ("3.14_15_93", Ok((0..10, "3.14_15_93")), ""),
797            //
798            ("1. ", Ok((0..2, "1.")), " "),
799            (".2 ", Ok((0..2, ".2")), " "),
800            ("1.2 ", Ok((0..3, "1.2")), " "),
801            ("1.2\n", Ok((0..3, "1.2")), "\n"),
802            //
803            ("1.+", Ok((0..2, "1.")), "+"),
804            (".2+", Ok((0..2, ".2")), "+"),
805            ("1.2+", Ok((0..3, "1.2")), "+"),
806            //
807            (" 1.", Err((0..0, "")), " 1."),
808            (" .2", Err((0..0, "")), " .2"),
809            (" 1.2", Err((0..0, "")), " 1.2"),
810            //
811            ("0", Err((0..1, "0")), "0"),
812            ("100", Err((0..3, "100")), "100"),
813            //
814            ("-1", Err((0..0, "")), "-1"),
815            ("-1.", Err((0..0, "")), "-1."),
816            ("-.2", Err((0..0, "")), "-.2"),
817            ("-1.2", Err((0..0, "")), "-1.2"),
818        ];
819
820        for (text, expected, remaining) in cases {
821            let mut scanner = Scanner::new(text);
822
823            let actual = scanner.scan_python_float();
824            assert_eq!(actual, expected);
825
826            assert_eq!(scanner.remaining_text(), remaining);
827        }
828    }
829
830    #[test]
831    fn test_python_short_string_double_quote() {
832        let cases = [
833            // text, expected, remaining text
834            ("\"\"", Ok((0..2, "\"\"")), ""),
835            ("\" \"", Ok((0..3, "\" \"")), ""),
836            ("\"Foo Bar\"", Ok((0..9, "\"Foo Bar\"")), ""),
837            //
838            ("\"Foo \n Bar\"", Ok((0..5, "\"Foo ")), "\n Bar\""),
839            ("\"Foo \\n Bar\"", Ok((0..12, "\"Foo \\n Bar\"")), ""),
840            //
841            ("\"Foo \\\" Bar\"", Ok((0..12, "\"Foo \\\" Bar\"")), ""),
842            ("\"Foo \\\n Bar\"", Ok((0..12, "\"Foo \\\n Bar\"")), ""),
843            //
844            ("\"\" ", Ok((0..2, "\"\"")), " "),
845            ("\"\"\t", Ok((0..2, "\"\"")), "\t"),
846            ("\"\"\n", Ok((0..2, "\"\"")), "\n"),
847            ("\"\"\r\n", Ok((0..2, "\"\"")), "\r\n"),
848            //
849            ("", Err((0..0, "")), ""),
850            //
851            (" \"\"", Err((0..0, "")), " \"\""),
852            ("\t\"\"", Err((0..0, "")), "\t\"\""),
853            ("\n\"\"", Err((0..0, "")), "\n\"\""),
854            //
855            ("\"", Ok((0..1, "\"")), ""),
856            ("\" ", Ok((0..2, "\" ")), ""),
857            ("\"\n", Ok((0..1, "\"")), "\n"),
858            ("\"Foo\n", Ok((0..4, "\"Foo")), "\n"),
859            ("\"Foo\nBar\"", Ok((0..4, "\"Foo")), "\nBar\""),
860            //
861            ("r\"\"", Ok((0..3, "r\"\"")), ""),
862            ("u\"\"", Ok((0..3, "u\"\"")), ""),
863            ("R\"\"", Ok((0..3, "R\"\"")), ""),
864            ("U\"\"", Ok((0..3, "U\"\"")), ""),
865            ("f\"\"", Ok((0..3, "f\"\"")), ""),
866            ("F\"\"", Ok((0..3, "F\"\"")), ""),
867            ("fr\"\"", Ok((0..4, "fr\"\"")), ""),
868            ("Fr\"\"", Ok((0..4, "Fr\"\"")), ""),
869            ("fR\"\"", Ok((0..4, "fR\"\"")), ""),
870            ("FR\"\"", Ok((0..4, "FR\"\"")), ""),
871            ("rf\"\"", Ok((0..4, "rf\"\"")), ""),
872            ("rF\"\"", Ok((0..4, "rF\"\"")), ""),
873            ("Rf\"\"", Ok((0..4, "Rf\"\"")), ""),
874            ("RF\"\"", Ok((0..4, "RF\"\"")), ""),
875        ];
876
877        for (text, expected, remaining) in cases {
878            let mut scanner = Scanner::new(text);
879
880            let actual = scanner.scan_python_short_string();
881            assert_eq!(actual, expected);
882
883            assert_eq!(scanner.remaining_text(), remaining);
884        }
885    }
886
887    #[test]
888    fn test_python_short_string_single_quote() {
889        let cases = [
890            // text, expected, remaining text
891            ("''", Ok((0..2, "''")), ""),
892            ("' '", Ok((0..3, "' '")), ""),
893            ("'Foo Bar'", Ok((0..9, "'Foo Bar'")), ""),
894            //
895            ("'Foo \n Bar'", Ok((0..5, "'Foo ")), "\n Bar'"),
896            ("'Foo \\n Bar'", Ok((0..12, "'Foo \\n Bar'")), ""),
897            //
898            ("'Foo \\' Bar'", Ok((0..12, "'Foo \\' Bar'")), ""),
899            ("'Foo \\\n Bar'", Ok((0..12, "'Foo \\\n Bar'")), ""),
900            //
901            ("'' ", Ok((0..2, "''")), " "),
902            ("''\t", Ok((0..2, "''")), "\t"),
903            ("''\n", Ok((0..2, "''")), "\n"),
904            ("''\r\n", Ok((0..2, "''")), "\r\n"),
905            //
906            ("", Err((0..0, "")), ""),
907            //
908            (" ''", Err((0..0, "")), " ''"),
909            ("\t''", Err((0..0, "")), "\t''"),
910            ("\n''", Err((0..0, "")), "\n''"),
911            //
912            ("'", Ok((0..1, "'")), ""),
913            ("' ", Ok((0..2, "' ")), ""),
914            ("'\n", Ok((0..1, "'")), "\n"),
915            ("'Foo\n", Ok((0..4, "'Foo")), "\n"),
916            ("'Foo\nBar'", Ok((0..4, "'Foo")), "\nBar'"),
917            //
918            ("r''", Ok((0..3, "r''")), ""),
919            ("u''", Ok((0..3, "u''")), ""),
920            ("R''", Ok((0..3, "R''")), ""),
921            ("U''", Ok((0..3, "U''")), ""),
922            ("f''", Ok((0..3, "f''")), ""),
923            ("F''", Ok((0..3, "F''")), ""),
924            ("fr''", Ok((0..4, "fr''")), ""),
925            ("Fr''", Ok((0..4, "Fr''")), ""),
926            ("fR''", Ok((0..4, "fR''")), ""),
927            ("FR''", Ok((0..4, "FR''")), ""),
928            ("rf''", Ok((0..4, "rf''")), ""),
929            ("rF''", Ok((0..4, "rF''")), ""),
930            ("Rf''", Ok((0..4, "Rf''")), ""),
931            ("RF''", Ok((0..4, "RF''")), ""),
932        ];
933
934        for (text, expected, remaining) in cases {
935            let mut scanner = Scanner::new(text);
936
937            let actual = scanner.scan_python_short_string();
938            assert_eq!(actual, expected);
939
940            assert_eq!(scanner.remaining_text(), remaining);
941        }
942    }
943
944    #[test]
945    fn test_python_long_string_double_quote() {
946        #[rustfmt::skip]
947        let cases = [
948            // text, expected, remaining text
949            ("\"\"\"\"\"\"", Ok((0..6, "\"\"\"\"\"\"")), ""),
950            ("\"\"\" \"\"\"", Ok((0..7, "\"\"\" \"\"\"")), ""),
951            ("\"\"\"Foo Bar\"\"\"", Ok((0..13, "\"\"\"Foo Bar\"\"\"")), ""),
952            //
953            ("\"\"\"Foo\nBar\"\"\"", Ok((0..13, "\"\"\"Foo\nBar\"\"\"")), ""),
954            //
955            ("\"\"\" \" \"\" \"\"\"", Ok((0..12, "\"\"\" \" \"\" \"\"\"")), ""),
956            ("\"\"\"\\\"\"\"Foo\"\"\"", Ok((0..13, "\"\"\"\\\"\"\"Foo\"\"\"")), ""),
957            ("\"\"\"\"Foo\"\"\"\"", Ok((0..10, "\"\"\"\"Foo\"\"\"")), "\""),
958            //
959            ("\"\"\"Foo'''\"\"\"\"", Ok((0..12, "\"\"\"Foo'''\"\"\"")), "\""),
960            //
961            ("\"\"\"Foo\"\"", Ok((0..8, "\"\"\"Foo\"\"")), ""),
962            ("\"\"\"Foo\n\"\"", Ok((0..9, "\"\"\"Foo\n\"\"")), ""),
963            //
964            ("r\"\"\"\"\"\"", Ok((0..7, "r\"\"\"\"\"\"")), ""),
965            ("u\"\"\"\"\"\"", Ok((0..7, "u\"\"\"\"\"\"")), ""),
966            ("R\"\"\"\"\"\"", Ok((0..7, "R\"\"\"\"\"\"")), ""),
967            ("U\"\"\"\"\"\"", Ok((0..7, "U\"\"\"\"\"\"")), ""),
968            ("f\"\"\"\"\"\"", Ok((0..7, "f\"\"\"\"\"\"")), ""),
969            ("F\"\"\"\"\"\"", Ok((0..7, "F\"\"\"\"\"\"")), ""),
970            ("fr\"\"\"\"\"\"", Ok((0..8, "fr\"\"\"\"\"\"")), ""),
971            ("Fr\"\"\"\"\"\"", Ok((0..8, "Fr\"\"\"\"\"\"")), ""),
972            ("fR\"\"\"\"\"\"", Ok((0..8, "fR\"\"\"\"\"\"")), ""),
973            ("FR\"\"\"\"\"\"", Ok((0..8, "FR\"\"\"\"\"\"")), ""),
974            ("rf\"\"\"\"\"\"", Ok((0..8, "rf\"\"\"\"\"\"")), ""),
975            ("rF\"\"\"\"\"\"", Ok((0..8, "rF\"\"\"\"\"\"")), ""),
976            ("Rf\"\"\"\"\"\"", Ok((0..8, "Rf\"\"\"\"\"\"")), ""),
977            ("RF\"\"\"\"\"\"", Ok((0..8, "RF\"\"\"\"\"\"")), ""),
978        ];
979
980        for (text, expected, remaining) in cases {
981            let mut scanner = Scanner::new(text);
982
983            let actual = scanner.scan_python_long_string();
984            assert_eq!(actual, expected);
985
986            assert_eq!(scanner.remaining_text(), remaining);
987        }
988    }
989
990    #[test]
991    fn test_python_long_string_single_quote() {
992        let cases = [
993            // text, expected, remaining text
994            ("''''''", Ok((0..6, "''''''")), ""),
995            ("''' '''", Ok((0..7, "''' '''")), ""),
996            ("'''Foo Bar'''", Ok((0..13, "'''Foo Bar'''")), ""),
997            //
998            ("'''Foo\nBar'''", Ok((0..13, "'''Foo\nBar'''")), ""),
999            //
1000            ("''' ' '' '''", Ok((0..12, "''' ' '' '''")), ""),
1001            ("'''\\'''Foo'''", Ok((0..13, "'''\\'''Foo'''")), ""),
1002            ("''''Foo''''", Ok((0..10, "''''Foo'''")), "'"),
1003            //
1004            ("'''Foo\"\"\"''''", Ok((0..12, "'''Foo\"\"\"'''")), "'"),
1005            //
1006            ("'''Foo''", Ok((0..8, "'''Foo''")), ""),
1007            ("'''Foo\n''", Ok((0..9, "'''Foo\n''")), ""),
1008            //
1009            ("r''''''", Ok((0..7, "r''''''")), ""),
1010            ("u''''''", Ok((0..7, "u''''''")), ""),
1011            ("R''''''", Ok((0..7, "R''''''")), ""),
1012            ("U''''''", Ok((0..7, "U''''''")), ""),
1013            ("f''''''", Ok((0..7, "f''''''")), ""),
1014            ("F''''''", Ok((0..7, "F''''''")), ""),
1015            ("fr''''''", Ok((0..8, "fr''''''")), ""),
1016            ("Fr''''''", Ok((0..8, "Fr''''''")), ""),
1017            ("fR''''''", Ok((0..8, "fR''''''")), ""),
1018            ("FR''''''", Ok((0..8, "FR''''''")), ""),
1019            ("rf''''''", Ok((0..8, "rf''''''")), ""),
1020            ("rF''''''", Ok((0..8, "rF''''''")), ""),
1021            ("Rf''''''", Ok((0..8, "Rf''''''")), ""),
1022            ("RF''''''", Ok((0..8, "RF''''''")), ""),
1023        ];
1024
1025        for (text, expected, remaining) in cases {
1026            let mut scanner = Scanner::new(text);
1027
1028            let actual = scanner.scan_python_long_string();
1029            assert_eq!(actual, expected);
1030
1031            assert_eq!(scanner.remaining_text(), remaining);
1032        }
1033    }
1034}