1use crate::{CharExt, ScanResult, Scanner, ScannerResult};
2
3pub const PYTHON_KEYWORDS: &[&str] = &[
5 "False", "await", "else", "import", "pass", "None", "break", "except", "in", "raise", "True",
6 "class", "finally", "is", "return", "and", "continue", "for", "lambda", "try", "as", "def",
7 "from", "nonlocal", "while", "assert", "del", "global", "not", "with", "async", "elif", "if",
8 "or", "yield",
9];
10
11pub const PYTHON_SOFT_KEYWORDS: &[&str] = &["match", "case", "_"];
13
14pub const PYTHON_OPERATORS: &[&str] = &[
19 "+", "-", "*", "**", "/", "//", "%", "@", "<<", ">>", "&", "|", "^", "~", ":=", "<", ">", "<=",
20 ">=", "==", "!=", ",", ":", ".", ";", "=", "->", "+=", "-=", "*=", "/=", "//=",
21 "%=", "@=", "&=", "|=", "^=", ">>=", "<<=", "**=",
22];
23
24pub const PYTHON_DELIMITERS: &[&str] = &["(", ")", "[", "]", "{", "}"];
26
27pub trait PythonScannerExt<'text>: crate::private::Sealed {
31 fn scan_python_line_comment(&mut self) -> ScannerResult<'text, &'text str>;
32
33 fn scan_python_explicit_line_joiner(&mut self) -> ScannerResult<'text, &'text str>;
34
35 fn scan_python_identifier(&mut self) -> ScannerResult<'text, &'text str>;
36 fn scan_python_keyword(&mut self) -> ScannerResult<'text, &'text str>;
37 fn scan_python_soft_keyword(&mut self) -> ScannerResult<'text, &'text str>;
38
39 fn scan_python_operator(&mut self) -> ScannerResult<'text, &'text str>;
40 fn scan_python_delimiter(&mut self) -> ScannerResult<'text, &'text str>;
41
42 fn scan_python_int_dec(&mut self) -> ScannerResult<'text, &'text str>;
43 fn scan_python_int_hex(&mut self) -> ScannerResult<'text, &'text str>;
44 fn scan_python_int_oct(&mut self) -> ScannerResult<'text, &'text str>;
45 fn scan_python_int_bin(&mut self) -> ScannerResult<'text, &'text str>;
46 fn scan_python_float(&mut self) -> ScannerResult<'text, &'text str>;
47
48 fn scan_python_string(&mut self) -> ScannerResult<'text, &'text str>;
49 fn scan_python_short_string(&mut self) -> ScannerResult<'text, &'text str>;
50 fn scan_python_long_string(&mut self) -> ScannerResult<'text, &'text str>;
51
52 fn scan_python_bytes(&mut self) -> ScannerResult<'text, &'text str>;
53 fn scan_python_short_bytes(&mut self) -> ScannerResult<'text, &'text str>;
54 fn scan_python_long_bytes(&mut self) -> ScannerResult<'text, &'text str>;
55}
56
57impl<'text> PythonScannerExt<'text> for Scanner<'text> {
58 fn scan_python_line_comment(&mut self) -> ScannerResult<'text, &'text str> {
60 self.scan_with(|scanner| {
61 scanner.accept_char('#')?;
62 scanner.skip_until_char_any(&['\n', '\r']);
63 Ok(())
64 })
65 }
66
67 fn scan_python_explicit_line_joiner(&mut self) -> ScannerResult<'text, &'text str> {
69 self.scan_with(|scanner| {
70 let (r, _c) = scanner.accept_char('\\')?;
71
72 if !scanner.has_remaining_text() {
73 return Ok(());
74 }
75
76 let remaining = scanner.remaining_text();
77 if remaining.starts_with('\n') || remaining.starts_with("\r\n") || (remaining == "\r") {
78 return Ok(());
79 }
80
81 Err(scanner.ranged_text(r))
82 })
83 }
84
85 fn scan_python_identifier(&mut self) -> ScannerResult<'text, &'text str> {
87 self.scan_with(|scanner| {
88 scanner.accept_if(|c| c.is_alphabetic() || (c == '_'))?;
89 scanner.skip_while(|c| c.is_alphanumeric() || (c == '_'));
90 Ok(())
91 })
92 }
93
94 fn scan_python_keyword(&mut self) -> ScannerResult<'text, &'text str> {
96 self.scan_with(|scanner| {
97 let (r, s) = scanner.scan_python_identifier()?;
98 if s.is_python_keyword() {
99 Ok(())
100 } else {
101 Err((r, s))
102 }
103 })
104 }
105
106 fn scan_python_soft_keyword(&mut self) -> ScannerResult<'text, &'text str> {
108 self.scan_with(|scanner| {
109 let (r, s) = scanner.scan_python_identifier()?;
110 if s.is_python_soft_keyword() {
111 Ok(())
112 } else {
113 Err((r, s))
114 }
115 })
116 }
117
118 fn scan_python_operator(&mut self) -> ScannerResult<'text, &'text str> {
121 self.scan_with(|scanner| {
122 let (r, c) = scanner.next()?;
123 match c {
124 '=' => {
125 _ = scanner.accept_char('=');
126 }
127 '/' => {
128 _ = scanner.accept_char('/');
129 _ = scanner.accept_char('=');
130 }
131 '-' => {
132 _ = scanner.accept_char_any(&['=', '>']);
133 }
134 '+' | '%' | '&' | '|' | '^' => {
135 _ = scanner.accept_char('=');
136 }
137 '*' => {
138 _ = scanner.accept_char('*');
139 _ = scanner.accept_char('=');
140 }
141 '<' => {
142 _ = scanner.accept_char('<');
143 _ = scanner.accept_char('=');
144 }
145 '>' => {
146 _ = scanner.accept_char('>');
147 _ = scanner.accept_char('=');
148 }
149 '@' => {
150 _ = scanner.accept_char('=');
151 }
152 ':' => {
153 _ = scanner.accept_char('=');
154 }
155 '!' => {
156 scanner.accept_char('=')?;
157 }
158 ',' | '.' | ';' | '~' => {}
159 _ => return Err(scanner.ranged_text(r)),
160 }
161 Ok(())
162 })
163 }
164
165 fn scan_python_delimiter(&mut self) -> ScannerResult<'text, &'text str> {
167 let (r, c) = self.peek()?;
168 let ret = self.ranged_text(r);
169 match c {
170 '(' | ')' | '[' | ']' | '{' | '}' => {
171 self.cursor = ret.0.end;
172 Ok(ret)
173 }
174 _ => Err(ret),
175 }
176 }
177
178 fn scan_python_int_dec(&mut self) -> ScannerResult<'text, &'text str> {
180 self.scan_with(|scanner| {
181 scanner.accept_if_ext(char::is_ascii_digit)?;
182 scanner.skip_while(|c| c.is_ascii_digit() || (c == '_'));
183 Ok(())
184 })
185 }
186
187 fn scan_python_int_hex(&mut self) -> ScannerResult<'text, &'text str> {
189 self.scan_with(|scanner| {
190 scanner.accept_char('0')?;
191 scanner.accept_char_any(&['x', 'X'])?;
192
193 scanner.skip_while_char('_');
194 scanner.accept_if_ext(char::is_ascii_hexdigit)?;
195
196 scanner.skip_while(|c| c.is_ascii_hexdigit() || (c == '_'));
197
198 Ok(())
199 })
200 }
201
202 fn scan_python_int_oct(&mut self) -> ScannerResult<'text, &'text str> {
204 self.scan_with(|scanner| {
205 scanner.accept_char('0')?;
206 scanner.accept_char_any(&['o', 'O'])?;
207
208 scanner.skip_while_char('_');
209 scanner.accept_if(CharExt::is_ascii_octdigit)?;
210
211 scanner.skip_while(|c| CharExt::is_ascii_octdigit(c) || (c == '_'));
212
213 Ok(())
214 })
215 }
216
217 fn scan_python_int_bin(&mut self) -> ScannerResult<'text, &'text str> {
219 self.scan_with(|scanner| {
220 scanner.accept_char('0')?;
221 scanner.accept_char_any(&['b', 'B'])?;
222
223 scanner.skip_while_char('_');
224 scanner.accept_if(CharExt::is_ascii_bindigit)?;
225
226 scanner.skip_while(|c| c.is_ascii_bindigit() || (c == '_'));
227
228 Ok(())
229 })
230 }
231
232 fn scan_python_float(&mut self) -> ScannerResult<'text, &'text str> {
234 self.scan_with(|scanner| {
235 let mut int_range = None;
236
237 if scanner.accept_char('.').is_ok() {
238 scanner.scan_python_int_dec()?;
239 } else {
240 int_range = Some(scanner.scan_python_int_dec()?.0);
241
242 if scanner.accept_char('.').is_ok() {
243 int_range = None;
244 _ = scanner.scan_python_int_dec();
245 }
246 }
247
248 if scanner.accept_char_any(&['e', 'E']).is_ok() {
249 _ = scanner.accept_char_any(&['+', '-']);
250
251 scanner.skip_while_char('_');
252 scanner.accept_if_ext(char::is_ascii_digit)?;
253 scanner.skip_while(|c| c.is_ascii_digit() || (c == '_'));
254 } else if let Some(r) = int_range {
255 return Err(scanner.ranged_text(r));
256 }
257
258 Ok(())
259 })
260 }
261
262 #[inline]
264 fn scan_python_string(&mut self) -> ScannerResult<'text, &'text str> {
265 self.scan_python_long_string()
266 .or_else(|_| self.scan_python_short_string())
267 }
268
269 fn scan_python_short_string(&mut self) -> ScannerResult<'text, &'text str> {
271 self.scan_with(|scanner| {
272 scan_python_string_prefix(scanner)?;
273 scan_python_short_string(scanner)?;
274 Ok(())
275 })
276 }
277
278 fn scan_python_long_string(&mut self) -> ScannerResult<'text, &'text str> {
280 self.scan_with(|scanner| {
281 scan_python_string_prefix(scanner)?;
282 scan_python_long_string(scanner)?;
283 Ok(())
284 })
285 }
286
287 #[inline]
289 fn scan_python_bytes(&mut self) -> ScannerResult<'text, &'text str> {
290 self.scan_python_long_bytes()
291 .or_else(|_| self.scan_python_short_bytes())
292 }
293
294 fn scan_python_short_bytes(&mut self) -> ScannerResult<'text, &'text str> {
296 self.scan_with(|scanner| {
297 scan_python_bytes_prefix(scanner)?;
298 scan_python_short_string(scanner)?;
300 Ok(())
301 })
302 }
303
304 fn scan_python_long_bytes(&mut self) -> ScannerResult<'text, &'text str> {
306 self.scan_with(|scanner| {
307 scan_python_bytes_prefix(scanner)?;
308 scan_python_long_string(scanner)?;
310 Ok(())
311 })
312 }
313}
314
315#[inline]
317fn scan_python_string_prefix<'text>(scanner: &mut Scanner<'text>) -> ScanResult<'text> {
318 let c = match scanner.accept_char_any(&['r', 'R', 'f', 'F', 'u', 'U']) {
319 Ok((_r, c)) => c,
320 Err(_) => return Ok(()),
321 };
322
323 match c {
324 'f' | 'F' => {
325 _ = scanner.accept_char_any(&['r', 'R']);
326 }
327 'r' | 'R' => {
328 _ = scanner.accept_char_any(&['f', 'F']);
329 }
330 'u' | 'U' => {}
331 _ => unreachable!(),
332 }
333
334 Ok(())
335}
336
337#[inline]
339fn scan_python_bytes_prefix<'text>(scanner: &mut Scanner<'text>) -> ScanResult<'text> {
340 let c = match scanner.accept_char_any(&['b', 'B', 'r', 'R']) {
341 Ok((_r, c)) => c,
342 Err(_) => return Ok(()),
343 };
344
345 match c {
346 'b' | 'B' => {
347 _ = scanner.accept_char_any(&['r', 'R']);
348 }
349 'r' | 'R' => {
350 scanner.accept_char_any(&['b', 'B'])?;
351 }
352 _ => unreachable!(),
353 }
354 Ok(())
355}
356
357#[inline]
359fn scan_python_short_string<'text>(scanner: &mut Scanner<'text>) -> ScanResult<'text> {
360 let (_r, quote) = scanner.accept_char_any(&['"', '\''])?;
361
362 loop {
363 scanner.skip_until_char_any(&[quote, '\\', '\n']);
364 match scanner.peek() {
365 Ok((_r, c)) if c == quote => {
366 _ = scanner.next();
367 break;
368 }
369 Ok((_r, '\\')) => {
370 _ = scanner.next();
371 _ = scanner.next();
374 }
375 Ok((_r, '\n')) => break,
376 Ok(_) => unreachable!(),
377 Err(_) => break,
378 }
379 }
380
381 Ok(())
382}
383
384#[inline]
386fn scan_python_long_string<'text>(scanner: &mut Scanner<'text>) -> ScanResult<'text> {
387 let (_r, quote) = scanner.accept_char_any(&['"', '\''])?;
388 scanner.accept_char(quote)?;
389 scanner.accept_char(quote)?;
390
391 'scan: loop {
392 scanner.skip_until_char_any(&[quote, '\\']);
393 match scanner.peek() {
394 Ok((_r, c)) if c == quote => {
395 _ = scanner.next();
396
397 for _ in 0..2 {
398 if scanner.accept_char(quote).is_err() {
399 continue 'scan;
400 }
401 }
402
403 break;
404 }
405 Ok((_r, '\\')) => {
406 _ = scanner.next();
407 _ = scanner.next();
410 }
411 Ok(_) => unreachable!(),
412 Err(_) => break,
413 }
414 }
415
416 Ok(())
417}
418
419pub trait PythonStrExt {
421 fn is_python_keyword(&self) -> bool;
422 fn is_python_soft_keyword(&self) -> bool;
423 fn is_python_operator(&self) -> bool;
424 fn is_python_delimiter(&self) -> bool;
425}
426
427impl PythonStrExt for str {
428 #[inline]
429 fn is_python_keyword(&self) -> bool {
430 PYTHON_KEYWORDS.contains(&self)
431 }
432
433 #[inline]
434 fn is_python_soft_keyword(&self) -> bool {
435 PYTHON_SOFT_KEYWORDS.contains(&self)
436 }
437
438 #[inline]
439 fn is_python_operator(&self) -> bool {
440 PYTHON_OPERATORS.contains(&self)
441 }
442
443 #[inline]
444 fn is_python_delimiter(&self) -> bool {
445 PYTHON_DELIMITERS.contains(&self)
446 }
447}
448
449#[cfg(test)]
450mod tests {
451 use super::*;
452
453 #[test]
454 fn test_python_line_comment() {
455 let cases = [
456 ("#", Ok((0..1, "#")), ""),
458 ("#\n", Ok((0..1, "#")), "\n"),
459 ("#\r\n", Ok((0..1, "#")), "\r\n"),
460 ("# Line Comment", Ok((0..14, "# Line Comment")), ""),
462 ("# Line Comment\n", Ok((0..14, "# Line Comment")), "\n"),
463 ("# Line Comment\r\n", Ok((0..14, "# Line Comment")), "\r\n"),
464 ("", Err((0..0, "")), ""),
466 (" #", Err((0..0, "")), " #"),
467 (" #\n", Err((0..0, "")), " #\n"),
468 (" #\r\n", Err((0..0, "")), " #\r\n"),
469 ];
470
471 for (text, expected, remaining) in cases {
472 let mut scanner = Scanner::new(text);
473
474 let actual = scanner.scan_python_line_comment();
475 assert_eq!(actual, expected);
476
477 assert_eq!(scanner.remaining_text(), remaining);
478 }
479 }
480
481 #[test]
482 fn test_python_explicit_line_joiner() {
483 let cases = [
484 ("\\", Ok((0..1, "\\")), ""),
486 ("\\\n", Ok((0..1, "\\")), "\n"),
487 ("\\\r\n", Ok((0..1, "\\")), "\r\n"),
488 ("\\\r", Ok((0..1, "\\")), "\r"),
489 ("\\Foo", Err((0..1, "\\")), "\\Foo"),
491 ("\\\nFoo", Ok((0..1, "\\")), "\nFoo"),
492 ("\\\r\nFoo", Ok((0..1, "\\")), "\r\nFoo"),
493 ("\\ Foo", Err((0..1, "\\")), "\\ Foo"),
495 ("\\\n Foo", Ok((0..1, "\\")), "\n Foo"),
496 ("\\\r\n Foo", Ok((0..1, "\\")), "\r\n Foo"),
497 ("\\\rFoo", Err((0..1, "\\")), "\\\rFoo"),
499 ("\\ Foo", Err((0..1, "\\")), "\\ Foo"),
500 ("\\ \rFoo", Err((0..1, "\\")), "\\ \rFoo"),
501 ("\\\\", Err((0..1, "\\")), "\\\\"),
503 ("\\\\\n", Err((0..1, "\\")), "\\\\\n"),
504 ("\\\\\r\n", Err((0..1, "\\")), "\\\\\r\n"),
505 ];
506
507 for (text, expected, remaining) in cases {
508 let mut scanner = Scanner::new(text);
509
510 let actual = scanner.scan_python_explicit_line_joiner();
511 assert_eq!(actual, expected);
512
513 assert_eq!(scanner.remaining_text(), remaining);
514 }
515 }
516
517 #[test]
518 fn test_python_identifier() {
519 let cases = [
520 ("x", Ok((0..1, "x")), ""),
522 ("_", Ok((0..1, "_")), ""),
523 ("x_", Ok((0..2, "x_")), ""),
524 ("xyz", Ok((0..3, "xyz")), ""),
525 ("x_y_z", Ok((0..5, "x_y_z")), ""),
526 ("_x_y_z_", Ok((0..7, "_x_y_z_")), ""),
527 ("x1", Ok((0..2, "x1")), ""),
529 ("_1", Ok((0..2, "_1")), ""),
530 ("x ", Ok((0..1, "x")), " "),
532 ("x\t", Ok((0..1, "x")), "\t"),
533 ("x\n", Ok((0..1, "x")), "\n"),
534 ("x\r\n", Ok((0..1, "x")), "\r\n"),
535 ("x-", Ok((0..1, "x")), "-"),
537 ("x+", Ok((0..1, "x")), "+"),
538 ("x()", Ok((0..1, "x")), "()"),
539 ("_2-", Ok((0..2, "_2")), "-"),
541 ("_-2", Ok((0..1, "_")), "-2"),
542 ("", Err((0..0, "")), ""),
544 (" x", Err((0..0, "")), " x"),
545 ("\tx", Err((0..0, "")), "\tx"),
546 ("\nx", Err((0..0, "")), "\nx"),
547 ("1x", Err((0..0, "")), "1x"),
549 ("-x", Err((0..0, "")), "-x"),
550 ];
551
552 for (text, expected, remaining) in cases {
553 let mut scanner = Scanner::new(text);
554
555 let actual = scanner.scan_python_identifier();
556 assert_eq!(actual, expected);
557
558 assert_eq!(scanner.remaining_text(), remaining);
559 }
560 }
561
562 #[test]
563 fn test_python_keyword() {
564 for &expected in PYTHON_KEYWORDS {
565 let mut scanner = Scanner::new(expected);
566
567 let actual = scanner.scan_python_keyword().map(|(_r, kw)| kw);
568 assert_eq!(actual, Ok(expected));
569 }
570 }
571
572 #[test]
573 fn test_python_soft_keyword() {
574 for &expected in PYTHON_SOFT_KEYWORDS {
575 let mut scanner = Scanner::new(expected);
576
577 let actual = scanner.scan_python_soft_keyword().map(|(_r, kw)| kw);
578 assert_eq!(actual, Ok(expected));
579 }
580 }
581
582 #[test]
583 fn test_python_operator() {
584 for &expected in PYTHON_OPERATORS {
585 let mut scanner = Scanner::new(expected);
586
587 let actual = scanner.scan_python_operator().map(|(_r, kw)| kw);
588 assert_eq!(actual, Ok(expected));
589 }
590 }
591
592 #[test]
593 fn test_python_delimiter() {
594 for &expected in PYTHON_DELIMITERS {
595 let mut scanner = Scanner::new(expected);
596
597 let actual = scanner.scan_python_delimiter().map(|(_r, kw)| kw);
598 assert_eq!(actual, Ok(expected));
599 }
600 }
601
602 #[test]
603 fn test_python_int_dec() {
604 let cases = [
605 ("0", Ok((0..1, "0")), ""),
607 ("1", Ok((0..1, "1")), ""),
608 ("123", Ok((0..3, "123")), ""),
609 ("1234567890", Ok((0..10, "1234567890")), ""),
610 ("0+", Ok((0..1, "0")), "+"),
612 ("1_2", Ok((0..3, "1_2")), ""),
614 ("_1_2", Err((0..0, "")), "_1_2"),
616 ("-0", Err((0..0, "")), "-0"),
621 ("-123", Err((0..0, "")), "-123"),
622 ];
623
624 for (text, expected, remaining) in cases {
625 let mut scanner = Scanner::new(text);
626
627 let actual = scanner.scan_python_int_dec();
628 assert_eq!(actual, expected);
629
630 assert_eq!(scanner.remaining_text(), remaining);
631 }
632 }
633
634 #[test]
635 fn test_python_int_hex() {
636 let cases = [
637 ("0x0", Ok((0..3, "0x0")), ""),
639 ("0xF", Ok((0..3, "0xF")), ""),
640 ("0xf", Ok((0..3, "0xf")), ""),
641 ("0xFF", Ok((0..4, "0xFF")), ""),
642 ("0X0", Ok((0..3, "0X0")), ""),
644 ("0XF", Ok((0..3, "0XF")), ""),
645 ("0Xf", Ok((0..3, "0Xf")), ""),
646 ("0XFF", Ok((0..4, "0XFF")), ""),
647 ("0xFFF", Ok((0..5, "0xFFF")), ""),
649 ("0xFFFFFF", Ok((0..8, "0xFFFFFF")), ""),
650 ("0xFFFFFFFFFFFF", Ok((0..14, "0xFFFFFFFFFFFF")), ""),
651 ("0x0123456789ABCDEF", Ok((0..18, "0x0123456789ABCDEF")), ""),
652 ("0x0123456789abcdef", Ok((0..18, "0x0123456789abcdef")), ""),
653 ("0xFF+", Ok((0..4, "0xFF")), "+"),
655 ("0xF_F", Ok((0..5, "0xF_F")), ""),
657 ("0x_FF", Ok((0..5, "0x_FF")), ""),
658 ("0x_F_F", Ok((0..6, "0x_F_F")), ""),
659 ("0x_", Err((0..3, "0x_")), "0x_"),
660 ("0", Err((0..1, "0")), "0"),
663 ("0x", Err((0..2, "0x")), "0x"),
664 ("1x", Err((0..0, "")), "1x"),
666 ("1xF", Err((0..0, "")), "1xF"),
667 ("1xFF", Err((0..0, "")), "1xFF"),
668 ("-0xFF", Err((0..0, "")), "-0xFF"),
670 ];
671
672 for (text, expected, remaining) in cases {
673 let mut scanner = Scanner::new(text);
674
675 let actual = scanner.scan_python_int_hex();
676 assert_eq!(actual, expected);
677
678 assert_eq!(scanner.remaining_text(), remaining);
679 }
680 }
681
682 #[test]
683 fn test_python_int_oct() {
684 let cases = [
685 ("0o0", Ok((0..3, "0o0")), ""),
687 ("0o7", Ok((0..3, "0o7")), ""),
688 ("0o00", Ok((0..4, "0o00")), ""),
689 ("0o77", Ok((0..4, "0o77")), ""),
690 ("0o1234567", Ok((0..9, "0o1234567")), ""),
691 ("0O0", Ok((0..3, "0O0")), ""),
693 ("0O7", Ok((0..3, "0O7")), ""),
694 ("0O00", Ok((0..4, "0O00")), ""),
695 ("0O77", Ok((0..4, "0O77")), ""),
696 ("0O1234567", Ok((0..9, "0O1234567")), ""),
697 ("0o77+", Ok((0..4, "0o77")), "+"),
699 ("0o7_7", Ok((0..5, "0o7_7")), ""),
701 ("0o_77", Ok((0..5, "0o_77")), ""),
702 ("0o_7_7", Ok((0..6, "0o_7_7")), ""),
703 ("0o_", Err((0..3, "0o_")), "0o_"),
704 ("0", Err((0..1, "0")), "0"),
707 ("0o", Err((0..2, "0o")), "0o"),
708 ("1", Err((0..0, "")), "1"),
710 ("1o", Err((0..0, "")), "1o"),
711 ("1o77", Err((0..0, "")), "1o77"),
712 ("1o2345670", Err((0..0, "")), "1o2345670"),
713 ("-0o77", Err((0..0, "")), "-0o77"),
715 ];
716
717 for (text, expected, remaining) in cases {
718 let mut scanner = Scanner::new(text);
719
720 let actual = scanner.scan_python_int_oct();
721 assert_eq!(actual, expected);
722
723 assert_eq!(scanner.remaining_text(), remaining);
724 }
725 }
726
727 #[test]
728 fn test_python_int_bin() {
729 let cases = [
730 ("0b0", Ok((0..3, "0b0")), ""),
732 ("0b1", Ok((0..3, "0b1")), ""),
733 ("0b2", Err((0..2, "0b")), "0b2"),
734 ("0B0", Ok((0..3, "0B0")), ""),
735 ("0B1", Ok((0..3, "0B1")), ""),
736 ("0B2", Err((0..2, "0B")), "0B2"),
737 ("0b0000", Ok((0..6, "0b0000")), ""),
739 ("0b1111", Ok((0..6, "0b1111")), ""),
740 ("0b0011", Ok((0..6, "0b0011")), ""),
741 ("0b1100", Ok((0..6, "0b1100")), ""),
742 ("1b0", Err((0..0, "")), "1b0"),
744 ("1b1", Err((0..0, "")), "1b1"),
745 ("1B0", Err((0..0, "")), "1B0"),
746 ("1B1", Err((0..0, "")), "1B1"),
747 ("0b0+", Ok((0..3, "0b0")), "+"),
749 ("0b1+", Ok((0..3, "0b1")), "+"),
750 ("-0b0", Err((0..0, "")), "-0b0"),
752 ("-0b1", Err((0..0, "")), "-0b1"),
753 (" 0b0", Err((0..0, "")), " 0b0"),
754 (" 0b1", Err((0..0, "")), " 0b1"),
755 ("0b1_1", Ok((0..5, "0b1_1")), ""),
757 ("0b_11", Ok((0..5, "0b_11")), ""),
758 ("0b_1_1", Ok((0..6, "0b_1_1")), ""),
759 ("0b_", Err((0..3, "0b_")), "0b_"),
760 ];
762
763 for (text, expected, remaining) in cases {
764 let mut scanner = Scanner::new(text);
765
766 let actual = scanner.scan_python_int_bin();
767 assert_eq!(actual, expected);
768
769 assert_eq!(scanner.remaining_text(), remaining);
770 }
771 }
772
773 #[test]
774 fn test_python_float() {
775 let cases = [
776 ("1.", Ok((0..2, "1.")), ""),
778 (".2", Ok((0..2, ".2")), ""),
779 ("1.2", Ok((0..3, "1.2")), ""),
780 ("1.2E3", Ok((0..5, "1.2E3")), ""),
782 ("1.2E+3", Ok((0..6, "1.2E+3")), ""),
783 ("1.2E-3", Ok((0..6, "1.2E-3")), ""),
784 ("1.2e3", Ok((0..5, "1.2e3")), ""),
785 ("1.2e+3", Ok((0..6, "1.2e+3")), ""),
786 ("1.2e-3", Ok((0..6, "1.2e-3")), ""),
787 ("12345.", Ok((0..6, "12345.")), ""),
789 (".12345", Ok((0..6, ".12345")), ""),
790 ("12345.12345", Ok((0..11, "12345.12345")), ""),
791 ("12345.12345E+12345", Ok((0..18, "12345.12345E+12345")), ""),
792 ("0e0", Ok((0..3, "0e0")), ""),
794 (".001", Ok((0..4, ".001")), ""),
795 ("1e100", Ok((0..5, "1e100")), ""),
796 ("3.14_15_93", Ok((0..10, "3.14_15_93")), ""),
797 ("1. ", Ok((0..2, "1.")), " "),
799 (".2 ", Ok((0..2, ".2")), " "),
800 ("1.2 ", Ok((0..3, "1.2")), " "),
801 ("1.2\n", Ok((0..3, "1.2")), "\n"),
802 ("1.+", Ok((0..2, "1.")), "+"),
804 (".2+", Ok((0..2, ".2")), "+"),
805 ("1.2+", Ok((0..3, "1.2")), "+"),
806 (" 1.", Err((0..0, "")), " 1."),
808 (" .2", Err((0..0, "")), " .2"),
809 (" 1.2", Err((0..0, "")), " 1.2"),
810 ("0", Err((0..1, "0")), "0"),
812 ("100", Err((0..3, "100")), "100"),
813 ("-1", Err((0..0, "")), "-1"),
815 ("-1.", Err((0..0, "")), "-1."),
816 ("-.2", Err((0..0, "")), "-.2"),
817 ("-1.2", Err((0..0, "")), "-1.2"),
818 ];
819
820 for (text, expected, remaining) in cases {
821 let mut scanner = Scanner::new(text);
822
823 let actual = scanner.scan_python_float();
824 assert_eq!(actual, expected);
825
826 assert_eq!(scanner.remaining_text(), remaining);
827 }
828 }
829
830 #[test]
831 fn test_python_short_string_double_quote() {
832 let cases = [
833 ("\"\"", Ok((0..2, "\"\"")), ""),
835 ("\" \"", Ok((0..3, "\" \"")), ""),
836 ("\"Foo Bar\"", Ok((0..9, "\"Foo Bar\"")), ""),
837 ("\"Foo \n Bar\"", Ok((0..5, "\"Foo ")), "\n Bar\""),
839 ("\"Foo \\n Bar\"", Ok((0..12, "\"Foo \\n Bar\"")), ""),
840 ("\"Foo \\\" Bar\"", Ok((0..12, "\"Foo \\\" Bar\"")), ""),
842 ("\"Foo \\\n Bar\"", Ok((0..12, "\"Foo \\\n Bar\"")), ""),
843 ("\"\" ", Ok((0..2, "\"\"")), " "),
845 ("\"\"\t", Ok((0..2, "\"\"")), "\t"),
846 ("\"\"\n", Ok((0..2, "\"\"")), "\n"),
847 ("\"\"\r\n", Ok((0..2, "\"\"")), "\r\n"),
848 ("", Err((0..0, "")), ""),
850 (" \"\"", Err((0..0, "")), " \"\""),
852 ("\t\"\"", Err((0..0, "")), "\t\"\""),
853 ("\n\"\"", Err((0..0, "")), "\n\"\""),
854 ("\"", Ok((0..1, "\"")), ""),
856 ("\" ", Ok((0..2, "\" ")), ""),
857 ("\"\n", Ok((0..1, "\"")), "\n"),
858 ("\"Foo\n", Ok((0..4, "\"Foo")), "\n"),
859 ("\"Foo\nBar\"", Ok((0..4, "\"Foo")), "\nBar\""),
860 ("r\"\"", Ok((0..3, "r\"\"")), ""),
862 ("u\"\"", Ok((0..3, "u\"\"")), ""),
863 ("R\"\"", Ok((0..3, "R\"\"")), ""),
864 ("U\"\"", Ok((0..3, "U\"\"")), ""),
865 ("f\"\"", Ok((0..3, "f\"\"")), ""),
866 ("F\"\"", Ok((0..3, "F\"\"")), ""),
867 ("fr\"\"", Ok((0..4, "fr\"\"")), ""),
868 ("Fr\"\"", Ok((0..4, "Fr\"\"")), ""),
869 ("fR\"\"", Ok((0..4, "fR\"\"")), ""),
870 ("FR\"\"", Ok((0..4, "FR\"\"")), ""),
871 ("rf\"\"", Ok((0..4, "rf\"\"")), ""),
872 ("rF\"\"", Ok((0..4, "rF\"\"")), ""),
873 ("Rf\"\"", Ok((0..4, "Rf\"\"")), ""),
874 ("RF\"\"", Ok((0..4, "RF\"\"")), ""),
875 ];
876
877 for (text, expected, remaining) in cases {
878 let mut scanner = Scanner::new(text);
879
880 let actual = scanner.scan_python_short_string();
881 assert_eq!(actual, expected);
882
883 assert_eq!(scanner.remaining_text(), remaining);
884 }
885 }
886
887 #[test]
888 fn test_python_short_string_single_quote() {
889 let cases = [
890 ("''", Ok((0..2, "''")), ""),
892 ("' '", Ok((0..3, "' '")), ""),
893 ("'Foo Bar'", Ok((0..9, "'Foo Bar'")), ""),
894 ("'Foo \n Bar'", Ok((0..5, "'Foo ")), "\n Bar'"),
896 ("'Foo \\n Bar'", Ok((0..12, "'Foo \\n Bar'")), ""),
897 ("'Foo \\' Bar'", Ok((0..12, "'Foo \\' Bar'")), ""),
899 ("'Foo \\\n Bar'", Ok((0..12, "'Foo \\\n Bar'")), ""),
900 ("'' ", Ok((0..2, "''")), " "),
902 ("''\t", Ok((0..2, "''")), "\t"),
903 ("''\n", Ok((0..2, "''")), "\n"),
904 ("''\r\n", Ok((0..2, "''")), "\r\n"),
905 ("", Err((0..0, "")), ""),
907 (" ''", Err((0..0, "")), " ''"),
909 ("\t''", Err((0..0, "")), "\t''"),
910 ("\n''", Err((0..0, "")), "\n''"),
911 ("'", Ok((0..1, "'")), ""),
913 ("' ", Ok((0..2, "' ")), ""),
914 ("'\n", Ok((0..1, "'")), "\n"),
915 ("'Foo\n", Ok((0..4, "'Foo")), "\n"),
916 ("'Foo\nBar'", Ok((0..4, "'Foo")), "\nBar'"),
917 ("r''", Ok((0..3, "r''")), ""),
919 ("u''", Ok((0..3, "u''")), ""),
920 ("R''", Ok((0..3, "R''")), ""),
921 ("U''", Ok((0..3, "U''")), ""),
922 ("f''", Ok((0..3, "f''")), ""),
923 ("F''", Ok((0..3, "F''")), ""),
924 ("fr''", Ok((0..4, "fr''")), ""),
925 ("Fr''", Ok((0..4, "Fr''")), ""),
926 ("fR''", Ok((0..4, "fR''")), ""),
927 ("FR''", Ok((0..4, "FR''")), ""),
928 ("rf''", Ok((0..4, "rf''")), ""),
929 ("rF''", Ok((0..4, "rF''")), ""),
930 ("Rf''", Ok((0..4, "Rf''")), ""),
931 ("RF''", Ok((0..4, "RF''")), ""),
932 ];
933
934 for (text, expected, remaining) in cases {
935 let mut scanner = Scanner::new(text);
936
937 let actual = scanner.scan_python_short_string();
938 assert_eq!(actual, expected);
939
940 assert_eq!(scanner.remaining_text(), remaining);
941 }
942 }
943
944 #[test]
945 fn test_python_long_string_double_quote() {
946 #[rustfmt::skip]
947 let cases = [
948 ("\"\"\"\"\"\"", Ok((0..6, "\"\"\"\"\"\"")), ""),
950 ("\"\"\" \"\"\"", Ok((0..7, "\"\"\" \"\"\"")), ""),
951 ("\"\"\"Foo Bar\"\"\"", Ok((0..13, "\"\"\"Foo Bar\"\"\"")), ""),
952 ("\"\"\"Foo\nBar\"\"\"", Ok((0..13, "\"\"\"Foo\nBar\"\"\"")), ""),
954 ("\"\"\" \" \"\" \"\"\"", Ok((0..12, "\"\"\" \" \"\" \"\"\"")), ""),
956 ("\"\"\"\\\"\"\"Foo\"\"\"", Ok((0..13, "\"\"\"\\\"\"\"Foo\"\"\"")), ""),
957 ("\"\"\"\"Foo\"\"\"\"", Ok((0..10, "\"\"\"\"Foo\"\"\"")), "\""),
958 ("\"\"\"Foo'''\"\"\"\"", Ok((0..12, "\"\"\"Foo'''\"\"\"")), "\""),
960 ("\"\"\"Foo\"\"", Ok((0..8, "\"\"\"Foo\"\"")), ""),
962 ("\"\"\"Foo\n\"\"", Ok((0..9, "\"\"\"Foo\n\"\"")), ""),
963 ("r\"\"\"\"\"\"", Ok((0..7, "r\"\"\"\"\"\"")), ""),
965 ("u\"\"\"\"\"\"", Ok((0..7, "u\"\"\"\"\"\"")), ""),
966 ("R\"\"\"\"\"\"", Ok((0..7, "R\"\"\"\"\"\"")), ""),
967 ("U\"\"\"\"\"\"", Ok((0..7, "U\"\"\"\"\"\"")), ""),
968 ("f\"\"\"\"\"\"", Ok((0..7, "f\"\"\"\"\"\"")), ""),
969 ("F\"\"\"\"\"\"", Ok((0..7, "F\"\"\"\"\"\"")), ""),
970 ("fr\"\"\"\"\"\"", Ok((0..8, "fr\"\"\"\"\"\"")), ""),
971 ("Fr\"\"\"\"\"\"", Ok((0..8, "Fr\"\"\"\"\"\"")), ""),
972 ("fR\"\"\"\"\"\"", Ok((0..8, "fR\"\"\"\"\"\"")), ""),
973 ("FR\"\"\"\"\"\"", Ok((0..8, "FR\"\"\"\"\"\"")), ""),
974 ("rf\"\"\"\"\"\"", Ok((0..8, "rf\"\"\"\"\"\"")), ""),
975 ("rF\"\"\"\"\"\"", Ok((0..8, "rF\"\"\"\"\"\"")), ""),
976 ("Rf\"\"\"\"\"\"", Ok((0..8, "Rf\"\"\"\"\"\"")), ""),
977 ("RF\"\"\"\"\"\"", Ok((0..8, "RF\"\"\"\"\"\"")), ""),
978 ];
979
980 for (text, expected, remaining) in cases {
981 let mut scanner = Scanner::new(text);
982
983 let actual = scanner.scan_python_long_string();
984 assert_eq!(actual, expected);
985
986 assert_eq!(scanner.remaining_text(), remaining);
987 }
988 }
989
990 #[test]
991 fn test_python_long_string_single_quote() {
992 let cases = [
993 ("''''''", Ok((0..6, "''''''")), ""),
995 ("''' '''", Ok((0..7, "''' '''")), ""),
996 ("'''Foo Bar'''", Ok((0..13, "'''Foo Bar'''")), ""),
997 ("'''Foo\nBar'''", Ok((0..13, "'''Foo\nBar'''")), ""),
999 ("''' ' '' '''", Ok((0..12, "''' ' '' '''")), ""),
1001 ("'''\\'''Foo'''", Ok((0..13, "'''\\'''Foo'''")), ""),
1002 ("''''Foo''''", Ok((0..10, "''''Foo'''")), "'"),
1003 ("'''Foo\"\"\"''''", Ok((0..12, "'''Foo\"\"\"'''")), "'"),
1005 ("'''Foo''", Ok((0..8, "'''Foo''")), ""),
1007 ("'''Foo\n''", Ok((0..9, "'''Foo\n''")), ""),
1008 ("r''''''", Ok((0..7, "r''''''")), ""),
1010 ("u''''''", Ok((0..7, "u''''''")), ""),
1011 ("R''''''", Ok((0..7, "R''''''")), ""),
1012 ("U''''''", Ok((0..7, "U''''''")), ""),
1013 ("f''''''", Ok((0..7, "f''''''")), ""),
1014 ("F''''''", Ok((0..7, "F''''''")), ""),
1015 ("fr''''''", Ok((0..8, "fr''''''")), ""),
1016 ("Fr''''''", Ok((0..8, "Fr''''''")), ""),
1017 ("fR''''''", Ok((0..8, "fR''''''")), ""),
1018 ("FR''''''", Ok((0..8, "FR''''''")), ""),
1019 ("rf''''''", Ok((0..8, "rf''''''")), ""),
1020 ("rF''''''", Ok((0..8, "rF''''''")), ""),
1021 ("Rf''''''", Ok((0..8, "Rf''''''")), ""),
1022 ("RF''''''", Ok((0..8, "RF''''''")), ""),
1023 ];
1024
1025 for (text, expected, remaining) in cases {
1026 let mut scanner = Scanner::new(text);
1027
1028 let actual = scanner.scan_python_long_string();
1029 assert_eq!(actual, expected);
1030
1031 assert_eq!(scanner.remaining_text(), remaining);
1032 }
1033 }
1034}