text_scanner/ext/
rust.rs

1use crate::{CharExt, Scanner, ScannerResult};
2
3/// [`Scanner`] extension for scanning Rust tokens.
4///
5/// **Note:** When using the `scan_rust_*()` methods, the order they are
6/// called matters.
7pub trait RustScannerExt<'text>: crate::private::Sealed {
8    /// Scans a single [Rust line comment].
9    ///
10    /// **Note:** This has the same lifetime as the original `text`,
11    /// so the scanner can continue to be used while this exists.
12    ///
13    /// # Example
14    ///
15    /// ```rust
16    /// use text_scanner::{ext::RustScannerExt, Scanner};
17    ///
18    /// let text = r#"
19    ///   // Line Comment
20    ///   //! Inner Doc Comment
21    ///   /// Outer Doc Comment
22    /// "#;
23    ///
24    /// let comments = [
25    ///     (3..18,  "// Line Comment"),
26    ///     (21..42, "//! Inner Doc Comment"),
27    ///     (45..66, "/// Outer Doc Comment"),
28    /// ];
29    ///
30    /// let mut scanner = Scanner::new(text);
31    /// for comment in comments {
32    ///     scanner.skip_whitespace();
33    ///     assert_eq!(scanner.scan_rust_line_comment(), Ok(comment));
34    /// }
35    ///
36    /// # scanner.skip_whitespace();
37    /// # assert_eq!(scanner.remaining_text(), "");
38    /// ```
39    ///
40    /// [Rust line comment]: https://doc.rust-lang.org/reference/comments.html
41    fn scan_rust_line_comment(&mut self) -> ScannerResult<'text, &'text str>;
42
43    /// Scans a single [Rust block comment].
44    ///
45    /// **Note:** Rust block comment **allow** nested block comments.
46    ///
47    /// **Note:** This has the same lifetime as the original `text`,
48    /// so the scanner can continue to be used while this exists.
49    ///
50    /// # Example
51    ///
52    /// ```rust
53    /// use text_scanner::{ext::RustScannerExt, Scanner};
54    ///
55    /// let text = r#"
56    ///   /* Block Comment */
57    ///
58    ///   /* Multi
59    ///   // Line
60    ///      Block
61    ///      Comment */
62    ///
63    ///   /* Multi
64    ///   // Line /*
65    ///      Nested
66    ///   /* Block */
67    ///      Comment */ */
68    ///
69    ///   /* Unterminated Block Comment
70    /// "#;
71    ///
72    /// let comments = [
73    ///     (3..22,    "/* Block Comment */"),
74    ///     (26..71,   "/* Multi\n  // Line\n     Block\n     Comment */"),
75    ///     (75..141,  "/* Multi\n  // Line /*\n     Nested\n  /* Block */\n     Comment */ */"),
76    ///     (145..175, "/* Unterminated Block Comment\n"),
77    /// ];
78    ///
79    /// let mut scanner = Scanner::new(text);
80    /// for comment in comments {
81    ///     scanner.skip_whitespace();
82    ///     assert_eq!(scanner.scan_rust_block_comment(), Ok(comment));
83    /// }
84    ///
85    /// # scanner.skip_whitespace();
86    /// # assert_eq!(scanner.remaining_text(), "");
87    /// ```
88    ///
89    /// [Rust block comment]: https://doc.rust-lang.org/reference/comments.html
90    fn scan_rust_block_comment(&mut self) -> ScannerResult<'text, &'text str>;
91
92    /// Scans a single [Rust identifier].
93    ///
94    /// **Note:** This **does not** differentiate between [Rust identifier]s
95    /// and [Rust keyword]s. If needed manually check if the returned `Ok` string slice
96    /// is a [Rust keyword] or not.
97    ///
98    /// **Note:** This has the same lifetime as the original `text`,
99    /// so the scanner can continue to be used while this exists.
100    ///
101    /// # Example
102    ///
103    /// ```rust
104    /// use text_scanner::{ext::RustScannerExt, Scanner};
105    ///
106    /// let text = r#"
107    ///   foo
108    ///   foo_bar
109    ///   _foo_
110    ///   æøå
111    ///   ľúbiť
112    ///   東京
113    /// "#;
114    ///
115    /// let idents = [
116    ///     (3..6,   "foo"),
117    ///     (9..16,  "foo_bar"),
118    ///     (19..24, "_foo_"),
119    ///     (27..33, "æøå"),
120    ///     (36..44, "ľúbiť"),
121    ///     (47..53, "東京"),
122    /// ];
123    ///
124    /// let mut scanner = Scanner::new(text);
125    /// for ident in idents {
126    ///     scanner.skip_whitespace();
127    ///     assert_eq!(scanner.scan_rust_identifier(), Ok(ident));
128    /// }
129    ///
130    /// # scanner.skip_whitespace();
131    /// # assert_eq!(scanner.remaining_text(), "");
132    /// ```
133    ///
134    /// [Rust identifier]: https://doc.rust-lang.org/reference/identifiers.html
135    /// [Rust keyword]: https://doc.rust-lang.org/reference/keywords.html
136    fn scan_rust_identifier(&mut self) -> ScannerResult<'text, &'text str>;
137
138    /// Scans a single [raw Rust identifier].
139    ///
140    /// **Note:** This **does not** differentiate between [Rust identifier]s
141    /// and [Rust keyword]s. If needed manually check if the returned `Ok` string slice
142    /// is a [Rust keyword] or not.
143    ///
144    /// **Note:** This has the same lifetime as the original `text`,
145    /// so the scanner can continue to be used while this exists.
146    ///
147    /// # Example
148    ///
149    /// ```rust
150    /// use text_scanner::{ext::RustScannerExt, Scanner};
151    ///
152    /// let text = r#"
153    ///   r#foo
154    ///   r#type
155    ///   r#while
156    ///   r#æøå
157    ///   r#ľúbiť
158    ///   r#東京
159    /// "#;
160    ///
161    /// let idents = [
162    ///     (3..8,   "r#foo"),
163    ///     (11..17, "r#type"),
164    ///     (20..27, "r#while"),
165    ///     (30..38, "r#æøå"),
166    ///     (41..51, "r#ľúbiť"),
167    ///     (54..62, "r#東京"),
168    /// ];
169    ///
170    /// let mut scanner = Scanner::new(text);
171    /// for ident in idents {
172    ///     scanner.skip_whitespace();
173    ///     assert_eq!(scanner.scan_rust_raw_identifier(), Ok(ident));
174    /// }
175    ///
176    /// # scanner.skip_whitespace();
177    /// # assert_eq!(scanner.remaining_text(), "");
178    /// ```
179    ///
180    /// [raw Rust identifier]: https://doc.rust-lang.org/reference/identifiers.html
181    /// [Rust identifier]: https://doc.rust-lang.org/reference/identifiers.html
182    /// [Rust keyword]: https://doc.rust-lang.org/reference/keywords.html
183    fn scan_rust_raw_identifier(&mut self) -> ScannerResult<'text, &'text str>;
184
185    /// Scans a single [Rust character].
186    ///
187    /// **Note:** This has the same lifetime as the original `text`,
188    /// so the scanner can continue to be used while this exists.
189    ///
190    /// # Example
191    ///
192    /// ```rust
193    /// use text_scanner::{ext::RustScannerExt, Scanner};
194    ///
195    /// let text = r#"
196    ///   'A'
197    ///   'Æ'
198    ///   'Á'
199    ///   '東'
200    ///   '🦀'
201    ///
202    ///   '"'
203    ///   '\\'
204    ///   '\''
205    ///   '\n'
206    ///   '\0'
207    /// "#;
208    ///
209    /// let chars = [
210    ///     (3..6,     "'A'"),
211    ///     (9..13,    "'Æ'"),
212    ///     (16..20,   "'Á'"),
213    ///     (23..28,   "'東'"),
214    ///     (31..37,   "'🦀'"),
215    ///     (41..44,   "'\"'"),
216    ///     (47..51,   "'\\\\'"),
217    ///     (54..58,   "'\\''"),
218    ///     (61..65, "'\\n'"),
219    ///     (68..72, "'\\0'"),
220    /// ];
221    ///
222    /// let mut scanner = Scanner::new(text);
223    /// for c in chars {
224    ///     scanner.skip_whitespace();
225    ///     assert_eq!(scanner.scan_rust_char(), Ok(c));
226    /// }
227    ///
228    /// # scanner.skip_whitespace();
229    /// # assert_eq!(scanner.remaining_text(), "");
230    /// ```
231    ///
232    /// [Rust character]: https://doc.rust-lang.org/reference/tokens.html#character-literals
233    fn scan_rust_char(&mut self) -> ScannerResult<'text, &'text str>;
234
235    /// Scans a single [Rust string].
236    ///
237    /// **Note:** This has the same lifetime as the original `text`,
238    /// so the scanner can continue to be used while this exists.
239    ///
240    /// # Example
241    ///
242    /// ```rust
243    /// use text_scanner::{ext::RustScannerExt, Scanner};
244    ///
245    /// let text = r#"
246    ///   "Hello World"
247    ///
248    ///   "Rust strings
249    ///    can span multiple
250    ///    lines"
251    ///
252    ///   "Foo \" Bar"
253    ///
254    ///   "Unterminated String
255    /// "#;
256    ///
257    /// let strings = [
258    ///     (3..16,   "\"Hello World\""),
259    ///     (20..64,  "\"Rust strings\n   can span multiple\n   lines\""),
260    ///     (68..80,  "\"Foo \\\" Bar\""),
261    ///     (84..105, "\"Unterminated String\n"),
262    /// ];
263    ///
264    /// let mut scanner = Scanner::new(text);
265    /// for string in strings {
266    ///     scanner.skip_whitespace();
267    ///     assert_eq!(scanner.scan_rust_string(), Ok(string));
268    /// }
269    ///
270    /// # scanner.skip_whitespace();
271    /// # assert_eq!(scanner.remaining_text(), "");
272    /// ```
273    ///
274    /// [Rust string]: https://doc.rust-lang.org/reference/tokens.html#string-literals
275    fn scan_rust_string(&mut self) -> ScannerResult<'text, &'text str>;
276
277    /// Scans a single [raw Rust string].
278    ///
279    /// **Note:** This has the same lifetime as the original `text`,
280    /// so the scanner can continue to be used while this exists.
281    ///
282    /// # Example
283    ///
284    /// ```rust
285    /// use text_scanner::{ext::RustScannerExt, Scanner};
286    ///
287    /// let text = r#####"
288    ///   r#"Hello World"#
289    ///
290    ///   r###"Raw Rust strings"
291    ///       "can span multiple"
292    ///       "lines"###
293    ///
294    ///   r##"Foo #"# Bar"##
295    ///
296    ///   r###"Unterminated String
297    /// "#####;
298    ///
299    /// let raw_strings = [
300    ///     (3..19,    "r#\"Hello World\"#"),
301    ///     (23..88,   "r###\"Raw Rust strings\"\n      \"can span multiple\"\n      \"lines\"###"),
302    ///     (92..110,  "r##\"Foo #\"# Bar\"##"),
303    ///     (114..139, "r###\"Unterminated String\n"),
304    /// ];
305    ///
306    /// let mut scanner = Scanner::new(text);
307    /// for raw_string in raw_strings {
308    ///     scanner.skip_whitespace();
309    ///     assert_eq!(scanner.scan_rust_raw_string(), Ok(raw_string));
310    /// }
311    ///
312    /// # scanner.skip_whitespace();
313    /// # assert_eq!(scanner.remaining_text(), "");
314    /// ```
315    ///
316    /// [raw Rust string]: https://doc.rust-lang.org/reference/tokens.html#string-literals
317    fn scan_rust_raw_string(&mut self) -> ScannerResult<'text, &'text str>;
318
319    /// Scans a single [Rust integer decimal literal].
320    ///
321    /// **Note:** Rust integer literals do not allow a sign in front
322    /// of the literal, i.e. `-10` is two tokens `["-", "10"]`.
323    ///
324    /// **Note:** This has the same lifetime as the original `text`,
325    /// so the scanner can continue to be used while this exists.
326    ///
327    /// # Example
328    ///
329    /// ```rust
330    /// use text_scanner::{ext::RustScannerExt, Scanner};
331    ///
332    /// let text = r#"
333    ///   0
334    ///   123
335    ///
336    ///   1_
337    ///   1__
338    ///   1_2_3
339    ///   1__2__3__
340    /// "#;
341    ///
342    /// let integers = [
343    ///     (3..4,   "0"),
344    ///     (7..10,  "123"),
345    ///     (14..16, "1_"),
346    ///     (19..22, "1__"),
347    ///     (25..30, "1_2_3"),
348    ///     (33..42, "1__2__3__"),
349    /// ];
350    ///
351    /// let mut scanner = Scanner::new(text);
352    /// for integer in integers {
353    ///     scanner.skip_whitespace();
354    ///     assert_eq!(scanner.scan_rust_int_dec(), Ok(integer));
355    /// }
356    ///
357    /// # scanner.skip_whitespace();
358    /// # assert_eq!(scanner.remaining_text(), "");
359    /// ```
360    ///
361    /// [Rust integer decimal literal]: https://doc.rust-lang.org/reference/tokens.html#integer-literals
362    fn scan_rust_int_dec(&mut self) -> ScannerResult<'text, &'text str>;
363
364    /// Scans a single [Rust integer hex literal].
365    ///
366    /// **Note:** This has the same lifetime as the original `text`,
367    /// so the scanner can continue to be used while this exists.
368    ///
369    /// # Example
370    ///
371    /// ```rust
372    /// use text_scanner::{ext::RustScannerExt, Scanner};
373    ///
374    /// let text = r#"
375    ///   0x0
376    ///   0xFF
377    ///
378    ///   0x_FF_FF_FF_FF_
379    /// "#;
380    ///
381    /// let hex_integers = [
382    ///     (3..6,   "0x0"),
383    ///     (9..13,  "0xFF"),
384    ///     (17..32, "0x_FF_FF_FF_FF_"),
385    /// ];
386    ///
387    /// let mut scanner = Scanner::new(text);
388    /// for hex_integer in hex_integers {
389    ///     scanner.skip_whitespace();
390    ///     assert_eq!(scanner.scan_rust_int_hex(), Ok(hex_integer));
391    /// }
392    ///
393    /// # scanner.skip_whitespace();
394    /// # assert_eq!(scanner.remaining_text(), "");
395    /// ```
396    ///
397    /// [Rust integer hex literal]: https://doc.rust-lang.org/reference/tokens.html#integer-literals
398    fn scan_rust_int_hex(&mut self) -> ScannerResult<'text, &'text str>;
399
400    /// Scans a single [Rust integer octal literal].
401    ///
402    /// **Note:** This has the same lifetime as the original `text`,
403    /// so the scanner can continue to be used while this exists.
404    ///
405    /// # Example
406    ///
407    /// ```rust
408    /// use text_scanner::{ext::RustScannerExt, Scanner};
409    ///
410    /// let text = r#"
411    ///   0o0
412    ///   0o100
413    ///
414    ///   0o_1_0_0_
415    /// "#;
416    ///
417    /// let oct_integers = [
418    ///     (3..6,   "0o0"),
419    ///     (9..14,  "0o100"),
420    ///     (18..27, "0o_1_0_0_"),
421    /// ];
422    ///
423    /// let mut scanner = Scanner::new(text);
424    /// for oct_integer in oct_integers {
425    ///     scanner.skip_whitespace();
426    ///     assert_eq!(scanner.scan_rust_int_oct(), Ok(oct_integer));
427    /// }
428    ///
429    /// # scanner.skip_whitespace();
430    /// # assert_eq!(scanner.remaining_text(), "");
431    /// ```
432    ///
433    /// [Rust integer octal literal]: https://doc.rust-lang.org/reference/tokens.html#integer-literals
434    fn scan_rust_int_oct(&mut self) -> ScannerResult<'text, &'text str>;
435
436    /// Scans a single [Rust integer binary literal].
437    ///
438    /// **Note:** This has the same lifetime as the original `text`,
439    /// so the scanner can continue to be used while this exists.
440    ///
441    /// # Example
442    ///
443    /// ```rust
444    /// use text_scanner::{ext::RustScannerExt, Scanner};
445    ///
446    /// let text = r#"
447    ///   0b0
448    ///   0b1
449    ///   0b10
450    ///   0b11
451    ///   0b100
452    ///
453    ///   0b_1_0_0_
454    /// "#;
455    ///
456    /// let bin_integers = [
457    ///     (3..6,   "0b0"),
458    ///     (9..12,  "0b1"),
459    ///     (15..19, "0b10"),
460    ///     (22..26, "0b11"),
461    ///     (29..34, "0b100"),
462    ///     (38..47, "0b_1_0_0_"),
463    /// ];
464    ///
465    /// let mut scanner = Scanner::new(text);
466    /// for bin_integer in bin_integers {
467    ///     scanner.skip_whitespace();
468    ///     assert_eq!(scanner.scan_rust_int_bin(), Ok(bin_integer));
469    /// }
470    ///
471    /// # scanner.skip_whitespace();
472    /// # assert_eq!(scanner.remaining_text(), "");
473    /// ```
474    ///
475    /// [Rust integer binary literal]: https://doc.rust-lang.org/reference/tokens.html#integer-literals
476    fn scan_rust_int_bin(&mut self) -> ScannerResult<'text, &'text str>;
477
478    /// Scans a single [Rust floating-point literal].
479    ///
480    /// **Note:** This has the same lifetime as the original `text`,
481    /// so the scanner can continue to be used while this exists.
482    ///
483    /// # Example
484    ///
485    /// ```rust
486    /// use text_scanner::{ext::RustScannerExt, Scanner};
487    ///
488    /// let text = r#"
489    ///   12.
490    ///   12.34
491    ///
492    ///   12.
493    ///   12.34
494    ///
495    ///   12.34E56
496    ///   12.34E+56
497    ///   12.34E-56
498    ///
499    ///   1_2_.
500    ///   1_2_.3_4_
501    ///
502    ///   1_2_.3_4_E_5_6_
503    ///   1_2_.3_4_E+_5_6_
504    ///   1_2_.3_4_E-_5_6_
505    /// "#;
506    ///
507    /// let floats = [
508    ///     (3..6,     "12."),
509    ///     (9..14,    "12.34"),
510    ///     (18..21,   "12."),
511    ///     (24..29,   "12.34"),
512    ///     (33..41,   "12.34E56"),
513    ///     (44..53,   "12.34E+56"),
514    ///     (56..65,   "12.34E-56"),
515    ///     (69..74,   "1_2_."),
516    ///     (77..86,   "1_2_.3_4_"),
517    ///     (90..105,  "1_2_.3_4_E_5_6_"),
518    ///     (108..124, "1_2_.3_4_E+_5_6_"),
519    ///     (127..143, "1_2_.3_4_E-_5_6_"),
520    /// ];
521    ///
522    /// let mut scanner = Scanner::new(text);
523    /// for float in floats {
524    ///     scanner.skip_whitespace();
525    ///     assert_eq!(scanner.scan_rust_float(), Ok(float));
526    /// }
527    ///
528    /// # scanner.skip_whitespace();
529    /// # assert_eq!(scanner.remaining_text(), "");
530    /// ```
531    ///
532    /// [Rust floating-point literal]: https://doc.rust-lang.org/reference/tokens.html#floating-point-literals
533    fn scan_rust_float(&mut self) -> ScannerResult<'text, &'text str>;
534}
535
536impl<'text> RustScannerExt<'text> for Scanner<'text> {
537    // Reference: https://doc.rust-lang.org/reference/comments.html
538    fn scan_rust_line_comment(&mut self) -> ScannerResult<'text, &'text str> {
539        self.scan_with(|scanner| {
540            scanner.accept_str("//")?;
541            scanner.skip_until_char_any(&['\n', '\r']);
542            Ok(())
543        })
544    }
545
546    // Reference: https://doc.rust-lang.org/reference/comments.html
547    fn scan_rust_block_comment(&mut self) -> ScannerResult<'text, &'text str> {
548        self.scan_with(|scanner| {
549            scanner.accept_str("/*")?;
550            let mut open = 1;
551            loop {
552                scanner.skip_until_char_any(&['*', '/']);
553
554                match scanner.next() {
555                    Ok((_r, '*')) => {
556                        if let Ok((_r, '/')) = scanner.next() {
557                            if open == 1 {
558                                break;
559                            }
560                            open -= 1;
561                        }
562                    }
563                    Ok((_r, '/')) => {
564                        if let Ok((_r, '*')) = scanner.next() {
565                            open += 1;
566                        }
567                    }
568                    Ok((_r, _c)) => {}
569                    Err(_) => break,
570                }
571            }
572            Ok(())
573        })
574    }
575
576    // Reference: https://doc.rust-lang.org/reference/identifiers.html
577    fn scan_rust_identifier(&mut self) -> ScannerResult<'text, &'text str> {
578        self.scan_with(|scanner| {
579            scanner.accept_if(|c| c.is_alphabetic() || (c == '_'))?;
580            scanner.skip_while(|c| c.is_alphanumeric() || (c == '_'));
581            Ok(())
582        })
583    }
584
585    // Reference: https://doc.rust-lang.org/reference/identifiers.html
586    fn scan_rust_raw_identifier(&mut self) -> ScannerResult<'text, &'text str> {
587        self.scan_with(|scanner| {
588            scanner.accept_str("r#")?;
589            scanner.scan_rust_identifier()?;
590            Ok(())
591        })
592    }
593
594    // Reference: https://doc.rust-lang.org/reference/tokens.html#character-literals
595    fn scan_rust_char(&mut self) -> ScannerResult<'text, &'text str> {
596        self.scan_with(|scanner| {
597            scanner.accept_char('\'')?;
598
599            let (_r, c) = scanner.next()?;
600            if c == '\\' {
601                // Skip the next character as it is escaped
602                // Note: Technically any character is not valid
603                _ = scanner.next();
604            }
605
606            scanner.accept_char('\'')?;
607            Ok(())
608        })
609    }
610
611    // Reference: https://doc.rust-lang.org/reference/tokens.html#string-literals
612    fn scan_rust_string(&mut self) -> ScannerResult<'text, &'text str> {
613        self.scan_with(|scanner| {
614            scanner.accept_char('"')?;
615
616            loop {
617                scanner.skip_until_char_any(&['"', '\\']);
618                match scanner.next() {
619                    Ok((_r, '"')) => break,
620                    Ok((_r, '\\')) => {
621                        // Skip the next character as it is escaped
622                        // Note: Technically any character is not valid
623                        _ = scanner.next();
624                    }
625                    Ok(_) => unreachable!(),
626                    Err(_) => break,
627                }
628            }
629
630            Ok(())
631        })
632    }
633
634    // Reference: https://doc.rust-lang.org/reference/tokens.html#raw-string-literals
635    fn scan_rust_raw_string(&mut self) -> ScannerResult<'text, &'text str> {
636        self.scan_with(|scanner| {
637            scanner.accept_char('r')?;
638            let hashes = scanner.skip_while_char('#').0.len();
639            scanner.accept_char('"')?;
640
641            'scan: loop {
642                scanner.skip_until_char('"');
643
644                if scanner.next().is_err() {
645                    break;
646                }
647
648                if hashes > 0 {
649                    for _ in 0..hashes {
650                        if scanner.accept_char('#').is_err() {
651                            continue 'scan;
652                        }
653                    }
654
655                    break;
656                } else {
657                    break;
658                }
659            }
660
661            Ok(())
662        })
663    }
664
665    // Reference: https://doc.rust-lang.org/reference/tokens.html#integer-literals
666    fn scan_rust_int_dec(&mut self) -> ScannerResult<'text, &'text str> {
667        self.scan_with(|scanner| {
668            scanner.accept_if_ext(char::is_ascii_digit)?;
669            scanner.skip_while(|c| c.is_ascii_digit() || (c == '_'));
670            Ok(())
671        })
672    }
673
674    // Reference: https://doc.rust-lang.org/reference/tokens.html#integer-literals
675    fn scan_rust_int_hex(&mut self) -> ScannerResult<'text, &'text str> {
676        self.scan_with(|scanner| {
677            scanner.accept_str("0x")?;
678
679            scanner.skip_while_char('_');
680            scanner.accept_if_ext(char::is_ascii_hexdigit)?;
681
682            scanner.skip_while(|c| c.is_ascii_hexdigit() || (c == '_'));
683
684            Ok(())
685        })
686    }
687
688    // Reference: https://doc.rust-lang.org/reference/tokens.html#integer-literals
689    fn scan_rust_int_oct(&mut self) -> ScannerResult<'text, &'text str> {
690        self.scan_with(|scanner| {
691            scanner.accept_str("0o")?;
692
693            scanner.skip_while_char('_');
694            scanner.accept_if(CharExt::is_ascii_octdigit)?;
695
696            scanner.skip_while(|c| CharExt::is_ascii_octdigit(c) || (c == '_'));
697
698            Ok(())
699        })
700    }
701
702    // Reference: https://doc.rust-lang.org/reference/tokens.html#integer-literals
703    fn scan_rust_int_bin(&mut self) -> ScannerResult<'text, &'text str> {
704        self.scan_with(|scanner| {
705            scanner.accept_str("0b")?;
706
707            scanner.skip_while_char('_');
708            scanner.accept_if(CharExt::is_ascii_bindigit)?;
709
710            scanner.skip_while(|c| c.is_ascii_bindigit() || (c == '_'));
711
712            Ok(())
713        })
714    }
715
716    // Reference: https://doc.rust-lang.org/reference/tokens.html#floating-point-literals
717    fn scan_rust_float(&mut self) -> ScannerResult<'text, &'text str> {
718        self.scan_with(|scanner| {
719            scanner.scan_rust_int_dec()?;
720            scanner.accept_char('.')?;
721
722            if scanner.scan_rust_int_dec().is_ok() && scanner.accept_char_any(&['e', 'E']).is_ok() {
723                _ = scanner.accept_char_any(&['+', '-']);
724
725                scanner.skip_while_char('_');
726                scanner.accept_if_ext(char::is_ascii_digit)?;
727                scanner.skip_while(|c| c.is_ascii_digit() || (c == '_'));
728            }
729
730            Ok(())
731        })
732    }
733}
734
735#[cfg(test)]
736mod tests {
737    use super::*;
738
739    #[test]
740    fn test_line_comments() {
741        let code = "
742            // Line Comment
743            // Line Comment\r
744            //! Inner Line Doc Comment
745            /// Outer Line Doc Comment
746            //
747            //\t
748            ///
749        ";
750        let mut scanner = Scanner::new(code);
751
752        assert_eq!(scanner.skip_whitespace().0, 0..13);
753        assert_eq!(
754            scanner.scan_rust_line_comment(),
755            Ok((13..28, "// Line Comment"))
756        );
757
758        assert_eq!(scanner.skip_whitespace().0, 28..41);
759        assert_eq!(
760            scanner.scan_rust_line_comment(),
761            Ok((41..56, "// Line Comment"))
762        );
763
764        assert_eq!(scanner.skip_whitespace().0, 56..70);
765        assert_eq!(
766            scanner.scan_rust_line_comment(),
767            Ok((70..96, "//! Inner Line Doc Comment"))
768        );
769
770        assert_eq!(scanner.skip_whitespace().0, 96..109);
771        assert_eq!(
772            scanner.scan_rust_line_comment(),
773            Ok((109..135, "/// Outer Line Doc Comment"))
774        );
775        assert_eq!(scanner.skip_whitespace().0, 135..148);
776        assert_eq!(scanner.scan_rust_line_comment(), Ok((148..150, "//")));
777
778        assert_eq!(scanner.skip_whitespace().0, 150..163);
779        assert_eq!(scanner.scan_rust_line_comment(), Ok((163..166, "//\t")));
780
781        assert_eq!(scanner.skip_whitespace().0, 166..179);
782        assert_eq!(scanner.scan_rust_line_comment(), Ok((179..182, "///")));
783
784        assert_eq!(scanner.skip_whitespace().0, 182..191);
785        assert_eq!(scanner.remaining_text(), "");
786    }
787
788    #[test]
789    fn test_block_comments() {
790        let code = "
791            /* Single Line Block Comment */
792            /* Two Line
793            Block Comment */
794
795            /*
796
797            Multiline
798            Block
799            Comment
800
801            */
802
803            /*
804
805            /* Nested
806            // /* Block */
807            Comment */
808
809            */
810
811            /**/
812            /*
813            */
814            /**//*
815            *//**/
816
817            /* Unclosed Block Comment
818        ";
819        let mut scanner = Scanner::new(code);
820
821        assert_eq!(scanner.skip_whitespace().0, 0..13);
822        assert_eq!(
823            scanner.scan_rust_block_comment(),
824            Ok((13..44, "/* Single Line Block Comment */"))
825        );
826
827        assert_eq!(scanner.skip_whitespace().0, 44..57);
828        assert_eq!(
829            scanner.scan_rust_block_comment(),
830            Ok((57..97, "/* Two Line\n            Block Comment */"))
831        );
832
833        assert_eq!(scanner.skip_whitespace().0, 97..111);
834        assert_eq!(
835            scanner.scan_rust_block_comment(),
836            Ok((
837                111..190,
838                "/*\n\n            Multiline\n            Block\n            Comment\n\n            */"
839            ))
840        );
841
842        assert_eq!(scanner.skip_whitespace().0, 190..204);
843        assert_eq!(
844            scanner.scan_rust_block_comment(),
845            Ok((204..295, "/*\n\n            /* Nested\n            // /* Block */\n            Comment */\n\n            */"))
846        );
847
848        assert_eq!(scanner.skip_whitespace().0, 295..309);
849        assert_eq!(scanner.scan_rust_block_comment(), Ok((309..313, "/**/")));
850
851        assert_eq!(scanner.skip_whitespace().0, 313..326);
852        assert_eq!(
853            scanner.scan_rust_block_comment(),
854            Ok((326..343, "/*\n            */"))
855        );
856
857        assert_eq!(scanner.skip_whitespace().0, 343..356);
858        assert_eq!(scanner.scan_rust_block_comment(), Ok((356..360, "/**/")));
859
860        assert_eq!(scanner.skip_whitespace().0, 360..360);
861        assert_eq!(
862            scanner.scan_rust_block_comment(),
863            Ok((360..377, "/*\n            */"))
864        );
865
866        assert_eq!(scanner.skip_whitespace().0, 377..377);
867        assert_eq!(scanner.scan_rust_block_comment(), Ok((377..381, "/**/")));
868
869        assert_eq!(scanner.skip_whitespace().0, 381..395);
870        assert_eq!(
871            scanner.scan_rust_block_comment(),
872            Ok((395..429, "/* Unclosed Block Comment\n        "))
873        );
874
875        assert_eq!(scanner.skip_whitespace().0, 429..429);
876        assert_eq!(scanner.remaining_text(), "");
877    }
878
879    #[test]
880    fn test_identifiers() {
881        let cases = [
882            // text, expected, remaining text
883            ("_", Some("_"), ""),
884            ("x", Some("x"), ""),
885            ("foo", Some("foo"), ""),
886            ("_bar", Some("_bar"), ""),
887            ("foo_bar_baz__", Some("foo_bar_baz__"), ""),
888            ("foo-bar", Some("foo"), "-bar"),
889            ("2foo", None, "2foo"),
890            ("+foo", None, "+foo"),
891        ];
892
893        for (text, expected, remaining) in cases {
894            let mut scanner = Scanner::new(text);
895
896            let actual = scanner.scan_rust_identifier().map(|(_, ident)| ident).ok();
897            assert_eq!(actual, expected);
898
899            assert_eq!(scanner.remaining_text(), remaining);
900        }
901    }
902
903    #[test]
904    fn test_raw_identifiers() {
905        let cases = [
906            // text, expected, remaining text
907            ("r#x", Some("r#x"), ""),
908            ("r#foo", Some("r#foo"), ""),
909            ("r#_foo", Some("r#_foo"), ""),
910            ("r#foo_bar_baz__", Some("r#foo_bar_baz__"), ""),
911            ("r#type", Some("r#type"), ""),
912            ("r#struct", Some("r#struct"), ""),
913            // Warning: Technically Rust does not allow `r#_`. However, this implementation
914            // only scans the raw identifier format, and does not verify the validity of the
915            // raw identifiers
916            ("r#_", Some("r#_"), ""),
917            ("r", None, "r"),
918            ("r#", None, "r#"),
919            ("r#2", None, "r#2"),
920            ("r#2foo", None, "r#2foo"),
921        ];
922
923        for (text, expected, remaining) in cases {
924            let mut scanner = Scanner::new(text);
925
926            let actual = scanner
927                .scan_rust_raw_identifier()
928                .map(|(_, ident)| ident)
929                .ok();
930            assert_eq!(actual, expected);
931
932            assert_eq!(scanner.remaining_text(), remaining);
933        }
934    }
935
936    #[test]
937    fn test_strings() {
938        let cases = [
939            // text, expected, remaining text
940            ("\"\"", Some("\"\""), ""),
941            ("\"Hello World\"", Some("\"Hello World\""), ""),
942            ("\"Hello\nWorld\"", Some("\"Hello\nWorld\""), ""),
943            ("\"Hello\\nWorld\"", Some("\"Hello\\nWorld\""), ""),
944            (r#""Hello \" World""#, Some(r#""Hello \" World""#), ""),
945            (r#""Hello \\\" World""#, Some(r#""Hello \\\" World""#), ""),
946            ("\"No Closing Quote", Some("\"No Closing Quote"), ""),
947            (r#""Hello \\" World""#, Some(r#""Hello \\""#), " World\""),
948        ];
949
950        for (text, expected, remaining) in cases {
951            let mut scanner = Scanner::new(text);
952
953            let actual = scanner.scan_rust_string().map(|(_, s)| s).ok();
954            assert_eq!(actual, expected);
955
956            assert_eq!(scanner.remaining_text(), remaining);
957        }
958    }
959
960    #[test]
961    fn test_raw_strings() {
962        let cases = [
963            // text, expected, remaining text
964            ("r\"\"", Some("r\"\""), ""),
965            ("r#\"\"#", Some("r#\"\"#"), ""),
966            ("r#\"\n\"\n\"\"#", Some("r#\"\n\"\n\"\"#"), ""),
967            ("r#\"Hello \" World\"#", Some("r#\"Hello \" World\"#"), ""),
968            (
969                "r#####\"Foo #\"# Bar ####\"#### Baz\"#####",
970                Some("r#####\"Foo #\"# Bar ####\"#### Baz\"#####"),
971                "",
972            ),
973            (
974                "r###\"Foo \"## Bar\" Baz",
975                Some("r###\"Foo \"## Bar\" Baz"),
976                "",
977            ),
978            ("r##\"\"#", Some("r##\"\"#"), ""),
979            ("r#\"\"##", Some("r#\"\"#"), "#"),
980            ("r\"Hello \" World\"", Some("r\"Hello \""), " World\""),
981        ];
982
983        for (text, expected, remaining) in cases {
984            let mut scanner = Scanner::new(text);
985
986            let actual = scanner.scan_rust_raw_string().map(|(_, s)| s).ok();
987            assert_eq!(actual, expected);
988
989            assert_eq!(scanner.remaining_text(), remaining);
990        }
991    }
992}