text_scanner/ext/rust.rs
1use crate::{CharExt, Scanner, ScannerResult};
2
3/// [`Scanner`] extension for scanning Rust tokens.
4///
5/// **Note:** When using the `scan_rust_*()` methods, the order they are
6/// called matters.
7pub trait RustScannerExt<'text>: crate::private::Sealed {
8 /// Scans a single [Rust line comment].
9 ///
10 /// **Note:** This has the same lifetime as the original `text`,
11 /// so the scanner can continue to be used while this exists.
12 ///
13 /// # Example
14 ///
15 /// ```rust
16 /// use text_scanner::{ext::RustScannerExt, Scanner};
17 ///
18 /// let text = r#"
19 /// // Line Comment
20 /// //! Inner Doc Comment
21 /// /// Outer Doc Comment
22 /// "#;
23 ///
24 /// let comments = [
25 /// (3..18, "// Line Comment"),
26 /// (21..42, "//! Inner Doc Comment"),
27 /// (45..66, "/// Outer Doc Comment"),
28 /// ];
29 ///
30 /// let mut scanner = Scanner::new(text);
31 /// for comment in comments {
32 /// scanner.skip_whitespace();
33 /// assert_eq!(scanner.scan_rust_line_comment(), Ok(comment));
34 /// }
35 ///
36 /// # scanner.skip_whitespace();
37 /// # assert_eq!(scanner.remaining_text(), "");
38 /// ```
39 ///
40 /// [Rust line comment]: https://doc.rust-lang.org/reference/comments.html
41 fn scan_rust_line_comment(&mut self) -> ScannerResult<'text, &'text str>;
42
43 /// Scans a single [Rust block comment].
44 ///
45 /// **Note:** Rust block comment **allow** nested block comments.
46 ///
47 /// **Note:** This has the same lifetime as the original `text`,
48 /// so the scanner can continue to be used while this exists.
49 ///
50 /// # Example
51 ///
52 /// ```rust
53 /// use text_scanner::{ext::RustScannerExt, Scanner};
54 ///
55 /// let text = r#"
56 /// /* Block Comment */
57 ///
58 /// /* Multi
59 /// // Line
60 /// Block
61 /// Comment */
62 ///
63 /// /* Multi
64 /// // Line /*
65 /// Nested
66 /// /* Block */
67 /// Comment */ */
68 ///
69 /// /* Unterminated Block Comment
70 /// "#;
71 ///
72 /// let comments = [
73 /// (3..22, "/* Block Comment */"),
74 /// (26..71, "/* Multi\n // Line\n Block\n Comment */"),
75 /// (75..141, "/* Multi\n // Line /*\n Nested\n /* Block */\n Comment */ */"),
76 /// (145..175, "/* Unterminated Block Comment\n"),
77 /// ];
78 ///
79 /// let mut scanner = Scanner::new(text);
80 /// for comment in comments {
81 /// scanner.skip_whitespace();
82 /// assert_eq!(scanner.scan_rust_block_comment(), Ok(comment));
83 /// }
84 ///
85 /// # scanner.skip_whitespace();
86 /// # assert_eq!(scanner.remaining_text(), "");
87 /// ```
88 ///
89 /// [Rust block comment]: https://doc.rust-lang.org/reference/comments.html
90 fn scan_rust_block_comment(&mut self) -> ScannerResult<'text, &'text str>;
91
92 /// Scans a single [Rust identifier].
93 ///
94 /// **Note:** This **does not** differentiate between [Rust identifier]s
95 /// and [Rust keyword]s. If needed manually check if the returned `Ok` string slice
96 /// is a [Rust keyword] or not.
97 ///
98 /// **Note:** This has the same lifetime as the original `text`,
99 /// so the scanner can continue to be used while this exists.
100 ///
101 /// # Example
102 ///
103 /// ```rust
104 /// use text_scanner::{ext::RustScannerExt, Scanner};
105 ///
106 /// let text = r#"
107 /// foo
108 /// foo_bar
109 /// _foo_
110 /// æøå
111 /// ľúbiť
112 /// 東京
113 /// "#;
114 ///
115 /// let idents = [
116 /// (3..6, "foo"),
117 /// (9..16, "foo_bar"),
118 /// (19..24, "_foo_"),
119 /// (27..33, "æøå"),
120 /// (36..44, "ľúbiť"),
121 /// (47..53, "東京"),
122 /// ];
123 ///
124 /// let mut scanner = Scanner::new(text);
125 /// for ident in idents {
126 /// scanner.skip_whitespace();
127 /// assert_eq!(scanner.scan_rust_identifier(), Ok(ident));
128 /// }
129 ///
130 /// # scanner.skip_whitespace();
131 /// # assert_eq!(scanner.remaining_text(), "");
132 /// ```
133 ///
134 /// [Rust identifier]: https://doc.rust-lang.org/reference/identifiers.html
135 /// [Rust keyword]: https://doc.rust-lang.org/reference/keywords.html
136 fn scan_rust_identifier(&mut self) -> ScannerResult<'text, &'text str>;
137
138 /// Scans a single [raw Rust identifier].
139 ///
140 /// **Note:** This **does not** differentiate between [Rust identifier]s
141 /// and [Rust keyword]s. If needed manually check if the returned `Ok` string slice
142 /// is a [Rust keyword] or not.
143 ///
144 /// **Note:** This has the same lifetime as the original `text`,
145 /// so the scanner can continue to be used while this exists.
146 ///
147 /// # Example
148 ///
149 /// ```rust
150 /// use text_scanner::{ext::RustScannerExt, Scanner};
151 ///
152 /// let text = r#"
153 /// r#foo
154 /// r#type
155 /// r#while
156 /// r#æøå
157 /// r#ľúbiť
158 /// r#東京
159 /// "#;
160 ///
161 /// let idents = [
162 /// (3..8, "r#foo"),
163 /// (11..17, "r#type"),
164 /// (20..27, "r#while"),
165 /// (30..38, "r#æøå"),
166 /// (41..51, "r#ľúbiť"),
167 /// (54..62, "r#東京"),
168 /// ];
169 ///
170 /// let mut scanner = Scanner::new(text);
171 /// for ident in idents {
172 /// scanner.skip_whitespace();
173 /// assert_eq!(scanner.scan_rust_raw_identifier(), Ok(ident));
174 /// }
175 ///
176 /// # scanner.skip_whitespace();
177 /// # assert_eq!(scanner.remaining_text(), "");
178 /// ```
179 ///
180 /// [raw Rust identifier]: https://doc.rust-lang.org/reference/identifiers.html
181 /// [Rust identifier]: https://doc.rust-lang.org/reference/identifiers.html
182 /// [Rust keyword]: https://doc.rust-lang.org/reference/keywords.html
183 fn scan_rust_raw_identifier(&mut self) -> ScannerResult<'text, &'text str>;
184
185 /// Scans a single [Rust character].
186 ///
187 /// **Note:** This has the same lifetime as the original `text`,
188 /// so the scanner can continue to be used while this exists.
189 ///
190 /// # Example
191 ///
192 /// ```rust
193 /// use text_scanner::{ext::RustScannerExt, Scanner};
194 ///
195 /// let text = r#"
196 /// 'A'
197 /// 'Æ'
198 /// 'Á'
199 /// '東'
200 /// '🦀'
201 ///
202 /// '"'
203 /// '\\'
204 /// '\''
205 /// '\n'
206 /// '\0'
207 /// "#;
208 ///
209 /// let chars = [
210 /// (3..6, "'A'"),
211 /// (9..13, "'Æ'"),
212 /// (16..20, "'Á'"),
213 /// (23..28, "'東'"),
214 /// (31..37, "'🦀'"),
215 /// (41..44, "'\"'"),
216 /// (47..51, "'\\\\'"),
217 /// (54..58, "'\\''"),
218 /// (61..65, "'\\n'"),
219 /// (68..72, "'\\0'"),
220 /// ];
221 ///
222 /// let mut scanner = Scanner::new(text);
223 /// for c in chars {
224 /// scanner.skip_whitespace();
225 /// assert_eq!(scanner.scan_rust_char(), Ok(c));
226 /// }
227 ///
228 /// # scanner.skip_whitespace();
229 /// # assert_eq!(scanner.remaining_text(), "");
230 /// ```
231 ///
232 /// [Rust character]: https://doc.rust-lang.org/reference/tokens.html#character-literals
233 fn scan_rust_char(&mut self) -> ScannerResult<'text, &'text str>;
234
235 /// Scans a single [Rust string].
236 ///
237 /// **Note:** This has the same lifetime as the original `text`,
238 /// so the scanner can continue to be used while this exists.
239 ///
240 /// # Example
241 ///
242 /// ```rust
243 /// use text_scanner::{ext::RustScannerExt, Scanner};
244 ///
245 /// let text = r#"
246 /// "Hello World"
247 ///
248 /// "Rust strings
249 /// can span multiple
250 /// lines"
251 ///
252 /// "Foo \" Bar"
253 ///
254 /// "Unterminated String
255 /// "#;
256 ///
257 /// let strings = [
258 /// (3..16, "\"Hello World\""),
259 /// (20..64, "\"Rust strings\n can span multiple\n lines\""),
260 /// (68..80, "\"Foo \\\" Bar\""),
261 /// (84..105, "\"Unterminated String\n"),
262 /// ];
263 ///
264 /// let mut scanner = Scanner::new(text);
265 /// for string in strings {
266 /// scanner.skip_whitespace();
267 /// assert_eq!(scanner.scan_rust_string(), Ok(string));
268 /// }
269 ///
270 /// # scanner.skip_whitespace();
271 /// # assert_eq!(scanner.remaining_text(), "");
272 /// ```
273 ///
274 /// [Rust string]: https://doc.rust-lang.org/reference/tokens.html#string-literals
275 fn scan_rust_string(&mut self) -> ScannerResult<'text, &'text str>;
276
277 /// Scans a single [raw Rust string].
278 ///
279 /// **Note:** This has the same lifetime as the original `text`,
280 /// so the scanner can continue to be used while this exists.
281 ///
282 /// # Example
283 ///
284 /// ```rust
285 /// use text_scanner::{ext::RustScannerExt, Scanner};
286 ///
287 /// let text = r#####"
288 /// r#"Hello World"#
289 ///
290 /// r###"Raw Rust strings"
291 /// "can span multiple"
292 /// "lines"###
293 ///
294 /// r##"Foo #"# Bar"##
295 ///
296 /// r###"Unterminated String
297 /// "#####;
298 ///
299 /// let raw_strings = [
300 /// (3..19, "r#\"Hello World\"#"),
301 /// (23..88, "r###\"Raw Rust strings\"\n \"can span multiple\"\n \"lines\"###"),
302 /// (92..110, "r##\"Foo #\"# Bar\"##"),
303 /// (114..139, "r###\"Unterminated String\n"),
304 /// ];
305 ///
306 /// let mut scanner = Scanner::new(text);
307 /// for raw_string in raw_strings {
308 /// scanner.skip_whitespace();
309 /// assert_eq!(scanner.scan_rust_raw_string(), Ok(raw_string));
310 /// }
311 ///
312 /// # scanner.skip_whitespace();
313 /// # assert_eq!(scanner.remaining_text(), "");
314 /// ```
315 ///
316 /// [raw Rust string]: https://doc.rust-lang.org/reference/tokens.html#string-literals
317 fn scan_rust_raw_string(&mut self) -> ScannerResult<'text, &'text str>;
318
319 /// Scans a single [Rust integer decimal literal].
320 ///
321 /// **Note:** Rust integer literals do not allow a sign in front
322 /// of the literal, i.e. `-10` is two tokens `["-", "10"]`.
323 ///
324 /// **Note:** This has the same lifetime as the original `text`,
325 /// so the scanner can continue to be used while this exists.
326 ///
327 /// # Example
328 ///
329 /// ```rust
330 /// use text_scanner::{ext::RustScannerExt, Scanner};
331 ///
332 /// let text = r#"
333 /// 0
334 /// 123
335 ///
336 /// 1_
337 /// 1__
338 /// 1_2_3
339 /// 1__2__3__
340 /// "#;
341 ///
342 /// let integers = [
343 /// (3..4, "0"),
344 /// (7..10, "123"),
345 /// (14..16, "1_"),
346 /// (19..22, "1__"),
347 /// (25..30, "1_2_3"),
348 /// (33..42, "1__2__3__"),
349 /// ];
350 ///
351 /// let mut scanner = Scanner::new(text);
352 /// for integer in integers {
353 /// scanner.skip_whitespace();
354 /// assert_eq!(scanner.scan_rust_int_dec(), Ok(integer));
355 /// }
356 ///
357 /// # scanner.skip_whitespace();
358 /// # assert_eq!(scanner.remaining_text(), "");
359 /// ```
360 ///
361 /// [Rust integer decimal literal]: https://doc.rust-lang.org/reference/tokens.html#integer-literals
362 fn scan_rust_int_dec(&mut self) -> ScannerResult<'text, &'text str>;
363
364 /// Scans a single [Rust integer hex literal].
365 ///
366 /// **Note:** This has the same lifetime as the original `text`,
367 /// so the scanner can continue to be used while this exists.
368 ///
369 /// # Example
370 ///
371 /// ```rust
372 /// use text_scanner::{ext::RustScannerExt, Scanner};
373 ///
374 /// let text = r#"
375 /// 0x0
376 /// 0xFF
377 ///
378 /// 0x_FF_FF_FF_FF_
379 /// "#;
380 ///
381 /// let hex_integers = [
382 /// (3..6, "0x0"),
383 /// (9..13, "0xFF"),
384 /// (17..32, "0x_FF_FF_FF_FF_"),
385 /// ];
386 ///
387 /// let mut scanner = Scanner::new(text);
388 /// for hex_integer in hex_integers {
389 /// scanner.skip_whitespace();
390 /// assert_eq!(scanner.scan_rust_int_hex(), Ok(hex_integer));
391 /// }
392 ///
393 /// # scanner.skip_whitespace();
394 /// # assert_eq!(scanner.remaining_text(), "");
395 /// ```
396 ///
397 /// [Rust integer hex literal]: https://doc.rust-lang.org/reference/tokens.html#integer-literals
398 fn scan_rust_int_hex(&mut self) -> ScannerResult<'text, &'text str>;
399
400 /// Scans a single [Rust integer octal literal].
401 ///
402 /// **Note:** This has the same lifetime as the original `text`,
403 /// so the scanner can continue to be used while this exists.
404 ///
405 /// # Example
406 ///
407 /// ```rust
408 /// use text_scanner::{ext::RustScannerExt, Scanner};
409 ///
410 /// let text = r#"
411 /// 0o0
412 /// 0o100
413 ///
414 /// 0o_1_0_0_
415 /// "#;
416 ///
417 /// let oct_integers = [
418 /// (3..6, "0o0"),
419 /// (9..14, "0o100"),
420 /// (18..27, "0o_1_0_0_"),
421 /// ];
422 ///
423 /// let mut scanner = Scanner::new(text);
424 /// for oct_integer in oct_integers {
425 /// scanner.skip_whitespace();
426 /// assert_eq!(scanner.scan_rust_int_oct(), Ok(oct_integer));
427 /// }
428 ///
429 /// # scanner.skip_whitespace();
430 /// # assert_eq!(scanner.remaining_text(), "");
431 /// ```
432 ///
433 /// [Rust integer octal literal]: https://doc.rust-lang.org/reference/tokens.html#integer-literals
434 fn scan_rust_int_oct(&mut self) -> ScannerResult<'text, &'text str>;
435
436 /// Scans a single [Rust integer binary literal].
437 ///
438 /// **Note:** This has the same lifetime as the original `text`,
439 /// so the scanner can continue to be used while this exists.
440 ///
441 /// # Example
442 ///
443 /// ```rust
444 /// use text_scanner::{ext::RustScannerExt, Scanner};
445 ///
446 /// let text = r#"
447 /// 0b0
448 /// 0b1
449 /// 0b10
450 /// 0b11
451 /// 0b100
452 ///
453 /// 0b_1_0_0_
454 /// "#;
455 ///
456 /// let bin_integers = [
457 /// (3..6, "0b0"),
458 /// (9..12, "0b1"),
459 /// (15..19, "0b10"),
460 /// (22..26, "0b11"),
461 /// (29..34, "0b100"),
462 /// (38..47, "0b_1_0_0_"),
463 /// ];
464 ///
465 /// let mut scanner = Scanner::new(text);
466 /// for bin_integer in bin_integers {
467 /// scanner.skip_whitespace();
468 /// assert_eq!(scanner.scan_rust_int_bin(), Ok(bin_integer));
469 /// }
470 ///
471 /// # scanner.skip_whitespace();
472 /// # assert_eq!(scanner.remaining_text(), "");
473 /// ```
474 ///
475 /// [Rust integer binary literal]: https://doc.rust-lang.org/reference/tokens.html#integer-literals
476 fn scan_rust_int_bin(&mut self) -> ScannerResult<'text, &'text str>;
477
478 /// Scans a single [Rust floating-point literal].
479 ///
480 /// **Note:** This has the same lifetime as the original `text`,
481 /// so the scanner can continue to be used while this exists.
482 ///
483 /// # Example
484 ///
485 /// ```rust
486 /// use text_scanner::{ext::RustScannerExt, Scanner};
487 ///
488 /// let text = r#"
489 /// 12.
490 /// 12.34
491 ///
492 /// 12.
493 /// 12.34
494 ///
495 /// 12.34E56
496 /// 12.34E+56
497 /// 12.34E-56
498 ///
499 /// 1_2_.
500 /// 1_2_.3_4_
501 ///
502 /// 1_2_.3_4_E_5_6_
503 /// 1_2_.3_4_E+_5_6_
504 /// 1_2_.3_4_E-_5_6_
505 /// "#;
506 ///
507 /// let floats = [
508 /// (3..6, "12."),
509 /// (9..14, "12.34"),
510 /// (18..21, "12."),
511 /// (24..29, "12.34"),
512 /// (33..41, "12.34E56"),
513 /// (44..53, "12.34E+56"),
514 /// (56..65, "12.34E-56"),
515 /// (69..74, "1_2_."),
516 /// (77..86, "1_2_.3_4_"),
517 /// (90..105, "1_2_.3_4_E_5_6_"),
518 /// (108..124, "1_2_.3_4_E+_5_6_"),
519 /// (127..143, "1_2_.3_4_E-_5_6_"),
520 /// ];
521 ///
522 /// let mut scanner = Scanner::new(text);
523 /// for float in floats {
524 /// scanner.skip_whitespace();
525 /// assert_eq!(scanner.scan_rust_float(), Ok(float));
526 /// }
527 ///
528 /// # scanner.skip_whitespace();
529 /// # assert_eq!(scanner.remaining_text(), "");
530 /// ```
531 ///
532 /// [Rust floating-point literal]: https://doc.rust-lang.org/reference/tokens.html#floating-point-literals
533 fn scan_rust_float(&mut self) -> ScannerResult<'text, &'text str>;
534}
535
536impl<'text> RustScannerExt<'text> for Scanner<'text> {
537 // Reference: https://doc.rust-lang.org/reference/comments.html
538 fn scan_rust_line_comment(&mut self) -> ScannerResult<'text, &'text str> {
539 self.scan_with(|scanner| {
540 scanner.accept_str("//")?;
541 scanner.skip_until_char_any(&['\n', '\r']);
542 Ok(())
543 })
544 }
545
546 // Reference: https://doc.rust-lang.org/reference/comments.html
547 fn scan_rust_block_comment(&mut self) -> ScannerResult<'text, &'text str> {
548 self.scan_with(|scanner| {
549 scanner.accept_str("/*")?;
550 let mut open = 1;
551 loop {
552 scanner.skip_until_char_any(&['*', '/']);
553
554 match scanner.next() {
555 Ok((_r, '*')) => {
556 if let Ok((_r, '/')) = scanner.next() {
557 if open == 1 {
558 break;
559 }
560 open -= 1;
561 }
562 }
563 Ok((_r, '/')) => {
564 if let Ok((_r, '*')) = scanner.next() {
565 open += 1;
566 }
567 }
568 Ok((_r, _c)) => {}
569 Err(_) => break,
570 }
571 }
572 Ok(())
573 })
574 }
575
576 // Reference: https://doc.rust-lang.org/reference/identifiers.html
577 fn scan_rust_identifier(&mut self) -> ScannerResult<'text, &'text str> {
578 self.scan_with(|scanner| {
579 scanner.accept_if(|c| c.is_alphabetic() || (c == '_'))?;
580 scanner.skip_while(|c| c.is_alphanumeric() || (c == '_'));
581 Ok(())
582 })
583 }
584
585 // Reference: https://doc.rust-lang.org/reference/identifiers.html
586 fn scan_rust_raw_identifier(&mut self) -> ScannerResult<'text, &'text str> {
587 self.scan_with(|scanner| {
588 scanner.accept_str("r#")?;
589 scanner.scan_rust_identifier()?;
590 Ok(())
591 })
592 }
593
594 // Reference: https://doc.rust-lang.org/reference/tokens.html#character-literals
595 fn scan_rust_char(&mut self) -> ScannerResult<'text, &'text str> {
596 self.scan_with(|scanner| {
597 scanner.accept_char('\'')?;
598
599 let (_r, c) = scanner.next()?;
600 if c == '\\' {
601 // Skip the next character as it is escaped
602 // Note: Technically any character is not valid
603 _ = scanner.next();
604 }
605
606 scanner.accept_char('\'')?;
607 Ok(())
608 })
609 }
610
611 // Reference: https://doc.rust-lang.org/reference/tokens.html#string-literals
612 fn scan_rust_string(&mut self) -> ScannerResult<'text, &'text str> {
613 self.scan_with(|scanner| {
614 scanner.accept_char('"')?;
615
616 loop {
617 scanner.skip_until_char_any(&['"', '\\']);
618 match scanner.next() {
619 Ok((_r, '"')) => break,
620 Ok((_r, '\\')) => {
621 // Skip the next character as it is escaped
622 // Note: Technically any character is not valid
623 _ = scanner.next();
624 }
625 Ok(_) => unreachable!(),
626 Err(_) => break,
627 }
628 }
629
630 Ok(())
631 })
632 }
633
634 // Reference: https://doc.rust-lang.org/reference/tokens.html#raw-string-literals
635 fn scan_rust_raw_string(&mut self) -> ScannerResult<'text, &'text str> {
636 self.scan_with(|scanner| {
637 scanner.accept_char('r')?;
638 let hashes = scanner.skip_while_char('#').0.len();
639 scanner.accept_char('"')?;
640
641 'scan: loop {
642 scanner.skip_until_char('"');
643
644 if scanner.next().is_err() {
645 break;
646 }
647
648 if hashes > 0 {
649 for _ in 0..hashes {
650 if scanner.accept_char('#').is_err() {
651 continue 'scan;
652 }
653 }
654
655 break;
656 } else {
657 break;
658 }
659 }
660
661 Ok(())
662 })
663 }
664
665 // Reference: https://doc.rust-lang.org/reference/tokens.html#integer-literals
666 fn scan_rust_int_dec(&mut self) -> ScannerResult<'text, &'text str> {
667 self.scan_with(|scanner| {
668 scanner.accept_if_ext(char::is_ascii_digit)?;
669 scanner.skip_while(|c| c.is_ascii_digit() || (c == '_'));
670 Ok(())
671 })
672 }
673
674 // Reference: https://doc.rust-lang.org/reference/tokens.html#integer-literals
675 fn scan_rust_int_hex(&mut self) -> ScannerResult<'text, &'text str> {
676 self.scan_with(|scanner| {
677 scanner.accept_str("0x")?;
678
679 scanner.skip_while_char('_');
680 scanner.accept_if_ext(char::is_ascii_hexdigit)?;
681
682 scanner.skip_while(|c| c.is_ascii_hexdigit() || (c == '_'));
683
684 Ok(())
685 })
686 }
687
688 // Reference: https://doc.rust-lang.org/reference/tokens.html#integer-literals
689 fn scan_rust_int_oct(&mut self) -> ScannerResult<'text, &'text str> {
690 self.scan_with(|scanner| {
691 scanner.accept_str("0o")?;
692
693 scanner.skip_while_char('_');
694 scanner.accept_if(CharExt::is_ascii_octdigit)?;
695
696 scanner.skip_while(|c| CharExt::is_ascii_octdigit(c) || (c == '_'));
697
698 Ok(())
699 })
700 }
701
702 // Reference: https://doc.rust-lang.org/reference/tokens.html#integer-literals
703 fn scan_rust_int_bin(&mut self) -> ScannerResult<'text, &'text str> {
704 self.scan_with(|scanner| {
705 scanner.accept_str("0b")?;
706
707 scanner.skip_while_char('_');
708 scanner.accept_if(CharExt::is_ascii_bindigit)?;
709
710 scanner.skip_while(|c| c.is_ascii_bindigit() || (c == '_'));
711
712 Ok(())
713 })
714 }
715
716 // Reference: https://doc.rust-lang.org/reference/tokens.html#floating-point-literals
717 fn scan_rust_float(&mut self) -> ScannerResult<'text, &'text str> {
718 self.scan_with(|scanner| {
719 scanner.scan_rust_int_dec()?;
720 scanner.accept_char('.')?;
721
722 if scanner.scan_rust_int_dec().is_ok() && scanner.accept_char_any(&['e', 'E']).is_ok() {
723 _ = scanner.accept_char_any(&['+', '-']);
724
725 scanner.skip_while_char('_');
726 scanner.accept_if_ext(char::is_ascii_digit)?;
727 scanner.skip_while(|c| c.is_ascii_digit() || (c == '_'));
728 }
729
730 Ok(())
731 })
732 }
733}
734
735#[cfg(test)]
736mod tests {
737 use super::*;
738
739 #[test]
740 fn test_line_comments() {
741 let code = "
742 // Line Comment
743 // Line Comment\r
744 //! Inner Line Doc Comment
745 /// Outer Line Doc Comment
746 //
747 //\t
748 ///
749 ";
750 let mut scanner = Scanner::new(code);
751
752 assert_eq!(scanner.skip_whitespace().0, 0..13);
753 assert_eq!(
754 scanner.scan_rust_line_comment(),
755 Ok((13..28, "// Line Comment"))
756 );
757
758 assert_eq!(scanner.skip_whitespace().0, 28..41);
759 assert_eq!(
760 scanner.scan_rust_line_comment(),
761 Ok((41..56, "// Line Comment"))
762 );
763
764 assert_eq!(scanner.skip_whitespace().0, 56..70);
765 assert_eq!(
766 scanner.scan_rust_line_comment(),
767 Ok((70..96, "//! Inner Line Doc Comment"))
768 );
769
770 assert_eq!(scanner.skip_whitespace().0, 96..109);
771 assert_eq!(
772 scanner.scan_rust_line_comment(),
773 Ok((109..135, "/// Outer Line Doc Comment"))
774 );
775 assert_eq!(scanner.skip_whitespace().0, 135..148);
776 assert_eq!(scanner.scan_rust_line_comment(), Ok((148..150, "//")));
777
778 assert_eq!(scanner.skip_whitespace().0, 150..163);
779 assert_eq!(scanner.scan_rust_line_comment(), Ok((163..166, "//\t")));
780
781 assert_eq!(scanner.skip_whitespace().0, 166..179);
782 assert_eq!(scanner.scan_rust_line_comment(), Ok((179..182, "///")));
783
784 assert_eq!(scanner.skip_whitespace().0, 182..191);
785 assert_eq!(scanner.remaining_text(), "");
786 }
787
788 #[test]
789 fn test_block_comments() {
790 let code = "
791 /* Single Line Block Comment */
792 /* Two Line
793 Block Comment */
794
795 /*
796
797 Multiline
798 Block
799 Comment
800
801 */
802
803 /*
804
805 /* Nested
806 // /* Block */
807 Comment */
808
809 */
810
811 /**/
812 /*
813 */
814 /**//*
815 *//**/
816
817 /* Unclosed Block Comment
818 ";
819 let mut scanner = Scanner::new(code);
820
821 assert_eq!(scanner.skip_whitespace().0, 0..13);
822 assert_eq!(
823 scanner.scan_rust_block_comment(),
824 Ok((13..44, "/* Single Line Block Comment */"))
825 );
826
827 assert_eq!(scanner.skip_whitespace().0, 44..57);
828 assert_eq!(
829 scanner.scan_rust_block_comment(),
830 Ok((57..97, "/* Two Line\n Block Comment */"))
831 );
832
833 assert_eq!(scanner.skip_whitespace().0, 97..111);
834 assert_eq!(
835 scanner.scan_rust_block_comment(),
836 Ok((
837 111..190,
838 "/*\n\n Multiline\n Block\n Comment\n\n */"
839 ))
840 );
841
842 assert_eq!(scanner.skip_whitespace().0, 190..204);
843 assert_eq!(
844 scanner.scan_rust_block_comment(),
845 Ok((204..295, "/*\n\n /* Nested\n // /* Block */\n Comment */\n\n */"))
846 );
847
848 assert_eq!(scanner.skip_whitespace().0, 295..309);
849 assert_eq!(scanner.scan_rust_block_comment(), Ok((309..313, "/**/")));
850
851 assert_eq!(scanner.skip_whitespace().0, 313..326);
852 assert_eq!(
853 scanner.scan_rust_block_comment(),
854 Ok((326..343, "/*\n */"))
855 );
856
857 assert_eq!(scanner.skip_whitespace().0, 343..356);
858 assert_eq!(scanner.scan_rust_block_comment(), Ok((356..360, "/**/")));
859
860 assert_eq!(scanner.skip_whitespace().0, 360..360);
861 assert_eq!(
862 scanner.scan_rust_block_comment(),
863 Ok((360..377, "/*\n */"))
864 );
865
866 assert_eq!(scanner.skip_whitespace().0, 377..377);
867 assert_eq!(scanner.scan_rust_block_comment(), Ok((377..381, "/**/")));
868
869 assert_eq!(scanner.skip_whitespace().0, 381..395);
870 assert_eq!(
871 scanner.scan_rust_block_comment(),
872 Ok((395..429, "/* Unclosed Block Comment\n "))
873 );
874
875 assert_eq!(scanner.skip_whitespace().0, 429..429);
876 assert_eq!(scanner.remaining_text(), "");
877 }
878
879 #[test]
880 fn test_identifiers() {
881 let cases = [
882 // text, expected, remaining text
883 ("_", Some("_"), ""),
884 ("x", Some("x"), ""),
885 ("foo", Some("foo"), ""),
886 ("_bar", Some("_bar"), ""),
887 ("foo_bar_baz__", Some("foo_bar_baz__"), ""),
888 ("foo-bar", Some("foo"), "-bar"),
889 ("2foo", None, "2foo"),
890 ("+foo", None, "+foo"),
891 ];
892
893 for (text, expected, remaining) in cases {
894 let mut scanner = Scanner::new(text);
895
896 let actual = scanner.scan_rust_identifier().map(|(_, ident)| ident).ok();
897 assert_eq!(actual, expected);
898
899 assert_eq!(scanner.remaining_text(), remaining);
900 }
901 }
902
903 #[test]
904 fn test_raw_identifiers() {
905 let cases = [
906 // text, expected, remaining text
907 ("r#x", Some("r#x"), ""),
908 ("r#foo", Some("r#foo"), ""),
909 ("r#_foo", Some("r#_foo"), ""),
910 ("r#foo_bar_baz__", Some("r#foo_bar_baz__"), ""),
911 ("r#type", Some("r#type"), ""),
912 ("r#struct", Some("r#struct"), ""),
913 // Warning: Technically Rust does not allow `r#_`. However, this implementation
914 // only scans the raw identifier format, and does not verify the validity of the
915 // raw identifiers
916 ("r#_", Some("r#_"), ""),
917 ("r", None, "r"),
918 ("r#", None, "r#"),
919 ("r#2", None, "r#2"),
920 ("r#2foo", None, "r#2foo"),
921 ];
922
923 for (text, expected, remaining) in cases {
924 let mut scanner = Scanner::new(text);
925
926 let actual = scanner
927 .scan_rust_raw_identifier()
928 .map(|(_, ident)| ident)
929 .ok();
930 assert_eq!(actual, expected);
931
932 assert_eq!(scanner.remaining_text(), remaining);
933 }
934 }
935
936 #[test]
937 fn test_strings() {
938 let cases = [
939 // text, expected, remaining text
940 ("\"\"", Some("\"\""), ""),
941 ("\"Hello World\"", Some("\"Hello World\""), ""),
942 ("\"Hello\nWorld\"", Some("\"Hello\nWorld\""), ""),
943 ("\"Hello\\nWorld\"", Some("\"Hello\\nWorld\""), ""),
944 (r#""Hello \" World""#, Some(r#""Hello \" World""#), ""),
945 (r#""Hello \\\" World""#, Some(r#""Hello \\\" World""#), ""),
946 ("\"No Closing Quote", Some("\"No Closing Quote"), ""),
947 (r#""Hello \\" World""#, Some(r#""Hello \\""#), " World\""),
948 ];
949
950 for (text, expected, remaining) in cases {
951 let mut scanner = Scanner::new(text);
952
953 let actual = scanner.scan_rust_string().map(|(_, s)| s).ok();
954 assert_eq!(actual, expected);
955
956 assert_eq!(scanner.remaining_text(), remaining);
957 }
958 }
959
960 #[test]
961 fn test_raw_strings() {
962 let cases = [
963 // text, expected, remaining text
964 ("r\"\"", Some("r\"\""), ""),
965 ("r#\"\"#", Some("r#\"\"#"), ""),
966 ("r#\"\n\"\n\"\"#", Some("r#\"\n\"\n\"\"#"), ""),
967 ("r#\"Hello \" World\"#", Some("r#\"Hello \" World\"#"), ""),
968 (
969 "r#####\"Foo #\"# Bar ####\"#### Baz\"#####",
970 Some("r#####\"Foo #\"# Bar ####\"#### Baz\"#####"),
971 "",
972 ),
973 (
974 "r###\"Foo \"## Bar\" Baz",
975 Some("r###\"Foo \"## Bar\" Baz"),
976 "",
977 ),
978 ("r##\"\"#", Some("r##\"\"#"), ""),
979 ("r#\"\"##", Some("r#\"\"#"), "#"),
980 ("r\"Hello \" World\"", Some("r\"Hello \""), " World\""),
981 ];
982
983 for (text, expected, remaining) in cases {
984 let mut scanner = Scanner::new(text);
985
986 let actual = scanner.scan_rust_raw_string().map(|(_, s)| s).ok();
987 assert_eq!(actual, expected);
988
989 assert_eq!(scanner.remaining_text(), remaining);
990 }
991 }
992}