1use crate::{ext::CScannerExt, CharExt, Scanner, ScannerResult};
2
3#[rustfmt::skip]
5pub const JAVA_RESERVED_KEYWORDS: &[&str] = &[
6 "_", "abstract", "assert", "boolean", "break", "byte", "case", "catch", "char", "class",
7 "const", "continue", "default", "do", "double", "else", "enum", "extends", "final",
8 "finally", "float", "for", "if", "goto", "implements", "import", "instanceof", "int",
9 "interface", "long", "native", "new", "package", "private", "protected", "public",
10 "return", "short", "static", "strictfp", "super", "switch", "synchronized", "this",
11 "throw", "throws", "transient", "try", "void", "volatile", "while",
12];
13
14#[rustfmt::skip]
16pub const JAVA_CONTEXTUAL_KEYWORDS: &[&str] = &[
17 "exports", "module", "non-sealed", "open", "opens", "permits", "provides", "record",
18 "requires", "sealed", "to", "transitive", "uses", "var", "with", "yield",
19];
20
21pub const JAVA_OPERATORS: &[&str] = &[
23 "=", ">", "<", "!", "~", "?", ":", "->", "==", ">=", "<=", "!=", "&&", "||", "++", "--", "+",
24 "-", "*", "/", "&", "|", "^", "%", "<<", ">>", ">>>", "+=", "-=", "*=", "/=", "&=", "|=", "^=",
25 "%=", "<<=", ">>=", ">>>=",
26];
27
28pub const JAVA_SEPARATORS: &[&str] = &[
30 "(", ")", "{", "}", "[", "]", ";", ",", ".", "...", "@", "::",
31];
32
33pub trait JavaScannerExt<'text>: crate::private::Sealed {
41 fn scan_java_line_comment(&mut self) -> ScannerResult<'text, &'text str>;
42 fn scan_java_block_comment(&mut self) -> ScannerResult<'text, &'text str>;
43
44 fn scan_java_identifier(&mut self) -> ScannerResult<'text, &'text str>;
45
46 fn scan_java_keyword(&mut self) -> ScannerResult<'text, &'text str>;
52 fn scan_java_reserved_keyword(&mut self) -> ScannerResult<'text, &'text str>;
53 fn scan_java_contextual_keyword(&mut self) -> ScannerResult<'text, &'text str>;
54
55 fn scan_java_operator(&mut self) -> ScannerResult<'text, &'text str>;
56 fn scan_java_separator(&mut self) -> ScannerResult<'text, &'text str>;
57
58 fn scan_java_null_literal(&mut self) -> ScannerResult<'text, &'text str>;
59 fn scan_java_boolean_literal(&mut self) -> ScannerResult<'text, &'text str>;
60
61 fn scan_java_int_literal(&mut self) -> ScannerResult<'text, &'text str>;
62 fn scan_java_int_dec_literal(&mut self) -> ScannerResult<'text, &'text str>;
63 fn scan_java_int_hex_literal(&mut self) -> ScannerResult<'text, &'text str>;
64 fn scan_java_int_oct_literal(&mut self) -> ScannerResult<'text, &'text str>;
65 fn scan_java_int_bin_literal(&mut self) -> ScannerResult<'text, &'text str>;
66
67 fn scan_java_float_literal(&mut self) -> ScannerResult<'text, &'text str>;
68 fn scan_java_float_dec_literal(&mut self) -> ScannerResult<'text, &'text str>;
69 fn scan_java_float_hex_literal(&mut self) -> ScannerResult<'text, &'text str>;
70
71 fn scan_java_char_literal(&mut self) -> ScannerResult<'text, &'text str>;
72 fn scan_java_string_literal(&mut self) -> ScannerResult<'text, &'text str>;
73}
74
75impl<'text> JavaScannerExt<'text> for Scanner<'text> {
76 #[inline]
78 fn scan_java_line_comment(&mut self) -> ScannerResult<'text, &'text str> {
79 self.scan_c_line_comment()
80 }
81
82 #[inline]
84 fn scan_java_block_comment(&mut self) -> ScannerResult<'text, &'text str> {
85 self.scan_c_block_comment()
86 }
87
88 #[inline]
90 fn scan_java_identifier(&mut self) -> ScannerResult<'text, &'text str> {
91 self.scan_c_identifier()
92 }
93
94 #[inline]
96 fn scan_java_keyword(&mut self) -> ScannerResult<'text, &'text str> {
97 self.scan_with(|scanner| {
98 let res = scanner.scan_java_contextual_keyword();
99 match res {
100 Ok(_) => Ok(()),
101 Err((r, s)) if s.is_java_reserved_keyword() => {
102 scanner.cursor = r.end;
103 Ok(())
104 }
105 Err(res) => Err(res),
106 }
107 })
108 }
109
110 #[inline]
112 fn scan_java_reserved_keyword(&mut self) -> ScannerResult<'text, &'text str> {
113 self.scan_with(|scanner| {
114 let (r, s) = scanner.scan_java_identifier()?;
115 if s.is_java_reserved_keyword() {
116 Ok(())
117 } else {
118 Err((r, s))
119 }
120 })
121 }
122
123 #[inline]
125 fn scan_java_contextual_keyword(&mut self) -> ScannerResult<'text, &'text str> {
126 self.scan_with(|scanner| {
127 let (r, s) = scanner.scan_java_identifier()?;
128 if s.is_java_contextual_keyword() {
129 Ok(())
130 } else if s == "non" {
131 scanner.accept_char('-')?;
132 scanner.accept_str("sealed")?;
133 Ok(())
134 } else {
135 Err((r, s))
136 }
137 })
138 }
139
140 fn scan_java_operator(&mut self) -> ScannerResult<'text, &'text str> {
142 self.scan_with(|scanner| {
143 let (r, c) = scanner.next()?;
144 match c {
145 '=' | '*' | '/' | '^' | '%' | '!' => {
148 _ = scanner.accept_char('=');
149 }
150 '+' => {
152 _ = scanner.accept_char_any(&['=', '+']);
153 }
154 '-' => {
156 _ = scanner.accept_char_any(&['=', '-', '>']);
157 }
158 '&' => {
160 _ = scanner.accept_char_any(&['=', '&']);
161 }
162 '|' => {
164 _ = scanner.accept_char_any(&['=', '|']);
165 }
166 '<' => {
169 _ = scanner.accept_char('<');
170 _ = scanner.accept_char('=');
171 }
172 '>' => {
175 _ = scanner.accept_char('>');
176 _ = scanner.accept_char('>');
177 _ = scanner.accept_char('=');
178 }
179 ':' | '?' | '~' => {}
181 _ => return Err(scanner.ranged_text(r)),
182 }
183 Ok(())
184 })
185 }
186
187 fn scan_java_separator(&mut self) -> ScannerResult<'text, &'text str> {
189 let (r, c) = self.peek()?;
190 let res = match c {
191 '(' | ')' | '{' | '}' | '[' | ']' | ';' | ',' | '@' => {
192 self.cursor = r.end;
193 Ok(r)
194 }
195 ':' => match self.peek_nth(1) {
196 Ok((last, ':')) => Ok(r.start..last.end),
197 _ => Err(r),
198 },
199 '.' => {
200 self.cursor = r.end;
201 match self.peek_str(2) {
202 Ok((last, "..")) => Ok(r.start..last.end),
203 _ => Ok(r),
204 }
205 }
206 _ => Err(r),
207 };
208 match res {
209 Ok(r) => {
210 self.cursor = r.end;
211 Ok(self.ranged_text(r))
212 }
213 Err(r) => Err(self.ranged_text(r)),
214 }
215 }
216
217 #[inline]
219 fn scan_java_null_literal(&mut self) -> ScannerResult<'text, &'text str> {
220 self.scan_with(|scanner| {
221 let (r, s) = scanner.scan_java_identifier()?;
222 if s.is_java_null_literal() {
223 Ok(())
224 } else {
225 Err((r, s))
226 }
227 })
228 }
229
230 #[inline]
232 fn scan_java_boolean_literal(&mut self) -> ScannerResult<'text, &'text str> {
233 self.scan_with(|scanner| {
234 let (r, s) = scanner.scan_java_identifier()?;
235 if s.is_java_boolean_literal() {
236 Ok(())
237 } else {
238 Err((r, s))
239 }
240 })
241 }
242
243 #[inline]
245 fn scan_java_int_literal(&mut self) -> ScannerResult<'text, &'text str> {
246 self.scan_java_int_dec_literal()
247 .or_else(|_| self.scan_java_int_hex_literal())
248 .or_else(|_| self.scan_java_int_oct_literal())
249 .or_else(|_| self.scan_java_int_bin_literal())
250 }
251
252 #[inline]
254 fn scan_java_int_dec_literal(&mut self) -> ScannerResult<'text, &'text str> {
255 self.scan_with(|scanner| {
256 match scanner.accept_if_ext(char::is_ascii_digit)? {
257 (r, '0') => {
258 if scanner.peek().map_or(false, |(_r, c)| match c {
259 c if c.is_ascii_digit() => true,
260 'x' | 'X' | 'b' | 'B' | 'f' | 'F' | 'd' | 'D' => true,
261 _ => false,
262 }) {
263 return Err((r, "0"));
264 }
265 }
266 _ => {
267 scanner.skip_while(|c| c.is_ascii_non_zero_digit() || (c == '_'));
268 }
269 }
270
271 _ = scanner.accept_char_any(&['l', 'L']);
272
273 Ok(())
274 })
275 }
276
277 #[inline]
279 fn scan_java_int_hex_literal(&mut self) -> ScannerResult<'text, &'text str> {
280 self.scan_with(|scanner| {
281 scanner.accept_char('0')?;
282 scanner.accept_char_any(&['x', 'X'])?;
283
284 scanner.skip_while_char('_');
285 scanner.accept_if_ext(char::is_ascii_hexdigit)?;
286
287 scanner.skip_while(|c| c.is_ascii_hexdigit() || (c == '_'));
288
289 _ = scanner.accept_char_any(&['l', 'L']);
290
291 Ok(())
292 })
293 }
294
295 #[inline]
297 fn scan_java_int_oct_literal(&mut self) -> ScannerResult<'text, &'text str> {
298 self.scan_with(|scanner| {
299 scanner.accept_char('0')?;
300
301 scanner.skip_while_char('_');
302 scanner.accept_if(CharExt::is_ascii_octdigit)?;
303
304 scanner.skip_while(|c| CharExt::is_ascii_octdigit(c) || (c == '_'));
305
306 _ = scanner.accept_char_any(&['l', 'L']);
307
308 Ok(())
309 })
310 }
311
312 #[inline]
314 fn scan_java_int_bin_literal(&mut self) -> ScannerResult<'text, &'text str> {
315 self.scan_with(|scanner| {
316 scanner.accept_char('0')?;
317 scanner.accept_char_any(&['b', 'B'])?;
318
319 scanner.skip_while_char('_');
320 scanner.accept_if(CharExt::is_ascii_bindigit)?;
321
322 scanner.skip_while(|c| c.is_ascii_bindigit() || (c == '_'));
323
324 _ = scanner.accept_char_any(&['l', 'L']);
325
326 Ok(())
327 })
328 }
329
330 #[inline]
332 fn scan_java_float_literal(&mut self) -> ScannerResult<'text, &'text str> {
333 self.scan_java_float_dec_literal()
334 .or_else(|_| self.scan_java_float_hex_literal())
335 }
336
337 fn scan_java_float_dec_literal(&mut self) -> ScannerResult<'text, &'text str> {
339 self.scan_with(|scanner| {
340 if scanner.accept_char('.').is_ok() {
341 scanner.scan_digits_or_underscores()?;
342 } else {
343 scanner.scan_digits_or_underscores()?;
344 if scanner.accept_char('.').is_ok() {
345 _ = scanner.scan_digits_or_underscores();
346 }
347 }
348
349 if scanner.accept_char_any(&['e', 'E']).is_ok() {
350 _ = scanner.accept_char_any(&['+', '-']);
351
352 scanner.skip_while_char('_');
353 scanner.scan_digits_or_underscores()?;
354 }
355
356 _ = scanner.accept_char_any(&['f', 'F', 'd', 'D']);
357
358 Ok(())
359 })
360 }
361
362 fn scan_java_float_hex_literal(&mut self) -> ScannerResult<'text, &'text str> {
364 self.scan_with(|scanner| {
365 scanner.accept_char('0')?;
366 scanner.accept_char_any(&['x', 'X'])?;
367
368 scanner.skip_while_char('_');
369 scanner.accept_if_ext(char::is_ascii_hexdigit)?;
370 scanner.skip_while(|c| c.is_ascii_hexdigit() || (c == '_'));
371
372 scanner.accept_char('.')?;
373
374 scanner.skip_while_char('_');
375 if scanner.accept_if_ext(char::is_ascii_hexdigit).is_ok() {
376 scanner.skip_while(|c| c.is_ascii_hexdigit() || (c == '_'));
377 }
378
379 scanner.accept_char_any(&['p', 'P'])?;
380
381 _ = scanner.accept_char_any(&['+', '-']);
382 scanner.skip_while_char('_');
383 scanner.scan_digits_or_underscores()?;
384
385 _ = scanner.accept_char_any(&['f', 'F', 'd', 'D']);
386
387 Ok(())
388 })
389 }
390
391 fn scan_java_char_literal(&mut self) -> ScannerResult<'text, &'text str> {
393 self.scan_with(|scanner| {
394 scanner.accept_char('\'')?;
395
396 let (_r, c) = scanner.next()?;
397 if c == '\\' {
398 let (_r, c) = scanner.next()?;
401
402 if CharExt::is_ascii_octdigit(c) {
403 _ = scanner.accept_if(CharExt::is_ascii_octdigit);
404 _ = scanner.accept_if(CharExt::is_ascii_octdigit);
405 } else if c == 'u' {
406 scanner.accept_if_ext(char::is_ascii_hexdigit)?;
407 scanner.accept_if_ext(char::is_ascii_hexdigit)?;
408 scanner.accept_if_ext(char::is_ascii_hexdigit)?;
409 scanner.accept_if_ext(char::is_ascii_hexdigit)?;
410 }
411 }
412
413 scanner.accept_char('\'')?;
414 Ok(())
415 })
416 }
417
418 #[inline]
420 fn scan_java_string_literal(&mut self) -> ScannerResult<'text, &'text str> {
421 self.scan_c_string()
423 }
424}
425
426pub trait JavaStrExt {
428 fn is_java_keyword(&self) -> bool;
429 fn is_java_reserved_keyword(&self) -> bool;
430 fn is_java_contextual_keyword(&self) -> bool;
431
432 fn is_java_null_literal(&self) -> bool;
433 fn is_java_boolean_literal(&self) -> bool;
434
435 fn is_java_operator(&self) -> bool;
436 fn is_java_separator(&self) -> bool;
437}
438
439impl JavaStrExt for str {
440 #[inline]
441 fn is_java_keyword(&self) -> bool {
442 self.is_java_contextual_keyword() || self.is_java_reserved_keyword()
443 }
444
445 #[inline]
446 fn is_java_reserved_keyword(&self) -> bool {
447 JAVA_RESERVED_KEYWORDS.contains(&self)
448 }
449
450 #[inline]
451 fn is_java_contextual_keyword(&self) -> bool {
452 JAVA_CONTEXTUAL_KEYWORDS.contains(&self)
453 }
454
455 #[inline]
456 fn is_java_null_literal(&self) -> bool {
457 self == "null"
458 }
459
460 #[inline]
461 fn is_java_boolean_literal(&self) -> bool {
462 (self == "true") || (self == "false")
463 }
464
465 #[inline]
466 fn is_java_operator(&self) -> bool {
467 JAVA_OPERATORS.contains(&self)
468 }
469
470 #[inline]
471 fn is_java_separator(&self) -> bool {
472 JAVA_SEPARATORS.contains(&self)
473 }
474}
475
476#[cfg(test)]
477mod tests {
478 use super::*;
479 use crate::{assert_invalid_cases, assert_valid_cases};
480
481 #[test]
482 fn test_java_keywords() {
483 for &expected in JAVA_RESERVED_KEYWORDS
484 .iter()
485 .chain(JAVA_CONTEXTUAL_KEYWORDS)
486 {
487 let mut scanner = Scanner::new(expected);
488
489 let actual = scanner.scan_java_keyword().map(|(_r, kw)| kw);
490 assert_eq!(actual, Ok(expected));
491
492 assert!(expected.is_java_keyword());
493 }
494 }
495
496 #[test]
497 fn test_java_reversed_keywords() {
498 for &expected in JAVA_RESERVED_KEYWORDS {
499 let mut scanner = Scanner::new(expected);
500
501 let actual = scanner.scan_java_reserved_keyword().map(|(_r, kw)| kw);
502 assert_eq!(actual, Ok(expected));
503
504 let actual = actual.unwrap();
505 assert!(actual.is_java_reserved_keyword());
506 assert!(!actual.is_java_contextual_keyword());
507 }
508 }
509
510 #[test]
511 fn test_java_contextual_keywords() {
512 for &expected in JAVA_CONTEXTUAL_KEYWORDS {
513 let mut scanner = Scanner::new(expected);
514
515 let actual = scanner.scan_java_contextual_keyword().map(|(_r, kw)| kw);
516 assert_eq!(actual, Ok(expected));
517
518 let actual = actual.unwrap();
519 assert!(actual.is_java_contextual_keyword());
520 assert!(!actual.is_java_reserved_keyword());
521 }
522 }
523
524 #[test]
525 fn test_java_operators() {
526 for &expected in JAVA_OPERATORS {
527 let mut scanner = Scanner::new(expected);
528
529 let actual = scanner.scan_java_operator().map(|(_r, kw)| kw);
530 assert_eq!(actual, Ok(expected));
531
532 let actual = actual.unwrap();
533 assert!(actual.is_java_operator());
534 assert!(!actual.is_java_separator());
535 }
536 }
537
538 #[test]
539 fn test_java_separators() {
540 for &expected in JAVA_SEPARATORS {
541 let mut scanner = Scanner::new(expected);
542
543 let actual = scanner.scan_java_separator().map(|(_r, kw)| kw);
544 assert_eq!(actual, Ok(expected));
545
546 let actual = actual.unwrap();
547 assert!(actual.is_java_separator());
548 assert!(!actual.is_java_operator());
549 }
550 }
551
552 #[test]
553 fn test_java_null_literals() {
554 assert_eq!("null".is_java_null_literal(), true);
555 assert_eq!("null".is_java_boolean_literal(), false);
556 assert_eq!("null".is_java_keyword(), false);
557 }
558
559 #[test]
560 fn test_java_boolean_literals() {
561 assert_eq!("true".is_java_boolean_literal(), true);
562 assert_eq!("true".is_java_null_literal(), false);
563 assert_eq!("true".is_java_keyword(), false);
564
565 assert_eq!("false".is_java_boolean_literal(), true);
566 assert_eq!("false".is_java_null_literal(), false);
567 assert_eq!("false".is_java_keyword(), false);
568 }
569
570 #[test]
571 fn test_java_int_dec_literals() {
572 let cases = ["0", "2", "0l", "0L", "1996", "2_147_483_648L", "2147483648"];
573
574 assert_valid_cases!(scan_java_int_dec_literal, cases);
575 assert_valid_cases!(scan_java_int_dec_literal, cases, "remaining");
576
577 assert_valid_cases!(scan_java_int_literal, cases);
578 assert_valid_cases!(scan_java_int_literal, cases, "remaining");
579 }
580
581 #[test]
582 fn test_java_int_dec_literals_invalid() {
583 let cases = ["00", "0000", "_0", "_10", "+1", "-123"];
584
585 assert_invalid_cases!(scan_java_int_dec_literal, cases);
586 }
587
588 #[test]
589 fn test_java_int_hex_literals() {
590 let cases = [
591 "0xDada_Cafe",
592 "0x00_FF__00_FF",
593 "0x100000000L",
594 "0xC0B0L",
595 "0x7fff_ffff",
596 "0x8000_0000",
597 "0xffff_ffff",
598 "0x7fff_ffff_ffff_ffffL",
599 "0x8000_0000_0000_0000L",
600 "0xffff_ffff_ffff_ffffL",
601 ];
602
603 assert_valid_cases!(scan_java_int_hex_literal, cases);
604 assert_valid_cases!(scan_java_int_hex_literal, cases, "remaining");
605
606 assert_valid_cases!(scan_java_int_literal, cases);
607 assert_valid_cases!(scan_java_int_literal, cases, "remaining");
608 }
609
610 #[test]
611 fn test_java_int_oct_literals() {
612 let cases = [
613 "0372",
614 "0777L",
615 "0177_7777_7777",
616 "0200_0000_0000",
617 "0377_7777_7777",
618 "07_7777_7777_7777_7777_7777L",
619 "010_0000_0000_0000_0000_0000L",
620 "017_7777_7777_7777_7777_7777L",
621 ];
622
623 assert_valid_cases!(scan_java_int_oct_literal, cases);
624 assert_valid_cases!(scan_java_int_oct_literal, cases, "remaining");
625
626 assert_valid_cases!(scan_java_int_literal, cases);
627 assert_valid_cases!(scan_java_int_literal, cases, "remaining");
628 }
629
630 #[test]
631 fn test_java_int_bin_literals() {
632 let cases = [
633 "0b0",
634 "0B0",
635 "0b1",
636 "0B1",
637 "0b0111_1111_1111_1111_1111_1111_1111_1111",
639 "0b1000_0000_0000_0000_0000_0000_0000_0000",
640 "0b1111_1111_1111_1111_1111_1111_1111_1111",
641 "0b0111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111L",
642 "0b1000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000L",
643 "0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111L",
644 ];
645
646 assert_valid_cases!(scan_java_int_bin_literal, cases);
647 assert_valid_cases!(scan_java_int_bin_literal, cases, "remaining");
648
649 assert_valid_cases!(scan_java_int_literal, cases);
650 assert_valid_cases!(scan_java_int_literal, cases, "remaining");
651 }
652
653 #[test]
654 fn test_java_float_literals() {
655 let cases = [
656 "1e1f",
658 "2.f",
659 ".3f",
660 "0f",
661 "3.14f",
662 "6.022137e+23f",
663 "1e1",
665 "2.",
666 ".3",
667 "0.0",
668 "3.14",
669 "1e-9d",
670 "1e137",
671 ];
672
673 assert_valid_cases!(scan_java_float_literal, cases);
674 assert_valid_cases!(scan_java_float_literal, cases, "remaining");
675
676 assert_valid_cases!(scan_java_float_dec_literal, cases);
677 assert_valid_cases!(scan_java_float_dec_literal, cases, "remaining");
678 }
679
680 #[test]
681 fn test_java_float_hex_literals() {
682 let cases = [
683 "0x1.fffffeP+127f",
684 "0x0.000002P-126f",
685 "0x1.0P-149f",
686 "0x1.f_ffff_ffff_ffffP+1023",
687 "0x0.0_0000_0000_0001P-1022",
688 ];
689
690 assert_valid_cases!(scan_java_float_hex_literal, cases);
691 assert_valid_cases!(scan_java_float_hex_literal, cases, "remaining");
692 }
693
694 #[test]
695 fn test_java_char_literals() {
696 let cases = [
697 "'a'",
698 "'%'",
699 "'\t'",
700 "'\\\\'",
701 "'\\''",
702 "'\\u03a9'",
703 "'\\uFFFF'",
704 "'\\177'",
705 "'™'",
706 ];
707
708 assert_valid_cases!(scan_java_char_literal, cases);
709 assert_valid_cases!(scan_java_char_literal, cases, "remaining");
710 }
711
712 #[test]
713 fn test_java_string_literals() {
714 let cases = [
715 "\"\"",
716 "\"\\\"\"",
717 "\"This string\"",
718 "\"A\\u0000B\\u1111C\\uEEEED\\uFFFFE\"",
719 ];
720
721 assert_valid_cases!(scan_java_string_literal, cases);
722 assert_valid_cases!(scan_java_string_literal, cases, "remaining");
723 }
724}