js_regex/validator.rs
1// Copyright (C) 2020 Quentin M. Kniep <hello@quentinkniep.com>
2// Distributed under terms of the MIT license.
3
4use std::collections::HashSet;
5use std::ops::{Deref, DerefMut};
6
7use crate::reader::Reader;
8use crate::unicode::*;
9
10fn is_syntax_character(cp: char) -> bool {
11 return cp == '^'
12 || cp == '$'
13 || cp == '\\'
14 || cp == '.'
15 || cp == '*'
16 || cp == '+'
17 || cp == '?'
18 || cp == '('
19 || cp == ')'
20 || cp == '['
21 || cp == ']'
22 || cp == '{'
23 || cp == '}'
24 || cp == '|';
25}
26
27fn is_unicode_property_name_character(cp: char) -> bool {
28 cp.is_ascii_alphabetic() || cp == '_'
29}
30
31fn is_unicode_property_value_character(cp: char) -> bool {
32 is_unicode_property_name_character(cp) || cp.is_digit(10)
33}
34
35fn is_regexp_identifier_start(cp: char) -> bool {
36 is_id_start(cp) || cp == '$' || cp == '_'
37}
38
39fn is_regexp_identifier_part(cp: char) -> bool {
40 is_id_continue(cp) ||
41 cp == '$' ||
42 cp == '_' ||
43 cp == '\u{200c}' || // unicode zero-width non-joiner
44 cp == '\u{200d}' // unicode zero-width joiner
45}
46
47fn is_id_start(cp: char) -> bool {
48 if (cp as u32) < 0x41 {
49 false
50 } else if (cp as u32) < 0x5b {
51 true
52 } else if (cp as u32) < 0x61 {
53 false
54 } else if (cp as u32) < 0x7b {
55 true
56 } else {
57 is_large_id_start(cp)
58 }
59}
60
61fn is_id_continue(cp: char) -> bool {
62 if (cp as u32) < 0x30 {
63 false
64 } else if (cp as u32) < 0x3a {
65 true
66 } else if (cp as u32) < 0x41 {
67 false
68 } else if (cp as u32) < 0x5b {
69 true
70 } else if (cp as u32) == 0x5f {
71 true
72 } else if (cp as u32) < 0x61 {
73 false
74 } else if (cp as u32) < 0x7b {
75 true
76 } else {
77 is_large_id_start(cp) || is_large_id_continue(cp)
78 }
79}
80
81fn is_valid_unicode(cp: i64) -> bool {
82 cp <= 0x10ffff
83}
84
85fn is_lead_surrogate(cp: i64) -> bool {
86 cp >= 0xd800 && cp <= 0xdbff
87}
88
89fn is_trail_surrogate(cp: i64) -> bool {
90 cp >= 0xdc00 && cp <= 0xdfff
91}
92
93fn combine_surrogate_pair(lead: i64, trail: i64) -> i64 {
94 (lead - 0xd800) * 0x400 + (trail - 0xdc00) + 0x10000
95}
96
97#[derive(Clone, Copy, Debug, Ord, PartialOrd, Eq, PartialEq, Hash)]
98pub enum EcmaVersion {
99 ES5,
100 ES2015,
101 ES2016,
102 ES2017,
103 ES2018,
104 ES2019,
105 ES2020,
106 ES2021,
107}
108
109#[derive(Debug)]
110pub struct EcmaRegexValidator {
111 reader: Reader,
112 strict: bool,
113 ecma_version: EcmaVersion,
114 u_flag: bool,
115 n_flag: bool,
116 last_int_value: i64,
117 last_min_value: i64,
118 last_max_value: i64,
119 last_str_value: String,
120 last_key_value: String,
121 last_val_value: String,
122 last_assertion_is_quantifiable: bool,
123 num_capturing_parens: u32,
124 group_names: HashSet<String>,
125 backreference_names: HashSet<String>,
126}
127
128impl Deref for EcmaRegexValidator {
129 type Target = Reader;
130
131 fn deref(&self) -> &Self::Target {
132 &self.reader
133 }
134}
135
136impl DerefMut for EcmaRegexValidator {
137 fn deref_mut(&mut self) -> &mut Self::Target {
138 &mut self.reader
139 }
140}
141
142impl EcmaRegexValidator {
143 pub fn new(ecma_version: EcmaVersion) -> Self {
144 EcmaRegexValidator {
145 reader: Reader::new(),
146 strict: false,
147 ecma_version,
148 u_flag: false,
149 n_flag: false,
150 last_int_value: 0,
151 last_min_value: 0,
152 last_max_value: 0,
153 last_str_value: "".to_string(),
154 last_key_value: "".to_string(),
155 last_val_value: "".to_string(),
156 last_assertion_is_quantifiable: false,
157 num_capturing_parens: 0,
158 group_names: HashSet::new(),
159 backreference_names: HashSet::new(),
160 }
161 }
162
163 /// Validates flags of a EcmaScript regular expression.
164 pub fn validate_flags(&self, flags: &str) -> Result<(), String> {
165 let mut existing_flags = HashSet::<char>::new();
166
167 for flag in flags.chars() {
168 if existing_flags.contains(&flag) {
169 return Err(format!("Duplicated flag {}", flag));
170 }
171 existing_flags.insert(flag);
172
173 if flag == 'g'
174 || flag == 'i'
175 || flag == 'm'
176 || (flag == 'u' && self.ecma_version >= EcmaVersion::ES2015)
177 || (flag == 'y' && self.ecma_version >= EcmaVersion::ES2015)
178 || (flag == 's' && self.ecma_version >= EcmaVersion::ES2018)
179 {
180 // do nothing
181 } else {
182 return Err(format!("Invalid flag {}", flag));
183 }
184 }
185 Ok(())
186 }
187
188 /// Validates the pattern of a EcmaScript regular expression.
189 pub fn validate_pattern(&mut self, source: &str, u_flag: bool) -> Result<(), String> {
190 self.strict = u_flag; // TODO: allow toggling strict independently of u flag
191 self.u_flag = u_flag && self.ecma_version >= EcmaVersion::ES2015;
192 self.n_flag = u_flag && self.ecma_version >= EcmaVersion::ES2018;
193 //self.reset(source, 0, source.len(), u_flag);
194 self.reset(source, 0, source.chars().count(), u_flag);
195 self.consume_pattern()?;
196
197 if !self.n_flag && self.ecma_version >= EcmaVersion::ES2018 && self.group_names.len() > 0 {
198 self.n_flag = true;
199 self.rewind(0);
200 self.consume_pattern()?;
201 }
202
203 return Ok(());
204 }
205
206 /// Validate the next characters as a RegExp `Pattern` production.
207 /// ```grammar
208 /// Pattern[U, N]::
209 /// Disjunction[?U, ?N]
210 /// ```
211 fn consume_pattern(&mut self) -> Result<(), String> {
212 self.num_capturing_parens = self.count_capturing_parens();
213 self.group_names.clear();
214 self.backreference_names.clear();
215
216 self.consume_disjunction()?;
217
218 if let Some(cp) = self.code_point_with_offset(0) {
219 if cp == ')' {
220 return Err("Unmatched ')'".to_string());
221 } else if cp == '\\' {
222 return Err("\\ at end of pattern".to_string());
223 } else if cp == ']' || cp == '}' {
224 return Err("Lone quantifier brackets".to_string());
225 }
226 return Err(format!("Unexpected character {}", cp));
227 }
228
229 for name in self.backreference_names.difference(&self.group_names) {
230 return Err(format!("Invalid named capture referenced: {}", name));
231 }
232 return Ok(());
233 }
234
235 /// Validate the next characters as a RegExp `Disjunction` production.
236 /// ```grammar
237 /// Disjunction[U, N]::
238 /// Alternative[?U, ?N]
239 /// Alternative[?U, ?N] `|` Disjunction[?U, ?N]
240 /// ```
241 fn consume_disjunction(&mut self) -> Result<(), String> {
242 self.consume_alternative()?;
243 while self.eat('|') {
244 self.consume_alternative()?;
245 }
246
247 if self.consume_quantifier(true)? {
248 return Err("Nothing to repeat".to_string());
249 } else if self.eat('{') {
250 return Err("Lone quantifier brackets".to_string());
251 }
252 return Ok(());
253 }
254
255 /// Validate the next characters as a RegExp `Alternative` production.
256 /// ```grammar
257 /// Alternative[U, N]::
258 /// ε
259 /// Alternative[?U, ?N] Term[?U, ?N]
260 /// ```
261 fn consume_alternative(&mut self) -> Result<(), String> {
262 while self.code_point_with_offset(0).is_some() && self.consume_term()? {
263 // do nothing
264 }
265 Ok(())
266 }
267
268 /// Validate the next characters as a RegExp `Term` production if possible.
269 /// ```grammar
270 /// Term[U, N]::
271 /// [strict] Assertion[+U, ?N]
272 /// [strict] Atom[+U, ?N]
273 /// [strict] Atom[+U, ?N] Quantifier
274 /// [annexB][+U] Assertion[+U, ?N]
275 /// [annexB][+U] Atom[+U, ?N]
276 /// [annexB][+U] Atom[+U, ?N] Quantifier
277 /// [annexB][~U] QuantifiableAssertion[?N] Quantifier
278 /// [annexB][~U] Assertion[~U, ?N]
279 /// [annexB][~U] ExtendedAtom[?N] Quantifier
280 /// [annexB][~U] ExtendedAtom[?N]
281 /// ```
282 /// Returns `true` if it consumed the next characters successfully.
283 fn consume_term(&mut self) -> Result<bool, String> {
284 if self.u_flag || self.strict {
285 return Ok(self.consume_assertion()?
286 || (self.consume_atom()? && self.consume_optional_quantifier()?));
287 }
288 return Ok((self.consume_assertion()?
289 && (!self.last_assertion_is_quantifiable || self.consume_optional_quantifier()?))
290 || (self.consume_extended_atom()? && self.consume_optional_quantifier()?));
291 }
292
293 fn consume_optional_quantifier(&mut self) -> Result<bool, String> {
294 self.consume_quantifier(false)?;
295 Ok(true)
296 }
297
298 /// Validate the next characters as a RegExp `Term` production if possible.
299 /// Set `self.last_assertion_is_quantifiable` if the consumed assertion was a
300 /// `QuantifiableAssertion` production.
301 /// ```grammar
302 /// Assertion[U, N]::
303 /// `^`
304 /// `$`
305 /// `\b`
306 /// `\B`
307 /// [strict] `(?=` Disjunction[+U, ?N] `)`
308 /// [strict] `(?!` Disjunction[+U, ?N] `)`
309 /// [annexB][+U] `(?=` Disjunction[+U, ?N] `)`
310 /// [annexB][+U] `(?!` Disjunction[+U, ?N] `)`
311 /// [annexB][~U] QuantifiableAssertion[?N]
312 /// `(?<=` Disjunction[?U, ?N] `)`
313 /// `(?<!` Disjunction[?U, ?N] `)`
314 /// QuantifiableAssertion[N]::
315 /// `(?=` Disjunction[~U, ?N] `)`
316 /// `(?!` Disjunction[~U, ?N] `)`
317 /// ```
318 /// Returns `true` if it consumed the next characters successfully.
319 fn consume_assertion(&mut self) -> Result<bool, String> {
320 let start = self.index();
321 self.last_assertion_is_quantifiable = false;
322
323 if self.eat('^') || self.eat('$') || self.eat2('\\', 'B') || self.eat2('\\', 'b') {
324 return Ok(true);
325 }
326
327 // Lookahead / Lookbehind
328 if self.eat2('(', '?') {
329 let lookbehind = self.ecma_version >= EcmaVersion::ES2018 && self.eat('<');
330 let mut flag = self.eat('=');
331 if !flag {
332 flag = self.eat('!');
333 }
334 if flag {
335 self.consume_disjunction()?;
336 if !self.eat(')') {
337 return Err("Unterminated group".to_string());
338 }
339 self.last_assertion_is_quantifiable = !lookbehind && !self.strict;
340 return Ok(true);
341 }
342 self.rewind(start);
343 }
344 Ok(false)
345 }
346
347 /// Validate the next characters as a RegExp `Quantifier` production if possible.
348 /// ```grammar
349 /// Quantifier::
350 /// QuantifierPrefix
351 /// QuantifierPrefix `?`
352 /// QuantifierPrefix::
353 /// `*`
354 /// `+`
355 /// `?`
356 /// `{` DecimalDigits `}`
357 /// `{` DecimalDigits `,}`
358 /// `{` DecimalDigits `,` DecimalDigits `}`
359 /// ```
360 /// Returns `true` if it consumed the next characters successfully.
361 fn consume_quantifier(&mut self, no_consume: bool) -> Result<bool, String> {
362 // QuantifierPrefix
363 if !self.eat('*')
364 && !self.eat('+')
365 && !self.eat('?')
366 && !self.eat_braced_quantifier(no_consume)?
367 {
368 return Ok(false);
369 }
370
371 self.eat('?');
372 return Ok(true);
373 }
374
375 /// Eats the next characters as the following alternatives if possible.
376 /// Sets `self.last_min_value` and `self.last_max_value` if it consumed the next characters
377 /// successfully.
378 /// ```grammar
379 /// `{` DecimalDigits `}`
380 /// `{` DecimalDigits `,}`
381 /// `{` DecimalDigits `,` DecimalDigits `}`
382 /// ```
383 /// Returns `true` if it consumed the next characters successfully.
384 fn eat_braced_quantifier(&mut self, no_error: bool) -> Result<bool, &str> {
385 let start = self.index();
386 if self.eat('{') {
387 self.last_min_value = 0;
388 self.last_max_value = i64::MAX;
389 if self.eat_decimal_digits() {
390 self.last_min_value = self.last_int_value;
391 self.last_max_value = self.last_int_value;
392 if self.eat(',') {
393 self.last_max_value = if self.eat_decimal_digits() {
394 self.last_int_value
395 } else {
396 i64::MAX
397 }
398 }
399 if self.eat('}') {
400 if !no_error && self.last_max_value < self.last_min_value {
401 return Err("numbers out of order in {} quantifier");
402 }
403 return Ok(true);
404 }
405 }
406 if !no_error && (self.u_flag || self.strict) {
407 return Err("Incomplete quantifier");
408 }
409 self.rewind(start);
410 }
411 return Ok(false);
412 }
413
414 /// Validate the next characters as a RegExp `Atom` production if possible.
415 /// ```grammar
416 /// Atom[U, N]::
417 /// PatternCharacter
418 /// `.`
419 /// `\\` AtomEscape[?U, ?N]
420 /// CharacterClass[?U]
421 /// `(?:` Disjunction[?U, ?N] )
422 /// `(` GroupSpecifier[?U] Disjunction[?U, ?N] `)`
423 /// ```
424 /// Returns `true` if it consumed the next characters successfully.
425 fn consume_atom(&mut self) -> Result<bool, String> {
426 Ok(self.consume_pattern_character()
427 || self.consume_dot()
428 || self.consume_reverse_solidus_atom_escape()?
429 || self.consume_character_class()?
430 || self.consume_uncapturing_group()?
431 || self.consume_capturing_group()?)
432 }
433
434 /// Validate the next characters as the following alternatives if possible.
435 /// ```grammar
436 /// `.`
437 /// ```
438 /// Returns `true` if it consumed the next characters successfully.
439 fn consume_dot(&mut self) -> bool {
440 if self.eat('.') {
441 return true;
442 }
443 return false;
444 }
445
446 /// Validate the next characters as the following alternatives if possible.
447 /// ```grammar
448 /// `\\` AtomEscape[?U, ?N]
449 /// ```
450 /// Returns `true` if it consumed the next characters successfully.
451 fn consume_reverse_solidus_atom_escape(&mut self) -> Result<bool, String> {
452 let start = self.index();
453 if self.eat('\\') {
454 if self.consume_atom_escape()? {
455 return Ok(true);
456 }
457 self.rewind(start);
458 }
459 return Ok(false);
460 }
461
462 /// Validate the next characters as the following alternatives if possible.
463 /// ```grammar
464 /// `(?:` Disjunction[?U, ?N] )
465 /// ```
466 /// Returns `true` if it consumed the next characters successfully.
467 fn consume_uncapturing_group(&mut self) -> Result<bool, String> {
468 if self.eat3('(', '?', ':') {
469 self.consume_disjunction()?;
470 if !self.eat(')') {
471 return Err("Unterminated group".to_string());
472 }
473 return Ok(true);
474 }
475 return Ok(false);
476 }
477
478 /// Validate the next characters as the following alternatives if possible.
479 /// ```grammar
480 /// `(` GroupSpecifier[?U] Disjunction[?U, ?N] `)`
481 /// ```
482 /// Returns `true` if it consumed the next characters successfully.
483 fn consume_capturing_group(&mut self) -> Result<bool, String> {
484 if !self.eat('(') {
485 return Ok(false);
486 }
487
488 if self.ecma_version >= EcmaVersion::ES2018 {
489 self.consume_group_specifier()?;
490 } else if self.code_point_with_offset(0) == Some('?') {
491 return Err("Invalid group".to_string());
492 }
493
494 self.consume_disjunction()?;
495 if !self.eat(')') {
496 return Err("Unterminated group".to_string());
497 }
498 Ok(true)
499 }
500
501 /// Validate the next characters as a RegExp `ExtendedAtom` production if possible.
502 /// ```grammar
503 /// ExtendedAtom[N]::
504 /// `.`
505 /// `\` AtomEscape[~U, ?N]
506 /// `\` [lookahead = c]
507 /// CharacterClass[~U]
508 /// `(?:` Disjunction[~U, ?N] `)`
509 /// `(` Disjunction[~U, ?N] `)`
510 /// InvalidBracedQuantifier
511 /// ExtendedPatternCharacter
512 /// ```
513 /// Returns `true` if it consumed the next characters successfully.
514 fn consume_extended_atom(&mut self) -> Result<bool, String> {
515 Ok(self.eat('.')
516 || self.consume_reverse_solidus_atom_escape()?
517 || self.consume_reverse_solidus_followed_by_c()
518 || self.consume_character_class()?
519 || self.consume_uncapturing_group()?
520 || self.consume_capturing_group()?
521 || self.consume_invalid_braced_quantifier()?
522 || self.consume_extended_pattern_character())
523 }
524
525 /// Validate the next characters as the following alternatives if possible.
526 /// ```grammar
527 /// `\` [lookahead = c]
528 /// ```
529 /// Returns `true` if it consumed the next characters successfully.
530 fn consume_reverse_solidus_followed_by_c(&mut self) -> bool {
531 if self.code_point_with_offset(0) == Some('\\')
532 && self.code_point_with_offset(1) == Some('c')
533 {
534 self.last_int_value = '\\' as i64;
535 self.advance();
536 return true;
537 }
538 return false;
539 }
540
541 /// Validate the next characters as a RegExp `InvalidBracedQuantifier`
542 /// production if possible.
543 /// ```grammar
544 /// InvalidBracedQuantifier::
545 /// `{` DecimalDigits `}`
546 /// `{` DecimalDigits `,}`
547 /// `{` DecimalDigits `,` DecimalDigits `}`
548 /// ```
549 /// Returns `true` if it consumed the next characters successfully.
550 fn consume_invalid_braced_quantifier(&mut self) -> Result<bool, &str> {
551 if self.eat_braced_quantifier(true)? {
552 return Err("Nothing to repeat");
553 }
554 Ok(false)
555 }
556
557 /// Validate the next characters as a RegExp `PatternCharacter` production if
558 /// possible.
559 /// ```grammar
560 /// PatternCharacter::
561 /// SourceCharacter but not SyntaxCharacter
562 /// ```
563 /// Returns `true` if it consumed the next characters successfully.
564 fn consume_pattern_character(&mut self) -> bool {
565 if let Some(cp) = self.code_point_with_offset(0) {
566 if !is_syntax_character(cp) {
567 self.advance();
568 return true;
569 }
570 }
571 return false;
572 }
573
574 /// Validate the next characters as a RegExp `ExtendedPatternCharacter`
575 /// production if possible.
576 /// ```grammar
577 /// ExtendedPatternCharacter::
578 /// SourceCharacter but not one of ^ $ \ . * + ? ( ) [ |
579 /// ```
580 /// Returns `true` if it consumed the next characters successfully.
581 fn consume_extended_pattern_character(&mut self) -> bool {
582 if let Some(cp) = self.code_point_with_offset(0) {
583 if cp != '^'
584 && cp != '$'
585 && cp != '\\'
586 && cp != '.'
587 && cp != '*'
588 && cp != '+'
589 && cp != '?'
590 && cp != '('
591 && cp != ')'
592 && cp != '['
593 && cp != '|'
594 {
595 self.advance();
596 return true;
597 }
598 }
599 return false;
600 }
601
602 /// Validate the next characters as a RegExp `GroupSpecifier` production.
603 /// Set `self.last_str_value` if the group name existed.
604 /// ```grammar
605 /// GroupSpecifier[U]::
606 /// ε
607 /// `?` GroupName[?U]
608 /// ```
609 /// Returns `true` if the group name existed.
610 fn consume_group_specifier(&mut self) -> Result<bool, String> {
611 if self.eat('?') {
612 if self.eat_group_name()? {
613 if !self.group_names.contains(&self.last_str_value) {
614 self.group_names.insert(self.last_str_value.clone());
615 return Ok(true);
616 }
617 return Err("Duplicate capture group name".to_string());
618 }
619 return Err("Invalid group".to_string());
620 }
621 return Ok(false);
622 }
623
624 /// Validate the next characters as a RegExp `AtomEscape` production if possible.
625 /// ```grammar
626 /// AtomEscape[U, N]::
627 /// [strict] DecimalEscape
628 /// [annexB][+U] DecimalEscape
629 /// [annexB][~U] DecimalEscape but only if the CapturingGroupNumber of DecimalEscape is <= NcapturingParens
630 /// CharacterClassEscape[?U]
631 /// [strict] CharacterEscape[?U]
632 /// [annexB] CharacterEscape[?U, ?N]
633 /// [+N] `k` GroupName[?U]
634 /// ```
635 /// Returns `Ok(true)` if it consumed the next characters successfully.
636 fn consume_atom_escape(&mut self) -> Result<bool, String> {
637 if self.consume_backreference()?
638 || self.consume_character_class_escape()?
639 || self.consume_character_escape()?
640 || (self.n_flag && self.consume_k_group_name()?)
641 {
642 return Ok(true);
643 }
644 if self.strict || self.u_flag {
645 return Err("Invalid escape".to_string());
646 }
647 return Ok(false);
648 }
649
650 /// Validate the next characters as the follwoing alternatives if possible.
651 /// ```grammar
652 /// [strict] DecimalEscape
653 /// [annexB][+U] DecimalEscape
654 /// [annexB][~U] DecimalEscape but only if the CapturingGroupNumber of DecimalEscape is <= NcapturingParens
655 /// ```
656 /// Returns `Ok(true)` if it consumed the next characters successfully.
657 fn consume_backreference(&mut self) -> Result<bool, &str> {
658 let start = self.index();
659 if self.eat_decimal_escape() {
660 if self.last_int_value <= self.num_capturing_parens as i64 {
661 return Ok(true);
662 } else if self.strict || self.u_flag {
663 return Err("Invalid escape");
664 }
665 self.rewind(start);
666 }
667 Ok(false)
668 }
669
670 /// Validate the next characters as a RegExp `DecimalEscape` production if possible.
671 /// Set `-1` to `self.last_int_value` as meaning of a character set if it ate the next
672 /// characters successfully.
673 /// ```grammar
674 /// CharacterClassEscape[U]::
675 /// `d`
676 /// `D`
677 /// `s`
678 /// `S`
679 /// `w`
680 /// `W`
681 /// [+U] `p{` UnicodePropertyValueExpression `}`
682 /// [+U] `P{` UnicodePropertyValueExpression `}`
683 /// ```
684 /// Returns `true` if it consumed the next characters successfully.
685 fn consume_character_class_escape(&mut self) -> Result<bool, String> {
686 if self.eat('d')
687 || self.eat('D')
688 || self.eat('s')
689 || self.eat('S')
690 || self.eat('w')
691 || self.eat('W')
692 {
693 self.last_int_value = -1;
694 return Ok(true);
695 }
696
697 if self.u_flag
698 && self.ecma_version >= EcmaVersion::ES2018
699 && (self.eat('p') || self.eat('P'))
700 {
701 self.last_int_value = -1;
702 if self.eat('{') && self.eat_unicode_property_value_expression()? && self.eat('}') {
703 return Ok(true);
704 }
705 return Err("Invalid property name".to_string());
706 }
707 Ok(false)
708 }
709
710 /// Validate the next characters as a RegExp `CharacterEscape` production if possible.
711 /// ```grammar
712 /// CharacterEscape[U, N]::
713 /// ControlEscape
714 /// `c` ControlLetter
715 /// `0` [lookahead ∉ DecimalDigit]
716 /// HexEscapeSequence
717 /// RegExpUnicodeEscapeSequence[?U]
718 /// [annexB][~U] LegacyOctalEscapeSequence
719 /// IdentityEscape[?U, ?N]
720 /// ```
721 /// Returns `true` if it consumed the next characters successfully.
722 fn consume_character_escape(&mut self) -> Result<bool, String> {
723 Ok(self.eat_control_escape()
724 || self.eat_c_control_letter()
725 || self.eat_zero()
726 || self.eat_hex_escape_sequence()?
727 || self.eat_regexp_unicode_escape_sequence(false)?
728 || (!self.strict && !self.u_flag && self.eat_legacy_octal_escape_sequence())
729 || self.eat_identity_escape())
730 }
731
732 /// Validate the next characters as the follwoing alternatives if possible.
733 /// ```grammar
734 /// `k` GroupName[?U]
735 /// ```
736 /// Returns `Ok(true)` if it consumed the next characters successfully.
737 fn consume_k_group_name(&mut self) -> Result<bool, String> {
738 if self.eat('k') {
739 if self.eat_group_name()? {
740 let group_name = self.last_str_value.clone();
741 self.backreference_names.insert(group_name);
742 return Ok(true);
743 }
744 return Err("Invalid named reference".to_string());
745 }
746 Ok(false)
747 }
748
749 /// Validate the next characters as a RegExp `CharacterClass` production if possible.
750 /// ```grammar
751 /// CharacterClass[U]::
752 /// `[` [lookahead ≠ ^] ClassRanges[?U] `]`
753 /// `[^` ClassRanges[?U] `]`
754 /// ```
755 /// Returns `true` if it consumed the next characters successfully.
756 fn consume_character_class(&mut self) -> Result<bool, String> {
757 if !self.eat('[') {
758 return Ok(false);
759 }
760 self.consume_class_ranges()?;
761 if !self.eat(']') {
762 return Err("Unterminated character class".to_string());
763 }
764 Ok(true)
765 }
766
767 /// Validate the next characters as a RegExp `ClassRanges` production.
768 /// ```grammar
769 /// ClassRanges[U]::
770 /// ε
771 /// NonemptyClassRanges[?U]
772 /// NonemptyClassRanges[U]::
773 /// ClassAtom[?U]
774 /// ClassAtom[?U] NonemptyClassRangesNoDash[?U]
775 /// ClassAtom[?U] `-` ClassAtom[?U] ClassRanges[?U]
776 /// NonemptyClassRangesNoDash[U]::
777 /// ClassAtom[?U]
778 /// ClassAtomNoDash[?U] NonemptyClassRangesNoDash[?U]
779 /// ClassAtomNoDash[?U] `-` ClassAtom[?U] ClassRanges[?U]
780 /// ```
781 fn consume_class_ranges(&mut self) -> Result<(), String> {
782 loop {
783 // Consume the first ClassAtom
784 if !self.consume_class_atom()? {
785 break;
786 }
787 let min = self.last_int_value;
788
789 // Consume `-`
790 if !self.eat('-') {
791 continue;
792 }
793
794 // Consume the second ClassAtom
795 if !self.consume_class_atom()? {
796 break;
797 }
798 let max = self.last_int_value;
799
800 // Validate
801 if min == -1 || max == -1 {
802 if self.strict {
803 return Err("Invalid character class".to_string());
804 }
805 continue;
806 }
807
808 println!("min: {}, max: {}", min, max);
809 if min > max {
810 return Err("Range out of order in character class".to_string());
811 }
812 }
813 Ok(())
814 }
815
816 /// Validate the next characters as a RegExp `ClassAtom` production if possible.
817 /// Set `self.last_int_value` if it consumed the next characters successfully.
818 /// ```grammar
819 /// ClassAtom[U, N]::
820 /// `-`
821 /// ClassAtomNoDash[?U, ?N]
822 /// ClassAtomNoDash[U, N]::
823 /// SourceCharacter but not one of \ ] -
824 /// `\` ClassEscape[?U, ?N]
825 /// [annexB] `\` [lookahead = c]
826 /// ```
827 /// Returns `Ok(true)` if it consumed the next characters successfully.
828 fn consume_class_atom(&mut self) -> Result<bool, String> {
829 let start = self.index();
830
831 if let Some(cp) = self.code_point_with_offset(0) {
832 if cp != '\\' && cp != ']' {
833 self.advance();
834 self.last_int_value = cp as i64;
835 return Ok(true);
836 }
837 }
838
839 if self.eat('\\') {
840 if self.consume_class_escape()? {
841 return Ok(true);
842 }
843 if !self.strict && self.code_point_with_offset(0) == Some('c') {
844 self.last_int_value = '\\' as i64;
845 return Ok(true);
846 }
847 if self.strict || self.u_flag {
848 return Err("Invalid escape".to_string());
849 }
850 self.rewind(start);
851 }
852 Ok(false)
853 }
854
855 /// Validate the next characters as a RegExp `ClassEscape` production if possible.
856 /// Set `self.last_int_value` if it consumed the next characters successfully.
857 /// ```grammar
858 /// ClassEscape[U, N]::
859 /// `b`
860 /// [+U] `-`
861 /// [annexB][~U] `c` ClassControlLetter
862 /// CharacterClassEscape[?U]
863 /// CharacterEscape[?U, ?N]
864 /// ClassControlLetter::
865 /// DecimalDigit
866 /// `_`
867 /// ```
868 /// Returns `Ok(true)` if it consumed the next characters successfully.
869 fn consume_class_escape(&mut self) -> Result<bool, String> {
870 if self.eat('b') {
871 self.last_int_value = 0x08; // backspace
872 return Ok(true);
873 }
874
875 // [+U] `-`
876 if self.u_flag && self.eat('-') {
877 self.last_int_value = '-' as i64;
878 return Ok(true);
879 }
880
881 // [annexB][~U] `c` ClassControlLetter
882 if !self.strict && !self.u_flag && self.code_point_with_offset(0) == Some('c') {
883 if let Some(cp) = self.code_point_with_offset(1) {
884 if cp.is_digit(10) || cp == '_' {
885 self.advance();
886 self.advance();
887 self.last_int_value = cp as i64 % 0x20;
888 return Ok(true);
889 }
890 }
891 }
892
893 Ok(self.consume_character_class_escape()? || self.consume_character_escape()?)
894 }
895
896 /// Eat the next characters as a RegExp `GroupName` production if possible.
897 /// Set `self.last_str_value` if the group name existed.
898 /// ```grammar
899 /// GroupName[U]::
900 /// `<` RegExpIdentifierName[?U] `>`
901 /// ```
902 /// Returns `true` if it ate the next characters successfully.
903 fn eat_group_name(&mut self) -> Result<bool, String> {
904 if self.eat('<') {
905 if self.eat_regexp_identifier_name()? && self.eat('>') {
906 return Ok(true);
907 }
908 return Err("Invalid capture group name".to_string());
909 }
910 return Ok(false);
911 }
912
913 /// Eat the next characters as a RegExp `RegExpIdentifierName` production if
914 /// possible.
915 /// Set `self.last_str_value` if the identifier name existed.
916 /// ```grammar
917 /// RegExpIdentifierName[U]::
918 /// RegExpIdentifierStart[?U]
919 /// RegExpIdentifierName[?U] RegExpIdentifierPart[?U]
920 /// ```
921 /// Returns `true` if it ate the next characters successfully.
922 fn eat_regexp_identifier_name(&mut self) -> Result<bool, String> {
923 if self.eat_regexp_identifier_start()? {
924 self.last_str_value = std::char::from_u32(self.last_int_value as u32)
925 .unwrap()
926 .to_string();
927 while self.eat_regexp_identifier_part()? {
928 self.last_str_value
929 .push(std::char::from_u32(self.last_int_value as u32).unwrap());
930 }
931 return Ok(true);
932 }
933 return Ok(false);
934 }
935
936 /// Eat the next characters as a RegExp `RegExpIdentifierStart` production if
937 /// possible.
938 /// Set `self.last_int_value` if the identifier start existed.
939 /// ```grammar
940 /// RegExpIdentifierStart[U] ::
941 /// UnicodeIDStart
942 /// `$`
943 /// `_`
944 /// `\` RegExpUnicodeEscapeSequence[+U]
945 /// [~U] UnicodeLeadSurrogate UnicodeTrailSurrogate
946 /// ```
947 /// Returns `true` if it ate the next characters successfully.
948 fn eat_regexp_identifier_start(&mut self) -> Result<bool, String> {
949 let start = self.index();
950 let force_u_flag = !self.u_flag && self.ecma_version >= EcmaVersion::ES2020;
951
952 if let Some(mut cp) = self.code_point_with_offset(0) {
953 self.advance();
954 let cp1 = self.code_point_with_offset(0);
955 if cp == '\\' && self.eat_regexp_unicode_escape_sequence(force_u_flag)? {
956 cp = std::char::from_u32(self.last_int_value as u32).unwrap();
957 } else if force_u_flag
958 && is_lead_surrogate(cp as i64)
959 && cp1.is_some()
960 && is_trail_surrogate(cp1.unwrap() as i64)
961 {
962 cp = std::char::from_u32(
963 combine_surrogate_pair(cp as i64, cp1.unwrap() as i64) as u32,
964 )
965 .unwrap();
966 self.advance();
967 }
968
969 if is_regexp_identifier_start(cp) {
970 self.last_int_value = cp as i64;
971 return Ok(true);
972 }
973 }
974
975 if self.index() != start {
976 self.rewind(start);
977 }
978 return Ok(false);
979 }
980
981 /// Eat the next characters as a RegExp `RegExpIdentifierPart` production if
982 /// possible.
983 /// Set `self.last_int_value` if the identifier part existed.
984 /// ```grammar
985 /// RegExpIdentifierPart[U] ::
986 /// UnicodeIDContinue
987 /// `$`
988 /// `_`
989 /// `\` RegExpUnicodeEscapeSequence[+U]
990 /// [~U] UnicodeLeadSurrogate UnicodeTrailSurrogate
991 /// <ZWNJ>
992 /// <ZWJ>
993 /// ```
994 /// Returns `true` if it ate the next characters successfully.
995 fn eat_regexp_identifier_part(&mut self) -> Result<bool, String> {
996 let start = self.index();
997 let force_u_flag = !self.u_flag && self.ecma_version >= EcmaVersion::ES2020;
998 let mut cp = self.code_point_with_offset(0);
999 self.advance();
1000 let cp1 = self.code_point_with_offset(0);
1001
1002 if cp == Some('\\') && self.eat_regexp_unicode_escape_sequence(force_u_flag)? {
1003 // TODO: convert unicode code point to char
1004 cp = std::char::from_u32(self.last_int_value as u32);
1005 } else if force_u_flag
1006 && is_lead_surrogate(cp.unwrap() as i64)
1007 && is_trail_surrogate(cp1.unwrap() as i64)
1008 {
1009 cp = std::char::from_u32(combine_surrogate_pair(
1010 cp.unwrap() as i64,
1011 cp1.unwrap() as i64,
1012 ) as u32);
1013 self.advance();
1014 }
1015
1016 if cp.is_some() && is_regexp_identifier_part(cp.unwrap()) {
1017 self.last_int_value = cp.unwrap() as i64;
1018 return Ok(true);
1019 }
1020
1021 if self.index() != start {
1022 self.rewind(start);
1023 }
1024 Ok(false)
1025 }
1026
1027 /// Eat the next characters as the follwoing alternatives if possible.
1028 /// Set `self.last_int_value` if it ate the next characters successfully.
1029 /// ```grammar
1030 /// `c` ControlLetter
1031 /// ```
1032 /// Returns `true` if it ate the next characters successfully.
1033 fn eat_c_control_letter(&mut self) -> bool {
1034 let start = self.index();
1035 if self.eat('c') {
1036 if self.eat_control_letter() {
1037 return true;
1038 }
1039 self.rewind(start);
1040 }
1041 false
1042 }
1043
1044 /// Eat the next characters as the follwoing alternatives if possible.
1045 /// Set `self.last_int_value` if it ate the next characters successfully.
1046 /// ```grammar
1047 /// `0` [lookahead ∉ DecimalDigit]
1048 /// ```
1049 /// Returns `true` if it ate the next characters successfully.
1050 fn eat_zero(&mut self) -> bool {
1051 if self.code_point_with_offset(0) != Some('0') {
1052 return false;
1053 }
1054 if let Some(cp) = self.code_point_with_offset(1) {
1055 if cp.is_digit(10) {
1056 return false;
1057 }
1058 }
1059 self.last_int_value = 0;
1060 self.advance();
1061 return true;
1062 }
1063
1064 /// Eat the next characters as a RegExp `ControlEscape` production if
1065 /// possible.
1066 /// Set `self.last_int_value` if it ate the next characters successfully.
1067 /// ```grammar
1068 /// ControlEscape:: one of
1069 /// f n r t v
1070 /// ```
1071 /// Returns `true` if it ate the next characters successfully.
1072 fn eat_control_escape(&mut self) -> bool {
1073 if self.eat('f') {
1074 self.last_int_value = 0x0c; // formfeed
1075 return true;
1076 }
1077 if self.eat('n') {
1078 self.last_int_value = 0x0a; // linefeed
1079 return true;
1080 }
1081 if self.eat('r') {
1082 self.last_int_value = 0x0d; // carriage return
1083 return true;
1084 }
1085 if self.eat('t') {
1086 self.last_int_value = 0x09; // character tabulation
1087 return true;
1088 }
1089 if self.eat('v') {
1090 self.last_int_value = 0x0b; // line tabulation
1091 return true;
1092 }
1093 false
1094 }
1095
1096 /// Eat the next characters as a RegExp `ControlLetter` production if possible.
1097 /// Set `self.last_int_value` if it ate the next characters successfully.
1098 /// ```grammar
1099 /// ControlLetter:: one of
1100 /// a b c d e f g h i j k l m n o p q r s t u v w x y z
1101 /// A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
1102 /// ```
1103 /// Returns `true` if it ate the next characters successfully.
1104 fn eat_control_letter(&mut self) -> bool {
1105 if let Some(cp) = self.code_point_with_offset(0) {
1106 if cp.is_ascii_alphabetic() {
1107 self.advance();
1108 self.last_int_value = cp as i64 % 0x20;
1109 return true;
1110 }
1111 }
1112 false
1113 }
1114
1115 /// Eat the next characters as a RegExp `RegExpUnicodeEscapeSequence`
1116 /// production if possible.
1117 /// Set `self.last_int_value` if it ate the next characters successfully.
1118 /// ```grammar
1119 /// RegExpUnicodeEscapeSequence[U]::
1120 /// [+U] `u` LeadSurrogate `\u` TrailSurrogate
1121 /// [+U] `u` LeadSurrogate
1122 /// [+U] `u` TrailSurrogate
1123 /// [+U] `u` NonSurrogate
1124 /// [~U] `u` Hex4Digits
1125 /// [+U] `u{` CodePoint `}`
1126 /// ```
1127 /// Returns `true` if it ate the next characters successfully.
1128 fn eat_regexp_unicode_escape_sequence(&mut self, force_u_flag: bool) -> Result<bool, &str> {
1129 let start = self.index();
1130 let u_flag = force_u_flag || self.u_flag;
1131
1132 if self.eat('u') {
1133 if (u_flag && self.eat_regexp_unicode_surrogate_pair_escape())
1134 || self.eat_fixed_hex_digits(4)
1135 || (u_flag && self.eat_regexp_unicode_codepoint_escape())
1136 {
1137 return Ok(true);
1138 }
1139 if self.strict || u_flag {
1140 return Err("Invalid unicode escape");
1141 }
1142 self.rewind(start);
1143 }
1144
1145 return Ok(false);
1146 }
1147
1148 /// Eat the next characters as the following alternatives if possible.
1149 /// Set `self.last_int_value` if it ate the next characters successfully.
1150 /// ```grammar
1151 /// LeadSurrogate `\u` TrailSurrogate
1152 /// ```
1153 /// Returns `true` if it ate the next characters successfully.
1154 fn eat_regexp_unicode_surrogate_pair_escape(&mut self) -> bool {
1155 let start = self.index();
1156
1157 if self.eat_fixed_hex_digits(4) {
1158 let lead = self.last_int_value;
1159 if is_lead_surrogate(lead)
1160 && self.eat('\\')
1161 && self.eat('u')
1162 && self.eat_fixed_hex_digits(4)
1163 {
1164 let trail = self.last_int_value;
1165 if is_trail_surrogate(trail) {
1166 self.last_int_value = combine_surrogate_pair(lead, trail);
1167 return true;
1168 }
1169 }
1170
1171 self.rewind(start);
1172 }
1173
1174 return false;
1175 }
1176
1177 /// Eat the next characters as the following alternatives if possible.
1178 /// Set `self.last_int_value` if it ate the next characters successfully.
1179 /// ```grammar
1180 /// `{` CodePoint `}`
1181 /// ```
1182 /// Returns `true` if it ate the next characters successfully.
1183 fn eat_regexp_unicode_codepoint_escape(&mut self) -> bool {
1184 let start = self.index();
1185
1186 if self.eat('{')
1187 && self.eat_hex_digits()
1188 && self.eat('}')
1189 && is_valid_unicode(self.last_int_value)
1190 {
1191 return true;
1192 }
1193
1194 self.rewind(start);
1195 return false;
1196 }
1197
1198 /// Eat the next characters as a RegExp `IdentityEscape` production if possible.
1199 /// Set `self.last_int_value` if it ate the next characters successfully.
1200 /// ```grammar
1201 /// IdentityEscape[U, N]::
1202 /// [+U] SyntaxCharacter
1203 /// [+U] `/`
1204 /// [strict][~U] SourceCharacter but not UnicodeIDContinue
1205 /// [annexB][~U] SourceCharacterIdentityEscape[?N]
1206 /// SourceCharacterIdentityEscape[N]::
1207 /// [~N] SourceCharacter but not c
1208 /// [+N] SourceCharacter but not one of c k
1209 /// ```
1210 /// Returns `true` if it ate the next characters successfully.
1211 fn eat_identity_escape(&mut self) -> bool {
1212 if let Some(cp) = self.code_point_with_offset(0) {
1213 if self.is_valid_identity_escape(cp) {
1214 self.last_int_value = cp as i64;
1215 self.advance();
1216 return true;
1217 }
1218 }
1219 return false;
1220 }
1221 fn is_valid_identity_escape(&self, cp: char) -> bool {
1222 if self.u_flag {
1223 return is_syntax_character(cp) || cp == '/';
1224 } else if self.strict {
1225 return !is_id_continue(cp);
1226 } else if self.n_flag {
1227 return !(cp == 'c' || cp == 'k');
1228 }
1229 return cp != 'c';
1230 }
1231
1232 /// Eat the next characters as a RegExp `DecimalEscape` production if possible.
1233 /// Set `self.last_int_value` if it ate the next characters successfully.
1234 /// ```grammar
1235 /// DecimalEscape::
1236 /// NonZeroDigit DecimalDigits(opt) [lookahead ∉ DecimalDigit]
1237 /// ```
1238 /// Returns `true` if it ate the next characters successfully.
1239 fn eat_decimal_escape(&mut self) -> bool {
1240 self.last_int_value = 0;
1241 if let Some(cp) = self.code_point_with_offset(0) {
1242 if cp.is_digit(10) {
1243 self.last_int_value = 10 * self.last_int_value + cp.to_digit(10).unwrap() as i64;
1244 self.advance();
1245 while let Some(cp) = self.code_point_with_offset(0) {
1246 if !cp.is_digit(10) {
1247 break;
1248 }
1249 self.last_int_value = 10 * self.last_int_value + cp.to_digit(10).unwrap() as i64;
1250 self.advance();
1251 }
1252 return true;
1253 }
1254 }
1255 return false;
1256 }
1257
1258 /// Eat the next characters as a RegExp `UnicodePropertyValueExpression` production if possible.
1259 /// Set `self.last_key_value` and `self.last_val_value` if it ate the next characters
1260 /// successfully.
1261 /// ```grammar
1262 /// UnicodePropertyValueExpression::
1263 /// UnicodePropertyName `=` UnicodePropertyValue
1264 /// LoneUnicodePropertyNameOrValue
1265 /// ```
1266 /// Returns `true` if it ate the next characters successfully.
1267 fn eat_unicode_property_value_expression(&mut self) -> Result<bool, &str> {
1268 let start = self.index();
1269
1270 // UnicodePropertyName `=` UnicodePropertyValue
1271 if self.eat_unicode_property_name() && self.eat('=') {
1272 self.last_key_value = self.last_str_value.clone();
1273 if self.eat_unicode_property_value() {
1274 self.last_val_value = self.last_str_value.clone();
1275 if is_valid_unicode_property(
1276 self.ecma_version,
1277 &self.last_key_value,
1278 &self.last_val_value,
1279 ) {
1280 return Ok(true);
1281 }
1282 return Err("Invalid property name");
1283 }
1284 }
1285 self.rewind(start);
1286
1287 // LoneUnicodePropertyNameOrValue
1288 if self.eat_lone_unicode_property_name_or_value() {
1289 let name_or_value = self.last_str_value.clone();
1290 if is_valid_unicode_property(self.ecma_version, "General_Category", &name_or_value) {
1291 self.last_key_value = "General_Category".to_string();
1292 self.last_val_value = name_or_value;
1293 return Ok(true);
1294 }
1295 if is_valid_lone_unicode_property(self.ecma_version, &name_or_value) {
1296 self.last_key_value = name_or_value;
1297 self.last_val_value = "".to_string();
1298 return Ok(true);
1299 }
1300 return Err("Invalid property name");
1301 }
1302 Ok(false)
1303 }
1304
1305 /// Eat the next characters as a RegExp `UnicodePropertyName` production if possible.
1306 /// Set `self.last_str_value` if it ate the next characters successfully.
1307 /// ```grammar
1308 /// UnicodePropertyName::
1309 /// UnicodePropertyNameCharacters
1310 /// ```
1311 /// Returns `true` if it ate the next characters successfully.
1312 fn eat_unicode_property_name(&mut self) -> bool {
1313 self.last_str_value = "".to_string();
1314 while let Some(cp) = self.code_point_with_offset(0) {
1315 if !is_unicode_property_name_character(cp) {
1316 break;
1317 }
1318 self.last_str_value.push(cp);
1319 self.advance();
1320 }
1321 self.last_str_value != ""
1322 }
1323
1324 /// Eat the next characters as a RegExp `UnicodePropertyValue` production if possible.
1325 /// Set `self.last_str_value` if it ate the next characters successfully.
1326 /// ```grammar
1327 /// UnicodePropertyValue::
1328 /// UnicodePropertyValueCharacters
1329 /// ```
1330 /// Returns `true` if it ate the next characters successfully.
1331 fn eat_unicode_property_value(&mut self) -> bool {
1332 self.last_str_value = "".to_string();
1333 while let Some(cp) = self.code_point_with_offset(0) {
1334 if !is_unicode_property_value_character(cp) {
1335 break;
1336 }
1337 self.last_str_value.push(cp);
1338 self.advance();
1339 }
1340 self.last_str_value != ""
1341 }
1342
1343 /// Eat the next characters as a RegExp `UnicodePropertyValue` production if possible.
1344 /// Set `self.last_str_value` if it ate the next characters successfully.
1345 /// ```grammar
1346 /// LoneUnicodePropertyNameOrValue::
1347 /// UnicodePropertyValueCharacters
1348 /// ```
1349 /// Returns `true` if it ate the next characters successfully.
1350 fn eat_lone_unicode_property_name_or_value(&mut self) -> bool {
1351 self.eat_unicode_property_value()
1352 }
1353
1354 /// Eat the next characters as a `HexEscapeSequence` production if possible.
1355 /// Set `self.last_int_value` if it ate the next characters successfully.
1356 /// ```grammar
1357 /// HexEscapeSequence::
1358 /// `x` HexDigit HexDigit
1359 /// HexDigit:: one of
1360 /// 0 1 2 3 4 5 6 7 8 9 a b c d e f A B C D E F
1361 /// ```
1362 /// Returns `true` if it ate the next characters successfully.
1363 fn eat_hex_escape_sequence(&mut self) -> Result<bool, &str> {
1364 let start = self.index();
1365 if self.eat('x') {
1366 if self.eat_fixed_hex_digits(2) {
1367 return Ok(true);
1368 }
1369 if self.u_flag || self.strict {
1370 return Err("Invalid escape");
1371 }
1372 self.rewind(start);
1373 }
1374 Ok(false)
1375 }
1376
1377 /// Eat the next characters as a `DecimalDigits` production if possible.
1378 /// Set `self.last_int_value` if it ate the next characters successfully.
1379 /// ```grammar
1380 /// DecimalDigits::
1381 /// DecimalDigit
1382 /// DecimalDigits DecimalDigit
1383 /// DecimalDigit:: one of
1384 /// 0 1 2 3 4 5 6 7 8 9
1385 /// ```
1386 /// Returns `true` if it ate the next characters successfully.
1387 fn eat_decimal_digits(&mut self) -> bool {
1388 let start = self.index();
1389
1390 self.last_int_value = 0;
1391 while let Some(cp) = self.code_point_with_offset(0) {
1392 if !cp.is_digit(10) {
1393 break;
1394 }
1395 self.last_int_value = 10 * self.last_int_value
1396 + self
1397 .code_point_with_offset(0)
1398 .unwrap()
1399 .to_digit(10)
1400 .unwrap() as i64;
1401 self.advance();
1402 }
1403
1404 return self.index() != start;
1405 }
1406
1407 /// Eat the next characters as a `HexDigits` production if possible.
1408 /// Set `self.last_int_value` if it ate the next characters successfully.
1409 /// ```grammar
1410 /// HexDigits::
1411 /// HexDigit
1412 /// HexDigits HexDigit
1413 /// HexDigit:: one of
1414 /// 0 1 2 3 4 5 6 7 8 9 a b c d e f A B C D E F
1415 /// ```
1416 /// Returns `true` if it ate the next characters successfully.
1417 fn eat_hex_digits(&mut self) -> bool {
1418 let start = self.index();
1419 self.last_int_value = 0;
1420 while let Some(cp) = self.code_point_with_offset(0) {
1421 if !cp.is_digit(16) {
1422 break;
1423 }
1424 self.last_int_value = 16 * self.last_int_value + cp.to_digit(16).unwrap() as i64;
1425 self.advance();
1426 }
1427 return self.index() != start;
1428 }
1429
1430 /// Eat the next characters as a `HexDigits` production if possible.
1431 /// Set `self.last_int_value` if it ate the next characters successfully.
1432 /// ```grammar
1433 /// LegacyOctalEscapeSequence::
1434 /// OctalDigit [lookahead ∉ OctalDigit]
1435 /// ZeroToThree OctalDigit [lookahead ∉ OctalDigit]
1436 /// FourToSeven OctalDigit
1437 /// ZeroToThree OctalDigit OctalDigit
1438 /// OctalDigit:: one of
1439 /// 0 1 2 3 4 5 6 7
1440 /// ZeroToThree:: one of
1441 /// 0 1 2 3
1442 /// FourToSeven:: one of
1443 /// 4 5 6 7
1444 /// ```
1445 /// Returns `true` if it ate the next characters successfully.
1446 fn eat_legacy_octal_escape_sequence(&mut self) -> bool {
1447 if self.eat_octal_digit() {
1448 let n1 = self.last_int_value;
1449 if self.eat_octal_digit() {
1450 let n2 = self.last_int_value;
1451 if n1 <= 3 && self.eat_octal_digit() {
1452 self.last_int_value = n1 * 64 + n2 * 8 + self.last_int_value
1453 } else {
1454 self.last_int_value = n1 * 8 + n2;
1455 }
1456 } else {
1457 self.last_int_value = n1;
1458 }
1459 return true;
1460 }
1461 return false;
1462 }
1463
1464 /// Eat the next characters as a `OctalDigit` production if possible.
1465 /// Set `self.last_int_value` if it ate the next characters successfully.
1466 /// ```grammar
1467 /// OctalDigit:: one of
1468 /// 0 1 2 3 4 5 6 7
1469 /// ```
1470 /// Returns `true` if it ate the next characters successfully.
1471 fn eat_octal_digit(&mut self) -> bool {
1472 if let Some(cp) = self.code_point_with_offset(0) {
1473 if cp.is_digit(8) {
1474 self.advance();
1475 self.last_int_value = cp.to_digit(8).unwrap() as i64;
1476 return true;
1477 }
1478 }
1479 self.last_int_value = 0;
1480 return false;
1481 }
1482
1483 /// Eat the next characters as the given number of `HexDigit` productions if possible.
1484 /// Set `self.last_int_value` if it ate the next characters successfully.
1485 /// ```grammar
1486 /// HexDigit:: one of
1487 /// 0 1 2 3 4 5 6 7 8 9 a b c d e f A B C D E F
1488 /// ```
1489 /// Returns `true` if it ate the next characters successfully.
1490 fn eat_fixed_hex_digits(&mut self, length: i64) -> bool {
1491 let start = self.index();
1492 self.last_int_value = 0;
1493 for _ in 0..length {
1494 let cp = self.code_point_with_offset(0);
1495 if cp.is_none() || !cp.unwrap().is_digit(16) {
1496 self.rewind(start);
1497 return false;
1498 }
1499 self.last_int_value =
1500 16 * self.last_int_value + cp.unwrap().to_digit(16).unwrap() as i64;
1501 self.advance();
1502 }
1503 return true;
1504 }
1505
1506 fn count_capturing_parens(&mut self) -> u32 {
1507 let start = self.index();
1508 let mut in_class = false;
1509 let mut escaped = false;
1510 let mut count = 0;
1511
1512 while let Some(cp) = self.code_point_with_offset(0) {
1513 if escaped {
1514 escaped = false;
1515 } else if cp == '\\' {
1516 escaped = true;
1517 } else if cp == '[' {
1518 in_class = true;
1519 } else if cp == ']' {
1520 in_class = false;
1521 } else if cp == '('
1522 && !in_class
1523 && (self.code_point_with_offset(1) != Some('?')
1524 || (self.code_point_with_offset(2) == Some('<')
1525 && self.code_point_with_offset(3) != Some('=')
1526 && self.code_point_with_offset(3) != Some('!')))
1527 {
1528 count += 1
1529 }
1530 self.advance();
1531 }
1532
1533 self.rewind(start);
1534 count
1535 }
1536}
1537
1538#[cfg(test)]
1539mod tests {
1540 use super::*;
1541
1542 #[test]
1543 fn count_capturing_parens_test() {
1544 let mut validator = EcmaRegexValidator::new(EcmaVersion::ES2018);
1545 let source = "foo|(abc)de";
1546 validator.reset(source, 0, source.len(), false);
1547 assert_eq!(validator.count_capturing_parens(), 1);
1548 let source = "foo|(?:abc)de";
1549 validator.reset(source, 0, source.len(), false);
1550 assert_eq!(validator.count_capturing_parens(), 0);
1551 let source = "((foo)|(abc)de)";
1552 validator.reset(source, 0, source.len(), false);
1553 assert_eq!(validator.count_capturing_parens(), 3);
1554 }
1555}