1#![warn(dead_code)]
6pub mod ast;
7use std::cell::{Cell, RefCell};
8
9use ast::{Ast, Concat, ErrorKind, GroupKind, LookaroundKind};
10use regex_syntax::{
11 ast::{
12 ClassAscii, ClassBracketed, ClassPerl, ClassSet, ClassSetBinaryOpKind, ClassSetItem,
13 ClassSetRange, ClassSetUnion, ClassUnicode, ClassUnicodeKind, ClassUnicodeOpKind,
14 HexLiteralKind, Literal, LiteralKind, Position, Span, SpecialLiteralKind,
15 },
16 hir::{
17 self,
18 translate::{Translator, TranslatorBuilder},
19 },
20 utf8::Utf8Sequences,
21};
22use resharp_algebra::NodeId;
23
24type TB<'s> = resharp_algebra::RegexBuilder;
25
26pub struct PatternFlags {
28 pub unicode: bool,
30 pub full_unicode: bool,
32 pub case_insensitive: bool,
34 pub dot_matches_new_line: bool,
36 pub multiline: bool,
38 pub ignore_whitespace: bool,
40 pub ascii_perl_classes: bool,
43 pub expanded_ast_limit: u64,
46 pub max_list_len: usize,
49}
50
51const REPETITION_COUNT_LIMIT: u32 = 2_000;
54pub const DEFAULT_EXPANDED_AST_LIMIT: u64 = 50_000;
55pub const DEFAULT_MAX_LIST_LEN: usize = 4_000;
56
57impl Default for PatternFlags {
58 fn default() -> Self {
59 Self {
60 unicode: true,
61 full_unicode: false,
62 case_insensitive: false,
63 dot_matches_new_line: false,
64 multiline: true,
65 ignore_whitespace: false,
66 ascii_perl_classes: false,
67 expanded_ast_limit: DEFAULT_EXPANDED_AST_LIMIT,
68 max_list_len: DEFAULT_MAX_LIST_LEN,
69 }
70 }
71}
72
73#[derive(Clone, Copy, PartialEq, Debug)]
74enum WordCharKind {
75 Word,
76 NonWord,
77 MaybeWord,
78 MaybeNonWord,
79 Unknown,
80 Edge,
81}
82
83fn is_word_byte(b: u8) -> bool {
84 b.is_ascii_alphanumeric() || b == b'_'
85}
86
87#[derive(Clone, Debug, Eq, PartialEq)]
88enum Primitive {
89 Literal(Literal),
90 Assertion(ast::Assertion),
91 Dot(Span),
92 Top(Span),
93 Perl(ClassPerl),
94 Unicode(ClassUnicode),
95}
96
97impl Primitive {
98 fn span(&self) -> &Span {
99 match *self {
100 Primitive::Literal(ref x) => &x.span,
101 Primitive::Assertion(ref x) => &x.span,
102 Primitive::Dot(ref span) => span,
103 Primitive::Top(ref span) => span,
104 Primitive::Perl(ref x) => &x.span,
105 Primitive::Unicode(ref x) => &x.span,
106 }
107 }
108
109 fn into_ast(self) -> Ast {
110 match self {
111 Primitive::Literal(lit) => Ast::literal(lit),
112 Primitive::Assertion(assert) => Ast::assertion(assert),
113 Primitive::Dot(span) => Ast::dot(span),
114 Primitive::Top(span) => Ast::top(span),
115 Primitive::Perl(cls) => Ast::class_perl(cls),
116 Primitive::Unicode(cls) => Ast::class_unicode(cls),
117 }
118 }
119
120 fn into_class_set_item(self, p: &ResharpParser) -> Result<regex_syntax::ast::ClassSetItem> {
121 use self::Primitive::*;
122 use regex_syntax::ast::ClassSetItem;
123
124 match self {
125 Literal(lit) => Ok(ClassSetItem::Literal(lit)),
126 Perl(cls) => Ok(ClassSetItem::Perl(cls)),
127 Unicode(cls) => Ok(ClassSetItem::Unicode(cls)),
128 x => Err(p.error(*x.span(), ast::ErrorKind::ClassEscapeInvalid)),
129 }
130 }
131
132 fn into_class_literal(self, p: &ResharpParser) -> Result<Literal> {
133 use self::Primitive::*;
134
135 match self {
136 Literal(lit) => Ok(lit),
137 x => Err(p.error(*x.span(), ast::ErrorKind::ClassRangeLiteral)),
138 }
139 }
140}
141
142#[derive(Clone, Debug, Eq, PartialEq)]
143pub enum Either<Left, Right> {
144 Left(Left),
145 Right(Right),
146}
147
148#[derive(Clone, Debug, Eq, PartialEq)]
149pub struct ParseError {
150 pub kind: ErrorKind,
152 pattern: String,
155 pub span: Span,
157}
158
159impl std::fmt::Display for ParseError {
160 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
161 write!(f, "{:?}: {:?}", self.kind, self.span)
162 }
163}
164impl std::error::Error for ParseError {}
165
166type Result<T> = core::result::Result<T, ParseError>;
167
168#[derive(Clone, Debug)]
169enum GroupState {
170 Group {
172 concat: Concat,
174 group: ast::Group,
176 ignore_whitespace: bool,
178 },
179 Alternation(ast::Alternation),
180 Intersection(ast::Intersection),
181}
182
183#[derive(Clone, Debug)]
184enum ClassState {
185 Open {
187 union: regex_syntax::ast::ClassSetUnion,
189 set: regex_syntax::ast::ClassBracketed,
190 },
191 Op {
194 kind: regex_syntax::ast::ClassSetBinaryOpKind,
196 lhs: regex_syntax::ast::ClassSet,
198 },
199}
200
201pub struct ResharpParser<'s> {
203 perl_classes: Vec<(bool, regex_syntax::ast::ClassPerlKind, NodeId)>,
204 unicode_classes: resharp_algebra::UnicodeClassCache,
205 pub translator: regex_syntax::hir::translate::Translator,
206 pub pattern: &'s str,
207 pos: Cell<Position>,
208 capture_index: Cell<u32>,
209 octal: bool,
210 empty_min_range: bool,
211 ignore_whitespace: Cell<bool>,
212 dot_all: Cell<bool>,
213 multiline: Cell<bool>,
214 global_unicode: bool,
215 global_full_unicode: bool,
216 global_ascii_perl: bool,
217 global_case_insensitive: bool,
218 expanded_ast_limit: u64,
219 max_list_len: usize,
220 comments: RefCell<Vec<ast::Comment>>,
221 stack_group: RefCell<Vec<GroupState>>,
222 stack_class: RefCell<Vec<ClassState>>,
223 capture_names: RefCell<Vec<ast::CaptureName>>,
224 scratch: RefCell<String>,
225}
226
227fn specialize_err<T>(result: Result<T>, from: ast::ErrorKind, to: ast::ErrorKind) -> Result<T> {
228 result.map_err(|e| {
229 if e.kind == from {
230 ParseError {
231 kind: to,
232 pattern: e.pattern,
233 span: e.span,
234 }
235 } else {
236 e
237 }
238 })
239}
240
241fn is_capture_char(c: char, first: bool) -> bool {
242 if first {
243 c == '_' || c.is_alphabetic()
244 } else {
245 c == '_' || c == '.' || c == '[' || c == ']' || c.is_alphanumeric()
246 }
247}
248
249pub fn is_meta_character(c: char) -> bool {
250 matches!(
251 c,
252 '\\' | '.'
253 | '+'
254 | '*'
255 | '?'
256 | '('
257 | ')'
258 | '|'
259 | '['
260 | ']'
261 | '{'
262 | '}'
263 | '^'
264 | '$'
265 | '#'
266 | '&'
267 | '-'
268 | '~'
269 | '_'
270 )
271}
272
273pub fn escape(text: &str) -> String {
275 let mut buf = String::new();
276 escape_into(text, &mut buf);
277 buf
278}
279
280pub fn escape_into(text: &str, buf: &mut String) {
282 buf.reserve(text.len());
283 for c in text.chars() {
284 if is_meta_character(c) {
285 buf.push('\\');
286 }
287 buf.push(c);
288 }
289}
290
291pub fn is_escapeable_character(c: char) -> bool {
292 if is_meta_character(c) {
293 return true;
294 }
295 if !c.is_ascii() {
296 return false;
297 }
298 match c {
299 '0'..='9' | 'A'..='Z' | 'a'..='z' => false,
300 '<' | '>' => false,
301 _ => true,
302 }
303}
304
305fn is_hex(c: char) -> bool {
306 c.is_ascii_digit() || ('a'..='f').contains(&c) || ('A'..='F').contains(&c)
307}
308
309impl<'s> ResharpParser<'s> {
310 fn default_translator_builder(&self) -> TranslatorBuilder {
311 let mut trb = TranslatorBuilder::new();
312 trb.unicode(self.global_unicode);
313 trb.utf8(false);
314 trb.case_insensitive(self.global_case_insensitive);
315 trb
316 }
317
318 pub fn new(pattern: &'s str) -> Self {
319 Self::with_flags(pattern, &PatternFlags::default())
320 }
321
322 pub fn with_flags(pattern: &'s str, flags: &PatternFlags) -> Self {
323 let mut trb = TranslatorBuilder::new();
324 trb.unicode(flags.unicode);
325 trb.utf8(false);
326 trb.case_insensitive(flags.case_insensitive);
327 Self {
328 translator: trb.build(),
329 pattern,
330 perl_classes: vec![],
331 unicode_classes: resharp_algebra::UnicodeClassCache::default(),
332 pos: Cell::new(Position::new(0, 0, 0)),
333 capture_index: Cell::new(0),
334 octal: false,
335 empty_min_range: false,
336 ignore_whitespace: Cell::new(flags.ignore_whitespace),
337 dot_all: Cell::new(flags.dot_matches_new_line),
338 multiline: Cell::new(flags.multiline),
339 global_unicode: flags.unicode || flags.full_unicode || flags.ascii_perl_classes,
340 global_full_unicode: flags.full_unicode,
341 global_ascii_perl: flags.ascii_perl_classes,
342 global_case_insensitive: flags.case_insensitive,
343 expanded_ast_limit: flags.expanded_ast_limit,
344 max_list_len: flags.max_list_len,
345 comments: RefCell::new(vec![]),
346 stack_group: RefCell::new(vec![]),
347 stack_class: RefCell::new(vec![]),
348 capture_names: RefCell::new(vec![]),
349 scratch: RefCell::new(String::new()),
350 }
351 }
352
353 fn parser(&'_ self) -> &'_ ResharpParser<'_> {
354 self
355 }
356
357 fn pattern(&self) -> &str {
358 self.pattern
359 }
360
361 fn error(&self, span: Span, kind: ast::ErrorKind) -> ParseError {
362 ParseError {
363 kind,
364 pattern: self.pattern().to_string(),
365 span,
366 }
367 }
368
369 fn unsupported_error(&self, _: regex_syntax::hir::Error) -> ParseError {
370 self.error(
371 Span::splat(self.pos()),
372 ast::ErrorKind::UnsupportedResharpRegex,
373 )
374 }
375
376 fn offset(&self) -> usize {
377 self.parser().pos.get().offset
378 }
379
380 fn line(&self) -> usize {
381 self.parser().pos.get().line
382 }
383
384 fn column(&self) -> usize {
385 self.parser().pos.get().column
386 }
387
388 fn next_capture_index(&self, span: Span) -> Result<u32> {
389 let current = self.parser().capture_index.get();
390 let i = current
391 .checked_add(1)
392 .ok_or_else(|| self.error(span, ast::ErrorKind::CaptureLimitExceeded))?;
393 self.parser().capture_index.set(i);
394 Ok(i)
395 }
396
397 fn add_capture_name(&self, cap: &ast::CaptureName) -> Result<()> {
398 let mut names = self.parser().capture_names.borrow_mut();
399 match names.binary_search_by_key(&cap.name.as_str(), |c| c.name.as_str()) {
400 Err(i) => {
401 names.insert(i, cap.clone());
402 Ok(())
403 }
404 Ok(i) => Err(self.error(
405 cap.span,
406 ast::ErrorKind::GroupNameDuplicate {
407 original: names[i].span,
408 },
409 )),
410 }
411 }
412
413 fn ignore_whitespace(&self) -> bool {
414 self.parser().ignore_whitespace.get()
415 }
416
417 fn char(&self) -> char {
418 self.char_at(self.offset())
419 }
420
421 fn char_at(&self, i: usize) -> char {
422 self.pattern()[i..]
423 .chars()
424 .next()
425 .unwrap_or_else(|| panic!("expected char at offset {}", i))
426 }
427
428 fn bump(&self) -> bool {
429 if self.is_eof() {
430 return false;
431 }
432 let Position {
433 mut offset,
434 mut line,
435 mut column,
436 } = self.pos();
437 if self.char() == '\n' {
438 line = line.checked_add(1).unwrap();
439 column = 1;
440 } else {
441 column = column.checked_add(1).unwrap();
442 }
443 offset += self.char().len_utf8();
444 self.parser().pos.set(Position {
445 offset,
446 line,
447 column,
448 });
449 self.pattern()[self.offset()..].chars().next().is_some()
450 }
451
452 fn bump_if(&self, prefix: &str) -> bool {
453 if self.pattern()[self.offset()..].starts_with(prefix) {
454 for _ in 0..prefix.chars().count() {
455 self.bump();
456 }
457 true
458 } else {
459 false
460 }
461 }
462
463 fn is_lookaround_prefix(&self) -> Option<(bool, bool)> {
464 if self.bump_if("?=") {
465 return Some((true, true));
466 }
467 if self.bump_if("?!") {
468 return Some((true, false));
469 }
470 if self.bump_if("?<=") {
471 return Some((false, true));
472 }
473 if self.bump_if("?<!") {
474 return Some((false, false));
475 }
476 None
477 }
478
479 fn bump_and_bump_space(&self) -> bool {
480 if !self.bump() {
481 return false;
482 }
483 self.bump_space();
484 !self.is_eof()
485 }
486
487 fn bump_space(&self) {
488 if !self.ignore_whitespace() {
489 return;
490 }
491 while !self.is_eof() {
492 if self.char().is_whitespace() {
493 self.bump();
494 } else if self.char() == '#' {
495 let start = self.pos();
496 let mut comment_text = String::new();
497 self.bump();
498 while !self.is_eof() {
499 let c = self.char();
500 self.bump();
501 if c == '\n' {
502 break;
503 }
504 comment_text.push(c);
505 }
506 let comment = ast::Comment {
507 span: Span::new(start, self.pos()),
508 comment: comment_text,
509 };
510 self.parser().comments.borrow_mut().push(comment);
511 } else {
512 break;
513 }
514 }
515 }
516
517 fn peek(&self) -> Option<char> {
518 if self.is_eof() {
519 return None;
520 }
521 self.pattern()[self.offset() + self.char().len_utf8()..]
522 .chars()
523 .next()
524 }
525
526 fn peek_space(&self) -> Option<char> {
529 if !self.ignore_whitespace() {
530 return self.peek();
531 }
532 if self.is_eof() {
533 return None;
534 }
535 let mut start = self.offset() + self.char().len_utf8();
536 let mut in_comment = false;
537 for (i, c) in self.pattern()[start..].char_indices() {
538 if c.is_whitespace() {
539 continue;
540 } else if !in_comment && c == '#' {
541 in_comment = true;
542 } else if in_comment && c == '\n' {
543 in_comment = false;
544 } else {
545 start += i;
546 break;
547 }
548 }
549 self.pattern()[start..].chars().next()
550 }
551
552 fn is_eof(&self) -> bool {
553 self.offset() == self.pattern().len()
554 }
555
556 fn pos(&self) -> Position {
557 self.parser().pos.get()
558 }
559
560 fn span(&self) -> Span {
561 Span::splat(self.pos())
562 }
563
564 fn span_char(&self) -> Span {
565 let mut next = Position {
566 offset: self.offset().checked_add(self.char().len_utf8()).unwrap(),
567 line: self.line(),
568 column: self.column().checked_add(1).unwrap(),
569 };
570 if self.char() == '\n' {
571 next.line += 1;
572 next.column = 1;
573 }
574 Span::new(self.pos(), next)
575 }
576
577 #[inline(never)]
578 fn push_alternate(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
579 assert_eq!(self.char(), '|');
580 concat.span.end = self.pos();
581 self.push_or_add_alternation(concat);
582 self.bump();
583 Ok(ast::Concat {
584 span: self.span(),
585 asts: vec![],
586 })
587 }
588
589 fn push_or_add_alternation(&self, concat: Concat) {
590 use self::GroupState::*;
591
592 let mut stack = self.parser().stack_group.borrow_mut();
593 if let Some(&mut Alternation(ref mut alts)) = stack.last_mut() {
594 alts.asts.push(concat.into_ast());
595 return;
596 }
597 stack.push(Alternation(ast::Alternation {
598 span: Span::new(concat.span.start, self.pos()),
599 asts: vec![concat.into_ast()],
600 }));
601 }
602
603 #[inline(never)]
604 fn push_intersect(&self, mut concat: Concat) -> Result<Concat> {
605 assert_eq!(self.char(), '&');
606 concat.span.end = self.pos();
607 self.push_or_add_intersect(concat);
608 self.bump();
609 Ok(Concat {
610 span: self.span(),
611 asts: vec![],
612 })
613 }
614
615 fn push_or_add_intersect(&self, concat: Concat) {
616 use self::GroupState::*;
617
618 let mut stack = self.parser().stack_group.borrow_mut();
619 if let Some(&mut Intersection(ref mut alts)) = stack.last_mut() {
620 alts.asts.push(concat.into_ast());
621 return;
622 }
623 stack.push(Intersection(ast::Intersection {
624 span: Span::new(concat.span.start, self.pos()),
625 asts: vec![concat.into_ast()],
626 }));
627 }
628
629 #[inline(never)]
630 fn push_group(&self, mut concat: Concat) -> Result<Concat> {
631 assert_eq!(self.char(), '(');
632 match self.parse_group()? {
633 Either::Left(set) => {
634 let ignore = set.flags.flag_state(ast::Flag::IgnoreWhitespace);
635 if let Some(v) = ignore {
636 self.parser().ignore_whitespace.set(v);
637 }
638
639 concat.asts.push(Ast::flags(set));
640 Ok(concat)
641 }
642 Either::Right(group) => {
643 let old_ignore_whitespace = self.ignore_whitespace();
644 let new_ignore_whitespace = group
645 .flags()
646 .and_then(|f| f.flag_state(ast::Flag::IgnoreWhitespace))
647 .unwrap_or(old_ignore_whitespace);
648 self.parser()
649 .stack_group
650 .borrow_mut()
651 .push(GroupState::Group {
652 concat,
653 group,
654 ignore_whitespace: old_ignore_whitespace,
655 });
656 self.parser().ignore_whitespace.set(new_ignore_whitespace);
657 Ok(Concat {
658 span: self.span(),
659 asts: vec![],
660 })
661 }
662 }
663 }
664
665 #[inline(never)]
666 fn push_compl_group(&self, concat: Concat) -> Result<Concat> {
667 assert_eq!(self.char(), '~');
668 self.bump();
669 if self.is_eof() || self.char() != '(' {
670 return Err(self.error(self.span(), ast::ErrorKind::ComplementGroupExpected));
671 }
672 let open_span = self.span_char();
673 self.bump();
674 let group = ast::Group {
675 span: open_span,
676 kind: ast::GroupKind::Complement,
677 ast: Box::new(Ast::empty(self.span())),
678 };
679
680 let old_ignore_whitespace = self.ignore_whitespace();
681 let new_ignore_whitespace = group
682 .flags()
683 .and_then(|f| f.flag_state(ast::Flag::IgnoreWhitespace))
684 .unwrap_or(old_ignore_whitespace);
685 self.parser()
686 .stack_group
687 .borrow_mut()
688 .push(GroupState::Group {
689 concat,
690 group,
691 ignore_whitespace: old_ignore_whitespace,
692 });
693 self.parser().ignore_whitespace.set(new_ignore_whitespace);
694 Ok(Concat {
695 span: self.span(),
696 asts: vec![],
697 })
698 }
699
700 #[inline(never)]
701 fn pop_group(&self, mut group_concat: Concat) -> Result<Concat> {
702 use self::GroupState::*;
703 assert_eq!(self.char(), ')');
704 let mut stack = self.parser().stack_group.borrow_mut();
705 let topstack = stack.pop();
706
707 let (mut prior_concat, mut group, ignore_whitespace, alt) = match topstack {
708 Some(Group {
709 concat,
710 group,
711 ignore_whitespace,
712 }) => (concat, group, ignore_whitespace, None),
713 Some(Alternation(alt)) => match stack.pop() {
714 Some(Group {
715 concat,
716 group,
717 ignore_whitespace,
718 }) => (
719 concat,
720 group,
721 ignore_whitespace,
722 Some(Either::Left::<ast::Alternation, ast::Intersection>(alt)),
723 ),
724 None | Some(Alternation(_)) | Some(Intersection(_)) => {
725 return Err(self.error(self.span_char(), ast::ErrorKind::GroupUnopened));
726 }
727 },
728 Some(Intersection(int)) => match stack.pop() {
729 Some(Group {
730 concat,
731 group,
732 ignore_whitespace,
733 }) => (
734 concat,
735 group,
736 ignore_whitespace,
737 Some(Either::Right::<ast::Alternation, ast::Intersection>(int)),
738 ),
739 None | Some(Alternation(_)) | Some(Intersection(_)) => {
740 return Err(self.error(self.span_char(), ast::ErrorKind::GroupUnopened));
741 }
742 },
743
744 None => {
745 return Err(self.error(self.span_char(), ast::ErrorKind::GroupUnopened));
746 }
747 };
748 self.parser().ignore_whitespace.set(ignore_whitespace);
749 group_concat.span.end = self.pos();
750 self.bump();
751 group.span.end = self.pos();
752 match alt {
753 Some(Either::Left(mut alt)) => {
754 alt.span.end = group_concat.span.end;
755 alt.asts.push(group_concat.into_ast());
756 group.ast = Box::new(alt.into_ast());
757 }
758 Some(Either::Right(mut int)) => {
759 int.span.end = group_concat.span.end;
760 int.asts.push(group_concat.into_ast());
761 group.ast = Box::new(int.into_ast());
762 }
763 None => {
764 group.ast = Box::new(group_concat.into_ast());
765 }
766 }
767
768 if group.kind == GroupKind::Complement {
769 let complement = ast::Complement {
770 span: self.span(),
771 ast: group.ast,
772 };
773 prior_concat.asts.push(Ast::complement(complement));
774 }
775 else {
777 prior_concat.asts.push(Ast::group(group));
778 }
779 Ok(prior_concat)
780 }
781
782 #[inline(never)]
783 fn pop_group_end(&self, mut concat: ast::Concat) -> Result<Ast> {
784 concat.span.end = self.pos();
785 let mut stack = self.parser().stack_group.borrow_mut();
786 let ast = match stack.pop() {
787 None => Ok(concat.into_ast()),
788 Some(GroupState::Alternation(mut alt)) => {
789 alt.span.end = self.pos();
790 alt.asts.push(concat.into_ast());
791 Ok(Ast::alternation(alt))
792 }
793 Some(GroupState::Intersection(mut int)) => {
794 int.span.end = self.pos();
795 int.asts.push(concat.into_ast());
796
797 Ok(Ast::intersection(int))
798 }
799 Some(GroupState::Group { group, .. }) => {
800 return Err(self.error(group.span, ast::ErrorKind::GroupUnclosed));
801 }
802 };
803 match stack.pop() {
805 None => ast,
806 Some(GroupState::Alternation(alt)) => {
807 Err(self.error(alt.span, ast::ErrorKind::UnsupportedResharpRegex))
808 }
809 Some(GroupState::Intersection(int)) => {
810 Err(self.error(int.span, ast::ErrorKind::UnsupportedResharpRegex))
811 }
812 Some(GroupState::Group { group, .. }) => {
813 Err(self.error(group.span, ast::ErrorKind::GroupUnclosed))
814 }
815 }
816 }
817
818 #[inline(never)]
819 fn push_class_open(
820 &self,
821 parent_union: regex_syntax::ast::ClassSetUnion,
822 ) -> Result<regex_syntax::ast::ClassSetUnion> {
823 assert_eq!(self.char(), '[');
824
825 let (nested_set, nested_union) = self.parse_set_class_open()?;
826 self.parser()
827 .stack_class
828 .borrow_mut()
829 .push(ClassState::Open {
830 union: parent_union,
831 set: nested_set,
832 });
833 Ok(nested_union)
834 }
835
836 #[inline(never)]
837 fn pop_class(
838 &self,
839 nested_union: regex_syntax::ast::ClassSetUnion,
840 ) -> Result<Either<regex_syntax::ast::ClassSetUnion, regex_syntax::ast::ClassBracketed>> {
841 assert_eq!(self.char(), ']');
842
843 let item = regex_syntax::ast::ClassSet::Item(nested_union.into_item());
844 let prevset = self.pop_class_op(item);
845 let mut stack = self.parser().stack_class.borrow_mut();
846 match stack.pop() {
847 None => panic!("unexpected empty character class stack"),
848 Some(ClassState::Op { .. }) => panic!("unexpected ClassState::Op"),
849 Some(ClassState::Open { mut union, mut set }) => {
850 self.bump();
851 set.span.end = self.pos();
852 set.kind = prevset;
853 if stack.is_empty() {
854 Ok(Either::Right(set))
855 } else {
856 union.push(regex_syntax::ast::ClassSetItem::Bracketed(Box::new(set)));
857 Ok(Either::Left(union))
858 }
859 }
860 }
861 }
862
863 #[inline(never)]
864 fn unclosed_class_error(&self) -> ParseError {
865 for state in self.parser().stack_class.borrow().iter().rev() {
866 if let ClassState::Open { ref set, .. } = *state {
867 return self.error(set.span, ast::ErrorKind::ClassUnclosed);
868 }
869 }
870 panic!("no open character class found")
871 }
872
873 #[inline(never)]
874 fn push_class_op(
875 &self,
876 next_kind: regex_syntax::ast::ClassSetBinaryOpKind,
877 next_union: regex_syntax::ast::ClassSetUnion,
878 ) -> regex_syntax::ast::ClassSetUnion {
879 let item = regex_syntax::ast::ClassSet::Item(next_union.into_item());
880 let new_lhs = self.pop_class_op(item);
881 self.parser().stack_class.borrow_mut().push(ClassState::Op {
882 kind: next_kind,
883 lhs: new_lhs,
884 });
885 regex_syntax::ast::ClassSetUnion {
886 span: self.span(),
887 items: vec![],
888 }
889 }
890
891 #[inline(never)]
892 fn pop_class_op(&self, rhs: regex_syntax::ast::ClassSet) -> regex_syntax::ast::ClassSet {
893 let mut stack = self.parser().stack_class.borrow_mut();
894 let (kind, lhs) = match stack.pop() {
895 Some(ClassState::Op { kind, lhs }) => (kind, lhs),
896 Some(state @ ClassState::Open { .. }) => {
897 stack.push(state);
898 return rhs;
899 }
900 None => unreachable!(),
901 };
902 let span = Span::new(lhs.span().start, rhs.span().end);
903 regex_syntax::ast::ClassSet::BinaryOp(regex_syntax::ast::ClassSetBinaryOp {
904 span,
905 kind,
906 lhs: Box::new(lhs),
907 rhs: Box::new(rhs),
908 })
909 }
910
911 fn hir_to_node_id(&self, hir: &hir::Hir, tb: &mut TB<'s>) -> Result<NodeId> {
912 match hir.kind() {
913 hir::HirKind::Empty => Ok(NodeId::EPS),
914 hir::HirKind::Literal(l) => {
915 if l.0.len() == 1 {
916 let node = tb.mk_u8(l.0[0]);
917 Ok(node)
918 } else {
919 let ws: Vec<_> = l.0.iter().map(|l| tb.mk_u8(*l)).collect();
920 let conc = tb.mk_concats(ws.iter().copied());
921 Ok(conc)
922 }
923 }
924 hir::HirKind::Class(class) => match class {
925 hir::Class::Unicode(class_unicode) => {
926 let ranges = class_unicode.ranges();
927 if ranges.len() == 1
928 && ranges[0].start() == '\u{0}'
929 && ranges[0].end() == '\u{10FFFF}'
930 {
931 return Ok(tb.mk_range_u8(0, 255));
932 }
933 let mut nodes = Vec::new();
934 for range in ranges {
935 for seq in Utf8Sequences::new(range.start(), range.end()) {
936 let sl = seq.as_slice();
937 let bytes: Vec<_> = sl.iter().map(|s| (s.start, s.end)).collect();
938 let node = match bytes.len() {
939 1 => tb.mk_range_u8(bytes[0].0, bytes[0].1),
940 n => {
941 let last = tb.mk_range_u8(bytes[n - 1].0, bytes[n - 1].1);
942 let mut conc = last;
943 for i in (0..n - 1).rev() {
944 let b = tb.mk_range_u8(bytes[i].0, bytes[i].1);
945 conc = tb.mk_concat(b, conc);
946 }
947 conc
948 }
949 };
950 nodes.push(node);
951 }
952 }
953 let merged = tb.mk_unions(nodes.into_iter());
954 Ok(merged)
955 }
956 hir::Class::Bytes(class_bytes) => {
957 let ranges = class_bytes.ranges();
958 let mut result = NodeId::BOT;
959 for range in ranges {
960 let start = range.start();
961 let end = range.end();
962 let node = tb.mk_range_u8(start, end);
963 result = tb.mk_union(result, node);
964 }
965 Ok(result)
966 }
967 },
968 hir::HirKind::Look(_) => Err(self.error(
969 Span::splat(self.pos()),
970 ast::ErrorKind::UnsupportedResharpRegex,
971 )),
972 hir::HirKind::Repetition(_) => Err(self.error(
973 Span::splat(self.pos()),
974 ast::ErrorKind::UnsupportedResharpRegex,
975 )),
976 hir::HirKind::Capture(_) => Err(self.error(
977 Span::splat(self.pos()),
978 ast::ErrorKind::UnsupportedResharpRegex,
979 )),
980 hir::HirKind::Concat(body) => {
981 let mut result = NodeId::EPS;
982 for child in body {
983 let node = self.hir_to_node_id(child, tb)?;
984 result = tb.mk_concat(result, node);
985 }
986 Ok(result)
987 }
988 hir::HirKind::Alternation(_) => Err(self.error(
989 Span::splat(self.pos()),
990 ast::ErrorKind::UnsupportedResharpRegex,
991 )),
992 }
993 }
994
995 fn translate_ast_to_hir(
996 &mut self,
997 orig_ast: ®ex_syntax::ast::Ast,
998 tb: &mut TB<'s>,
999 ) -> Result<NodeId> {
1000 match self.translator.translate("", orig_ast) {
1001 Err(_) => Err(self.error(self.span(), ast::ErrorKind::UnicodeClassInvalid)),
1002 Ok(hir) => self.hir_to_node_id(&hir, tb),
1003 }
1004 }
1005
1006 fn translator_to_node_id(
1007 &mut self,
1008 orig_ast: ®ex_syntax::ast::Ast,
1009 translator: &mut Option<Translator>,
1010 tb: &mut TB<'s>,
1011 ) -> Result<NodeId> {
1012 match translator {
1013 Some(tr) => {
1014 let hir = tr
1015 .translate("", orig_ast)
1016 .map_err(|e| self.unsupported_error(e))?;
1017 self.hir_to_node_id(&hir, tb)
1018 }
1019 None => self.translate_ast_to_hir(orig_ast, tb),
1020 }
1021 }
1022
1023 fn get_class(
1024 &mut self,
1025 negated: bool,
1026 kind: regex_syntax::ast::ClassPerlKind,
1027 tb: &mut TB<'s>,
1028 ) -> Result<NodeId> {
1029 let w = self
1030 .perl_classes
1031 .iter()
1032 .find(|(c_neg, c_kind, _)| *c_kind == kind && *c_neg == negated);
1033 match w {
1034 Some((_, _, value)) => Ok(*value),
1035 None => {
1036 let translated = if self.global_ascii_perl {
1037 let pos = match kind {
1038 regex_syntax::ast::ClassPerlKind::Word => {
1039 let az = tb.mk_range_u8(b'a', b'z');
1040 let big = tb.mk_range_u8(b'A', b'Z');
1041 let dig = tb.mk_range_u8(b'0', b'9');
1042 let us = tb.mk_u8(b'_');
1043 tb.mk_unions([az, big, dig, us].into_iter())
1044 }
1045 regex_syntax::ast::ClassPerlKind::Digit => tb.mk_range_u8(b'0', b'9'),
1046 regex_syntax::ast::ClassPerlKind::Space => {
1047 let sp = tb.mk_u8(b' ');
1048 let tab = tb.mk_u8(b'\t');
1049 let nl = tb.mk_u8(b'\n');
1050 let cr = tb.mk_u8(b'\r');
1051 let ff = tb.mk_u8(0x0C);
1052 let vt = tb.mk_u8(0x0B);
1053 tb.mk_unions([sp, tab, nl, cr, ff, vt].into_iter())
1054 }
1055 };
1056 if negated {
1057 resharp_algebra::neg_class(tb, pos)
1058 } else {
1059 pos
1060 }
1061 } else if self.global_unicode {
1062 match kind {
1063 regex_syntax::ast::ClassPerlKind::Word => {
1064 if self.global_full_unicode {
1065 self.unicode_classes.ensure_word_full(tb);
1066 } else {
1067 self.unicode_classes.ensure_word(tb);
1068 }
1069 if negated {
1070 self.unicode_classes.non_word
1071 } else {
1072 self.unicode_classes.word
1073 }
1074 }
1075 regex_syntax::ast::ClassPerlKind::Digit => {
1076 if self.global_full_unicode {
1077 self.unicode_classes.ensure_digit_full(tb);
1078 } else {
1079 self.unicode_classes.ensure_digit(tb);
1080 }
1081 if negated {
1082 self.unicode_classes.non_digit
1083 } else {
1084 self.unicode_classes.digit
1085 }
1086 }
1087 regex_syntax::ast::ClassPerlKind::Space => {
1088 if self.global_full_unicode {
1089 self.unicode_classes.ensure_space_full(tb);
1090 } else {
1091 self.unicode_classes.ensure_space(tb);
1092 }
1093 if negated {
1094 self.unicode_classes.non_space
1095 } else {
1096 self.unicode_classes.space
1097 }
1098 }
1099 }
1100 } else {
1101 let pos = match kind {
1102 regex_syntax::ast::ClassPerlKind::Word => {
1103 let az = tb.mk_range_u8(b'a', b'z');
1104 let big = tb.mk_range_u8(b'A', b'Z');
1105 let dig = tb.mk_range_u8(b'0', b'9');
1106 let us = tb.mk_u8(b'_');
1107 tb.mk_unions([az, big, dig, us].into_iter())
1108 }
1109 regex_syntax::ast::ClassPerlKind::Digit => tb.mk_range_u8(b'0', b'9'),
1110 regex_syntax::ast::ClassPerlKind::Space => {
1111 let sp = tb.mk_u8(b' ');
1112 let tab = tb.mk_u8(b'\t');
1113 let nl = tb.mk_u8(b'\n');
1114 let cr = tb.mk_u8(b'\r');
1115 let ff = tb.mk_u8(0x0C);
1116 let vt = tb.mk_u8(0x0B);
1117 tb.mk_unions([sp, tab, nl, cr, ff, vt].into_iter())
1118 }
1119 };
1120 if negated {
1121 tb.mk_compl(pos)
1122 } else {
1123 pos
1124 }
1125 };
1126 self.perl_classes.push((negated, kind, translated));
1127 Ok(translated)
1128 }
1129 }
1130 }
1131
1132 fn word_char_kind(ast: &Ast, left: bool) -> WordCharKind {
1133 use WordCharKind::*;
1134 match ast {
1135 Ast::Literal(lit) => {
1136 if is_word_byte(lit.c as u8) {
1137 Word
1138 } else {
1139 NonWord
1140 }
1141 }
1142 Ast::ClassPerl(c) => match (&c.kind, c.negated) {
1143 (®ex_syntax::ast::ClassPerlKind::Word, false) => Word,
1144 (®ex_syntax::ast::ClassPerlKind::Word, true) => NonWord,
1145 (®ex_syntax::ast::ClassPerlKind::Space, false) => NonWord,
1146 (®ex_syntax::ast::ClassPerlKind::Space, true) => Unknown,
1147 (®ex_syntax::ast::ClassPerlKind::Digit, false) => Word,
1148 (®ex_syntax::ast::ClassPerlKind::Digit, true) => Unknown,
1149 },
1150 Ast::Dot(_) | Ast::Top(_) => Unknown,
1151 Ast::Group(g) => Self::word_char_kind(&g.ast, left),
1152 Ast::Concat(c) if !c.asts.is_empty() => {
1153 let edge = if left { c.asts.len() - 1 } else { 0 };
1154 let kind = Self::word_char_kind(&c.asts[edge], left);
1155 match kind {
1156 MaybeWord => {
1157 let dir: isize = if left { -1 } else { 1 };
1158 match Self::concat_neighbor_kind(&c.asts, edge, dir) {
1159 Word => Word,
1160 _ => MaybeWord,
1161 }
1162 }
1163 MaybeNonWord => {
1164 let dir: isize = if left { -1 } else { 1 };
1165 match Self::concat_neighbor_kind(&c.asts, edge, dir) {
1166 NonWord => NonWord,
1167 _ => MaybeNonWord,
1168 }
1169 }
1170 other => other,
1171 }
1172 }
1173 Ast::Alternation(alt) if !alt.asts.is_empty() => {
1174 let first = Self::word_char_kind(&alt.asts[0], left);
1175 if alt.asts[1..]
1176 .iter()
1177 .all(|a| Self::word_char_kind(a, left) == first)
1178 {
1179 first
1180 } else {
1181 Unknown
1182 }
1183 }
1184 Ast::Repetition(r) => {
1185 let inner = Self::word_char_kind(&r.ast, left);
1186 let nullable = matches!(
1187 &r.op.kind,
1188 ast::RepetitionKind::ZeroOrMore
1189 | ast::RepetitionKind::ZeroOrOne
1190 | ast::RepetitionKind::Range(ast::RepetitionRange::Bounded(0, _))
1191 );
1192 if nullable {
1193 match inner {
1194 Word => MaybeWord,
1195 NonWord => MaybeNonWord,
1196 _ => Unknown,
1197 }
1198 } else {
1199 inner
1200 }
1201 }
1202 Ast::Lookaround(la) => Self::word_char_kind(&la.ast, left),
1203 _ => Unknown,
1204 }
1205 }
1206
1207 fn edge_class_ast(ast: &Ast, left: bool) -> Option<&Ast> {
1209 match ast {
1210 Ast::Literal(_)
1211 | Ast::ClassPerl(_)
1212 | Ast::ClassBracketed(_)
1213 | Ast::ClassUnicode(_)
1214 | Ast::Dot(_)
1215 | Ast::Top(_) => Some(ast),
1216 Ast::Group(g) => Self::edge_class_ast(&g.ast, left),
1217 Ast::Concat(c) if !c.asts.is_empty() => {
1218 Self::edge_class_ast(&c.asts[if left { c.asts.len() - 1 } else { 0 }], left)
1219 }
1220 Ast::Repetition(r) => {
1221 let nullable = matches!(
1222 &r.op.kind,
1223 ast::RepetitionKind::ZeroOrMore
1224 | ast::RepetitionKind::ZeroOrOne
1225 | ast::RepetitionKind::Range(ast::RepetitionRange::Bounded(0, _))
1226 );
1227 if nullable {
1228 None
1229 } else {
1230 Self::edge_class_ast(&r.ast, left)
1231 }
1232 }
1233 _ => None,
1234 }
1235 }
1236
1237 fn resolve_word_kind(
1238 &mut self,
1239 asts: &[Ast],
1240 idx: usize,
1241 dir: isize,
1242 translator: &mut Option<Translator>,
1243 tb: &mut TB<'s>,
1244 word_id: NodeId,
1245 not_word_id: NodeId,
1246 ) -> Result<WordCharKind> {
1247 use WordCharKind::*;
1248 let fast = Self::concat_neighbor_kind(asts, idx, dir);
1249 if fast != Unknown {
1250 return Ok(fast);
1251 }
1252 let neighbor_idx = (idx as isize + dir) as usize;
1253 let node = if let Some(edge) = Self::edge_class_ast(&asts[neighbor_idx], dir < 0) {
1254 self.ast_to_node_id(edge, translator, tb)?
1255 } else {
1256 let neighbor_node = self.ast_to_node_id(&asts[neighbor_idx], translator, tb)?;
1258 let mut neighbor_node = tb
1259 .try_elim_lookarounds(neighbor_node)
1260 .ok_or_else(|| self.error(self.span(), ast::ErrorKind::UnsupportedResharpRegex))?;
1261 if dir < 0 {
1262 neighbor_node = tb.reverse(neighbor_node).or_else(|_| {
1263 Err(self.error(self.span(), ast::ErrorKind::UnsupportedResharpRegex))
1264 })?;
1265 }
1266 let word_prefix = tb.mk_concat(word_id, NodeId::TS);
1267 let non_word_prefix = tb.mk_concat(not_word_id, NodeId::TS);
1268 return if tb.subsumes(word_prefix, neighbor_node) == Some(true) {
1269 Ok(Word)
1270 } else if tb.subsumes(non_word_prefix, neighbor_node) == Some(true) {
1271 Ok(NonWord)
1272 } else {
1273 Ok(Unknown)
1274 };
1275 };
1276 if tb.subsumes(word_id, node) == Some(true) {
1277 Ok(Word)
1278 } else if tb.subsumes(not_word_id, node) == Some(true) {
1279 Ok(NonWord)
1280 } else {
1281 Ok(Unknown)
1282 }
1283 }
1284
1285 fn concat_neighbor_kind(asts: &[Ast], idx: usize, dir: isize) -> WordCharKind {
1286 use WordCharKind::*;
1287 let next = idx as isize + dir;
1288 if next < 0 || next >= asts.len() as isize {
1289 return Edge;
1290 }
1291 let kind = Self::word_char_kind(&asts[next as usize], dir < 0);
1292 match kind {
1293 MaybeWord => match Self::concat_neighbor_kind(asts, next as usize, dir) {
1294 Word => Word,
1295 _ => Unknown,
1296 },
1297 MaybeNonWord => match Self::concat_neighbor_kind(asts, next as usize, dir) {
1298 NonWord => NonWord,
1299 _ => Unknown,
1300 },
1301 other => other,
1302 }
1303 }
1304
1305 fn rewrite_word_boundary_in_concat(
1306 &mut self,
1307 asts: &[Ast],
1308 idx: usize,
1309 translator: &mut Option<Translator>,
1310 tb: &mut TB<'s>,
1311 ) -> Result<(NodeId, usize)> {
1312 use WordCharKind::*;
1313 let (word_id, not_word_id) = if self.global_full_unicode {
1314 self.unicode_classes.ensure_word_full(tb);
1315 (self.unicode_classes.word, self.unicode_classes.non_word)
1316 } else if self.global_unicode && !self.global_ascii_perl {
1317 self.unicode_classes.ensure_word(tb);
1318 (self.unicode_classes.word, self.unicode_classes.non_word)
1319 } else {
1320 let az = tb.mk_range_u8(b'a', b'z');
1321 let big = tb.mk_range_u8(b'A', b'Z');
1322 let dig = tb.mk_range_u8(b'0', b'9');
1323 let us = tb.mk_u8(b'_');
1324 let w = tb.mk_unions([az, big, dig, us].into_iter());
1325 (w, tb.mk_compl(w))
1326 };
1327 let left = self.resolve_word_kind(asts, idx, -1, translator, tb, word_id, not_word_id)?;
1328 let right = self.resolve_word_kind(asts, idx, 1, translator, tb, word_id, not_word_id)?;
1329 match (left, right) {
1330 (NonWord, Word) | (Word, NonWord) => Ok((NodeId::EPS, idx + 1)),
1331 (Word, _) => {
1332 let neg = tb.mk_neg_lookahead(word_id, 0);
1333 Ok((neg, idx + 1))
1334 }
1335 (NonWord, _) => {
1336 let tail = tb.mk_concat(word_id, NodeId::TS);
1337 self.merge_boundary_with_following_lookaheads(asts, idx, tail, translator, tb)
1338 }
1339 (_, Word) => Ok((tb.mk_neg_lookbehind(word_id), idx + 1)),
1340 (_, NonWord) => Ok((tb.mk_lookbehind(word_id, NodeId::MISSING), idx + 1)),
1341 _ => Err(self.error(self.span(), ast::ErrorKind::UnsupportedResharpRegex)),
1345 }
1346 }
1347
1348 fn merge_boundary_with_following_lookaheads(
1349 &mut self,
1350 asts: &[Ast],
1351 wb_idx: usize,
1352 boundary_tail: NodeId,
1353 translator: &mut Option<Translator>,
1354 tb: &mut TB<'s>,
1355 ) -> Result<(NodeId, usize)> {
1356 let mut next = wb_idx + 1;
1357 let mut la_bodies = vec![boundary_tail];
1358 while next < asts.len() {
1359 match &asts[next] {
1360 Ast::Lookaround(la) if la.kind == ast::LookaroundKind::PositiveLookahead => {
1361 let body = self.ast_to_node_id(&la.ast, translator, tb)?;
1362 la_bodies.push(tb.mk_concat(body, NodeId::TS));
1363 next += 1;
1364 }
1365 _ => break,
1366 }
1367 }
1368 let merged = tb.mk_inters(la_bodies.into_iter());
1369 Ok((tb.mk_lookahead(merged, NodeId::MISSING, 0), next))
1370 }
1371
1372 fn ast_to_node_id(
1373 &mut self,
1374 ast: &Ast,
1375 translator: &mut Option<Translator>,
1376 tb: &mut TB<'s>,
1377 ) -> Result<NodeId> {
1378 match ast {
1379 Ast::Empty(_) => Ok(NodeId::EPS),
1380 Ast::Flags(f) => {
1381 if f.flags.flag_state(ast::Flag::SwapGreed).is_some() {
1382 return Err(self.error(f.span, ast::ErrorKind::UnsupportedResharpRegex));
1383 }
1384 let mut translator_builder = self.default_translator_builder();
1385 if let Some(state) = f.flags.flag_state(ast::Flag::CaseInsensitive) {
1386 translator_builder.case_insensitive(state);
1387 }
1388 if let Some(state) = f.flags.flag_state(ast::Flag::Unicode) {
1389 translator_builder.unicode(state);
1390 }
1391 if let Some(state) = f.flags.flag_state(ast::Flag::DotMatchesNewLine) {
1392 self.dot_all.set(state);
1393 }
1394 if let Some(state) = f.flags.flag_state(ast::Flag::MultiLine) {
1395 self.multiline.set(state);
1396 }
1397 let concat_translator = Some(translator_builder.build());
1398 *translator = concat_translator;
1399 Ok(NodeId::EPS)
1400 }
1401 Ast::Literal(l) => {
1402 let ast_lit = regex_syntax::ast::Ast::literal(*l.to_owned());
1403 self.translator_to_node_id(&ast_lit, translator, tb)
1404 }
1405 Ast::Top(_) => Ok(NodeId::TOP),
1406 Ast::Dot(_) => {
1407 let codepoint_dot = self.global_ascii_perl || self.global_full_unicode;
1408 let hirv = match (codepoint_dot, self.dot_all.get()) {
1409 (true, true) => hir::Hir::dot(hir::Dot::AnyChar),
1410 (true, false) => hir::Hir::dot(hir::Dot::AnyCharExceptLF),
1411 (false, true) => return Ok(NodeId::TOP),
1412 (false, false) => hir::Hir::dot(hir::Dot::AnyByteExceptLF),
1413 };
1414 self.hir_to_node_id(&hirv, tb)
1415 }
1416 Ast::Assertion(a) => match &a.kind {
1417 ast::AssertionKind::StartText => Ok(NodeId::BEGIN),
1418 ast::AssertionKind::EndText => Ok(NodeId::END),
1419 ast::AssertionKind::WordBoundary => {
1420 Err(self.error(self.span(), ast::ErrorKind::UnsupportedResharpRegex))
1421 }
1422 ast::AssertionKind::NotWordBoundary => {
1423 Err(self.error(self.span(), ast::ErrorKind::UnsupportedResharpRegex))
1424 }
1425 ast::AssertionKind::StartLine => {
1426 if !self.multiline.get() {
1427 return Ok(NodeId::BEGIN);
1428 }
1429 let left = NodeId::BEGIN;
1430 let right = tb.mk_u8(b'\n');
1431 let union = tb.mk_union(left, right);
1432 Ok(tb.mk_lookbehind(union, NodeId::MISSING))
1433 }
1434 ast::AssertionKind::EndLine => {
1435 if !self.multiline.get() {
1436 return Ok(NodeId::END);
1437 }
1438 let left = NodeId::END;
1439 let right = tb.mk_u8(b'\n');
1440 let union = tb.mk_union(left, right);
1441 Ok(tb.mk_lookahead(union, NodeId::MISSING, 0))
1442 }
1443 ast::AssertionKind::WordBoundaryStart => {
1444 Err(self.error(a.span, ast::ErrorKind::UnsupportedResharpRegex))
1445 }
1446 ast::AssertionKind::WordBoundaryEnd => {
1447 Err(self.error(a.span, ast::ErrorKind::UnsupportedResharpRegex))
1448 }
1449 ast::AssertionKind::WordBoundaryStartAngle => {
1450 Err(self.error(a.span, ast::ErrorKind::UnsupportedResharpRegex))
1451 }
1452 ast::AssertionKind::WordBoundaryEndAngle => {
1453 Err(self.error(a.span, ast::ErrorKind::UnsupportedResharpRegex))
1454 }
1455 ast::AssertionKind::WordBoundaryStartHalf => {
1456 Err(self.error(a.span, ast::ErrorKind::UnsupportedResharpRegex))
1457 }
1458 ast::AssertionKind::WordBoundaryEndHalf => {
1459 Err(self.error(a.span, ast::ErrorKind::UnsupportedResharpRegex))
1460 }
1461 },
1462 Ast::ClassUnicode(c) => {
1463 let tmp = regex_syntax::ast::ClassUnicode {
1464 span: c.span,
1465 negated: c.negated,
1466 kind: c.kind.clone(),
1467 };
1468 if !c.negated {
1469 if let regex_syntax::ast::ClassUnicodeKind::Named(s) = &c.kind {
1470 match s.as_str() {
1471 "ascii" => return Ok(tb.mk_range_u8(0, 127)),
1473 "utf8" => {
1475 let ascii = tb.mk_range_u8(0, 127);
1476 let beta = tb.mk_range_u8(128, 0xBF);
1477 let c0 = tb.mk_range_u8(0xC0, 0xDF);
1478 let c0s = tb.mk_concats([c0, beta].into_iter());
1479 let e0 = tb.mk_range_u8(0xE0, 0xEF);
1480 let e0s = tb.mk_concats([e0, beta, beta].into_iter());
1481 let f0 = tb.mk_range_u8(0xF0, 0xF7);
1482 let f0s = tb.mk_concats([f0, beta, beta, beta].into_iter());
1483 let merged = tb.mk_unions([ascii, c0s, e0s, f0s].into_iter());
1484 return Ok(tb.mk_star(merged));
1485 }
1486 "hex" => {
1487 let nums = tb.mk_range_u8(b'0', b'9');
1488 let lets = tb.mk_range_u8(b'a', b'f');
1489 let lets2 = tb.mk_range_u8(b'A', b'F');
1490 let merged = tb.mk_unions([nums, lets, lets2].into_iter());
1491 return Ok(merged);
1492 }
1493 _ => {}
1494 }
1495 };
1496 }
1497
1498 let orig_ast = regex_syntax::ast::Ast::class_unicode(tmp);
1499 self.translator_to_node_id(&orig_ast, translator, tb)
1500 }
1501 Ast::ClassPerl(c) => self.get_class(c.negated, c.kind.clone(), tb),
1502 Ast::ClassBracketed(c) => match &c.kind {
1503 regex_syntax::ast::ClassSet::Item(item) => {
1504 if !c.negated && is_universal_perl_pair(item) {
1505 return Ok(NodeId::TOP);
1506 }
1507 let tmp = regex_syntax::ast::ClassBracketed {
1508 span: c.span,
1509 negated: c.negated,
1510 kind: c.kind.clone(),
1511 };
1512 let orig_ast = regex_syntax::ast::Ast::class_bracketed(tmp);
1513 self.translator_to_node_id(&orig_ast, translator, tb)
1514 }
1515 regex_syntax::ast::ClassSet::BinaryOp(_) => {
1516 Err(self.error(c.span, ast::ErrorKind::UnsupportedResharpRegex))
1517 }
1518 },
1519 Ast::Repetition(r) => {
1520 let body = self.ast_to_node_id(&r.ast, translator, tb);
1521 match body {
1522 Ok(body) => match &r.op.kind {
1523 ast::RepetitionKind::ZeroOrOne => Ok(tb.mk_opt(body)),
1524 ast::RepetitionKind::ZeroOrMore => Ok(tb.mk_star(body)),
1525 ast::RepetitionKind::OneOrMore => Ok(tb.mk_plus(body)),
1526 ast::RepetitionKind::Range(r) => match r {
1527 ast::RepetitionRange::Exactly(n) => Ok(tb.mk_repeat(body, *n, *n)),
1528 ast::RepetitionRange::AtLeast(n) => {
1529 let rep = tb.mk_repeat(body, *n, *n);
1530 let st = tb.mk_star(body);
1531 Ok(tb.mk_concat(rep, st))
1532 }
1533
1534 ast::RepetitionRange::Bounded(n, m) => Ok(tb.mk_repeat(body, *n, *m)),
1535 },
1536 },
1537 Err(_) => body,
1538 }
1539 }
1540 Ast::Lookaround(g) => {
1541 let body = self.ast_to_node_id(&g.ast, translator, tb)?;
1542 match g.kind {
1543 ast::LookaroundKind::PositiveLookahead
1544 | ast::LookaroundKind::NegativeLookahead
1545 if body.contains_lookbehind(tb) =>
1546 {
1547 Err(self.error(g.span, ast::ErrorKind::UnsupportedResharpRegex))
1548 }
1549 ast::LookaroundKind::PositiveLookahead => {
1550 Ok(tb.mk_lookahead(body, NodeId::MISSING, 0))
1551 }
1552 ast::LookaroundKind::PositiveLookbehind => {
1553 Ok(tb.mk_lookbehind(body, NodeId::MISSING))
1554 }
1555 ast::LookaroundKind::NegativeLookahead => Ok(tb.mk_neg_lookahead(body, 0)),
1556 ast::LookaroundKind::NegativeLookbehind => Ok(tb.mk_neg_lookbehind(body)),
1557 }
1558 }
1559 Ast::Group(g) => {
1560 if let ast::GroupKind::NonCapturing(ref flags) = g.kind {
1561 if !flags.items.is_empty() {
1562 let mut translator_builder = self.default_translator_builder();
1563 if let Some(state) = flags.flag_state(ast::Flag::CaseInsensitive) {
1564 translator_builder.case_insensitive(state);
1565 }
1566 if let Some(state) = flags.flag_state(ast::Flag::Unicode) {
1567 translator_builder.unicode(state);
1568 }
1569 let saved_dot_all = self.dot_all.get();
1570 if let Some(state) = flags.flag_state(ast::Flag::DotMatchesNewLine) {
1571 self.dot_all.set(state);
1572 }
1573 let saved_multiline = self.multiline.get();
1574 if let Some(state) = flags.flag_state(ast::Flag::MultiLine) {
1575 self.multiline.set(state);
1576 }
1577 let mut scoped = Some(translator_builder.build());
1578 let result = self.ast_to_node_id(&g.ast, &mut scoped, tb);
1579 self.dot_all.set(saved_dot_all);
1580 self.multiline.set(saved_multiline);
1581 return result;
1582 }
1583 }
1584 self.ast_to_node_id(&g.ast, translator, tb)
1585 }
1586 Ast::Alternation(a) => {
1587 let mut children = vec![];
1588 for ast in &a.asts {
1589 match self.ast_to_node_id(ast, translator, tb) {
1590 Ok(node_id) => children.push(node_id),
1591 Err(err) => return Err(err),
1592 }
1593 }
1594 Ok(tb.mk_unions(children.iter().copied()))
1595 }
1596 Ast::Concat(c) => {
1597 let mut concat_translator: Option<Translator> = None;
1598 let mut children = vec![];
1599 let mut i = 0;
1600 while i < c.asts.len() {
1601 let ast = &c.asts[i];
1602 match ast {
1603 Ast::Flags(f) => {
1604 if f.flags.flag_state(ast::Flag::SwapGreed).is_some() {
1605 return Err(
1606 self.error(f.span, ast::ErrorKind::UnsupportedResharpRegex)
1607 );
1608 }
1609 let mut translator_builder = self.default_translator_builder();
1610 if let Some(state) = f.flags.flag_state(ast::Flag::CaseInsensitive) {
1611 translator_builder.case_insensitive(state);
1612 }
1613 if let Some(state) = f.flags.flag_state(ast::Flag::Unicode) {
1614 translator_builder.unicode(state);
1615 }
1616 if let Some(state) = f.flags.flag_state(ast::Flag::DotMatchesNewLine) {
1617 self.dot_all.set(state);
1618 }
1619 if let Some(state) = f.flags.flag_state(ast::Flag::MultiLine) {
1620 self.multiline.set(state);
1621 }
1622 concat_translator = Some(translator_builder.build());
1623 *translator = concat_translator.clone();
1624 i += 1;
1625 continue;
1626 }
1627 Ast::Assertion(a) if a.kind == ast::AssertionKind::WordBoundary => {
1628 let node =
1629 self.rewrite_word_boundary_in_concat(&c.asts, i, translator, tb)?;
1630 children.push(node.0);
1631 i = node.1; continue;
1633 }
1634 _ => {}
1635 }
1636 match concat_translator {
1637 Some(_) => match self.ast_to_node_id(ast, &mut concat_translator, tb) {
1638 Ok(node_id) => children.push(node_id),
1639 Err(err) => return Err(err),
1640 },
1641 None => match self.ast_to_node_id(ast, translator, tb) {
1642 Ok(node_id) => children.push(node_id),
1643 Err(err) => return Err(err),
1644 },
1645 }
1646 i += 1;
1647 }
1648 Ok(tb.mk_concats(children.iter().cloned()))
1649 }
1650 Ast::Intersection(intersection) => {
1651 let mut children = vec![];
1652 for ast in &intersection.asts {
1653 match self.ast_to_node_id(ast, translator, tb) {
1654 Ok(node_id) => children.push(node_id),
1655 Err(err) => return Err(err),
1656 }
1657 }
1658 Ok(tb.mk_inters(children.into_iter()))
1659 }
1660 Ast::Complement(complement) => {
1661 let body = self.ast_to_node_id(&complement.ast, translator, tb);
1662 body.map(|x| tb.mk_compl(x))
1663 }
1664 }
1665 }
1666
1667 fn parse_inner(&mut self) -> Result<Ast> {
1668 let mut concat = Concat {
1669 span: self.span(),
1670 asts: vec![],
1671 };
1672 loop {
1673 self.bump_space();
1674 if self.is_eof() {
1675 break;
1676 }
1677 match self.char() {
1678 '(' => concat = self.push_group(concat)?,
1679 ')' => concat = self.pop_group(concat)?,
1680 '|' => concat = self.push_alternate(concat)?,
1681 '&' => concat = self.push_intersect(concat)?,
1682 '~' => concat = self.push_compl_group(concat)?,
1683 '[' => {
1684 let class = self.parse_set_class()?;
1685 concat.asts.push(Ast::class_bracketed(class));
1686 }
1687 '?' => {
1688 concat =
1689 self.parse_uncounted_repetition(concat, ast::RepetitionKind::ZeroOrOne)?;
1690 }
1691 '*' => {
1692 concat =
1693 self.parse_uncounted_repetition(concat, ast::RepetitionKind::ZeroOrMore)?;
1694 }
1695 '+' => {
1696 concat =
1697 self.parse_uncounted_repetition(concat, ast::RepetitionKind::OneOrMore)?;
1698 }
1699 '{' => {
1700 concat = self.parse_counted_repetition(concat)?;
1701 }
1702 _ => concat.asts.push(self.parse_primitive()?.into_ast()),
1703 }
1704 }
1705 let ast = self.pop_group_end(concat)?;
1706 if expanded_ast_size(&ast, self.expanded_ast_limit) >= self.expanded_ast_limit
1707 || max_concat_length(&ast) >= self.max_list_len
1708 {
1709 return Err(self.error(*ast.span(), ast::ErrorKind::UnsupportedResharpRegex));
1710 }
1711 Ok(ast)
1712 }
1713
1714 fn parse(&mut self, tb: &mut TB<'s>) -> Result<NodeId> {
1715 let ast = self.parse_inner()?;
1716 self.ast_to_node_id(&ast, &mut None, tb)
1717 }
1718
1719 #[inline(never)]
1720 fn parse_uncounted_repetition(
1721 &self,
1722 mut concat: ast::Concat,
1723 kind: ast::RepetitionKind,
1724 ) -> Result<ast::Concat> {
1725 let op_start = self.pos();
1727 let ast = match concat.asts.pop() {
1728 Some(ast) => ast,
1729 None => return Err(self.error(self.span(), ast::ErrorKind::RepetitionMissing)),
1730 };
1731 match ast {
1732 Ast::Empty(_) | Ast::Flags(_) => {
1733 return Err(self.error(self.span(), ast::ErrorKind::RepetitionMissing))
1734 }
1735 _ => {}
1736 }
1737 if self.bump() && self.char() == '?' {
1738 return Err(self.error(
1739 Span::new(op_start, self.pos()),
1740 ast::ErrorKind::UnsupportedLazyQuantifier,
1741 ));
1742 }
1743 concat.asts.push(Ast::repetition(ast::Repetition {
1744 span: ast.span().with_end(self.pos()),
1745 op: ast::RepetitionOp {
1746 span: Span::new(op_start, self.pos()),
1747 kind,
1748 },
1749 greedy: true,
1750 ast: Box::new(ast),
1751 }));
1752 Ok(concat)
1753 }
1754
1755 #[inline(never)]
1756 fn parse_counted_repetition(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
1757 assert!(self.char() == '{');
1758 let start = self.pos();
1759 let ast = match concat.asts.pop() {
1760 Some(ast) => ast,
1761 None => return Err(self.error(self.span(), ast::ErrorKind::RepetitionMissing)),
1762 };
1763 match ast {
1764 Ast::Empty(_) | Ast::Flags(_) => {
1765 return Err(self.error(self.span(), ast::ErrorKind::RepetitionMissing))
1766 }
1767 _ => {}
1768 }
1769 if !self.bump_and_bump_space() {
1770 return Err(self.error(
1771 Span::new(start, self.pos()),
1772 ast::ErrorKind::RepetitionCountUnclosed,
1773 ));
1774 }
1775 let count_start = specialize_err(
1776 self.parse_decimal(),
1777 ast::ErrorKind::DecimalEmpty,
1778 ast::ErrorKind::RepetitionCountDecimalEmpty,
1779 );
1780 if self.is_eof() {
1781 return Err(self.error(
1782 Span::new(start, self.pos()),
1783 ast::ErrorKind::RepetitionCountUnclosed,
1784 ));
1785 }
1786 let range = if self.char() == ',' {
1787 if !self.bump_and_bump_space() {
1788 return Err(self.error(
1789 Span::new(start, self.pos()),
1790 ast::ErrorKind::RepetitionCountUnclosed,
1791 ));
1792 }
1793 if self.char() != '}' {
1794 let count_start = match count_start {
1795 Ok(c) => c,
1796 Err(err) if err.kind == ast::ErrorKind::RepetitionCountDecimalEmpty => {
1797 if self.parser().empty_min_range {
1798 0
1799 } else {
1800 return Err(err);
1801 }
1802 }
1803 err => err?,
1804 };
1805 let count_end = specialize_err(
1806 self.parse_decimal(),
1807 ast::ErrorKind::DecimalEmpty,
1808 ast::ErrorKind::RepetitionCountDecimalEmpty,
1809 )?;
1810 ast::RepetitionRange::Bounded(count_start, count_end)
1811 } else {
1812 ast::RepetitionRange::AtLeast(count_start?)
1813 }
1814 } else {
1815 ast::RepetitionRange::Exactly(count_start?)
1816 };
1817
1818 if self.is_eof() || self.char() != '}' {
1819 return Err(self.error(
1820 Span::new(start, self.pos()),
1821 ast::ErrorKind::RepetitionCountUnclosed,
1822 ));
1823 }
1824
1825 if self.bump_and_bump_space() && self.char() == '?' {
1826 return Err(self.error(
1827 Span::new(start, self.pos()),
1828 ast::ErrorKind::UnsupportedLazyQuantifier,
1829 ));
1830 }
1831
1832 let op_span = Span::new(start, self.pos());
1833 if !range.is_valid() {
1834 return Err(self.error(op_span, ast::ErrorKind::RepetitionCountInvalid));
1835 }
1836
1837 let over_limit = match &range {
1838 ast::RepetitionRange::Exactly(n) => *n > REPETITION_COUNT_LIMIT,
1839 ast::RepetitionRange::AtLeast(n) => *n > REPETITION_COUNT_LIMIT,
1840 ast::RepetitionRange::Bounded(n, m) => {
1841 *n > REPETITION_COUNT_LIMIT || *m > REPETITION_COUNT_LIMIT
1842 }
1843 };
1844 if over_limit {
1845 return Err(self.error(op_span, ast::ErrorKind::UnsupportedResharpRegex));
1846 }
1847 concat.asts.push(Ast::repetition(ast::Repetition {
1848 span: ast.span().with_end(self.pos()),
1849 op: ast::RepetitionOp {
1850 span: op_span,
1851 kind: ast::RepetitionKind::Range(range),
1852 },
1853 greedy: true,
1854 ast: Box::new(ast),
1855 }));
1856 Ok(concat)
1857 }
1858
1859 #[inline(never)]
1860 fn parse_group(&self) -> Result<Either<ast::SetFlags, ast::Group>> {
1861 assert_eq!(self.char(), '(');
1862 let open_span = self.span_char();
1863 self.bump();
1864 self.bump_space();
1865 if let Some((ahead, pos)) = self.is_lookaround_prefix() {
1866 let kind = match (pos, ahead) {
1867 (true, true) => LookaroundKind::PositiveLookahead,
1868 (true, false) => LookaroundKind::PositiveLookbehind,
1869 (false, true) => LookaroundKind::NegativeLookahead,
1870 (false, false) => LookaroundKind::NegativeLookbehind,
1871 };
1872 return Ok(Either::Right(ast::Group {
1873 span: open_span,
1874 kind: ast::GroupKind::Lookaround(kind),
1875 ast: Box::new(Ast::empty(self.span())),
1876 }));
1877 }
1878 let inner_span = self.span();
1879 let mut starts_with_p = true;
1880 if self.bump_if("?P<") || {
1881 starts_with_p = false;
1882 self.bump_if("?<")
1883 } {
1884 let capture_index = self.next_capture_index(open_span)?;
1885 let name = self.parse_capture_name(capture_index)?;
1886 Ok(Either::Right(ast::Group {
1887 span: open_span,
1888 kind: ast::GroupKind::CaptureName {
1889 starts_with_p,
1890 name,
1891 },
1892 ast: Box::new(Ast::empty(self.span())),
1893 }))
1894 } else if self.bump_if("?") {
1895 if self.is_eof() {
1896 return Err(self.error(open_span, ast::ErrorKind::GroupUnclosed));
1897 }
1898 let flags = self.parse_flags()?;
1899 let char_end = self.char();
1900 self.bump();
1901 if char_end == ')' {
1902 if flags.items.is_empty() {
1905 return Err(self.error(inner_span, ast::ErrorKind::RepetitionMissing));
1906 }
1907 Ok(Either::Left(ast::SetFlags {
1908 span: Span {
1909 end: self.pos(),
1910 ..open_span
1911 },
1912 flags,
1913 }))
1914 } else {
1915 assert_eq!(char_end, ':');
1916 Ok(Either::Right(ast::Group {
1917 span: open_span,
1918 kind: ast::GroupKind::NonCapturing(flags),
1919 ast: Box::new(Ast::empty(self.span())),
1920 }))
1921 }
1922 } else {
1923 let capture_index = self.next_capture_index(open_span)?;
1924 Ok(Either::Right(ast::Group {
1925 span: open_span,
1926 kind: ast::GroupKind::CaptureIndex(capture_index),
1927 ast: Box::new(Ast::empty(self.span())),
1928 }))
1929 }
1930 }
1931
1932 #[inline(never)]
1933 fn parse_capture_name(&self, capture_index: u32) -> Result<ast::CaptureName> {
1934 if self.is_eof() {
1935 return Err(self.error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
1936 }
1937 let start = self.pos();
1938 loop {
1939 if self.char() == '>' {
1940 break;
1941 }
1942 if !is_capture_char(self.char(), self.pos() == start) {
1943 return Err(self.error(self.span_char(), ast::ErrorKind::GroupNameInvalid));
1944 }
1945 if !self.bump() {
1946 break;
1947 }
1948 }
1949 let end = self.pos();
1950 if self.is_eof() {
1951 return Err(self.error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
1952 }
1953 assert_eq!(self.char(), '>');
1954 self.bump();
1955 let name = &self.pattern()[start.offset..end.offset];
1956 if name.is_empty() {
1957 return Err(self.error(Span::new(start, start), ast::ErrorKind::GroupNameEmpty));
1958 }
1959 let capname = ast::CaptureName {
1960 span: Span::new(start, end),
1961 name: name.to_string(),
1962 index: capture_index,
1963 };
1964 self.add_capture_name(&capname)?;
1965 Ok(capname)
1966 }
1967
1968 #[inline(never)]
1969 fn parse_flags(&self) -> Result<ast::Flags> {
1970 let mut flags = ast::Flags {
1971 span: self.span(),
1972 items: vec![],
1973 };
1974 let mut last_was_negation = None;
1975 while self.char() != ':' && self.char() != ')' {
1976 if self.char() == '-' {
1977 last_was_negation = Some(self.span_char());
1978 let item = ast::FlagsItem {
1979 span: self.span_char(),
1980 kind: ast::FlagsItemKind::Negation,
1981 };
1982 if let Some(i) = flags.add_item(item) {
1983 return Err(self.error(
1984 self.span_char(),
1985 ast::ErrorKind::FlagRepeatedNegation {
1986 original: flags.items[i].span,
1987 },
1988 ));
1989 }
1990 } else {
1991 last_was_negation = None;
1992 let item = ast::FlagsItem {
1993 span: self.span_char(),
1994 kind: ast::FlagsItemKind::Flag(self.parse_flag()?),
1995 };
1996 if let Some(i) = flags.add_item(item) {
1997 return Err(self.error(
1998 self.span_char(),
1999 ast::ErrorKind::FlagDuplicate {
2000 original: flags.items[i].span,
2001 },
2002 ));
2003 }
2004 }
2005 if !self.bump() {
2006 return Err(self.error(self.span(), ast::ErrorKind::FlagUnexpectedEof));
2007 }
2008 }
2009 if let Some(span) = last_was_negation {
2010 return Err(self.error(span, ast::ErrorKind::FlagDanglingNegation));
2011 }
2012 flags.span.end = self.pos();
2013 Ok(flags)
2014 }
2015
2016 #[inline(never)]
2017 fn parse_flag(&self) -> Result<ast::Flag> {
2018 match self.char() {
2019 'i' => Ok(ast::Flag::CaseInsensitive),
2020 'm' => Ok(ast::Flag::MultiLine),
2021 's' => Ok(ast::Flag::DotMatchesNewLine),
2022 'U' => Ok(ast::Flag::SwapGreed),
2023 'u' => Ok(ast::Flag::Unicode),
2024 'R' => Ok(ast::Flag::CRLF),
2025 'x' => Ok(ast::Flag::IgnoreWhitespace),
2026 _ => Err(self.error(self.span_char(), ast::ErrorKind::FlagUnrecognized)),
2027 }
2028 }
2029
2030 fn parse_primitive(&self) -> Result<Primitive> {
2031 match self.char() {
2032 '\\' => self.parse_escape(),
2033 '_' => {
2034 let ast = Primitive::Top(self.span_char());
2035 self.bump();
2036 Ok(ast)
2037 }
2038 '.' => {
2039 let ast = Primitive::Dot(self.span_char());
2040 self.bump();
2041 Ok(ast)
2042 }
2043 '^' => {
2044 let ast = Primitive::Assertion(ast::Assertion {
2045 span: self.span_char(),
2046 kind: ast::AssertionKind::StartLine,
2047 });
2048 self.bump();
2049 Ok(ast)
2050 }
2051 '$' => {
2052 let ast = Primitive::Assertion(ast::Assertion {
2053 span: self.span_char(),
2054 kind: ast::AssertionKind::EndLine,
2055 });
2056 self.bump();
2057 Ok(ast)
2058 }
2059 c => {
2060 let ast = Primitive::Literal(Literal {
2061 span: self.span_char(),
2062 kind: LiteralKind::Verbatim,
2063 c,
2064 });
2065 self.bump();
2066 Ok(ast)
2067 }
2068 }
2069 }
2070
2071 #[inline(never)]
2072 fn parse_escape(&self) -> Result<Primitive> {
2073 assert_eq!(self.char(), '\\');
2074 let start = self.pos();
2075 if !self.bump() {
2076 return Err(self.error(
2077 Span::new(start, self.pos()),
2078 ast::ErrorKind::EscapeUnexpectedEof,
2079 ));
2080 }
2081 let c = self.char();
2082 match c {
2084 '0'..='9' => {
2085 if !self.parser().octal {
2086 return Err(self.error(
2087 Span::new(start, self.span_char().end),
2088 ast::ErrorKind::UnsupportedBackreference,
2089 ));
2090 }
2091 let mut lit = self.parse_octal();
2092 lit.span.start = start;
2093 return Ok(Primitive::Literal(lit));
2094 }
2095 'x' | 'u' | 'U' => {
2102 let mut lit = self.parse_hex()?;
2103 lit.span.start = start;
2104 return Ok(Primitive::Literal(lit));
2105 }
2106 'p' | 'P' => {
2107 let mut cls = self.parse_unicode_class()?;
2108 cls.span.start = start;
2109 return Ok(Primitive::Unicode(cls));
2110 }
2111 'd' | 's' | 'w' | 'D' | 'S' | 'W' => {
2112 let mut cls = self.parse_perl_class();
2113 cls.span.start = start;
2114 return Ok(Primitive::Perl(cls));
2115 }
2116 _ => {}
2117 }
2118
2119 self.bump();
2121 let span = Span::new(start, self.pos());
2122 if is_meta_character(c) {
2123 return Ok(Primitive::Literal(Literal {
2124 span,
2125 kind: LiteralKind::Meta,
2126 c,
2127 }));
2128 }
2129 if is_escapeable_character(c) {
2130 return Ok(Primitive::Literal(Literal {
2131 span,
2132 kind: LiteralKind::Superfluous,
2133 c,
2134 }));
2135 }
2136 let special = |kind, c| {
2137 Ok(Primitive::Literal(Literal {
2138 span,
2139 kind: LiteralKind::Special(kind),
2140 c,
2141 }))
2142 };
2143 match c {
2144 'a' => special(SpecialLiteralKind::Bell, '\x07'),
2145 'f' => special(SpecialLiteralKind::FormFeed, '\x0C'),
2146 't' => special(SpecialLiteralKind::Tab, '\t'),
2147 'n' => special(SpecialLiteralKind::LineFeed, '\n'),
2148 'r' => special(SpecialLiteralKind::CarriageReturn, '\r'),
2149 'v' => special(SpecialLiteralKind::VerticalTab, '\x0B'),
2150 'A' => Ok(Primitive::Assertion(ast::Assertion {
2151 span,
2152 kind: ast::AssertionKind::StartText,
2153 })),
2154 'z' => Ok(Primitive::Assertion(ast::Assertion {
2155 span,
2156 kind: ast::AssertionKind::EndText,
2157 })),
2158 'b' => {
2159 let mut wb = ast::Assertion {
2160 span,
2161 kind: ast::AssertionKind::WordBoundary,
2162 };
2163 if !self.is_eof() && self.char() == '{' {
2166 if let Some(kind) = self.maybe_parse_special_word_boundary(start)? {
2167 wb.kind = kind;
2168 wb.span.end = self.pos();
2169 }
2170 }
2171 Ok(Primitive::Assertion(wb))
2172 }
2173 'B' => Ok(Primitive::Assertion(ast::Assertion {
2174 span,
2175 kind: ast::AssertionKind::NotWordBoundary,
2176 })),
2177 '<' => Ok(Primitive::Assertion(ast::Assertion {
2178 span,
2179 kind: ast::AssertionKind::WordBoundaryStartAngle,
2180 })),
2181 '>' => Ok(Primitive::Assertion(ast::Assertion {
2182 span,
2183 kind: ast::AssertionKind::WordBoundaryEndAngle,
2184 })),
2185 _ => Err(self.error(span, ast::ErrorKind::EscapeUnrecognized)),
2186 }
2187 }
2188
2189 fn maybe_parse_special_word_boundary(
2190 &self,
2191 wb_start: Position,
2192 ) -> Result<Option<ast::AssertionKind>> {
2193 assert_eq!(self.char(), '{');
2194
2195 let is_valid_char = |c| matches!(c, 'A'..='Z' | 'a'..='z' | '-');
2196 let start = self.pos();
2197 if !self.bump_and_bump_space() {
2198 return Err(self.error(
2199 Span::new(wb_start, self.pos()),
2200 ast::ErrorKind::SpecialWordOrRepetitionUnexpectedEof,
2201 ));
2202 }
2203 let start_contents = self.pos();
2204 if !is_valid_char(self.char()) {
2205 self.parser().pos.set(start);
2206 return Ok(None);
2207 }
2208
2209 let mut scratch = self.parser().scratch.borrow_mut();
2211 scratch.clear();
2212 while !self.is_eof() && is_valid_char(self.char()) {
2213 scratch.push(self.char());
2214 self.bump_and_bump_space();
2215 }
2216 if self.is_eof() || self.char() != '}' {
2217 return Err(self.error(
2218 Span::new(start, self.pos()),
2219 ast::ErrorKind::SpecialWordBoundaryUnclosed,
2220 ));
2221 }
2222 let end = self.pos();
2223 self.bump();
2224 let kind = match scratch.as_str() {
2225 "start" => ast::AssertionKind::WordBoundaryStart,
2226 "end" => ast::AssertionKind::WordBoundaryEnd,
2227 "start-half" => ast::AssertionKind::WordBoundaryStartHalf,
2228 "end-half" => ast::AssertionKind::WordBoundaryEndHalf,
2229 _ => {
2230 return Err(self.error(
2231 Span::new(start_contents, end),
2232 ast::ErrorKind::SpecialWordBoundaryUnrecognized,
2233 ))
2234 }
2235 };
2236 Ok(Some(kind))
2237 }
2238
2239 #[inline(never)]
2240 fn parse_octal(&self) -> Literal {
2241 assert!(self.parser().octal);
2242 assert!('0' <= self.char() && self.char() <= '7');
2243 let start = self.pos();
2244 while self.bump()
2246 && '0' <= self.char()
2247 && self.char() <= '7'
2248 && self.pos().offset - start.offset <= 2
2249 {}
2250 let end = self.pos();
2251 let octal = &self.pattern()[start.offset..end.offset];
2252 let codepoint = u32::from_str_radix(octal, 8).expect("valid octal number");
2255 let c = char::from_u32(codepoint).expect("Unicode scalar value");
2258 Literal {
2259 span: Span::new(start, end),
2260 kind: LiteralKind::Octal,
2261 c,
2262 }
2263 }
2264
2265 #[inline(never)]
2266 fn parse_hex(&self) -> Result<Literal> {
2267 assert!(self.char() == 'x' || self.char() == 'u' || self.char() == 'U');
2268
2269 let hex_kind = match self.char() {
2270 'x' => HexLiteralKind::X,
2271 'u' => HexLiteralKind::UnicodeShort,
2272 _ => HexLiteralKind::UnicodeLong,
2273 };
2274 if !self.bump_and_bump_space() {
2275 return Err(self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
2276 }
2277 if self.char() == '{' {
2278 self.parse_hex_brace(hex_kind)
2279 } else {
2280 self.parse_hex_digits(hex_kind)
2281 }
2282 }
2283
2284 #[inline(never)]
2285 fn parse_hex_digits(&self, kind: HexLiteralKind) -> Result<Literal> {
2286 let mut scratch = self.parser().scratch.borrow_mut();
2287 scratch.clear();
2288
2289 let start = self.pos();
2290 for i in 0..kind.digits() {
2291 if i > 0 && !self.bump_and_bump_space() {
2292 return Err(self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
2293 }
2294 if !is_hex(self.char()) {
2295 return Err(self.error(self.span_char(), ast::ErrorKind::EscapeHexInvalidDigit));
2296 }
2297 scratch.push(self.char());
2298 }
2299 self.bump_and_bump_space();
2300 let end = self.pos();
2301 let hex = scratch.as_str();
2302 match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
2303 None => Err(self.error(Span::new(start, end), ast::ErrorKind::EscapeHexInvalid)),
2304 Some(c) => Ok(Literal {
2305 span: Span::new(start, end),
2306 kind: LiteralKind::HexFixed(kind),
2307 c,
2308 }),
2309 }
2310 }
2311
2312 #[inline(never)]
2313 fn parse_hex_brace(&self, kind: HexLiteralKind) -> Result<Literal> {
2314 let mut scratch = self.parser().scratch.borrow_mut();
2315 scratch.clear();
2316
2317 let brace_pos = self.pos();
2318 let start = self.span_char().end;
2319 while self.bump_and_bump_space() && self.char() != '}' {
2320 if !is_hex(self.char()) {
2321 return Err(self.error(self.span_char(), ast::ErrorKind::EscapeHexInvalidDigit));
2322 }
2323 scratch.push(self.char());
2324 }
2325 if self.is_eof() {
2326 return Err(self.error(
2327 Span::new(brace_pos, self.pos()),
2328 ast::ErrorKind::EscapeUnexpectedEof,
2329 ));
2330 }
2331 let end = self.pos();
2332 let hex = scratch.as_str();
2333 assert_eq!(self.char(), '}');
2334 self.bump_and_bump_space();
2335
2336 if hex.is_empty() {
2337 return Err(self.error(
2338 Span::new(brace_pos, self.pos()),
2339 ast::ErrorKind::EscapeHexEmpty,
2340 ));
2341 }
2342 match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
2343 None => Err(self.error(Span::new(start, end), ast::ErrorKind::EscapeHexInvalid)),
2344 Some(c) => Ok(Literal {
2345 span: Span::new(start, self.pos()),
2346 kind: LiteralKind::HexBrace(kind),
2347 c,
2348 }),
2349 }
2350 }
2351
2352 fn parse_decimal(&self) -> Result<u32> {
2353 let mut scratch = self.parser().scratch.borrow_mut();
2354 scratch.clear();
2355
2356 while !self.is_eof() && self.char().is_whitespace() {
2357 self.bump();
2358 }
2359 let start = self.pos();
2360 while !self.is_eof() && '0' <= self.char() && self.char() <= '9' {
2361 scratch.push(self.char());
2362 self.bump_and_bump_space();
2363 }
2364 let span = Span::new(start, self.pos());
2365 while !self.is_eof() && self.char().is_whitespace() {
2366 self.bump_and_bump_space();
2367 }
2368 let digits = scratch.as_str();
2369 if digits.is_empty() {
2370 return Err(self.error(span, ast::ErrorKind::DecimalEmpty));
2371 }
2372 match digits.parse::<u32>().ok() {
2373 Some(n) => Ok(n),
2374 None => Err(self.error(span, ast::ErrorKind::DecimalInvalid)),
2375 }
2376 }
2377
2378 #[inline(never)]
2379 fn parse_set_class(&self) -> Result<ClassBracketed> {
2380 assert_eq!(self.char(), '[');
2381
2382 let mut union = ClassSetUnion {
2383 span: self.span(),
2384 items: vec![],
2385 };
2386 loop {
2387 self.bump_space();
2388 if self.is_eof() {
2389 return Err(self.unclosed_class_error());
2390 }
2391 match self.char() {
2392 '[' => {
2393 if !self.parser().stack_class.borrow().is_empty() {
2394 if let Some(cls) = self.maybe_parse_ascii_class() {
2395 union.push(ClassSetItem::Ascii(cls));
2396 continue;
2397 }
2398 }
2399 union = self.push_class_open(union)?;
2400 }
2401 ']' => match self.pop_class(union)? {
2402 Either::Left(nested_union) => {
2403 union = nested_union;
2404 }
2405 Either::Right(class) => return Ok(class),
2406 },
2407 '&' if self.peek() == Some('&') => {
2408 assert!(self.bump_if("&&"));
2409 union = self.push_class_op(ClassSetBinaryOpKind::Intersection, union);
2410 }
2411 '-' if self.peek() == Some('-') => {
2412 assert!(self.bump_if("--"));
2413 union = self.push_class_op(ClassSetBinaryOpKind::Difference, union);
2414 }
2415 '~' if self.peek() == Some('~') => {
2416 assert!(self.bump_if("~~"));
2417 union = self.push_class_op(ClassSetBinaryOpKind::SymmetricDifference, union);
2418 }
2419 _ => {
2420 union.push(self.parse_set_class_range()?);
2421 }
2422 }
2423 }
2424 }
2425
2426 #[inline(never)]
2427 fn parse_set_class_range(&self) -> Result<ClassSetItem> {
2428 let prim1 = self.parse_set_class_item()?;
2429 self.bump_space();
2430 if self.is_eof() {
2431 return Err(self.unclosed_class_error());
2432 }
2433 if self.char() != '-' || self.peek_space() == Some(']') || self.peek_space() == Some('-') {
2434 return prim1.into_class_set_item(self);
2435 }
2436 if !self.bump_and_bump_space() {
2437 return Err(self.unclosed_class_error());
2438 }
2439 let prim2 = self.parse_set_class_item()?;
2440 let range = ClassSetRange {
2441 span: Span::new(prim1.span().start, prim2.span().end),
2442 start: prim1.into_class_literal(self)?,
2443 end: prim2.into_class_literal(self)?,
2444 };
2445 if !range.is_valid() {
2446 return Err(self.error(range.span, ast::ErrorKind::ClassRangeInvalid));
2447 }
2448 Ok(ClassSetItem::Range(range))
2449 }
2450
2451 #[inline(never)]
2452 fn parse_set_class_item(&self) -> Result<Primitive> {
2453 if self.char() == '\\' {
2454 self.parse_escape()
2455 } else {
2456 let x = Primitive::Literal(Literal {
2457 span: self.span_char(),
2458 kind: LiteralKind::Verbatim,
2459 c: self.char(),
2460 });
2461 self.bump();
2462 Ok(x)
2463 }
2464 }
2465
2466 #[inline(never)]
2467 fn parse_set_class_open(&self) -> Result<(ClassBracketed, ClassSetUnion)> {
2468 assert_eq!(self.char(), '[');
2469 let start = self.pos();
2470 if !self.bump_and_bump_space() {
2471 return Err(self.error(Span::new(start, self.pos()), ast::ErrorKind::ClassUnclosed));
2472 }
2473
2474 let negated = if self.char() != '^' {
2475 false
2476 } else {
2477 if !self.bump_and_bump_space() {
2478 return Err(self.error(Span::new(start, self.pos()), ast::ErrorKind::ClassUnclosed));
2479 }
2480 true
2481 };
2482 let mut union = ClassSetUnion {
2484 span: self.span(),
2485 items: vec![],
2486 };
2487 while self.char() == '-' {
2488 union.push(ClassSetItem::Literal(Literal {
2489 span: self.span_char(),
2490 kind: LiteralKind::Verbatim,
2491 c: '-',
2492 }));
2493 if !self.bump_and_bump_space() {
2494 return Err(self.error(Span::new(start, start), ast::ErrorKind::ClassUnclosed));
2495 }
2496 }
2497 if union.items.is_empty() && self.char() == ']' {
2500 union.push(ClassSetItem::Literal(Literal {
2501 span: self.span_char(),
2502 kind: LiteralKind::Verbatim,
2503 c: ']',
2504 }));
2505 if !self.bump_and_bump_space() {
2506 return Err(self.error(Span::new(start, self.pos()), ast::ErrorKind::ClassUnclosed));
2507 }
2508 }
2509 let set = ClassBracketed {
2510 span: Span::new(start, self.pos()),
2511 negated,
2512 kind: ClassSet::union(ClassSetUnion {
2513 span: Span::new(union.span.start, union.span.start),
2514 items: vec![],
2515 }),
2516 };
2517 Ok((set, union))
2518 }
2519
2520 #[inline(never)]
2521 fn maybe_parse_ascii_class(&self) -> Option<ClassAscii> {
2522 assert_eq!(self.char(), '[');
2523 let start = self.pos();
2525 let mut negated = false;
2526 if !self.bump() || self.char() != ':' {
2527 self.parser().pos.set(start);
2528 return None;
2529 }
2530 if !self.bump() {
2531 self.parser().pos.set(start);
2532 return None;
2533 }
2534 if self.char() == '^' {
2535 negated = true;
2536 if !self.bump() {
2537 self.parser().pos.set(start);
2538 return None;
2539 }
2540 }
2541 let name_start = self.offset();
2542 while self.char() != ':' && self.bump() {}
2543 if self.is_eof() {
2544 self.parser().pos.set(start);
2545 return None;
2546 }
2547 let name = &self.pattern()[name_start..self.offset()];
2548 if !self.bump_if(":]") {
2549 self.parser().pos.set(start);
2550 return None;
2551 }
2552 let kind = match regex_syntax::ast::ClassAsciiKind::from_name(name) {
2553 Some(kind) => kind,
2554 None => {
2555 self.parser().pos.set(start);
2556 return None;
2557 }
2558 };
2559 Some(ClassAscii {
2560 span: Span::new(start, self.pos()),
2561 kind,
2562 negated,
2563 })
2564 }
2565
2566 #[inline(never)]
2567 fn parse_unicode_class(&self) -> Result<ClassUnicode> {
2568 assert!(self.char() == 'p' || self.char() == 'P');
2569
2570 let mut scratch = self.parser().scratch.borrow_mut();
2571 scratch.clear();
2572
2573 let negated = self.char() == 'P';
2574 if !self.bump_and_bump_space() {
2575 return Err(self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
2576 }
2577 let (start, kind) = if self.char() == '{' {
2578 let start = self.span_char().end;
2579 while self.bump_and_bump_space() && self.char() != '}' {
2580 scratch.push(self.char());
2581 }
2582 if self.is_eof() {
2583 return Err(self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
2584 }
2585 assert_eq!(self.char(), '}');
2586 self.bump();
2587
2588 let name = scratch.as_str();
2589 if let Some(i) = name.find("!=") {
2590 (
2591 start,
2592 ClassUnicodeKind::NamedValue {
2593 op: ClassUnicodeOpKind::NotEqual,
2594 name: name[..i].to_string(),
2595 value: name[i + 2..].to_string(),
2596 },
2597 )
2598 } else if let Some(i) = name.find(':') {
2599 (
2600 start,
2601 ClassUnicodeKind::NamedValue {
2602 op: ClassUnicodeOpKind::Colon,
2603 name: name[..i].to_string(),
2604 value: name[i + 1..].to_string(),
2605 },
2606 )
2607 } else if let Some(i) = name.find('=') {
2608 (
2609 start,
2610 ClassUnicodeKind::NamedValue {
2611 op: ClassUnicodeOpKind::Equal,
2612 name: name[..i].to_string(),
2613 value: name[i + 1..].to_string(),
2614 },
2615 )
2616 } else {
2617 (start, ClassUnicodeKind::Named(name.to_string()))
2618 }
2619 } else {
2620 let start = self.pos();
2621 let c = self.char();
2622 if c == '\\' {
2623 return Err(self.error(self.span_char(), ast::ErrorKind::UnicodeClassInvalid));
2624 }
2625 self.bump_and_bump_space();
2626 let kind = ClassUnicodeKind::OneLetter(c);
2627 (start, kind)
2628 };
2629 Ok(ClassUnicode {
2630 span: Span::new(start, self.pos()),
2631 negated,
2632 kind,
2633 })
2634 }
2635
2636 #[inline(never)]
2637 fn parse_perl_class(&self) -> ClassPerl {
2638 let c = self.char();
2639 let span = self.span_char();
2640 self.bump();
2641 let (negated, kind) = match c {
2642 'd' => (false, regex_syntax::ast::ClassPerlKind::Digit),
2643 'D' => (true, regex_syntax::ast::ClassPerlKind::Digit),
2644 's' => (false, regex_syntax::ast::ClassPerlKind::Space),
2645 'S' => (true, regex_syntax::ast::ClassPerlKind::Space),
2646 'w' => (false, regex_syntax::ast::ClassPerlKind::Word),
2647 'W' => (true, regex_syntax::ast::ClassPerlKind::Word),
2648 c => panic!("expected valid Perl class but got '{}'", c),
2649 };
2650 ClassPerl {
2651 span,
2652 kind,
2653 negated,
2654 }
2655 }
2656}
2657
2658fn is_universal_perl_pair(item: ®ex_syntax::ast::ClassSetItem) -> bool {
2661 use regex_syntax::ast::ClassSetItem;
2662 let items = match item {
2663 ClassSetItem::Union(u) => &u.items,
2664 _ => return false,
2665 };
2666 if items.len() != 2 {
2667 return false;
2668 }
2669 match (&items[0], &items[1]) {
2670 (ClassSetItem::Perl(a), ClassSetItem::Perl(b)) => {
2671 let is_all = a.kind == b.kind && a.negated != b.negated;
2672 is_all
2673 }
2674 _ => false,
2675 }
2676}
2677
2678pub fn max_concat_length(ast: &ast::Ast) -> usize {
2679 match ast {
2680 ast::Ast::Empty(_)
2681 | ast::Ast::Flags(_)
2682 | ast::Ast::Literal(_)
2683 | ast::Ast::Dot(_)
2684 | ast::Ast::Top(_)
2685 | ast::Ast::Assertion(_)
2686 | ast::Ast::ClassUnicode(_)
2687 | ast::Ast::ClassPerl(_)
2688 | ast::Ast::ClassBracketed(_) => 0,
2689 ast::Ast::Group(g) => max_concat_length(&g.ast),
2690 ast::Ast::Complement(c) => max_concat_length(&c.ast),
2691 ast::Ast::Lookaround(l) => max_concat_length(&l.ast),
2692 ast::Ast::Repetition(r) => max_concat_length(&r.ast),
2693 ast::Ast::Concat(c) => c
2694 .asts
2695 .len()
2696 .max(c.asts.iter().map(max_concat_length).max().unwrap_or(0)),
2697 ast::Ast::Alternation(a) => a.asts.iter().map(max_concat_length).max().unwrap_or(0),
2698 ast::Ast::Intersection(i) => i.asts.iter().map(max_concat_length).max().unwrap_or(0),
2699 }
2700}
2701
2702pub fn expanded_ast_size(ast: &ast::Ast, limit: u64) -> u64 {
2703 fn go(ast: &ast::Ast, limit: u64) -> u64 {
2704 match ast {
2705 ast::Ast::Empty(_) | ast::Ast::Flags(_) => 1,
2706 ast::Ast::Literal(_) | ast::Ast::Dot(_) | ast::Ast::Top(_) => 1,
2707 ast::Ast::Assertion(_) => 1,
2708 ast::Ast::ClassUnicode(_) | ast::Ast::ClassPerl(_) | ast::Ast::ClassBracketed(_) => 1,
2709 ast::Ast::Group(g) => go(&g.ast, limit).saturating_add(1).min(limit),
2710 ast::Ast::Complement(c) => go(&c.ast, limit).saturating_add(1).min(limit),
2711 ast::Ast::Lookaround(l) => go(&l.ast, limit).saturating_add(1).min(limit),
2712 ast::Ast::Concat(c) => sum_children(&c.asts, limit),
2713 ast::Ast::Alternation(a) => sum_children(&a.asts, limit),
2714 ast::Ast::Intersection(i) => sum_children(&i.asts, limit),
2715 ast::Ast::Repetition(r) => {
2716 let body = go(&r.ast, limit);
2717 let factor: u64 = match &r.op.kind {
2718 ast::RepetitionKind::ZeroOrOne => 2,
2719 ast::RepetitionKind::ZeroOrMore | ast::RepetitionKind::OneOrMore => 2,
2720 ast::RepetitionKind::Range(ast::RepetitionRange::Exactly(n)) => {
2721 (*n as u64).max(1)
2722 }
2723 ast::RepetitionKind::Range(ast::RepetitionRange::AtLeast(n)) => {
2724 (*n as u64).max(1).saturating_add(1)
2725 }
2726 ast::RepetitionKind::Range(ast::RepetitionRange::Bounded(_, m)) => {
2727 (*m as u64).max(1)
2728 }
2729 };
2730 body.saturating_mul(factor).min(limit)
2731 }
2732 }
2733 }
2734 fn sum_children(children: &[ast::Ast], limit: u64) -> u64 {
2735 let mut total: u64 = 0;
2736 for c in children {
2737 total = total.saturating_add(go(c, limit));
2738 if total >= limit {
2739 return limit;
2740 }
2741 }
2742 total
2743 }
2744 go(ast, limit)
2745}
2746
2747pub fn parse_ast<'s>(tb: &mut TB<'s>, pattern: &'s str) -> std::result::Result<NodeId, ParseError> {
2748 let mut p: ResharpParser<'s> = ResharpParser::new(pattern);
2749 p.parse(tb)
2750}
2751
2752pub fn parse_ast_with<'s>(
2753 tb: &mut TB<'s>,
2754 pattern: &'s str,
2755 flags: &PatternFlags,
2756) -> std::result::Result<NodeId, ParseError> {
2757 let mut p: ResharpParser<'s> = ResharpParser::with_flags(pattern, flags);
2758 p.parse(tb)
2759}
2760
2761pub fn parse_to_ast(pattern: &str) -> std::result::Result<ast::Ast, ParseError> {
2763 let mut p: ResharpParser = ResharpParser::new(pattern);
2764 p.parse_inner()
2765}