1#![warn(dead_code)]
6pub mod ast;
7use std::cell::{Cell, RefCell};
8
9use ast::{Ast, Concat, ErrorKind, GroupKind, LookaroundKind};
10use regex_syntax::{
11 ast::{
12 ClassAscii, ClassBracketed, ClassPerl, ClassSet, ClassSetBinaryOpKind, ClassSetItem,
13 ClassSetRange, ClassSetUnion, ClassUnicode, ClassUnicodeKind, ClassUnicodeOpKind,
14 HexLiteralKind, Literal, LiteralKind, Position, Span, SpecialLiteralKind,
15 },
16 hir::{
17 self,
18 translate::{Translator, TranslatorBuilder},
19 },
20 utf8::Utf8Sequences,
21};
22use resharp_algebra::NodeId;
23
24type TB<'s> = resharp_algebra::RegexBuilder;
25
26pub struct PatternFlags {
28 pub unicode: bool,
30 pub full_unicode: bool,
32 pub case_insensitive: bool,
34 pub dot_matches_new_line: bool,
36 pub multiline: bool,
38 pub ignore_whitespace: bool,
40 pub ascii_perl_classes: bool,
43 pub expanded_ast_limit: u64,
46 pub max_list_len: usize,
49 pub max_repeat: u32,
51}
52
53pub const DEFAULT_MAX_REPEAT: u32 = 500;
56pub const DEFAULT_EXPANDED_AST_LIMIT: u64 = 50_000;
57pub const DEFAULT_MAX_LIST_LEN: usize = 4_000;
58
59impl Default for PatternFlags {
60 fn default() -> Self {
61 Self {
62 unicode: true,
63 full_unicode: false,
64 case_insensitive: false,
65 dot_matches_new_line: false,
66 multiline: true,
67 ignore_whitespace: false,
68 ascii_perl_classes: false,
69 expanded_ast_limit: DEFAULT_EXPANDED_AST_LIMIT,
70 max_list_len: DEFAULT_MAX_LIST_LEN,
71 max_repeat: DEFAULT_MAX_REPEAT,
72 }
73 }
74}
75
76#[derive(Clone, Copy, PartialEq, Debug)]
77enum WordCharKind {
78 Word,
79 NonWord,
80 MaybeWord,
81 MaybeNonWord,
82 Unknown,
83 Edge,
84}
85
86fn is_word_byte(b: u8) -> bool {
87 b.is_ascii_alphanumeric() || b == b'_'
88}
89
90#[derive(Clone, Debug, Eq, PartialEq)]
91enum Primitive {
92 Literal(Literal),
93 Assertion(ast::Assertion),
94 Dot(Span),
95 Top(Span),
96 Perl(ClassPerl),
97 Unicode(ClassUnicode),
98}
99
100impl Primitive {
101 fn span(&self) -> &Span {
102 match *self {
103 Primitive::Literal(ref x) => &x.span,
104 Primitive::Assertion(ref x) => &x.span,
105 Primitive::Dot(ref span) => span,
106 Primitive::Top(ref span) => span,
107 Primitive::Perl(ref x) => &x.span,
108 Primitive::Unicode(ref x) => &x.span,
109 }
110 }
111
112 fn into_ast(self) -> Ast {
113 match self {
114 Primitive::Literal(lit) => Ast::literal(lit),
115 Primitive::Assertion(assert) => Ast::assertion(assert),
116 Primitive::Dot(span) => Ast::dot(span),
117 Primitive::Top(span) => Ast::top(span),
118 Primitive::Perl(cls) => Ast::class_perl(cls),
119 Primitive::Unicode(cls) => Ast::class_unicode(cls),
120 }
121 }
122
123 fn into_class_set_item(self, p: &ResharpParser) -> Result<regex_syntax::ast::ClassSetItem> {
124 use self::Primitive::*;
125 use regex_syntax::ast::ClassSetItem;
126
127 match self {
128 Literal(lit) => Ok(ClassSetItem::Literal(lit)),
129 Perl(cls) => Ok(ClassSetItem::Perl(cls)),
130 Unicode(cls) => Ok(ClassSetItem::Unicode(cls)),
131 x => Err(p.error(*x.span(), ast::ErrorKind::ClassEscapeInvalid)),
132 }
133 }
134
135 fn into_class_literal(self, p: &ResharpParser) -> Result<Literal> {
136 use self::Primitive::*;
137
138 match self {
139 Literal(lit) => Ok(lit),
140 x => Err(p.error(*x.span(), ast::ErrorKind::ClassRangeLiteral)),
141 }
142 }
143}
144
145#[derive(Clone, Debug, Eq, PartialEq)]
146pub enum Either<Left, Right> {
147 Left(Left),
148 Right(Right),
149}
150
151#[derive(Clone, Debug, Eq, PartialEq)]
152pub struct ParseError {
153 pub kind: ErrorKind,
155 pattern: String,
158 pub span: Span,
160}
161
162impl std::fmt::Display for ParseError {
163 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
164 write!(f, "{:?}: {:?}", self.kind, self.span)
165 }
166}
167impl std::error::Error for ParseError {}
168
169type Result<T> = core::result::Result<T, ParseError>;
170
171#[derive(Clone, Debug)]
172enum GroupState {
173 Group {
175 concat: Concat,
177 group: ast::Group,
179 ignore_whitespace: bool,
181 },
182 Alternation(ast::Alternation),
183 Intersection(ast::Intersection),
184}
185
186#[derive(Clone, Debug)]
187enum ClassState {
188 Open {
190 union: regex_syntax::ast::ClassSetUnion,
192 set: regex_syntax::ast::ClassBracketed,
193 },
194 Op {
197 kind: regex_syntax::ast::ClassSetBinaryOpKind,
199 lhs: regex_syntax::ast::ClassSet,
201 },
202}
203
204pub struct ResharpParser<'s> {
206 perl_classes: Vec<(bool, regex_syntax::ast::ClassPerlKind, NodeId)>,
207 unicode_classes: resharp_algebra::UnicodeClassCache,
208 pub translator: regex_syntax::hir::translate::Translator,
209 pub pattern: &'s str,
210 pos: Cell<Position>,
211 capture_index: Cell<u32>,
212 octal: bool,
213 empty_min_range: bool,
214 ignore_whitespace: Cell<bool>,
215 dot_all: Cell<bool>,
216 multiline: Cell<bool>,
217 global_unicode: bool,
218 global_full_unicode: bool,
219 global_ascii_perl: bool,
220 global_case_insensitive: bool,
221 expanded_ast_limit: u64,
222 max_list_len: usize,
223 max_repeat: u32,
224 comments: RefCell<Vec<ast::Comment>>,
225 stack_group: RefCell<Vec<GroupState>>,
226 stack_class: RefCell<Vec<ClassState>>,
227 capture_names: RefCell<Vec<ast::CaptureName>>,
228 scratch: RefCell<String>,
229}
230
231fn specialize_err<T>(result: Result<T>, from: ast::ErrorKind, to: ast::ErrorKind) -> Result<T> {
232 result.map_err(|e| {
233 if e.kind == from {
234 ParseError {
235 kind: to,
236 pattern: e.pattern,
237 span: e.span,
238 }
239 } else {
240 e
241 }
242 })
243}
244
245fn is_capture_char(c: char, first: bool) -> bool {
246 if first {
247 c == '_' || c.is_alphabetic()
248 } else {
249 c == '_' || c == '.' || c == '[' || c == ']' || c.is_alphanumeric()
250 }
251}
252
253pub fn is_meta_character(c: char) -> bool {
254 matches!(
255 c,
256 '\\' | '.'
257 | '+'
258 | '*'
259 | '?'
260 | '('
261 | ')'
262 | '|'
263 | '['
264 | ']'
265 | '{'
266 | '}'
267 | '^'
268 | '$'
269 | '#'
270 | '&'
271 | '-'
272 | '~'
273 | '_'
274 )
275}
276
277pub fn escape(text: &str) -> String {
279 let mut buf = String::new();
280 escape_into(text, &mut buf);
281 buf
282}
283
284pub fn escape_into(text: &str, buf: &mut String) {
286 buf.reserve(text.len());
287 for c in text.chars() {
288 if is_meta_character(c) {
289 buf.push('\\');
290 }
291 buf.push(c);
292 }
293}
294
295pub fn is_escapeable_character(c: char) -> bool {
296 if is_meta_character(c) {
297 return true;
298 }
299 if !c.is_ascii() {
300 return false;
301 }
302 match c {
303 '0'..='9' | 'A'..='Z' | 'a'..='z' => false,
304 '<' | '>' => false,
305 _ => true,
306 }
307}
308
309fn is_hex(c: char) -> bool {
310 c.is_ascii_digit() || ('a'..='f').contains(&c) || ('A'..='F').contains(&c)
311}
312
313impl<'s> ResharpParser<'s> {
314 fn default_translator_builder(&self) -> TranslatorBuilder {
315 let mut trb = TranslatorBuilder::new();
316 trb.unicode(self.global_unicode);
317 trb.utf8(false);
318 trb.case_insensitive(self.global_case_insensitive);
319 trb
320 }
321
322 pub fn new(pattern: &'s str) -> Self {
323 Self::with_flags(pattern, &PatternFlags::default())
324 }
325
326 pub fn with_flags(pattern: &'s str, flags: &PatternFlags) -> Self {
327 let mut trb = TranslatorBuilder::new();
328 trb.unicode(flags.unicode);
329 trb.utf8(false);
330 trb.case_insensitive(flags.case_insensitive);
331 Self {
332 translator: trb.build(),
333 pattern,
334 perl_classes: vec![],
335 unicode_classes: resharp_algebra::UnicodeClassCache::default(),
336 pos: Cell::new(Position::new(0, 0, 0)),
337 capture_index: Cell::new(0),
338 octal: false,
339 empty_min_range: false,
340 ignore_whitespace: Cell::new(flags.ignore_whitespace),
341 dot_all: Cell::new(flags.dot_matches_new_line),
342 multiline: Cell::new(flags.multiline),
343 global_unicode: flags.unicode || flags.full_unicode || flags.ascii_perl_classes,
344 global_full_unicode: flags.full_unicode,
345 global_ascii_perl: flags.ascii_perl_classes,
346 global_case_insensitive: flags.case_insensitive,
347 expanded_ast_limit: flags.expanded_ast_limit,
348 max_list_len: flags.max_list_len,
349 max_repeat: flags.max_repeat,
350 comments: RefCell::new(vec![]),
351 stack_group: RefCell::new(vec![]),
352 stack_class: RefCell::new(vec![]),
353 capture_names: RefCell::new(vec![]),
354 scratch: RefCell::new(String::new()),
355 }
356 }
357
358 fn parser(&'_ self) -> &'_ ResharpParser<'_> {
359 self
360 }
361
362 fn pattern(&self) -> &str {
363 self.pattern
364 }
365
366 fn error(&self, span: Span, kind: ast::ErrorKind) -> ParseError {
367 ParseError {
368 kind,
369 pattern: self.pattern().to_string(),
370 span,
371 }
372 }
373
374 fn unsupported_error(&self, _: regex_syntax::hir::Error) -> ParseError {
375 self.error(
376 Span::splat(self.pos()),
377 ast::ErrorKind::UnsupportedResharpRegex,
378 )
379 }
380
381 fn offset(&self) -> usize {
382 self.parser().pos.get().offset
383 }
384
385 fn line(&self) -> usize {
386 self.parser().pos.get().line
387 }
388
389 fn column(&self) -> usize {
390 self.parser().pos.get().column
391 }
392
393 fn next_capture_index(&self, span: Span) -> Result<u32> {
394 let current = self.parser().capture_index.get();
395 let i = current
396 .checked_add(1)
397 .ok_or_else(|| self.error(span, ast::ErrorKind::CaptureLimitExceeded))?;
398 self.parser().capture_index.set(i);
399 Ok(i)
400 }
401
402 fn add_capture_name(&self, cap: &ast::CaptureName) -> Result<()> {
403 let mut names = self.parser().capture_names.borrow_mut();
404 match names.binary_search_by_key(&cap.name.as_str(), |c| c.name.as_str()) {
405 Err(i) => {
406 names.insert(i, cap.clone());
407 Ok(())
408 }
409 Ok(i) => Err(self.error(
410 cap.span,
411 ast::ErrorKind::GroupNameDuplicate {
412 original: names[i].span,
413 },
414 )),
415 }
416 }
417
418 fn ignore_whitespace(&self) -> bool {
419 self.parser().ignore_whitespace.get()
420 }
421
422 fn char(&self) -> char {
423 self.char_at(self.offset())
424 }
425
426 fn char_at(&self, i: usize) -> char {
427 self.pattern()[i..]
428 .chars()
429 .next()
430 .unwrap_or_else(|| panic!("expected char at offset {}", i))
431 }
432
433 fn bump(&self) -> bool {
434 if self.is_eof() {
435 return false;
436 }
437 let Position {
438 mut offset,
439 mut line,
440 mut column,
441 } = self.pos();
442 if self.char() == '\n' {
443 line = line.checked_add(1).unwrap();
444 column = 1;
445 } else {
446 column = column.checked_add(1).unwrap();
447 }
448 offset += self.char().len_utf8();
449 self.parser().pos.set(Position {
450 offset,
451 line,
452 column,
453 });
454 self.pattern()[self.offset()..].chars().next().is_some()
455 }
456
457 fn bump_if(&self, prefix: &str) -> bool {
458 if self.pattern()[self.offset()..].starts_with(prefix) {
459 for _ in 0..prefix.chars().count() {
460 self.bump();
461 }
462 true
463 } else {
464 false
465 }
466 }
467
468 fn is_lookaround_prefix(&self) -> Option<(bool, bool)> {
469 if self.bump_if("?=") {
470 return Some((true, true));
471 }
472 if self.bump_if("?!") {
473 return Some((true, false));
474 }
475 if self.bump_if("?<=") {
476 return Some((false, true));
477 }
478 if self.bump_if("?<!") {
479 return Some((false, false));
480 }
481 None
482 }
483
484 fn bump_and_bump_space(&self) -> bool {
485 if !self.bump() {
486 return false;
487 }
488 self.bump_space();
489 !self.is_eof()
490 }
491
492 fn bump_space(&self) {
493 if !self.ignore_whitespace() {
494 return;
495 }
496 while !self.is_eof() {
497 if self.char().is_whitespace() {
498 self.bump();
499 } else if self.char() == '#' {
500 let start = self.pos();
501 let mut comment_text = String::new();
502 self.bump();
503 while !self.is_eof() {
504 let c = self.char();
505 self.bump();
506 if c == '\n' {
507 break;
508 }
509 comment_text.push(c);
510 }
511 let comment = ast::Comment {
512 span: Span::new(start, self.pos()),
513 comment: comment_text,
514 };
515 self.parser().comments.borrow_mut().push(comment);
516 } else {
517 break;
518 }
519 }
520 }
521
522 fn peek(&self) -> Option<char> {
523 if self.is_eof() {
524 return None;
525 }
526 self.pattern()[self.offset() + self.char().len_utf8()..]
527 .chars()
528 .next()
529 }
530
531 fn peek_space(&self) -> Option<char> {
534 if !self.ignore_whitespace() {
535 return self.peek();
536 }
537 if self.is_eof() {
538 return None;
539 }
540 let mut start = self.offset() + self.char().len_utf8();
541 let mut in_comment = false;
542 for (i, c) in self.pattern()[start..].char_indices() {
543 if c.is_whitespace() {
544 continue;
545 } else if !in_comment && c == '#' {
546 in_comment = true;
547 } else if in_comment && c == '\n' {
548 in_comment = false;
549 } else {
550 start += i;
551 break;
552 }
553 }
554 self.pattern()[start..].chars().next()
555 }
556
557 fn is_eof(&self) -> bool {
558 self.offset() == self.pattern().len()
559 }
560
561 fn pos(&self) -> Position {
562 self.parser().pos.get()
563 }
564
565 fn span(&self) -> Span {
566 Span::splat(self.pos())
567 }
568
569 fn span_char(&self) -> Span {
570 let mut next = Position {
571 offset: self.offset().checked_add(self.char().len_utf8()).unwrap(),
572 line: self.line(),
573 column: self.column().checked_add(1).unwrap(),
574 };
575 if self.char() == '\n' {
576 next.line += 1;
577 next.column = 1;
578 }
579 Span::new(self.pos(), next)
580 }
581
582 #[inline(never)]
583 fn push_alternate(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
584 assert_eq!(self.char(), '|');
585 concat.span.end = self.pos();
586 self.push_or_add_alternation(concat);
587 self.bump();
588 Ok(ast::Concat {
589 span: self.span(),
590 asts: vec![],
591 })
592 }
593
594 fn push_or_add_alternation(&self, concat: Concat) {
595 use self::GroupState::*;
596
597 let mut stack = self.parser().stack_group.borrow_mut();
598 if let Some(&mut Alternation(ref mut alts)) = stack.last_mut() {
599 alts.asts.push(concat.into_ast());
600 return;
601 }
602 stack.push(Alternation(ast::Alternation {
603 span: Span::new(concat.span.start, self.pos()),
604 asts: vec![concat.into_ast()],
605 }));
606 }
607
608 #[inline(never)]
609 fn push_intersect(&self, mut concat: Concat) -> Result<Concat> {
610 assert_eq!(self.char(), '&');
611 concat.span.end = self.pos();
612 self.push_or_add_intersect(concat);
613 self.bump();
614 Ok(Concat {
615 span: self.span(),
616 asts: vec![],
617 })
618 }
619
620 fn push_or_add_intersect(&self, concat: Concat) {
621 use self::GroupState::*;
622
623 let mut stack = self.parser().stack_group.borrow_mut();
624 if let Some(&mut Intersection(ref mut alts)) = stack.last_mut() {
625 alts.asts.push(concat.into_ast());
626 return;
627 }
628 stack.push(Intersection(ast::Intersection {
629 span: Span::new(concat.span.start, self.pos()),
630 asts: vec![concat.into_ast()],
631 }));
632 }
633
634 #[inline(never)]
635 fn push_group(&self, mut concat: Concat) -> Result<Concat> {
636 assert_eq!(self.char(), '(');
637 match self.parse_group()? {
638 Either::Left(set) => {
639 let ignore = set.flags.flag_state(ast::Flag::IgnoreWhitespace);
640 if let Some(v) = ignore {
641 self.parser().ignore_whitespace.set(v);
642 }
643
644 concat.asts.push(Ast::flags(set));
645 Ok(concat)
646 }
647 Either::Right(group) => {
648 let old_ignore_whitespace = self.ignore_whitespace();
649 let new_ignore_whitespace = group
650 .flags()
651 .and_then(|f| f.flag_state(ast::Flag::IgnoreWhitespace))
652 .unwrap_or(old_ignore_whitespace);
653 self.parser()
654 .stack_group
655 .borrow_mut()
656 .push(GroupState::Group {
657 concat,
658 group,
659 ignore_whitespace: old_ignore_whitespace,
660 });
661 self.parser().ignore_whitespace.set(new_ignore_whitespace);
662 Ok(Concat {
663 span: self.span(),
664 asts: vec![],
665 })
666 }
667 }
668 }
669
670 #[inline(never)]
671 fn push_compl_group(&self, concat: Concat) -> Result<Concat> {
672 assert_eq!(self.char(), '~');
673 self.bump();
674 if self.is_eof() || self.char() != '(' {
675 return Err(self.error(self.span(), ast::ErrorKind::ComplementGroupExpected));
676 }
677 let open_span = self.span_char();
678 self.bump();
679 let group = ast::Group {
680 span: open_span,
681 kind: ast::GroupKind::Complement,
682 ast: Box::new(Ast::empty(self.span())),
683 };
684
685 let old_ignore_whitespace = self.ignore_whitespace();
686 let new_ignore_whitespace = group
687 .flags()
688 .and_then(|f| f.flag_state(ast::Flag::IgnoreWhitespace))
689 .unwrap_or(old_ignore_whitespace);
690 self.parser()
691 .stack_group
692 .borrow_mut()
693 .push(GroupState::Group {
694 concat,
695 group,
696 ignore_whitespace: old_ignore_whitespace,
697 });
698 self.parser().ignore_whitespace.set(new_ignore_whitespace);
699 Ok(Concat {
700 span: self.span(),
701 asts: vec![],
702 })
703 }
704
705 #[inline(never)]
706 fn pop_group(&self, mut group_concat: Concat) -> Result<Concat> {
707 use self::GroupState::*;
708 assert_eq!(self.char(), ')');
709 let mut stack = self.parser().stack_group.borrow_mut();
710 let topstack = stack.pop();
711
712 let (mut prior_concat, mut group, ignore_whitespace, alt) = match topstack {
713 Some(Group {
714 concat,
715 group,
716 ignore_whitespace,
717 }) => (concat, group, ignore_whitespace, None),
718 Some(Alternation(alt)) => match stack.pop() {
719 Some(Group {
720 concat,
721 group,
722 ignore_whitespace,
723 }) => (
724 concat,
725 group,
726 ignore_whitespace,
727 Some(Either::Left::<ast::Alternation, ast::Intersection>(alt)),
728 ),
729 None | Some(Alternation(_)) | Some(Intersection(_)) => {
730 return Err(self.error(self.span_char(), ast::ErrorKind::GroupUnopened));
731 }
732 },
733 Some(Intersection(int)) => match stack.pop() {
734 Some(Group {
735 concat,
736 group,
737 ignore_whitespace,
738 }) => (
739 concat,
740 group,
741 ignore_whitespace,
742 Some(Either::Right::<ast::Alternation, ast::Intersection>(int)),
743 ),
744 None | Some(Alternation(_)) | Some(Intersection(_)) => {
745 return Err(self.error(self.span_char(), ast::ErrorKind::GroupUnopened));
746 }
747 },
748
749 None => {
750 return Err(self.error(self.span_char(), ast::ErrorKind::GroupUnopened));
751 }
752 };
753 self.parser().ignore_whitespace.set(ignore_whitespace);
754 group_concat.span.end = self.pos();
755 self.bump();
756 group.span.end = self.pos();
757 match alt {
758 Some(Either::Left(mut alt)) => {
759 alt.span.end = group_concat.span.end;
760 alt.asts.push(group_concat.into_ast());
761 group.ast = Box::new(alt.into_ast());
762 }
763 Some(Either::Right(mut int)) => {
764 int.span.end = group_concat.span.end;
765 int.asts.push(group_concat.into_ast());
766 group.ast = Box::new(int.into_ast());
767 }
768 None => {
769 group.ast = Box::new(group_concat.into_ast());
770 }
771 }
772
773 if group.kind == GroupKind::Complement {
774 let complement = ast::Complement {
775 span: self.span(),
776 ast: group.ast,
777 };
778 prior_concat.asts.push(Ast::complement(complement));
779 }
780 else {
782 prior_concat.asts.push(Ast::group(group));
783 }
784 Ok(prior_concat)
785 }
786
787 #[inline(never)]
788 fn pop_group_end(&self, mut concat: ast::Concat) -> Result<Ast> {
789 concat.span.end = self.pos();
790 let mut stack = self.parser().stack_group.borrow_mut();
791 let ast = match stack.pop() {
792 None => Ok(concat.into_ast()),
793 Some(GroupState::Alternation(mut alt)) => {
794 alt.span.end = self.pos();
795 alt.asts.push(concat.into_ast());
796 Ok(Ast::alternation(alt))
797 }
798 Some(GroupState::Intersection(mut int)) => {
799 int.span.end = self.pos();
800 int.asts.push(concat.into_ast());
801
802 Ok(Ast::intersection(int))
803 }
804 Some(GroupState::Group { group, .. }) => {
805 return Err(self.error(group.span, ast::ErrorKind::GroupUnclosed));
806 }
807 };
808 match stack.pop() {
810 None => ast,
811 Some(GroupState::Alternation(alt)) => {
812 Err(self.error(alt.span, ast::ErrorKind::UnsupportedResharpRegex))
813 }
814 Some(GroupState::Intersection(int)) => {
815 Err(self.error(int.span, ast::ErrorKind::UnsupportedResharpRegex))
816 }
817 Some(GroupState::Group { group, .. }) => {
818 Err(self.error(group.span, ast::ErrorKind::GroupUnclosed))
819 }
820 }
821 }
822
823 #[inline(never)]
824 fn push_class_open(
825 &self,
826 parent_union: regex_syntax::ast::ClassSetUnion,
827 ) -> Result<regex_syntax::ast::ClassSetUnion> {
828 assert_eq!(self.char(), '[');
829
830 let (nested_set, nested_union) = self.parse_set_class_open()?;
831 self.parser()
832 .stack_class
833 .borrow_mut()
834 .push(ClassState::Open {
835 union: parent_union,
836 set: nested_set,
837 });
838 Ok(nested_union)
839 }
840
841 #[inline(never)]
842 fn pop_class(
843 &self,
844 nested_union: regex_syntax::ast::ClassSetUnion,
845 ) -> Result<Either<regex_syntax::ast::ClassSetUnion, regex_syntax::ast::ClassBracketed>> {
846 assert_eq!(self.char(), ']');
847
848 let item = regex_syntax::ast::ClassSet::Item(nested_union.into_item());
849 let prevset = self.pop_class_op(item);
850 let mut stack = self.parser().stack_class.borrow_mut();
851 match stack.pop() {
852 None => panic!("unexpected empty character class stack"),
853 Some(ClassState::Op { .. }) => panic!("unexpected ClassState::Op"),
854 Some(ClassState::Open { mut union, mut set }) => {
855 self.bump();
856 set.span.end = self.pos();
857 set.kind = prevset;
858 if stack.is_empty() {
859 Ok(Either::Right(set))
860 } else {
861 union.push(regex_syntax::ast::ClassSetItem::Bracketed(Box::new(set)));
862 Ok(Either::Left(union))
863 }
864 }
865 }
866 }
867
868 #[inline(never)]
869 fn unclosed_class_error(&self) -> ParseError {
870 for state in self.parser().stack_class.borrow().iter().rev() {
871 if let ClassState::Open { ref set, .. } = *state {
872 return self.error(set.span, ast::ErrorKind::ClassUnclosed);
873 }
874 }
875 panic!("no open character class found")
876 }
877
878 #[inline(never)]
879 fn push_class_op(
880 &self,
881 next_kind: regex_syntax::ast::ClassSetBinaryOpKind,
882 next_union: regex_syntax::ast::ClassSetUnion,
883 ) -> regex_syntax::ast::ClassSetUnion {
884 let item = regex_syntax::ast::ClassSet::Item(next_union.into_item());
885 let new_lhs = self.pop_class_op(item);
886 self.parser().stack_class.borrow_mut().push(ClassState::Op {
887 kind: next_kind,
888 lhs: new_lhs,
889 });
890 regex_syntax::ast::ClassSetUnion {
891 span: self.span(),
892 items: vec![],
893 }
894 }
895
896 #[inline(never)]
897 fn pop_class_op(&self, rhs: regex_syntax::ast::ClassSet) -> regex_syntax::ast::ClassSet {
898 let mut stack = self.parser().stack_class.borrow_mut();
899 let (kind, lhs) = match stack.pop() {
900 Some(ClassState::Op { kind, lhs }) => (kind, lhs),
901 Some(state @ ClassState::Open { .. }) => {
902 stack.push(state);
903 return rhs;
904 }
905 None => unreachable!(),
906 };
907 let span = Span::new(lhs.span().start, rhs.span().end);
908 regex_syntax::ast::ClassSet::BinaryOp(regex_syntax::ast::ClassSetBinaryOp {
909 span,
910 kind,
911 lhs: Box::new(lhs),
912 rhs: Box::new(rhs),
913 })
914 }
915
916 fn hir_to_node_id(&self, hir: &hir::Hir, tb: &mut TB<'s>) -> Result<NodeId> {
917 match hir.kind() {
918 hir::HirKind::Empty => Ok(NodeId::EPS),
919 hir::HirKind::Literal(l) => {
920 if l.0.len() == 1 {
921 let node = tb.mk_u8(l.0[0]);
922 Ok(node)
923 } else {
924 let ws: Vec<_> = l.0.iter().map(|l| tb.mk_u8(*l)).collect();
925 let conc = tb.mk_concats(ws.iter().copied());
926 Ok(conc)
927 }
928 }
929 hir::HirKind::Class(class) => match class {
930 hir::Class::Unicode(class_unicode) => {
931 let ranges = class_unicode.ranges();
932 if ranges.len() == 1
933 && ranges[0].start() == '\u{0}'
934 && ranges[0].end() == '\u{10FFFF}'
935 {
936 return Ok(tb.mk_range_u8(0, 255));
937 }
938 let mut nodes = Vec::new();
939 for range in ranges {
940 for seq in Utf8Sequences::new(range.start(), range.end()) {
941 let sl = seq.as_slice();
942 let bytes: Vec<_> = sl.iter().map(|s| (s.start, s.end)).collect();
943 let node = match bytes.len() {
944 1 => tb.mk_range_u8(bytes[0].0, bytes[0].1),
945 n => {
946 let last = tb.mk_range_u8(bytes[n - 1].0, bytes[n - 1].1);
947 let mut conc = last;
948 for i in (0..n - 1).rev() {
949 let b = tb.mk_range_u8(bytes[i].0, bytes[i].1);
950 conc = tb.mk_concat(b, conc);
951 }
952 conc
953 }
954 };
955 nodes.push(node);
956 }
957 }
958 let merged = tb.mk_unions(nodes.into_iter());
959 Ok(merged)
960 }
961 hir::Class::Bytes(class_bytes) => {
962 let ranges = class_bytes.ranges();
963 let mut result = NodeId::BOT;
964 for range in ranges {
965 let start = range.start();
966 let end = range.end();
967 let node = tb.mk_range_u8(start, end);
968 result = tb.mk_union(result, node);
969 }
970 Ok(result)
971 }
972 },
973 hir::HirKind::Look(_) => Err(self.error(
974 Span::splat(self.pos()),
975 ast::ErrorKind::UnsupportedResharpRegex,
976 )),
977 hir::HirKind::Repetition(_) => Err(self.error(
978 Span::splat(self.pos()),
979 ast::ErrorKind::UnsupportedResharpRegex,
980 )),
981 hir::HirKind::Capture(_) => Err(self.error(
982 Span::splat(self.pos()),
983 ast::ErrorKind::UnsupportedResharpRegex,
984 )),
985 hir::HirKind::Concat(body) => {
986 let mut result = NodeId::EPS;
987 for child in body {
988 let node = self.hir_to_node_id(child, tb)?;
989 result = tb.mk_concat(result, node);
990 }
991 Ok(result)
992 }
993 hir::HirKind::Alternation(_) => Err(self.error(
994 Span::splat(self.pos()),
995 ast::ErrorKind::UnsupportedResharpRegex,
996 )),
997 }
998 }
999
1000 fn translate_ast_to_hir(
1001 &mut self,
1002 orig_ast: ®ex_syntax::ast::Ast,
1003 tb: &mut TB<'s>,
1004 ) -> Result<NodeId> {
1005 match self.translator.translate("", orig_ast) {
1006 Err(_) => Err(self.error(self.span(), ast::ErrorKind::UnicodeClassInvalid)),
1007 Ok(hir) => self.hir_to_node_id(&hir, tb),
1008 }
1009 }
1010
1011 fn translator_to_node_id(
1012 &mut self,
1013 orig_ast: ®ex_syntax::ast::Ast,
1014 translator: &mut Option<Translator>,
1015 tb: &mut TB<'s>,
1016 ) -> Result<NodeId> {
1017 match translator {
1018 Some(tr) => {
1019 let hir = tr
1020 .translate("", orig_ast)
1021 .map_err(|e| self.unsupported_error(e))?;
1022 self.hir_to_node_id(&hir, tb)
1023 }
1024 None => self.translate_ast_to_hir(orig_ast, tb),
1025 }
1026 }
1027
1028 fn get_class(
1029 &mut self,
1030 negated: bool,
1031 kind: regex_syntax::ast::ClassPerlKind,
1032 tb: &mut TB<'s>,
1033 ) -> Result<NodeId> {
1034 let w = self
1035 .perl_classes
1036 .iter()
1037 .find(|(c_neg, c_kind, _)| *c_kind == kind && *c_neg == negated);
1038 match w {
1039 Some((_, _, value)) => Ok(*value),
1040 None => {
1041 let translated = if self.global_ascii_perl {
1042 let pos = match kind {
1043 regex_syntax::ast::ClassPerlKind::Word => {
1044 let az = tb.mk_range_u8(b'a', b'z');
1045 let big = tb.mk_range_u8(b'A', b'Z');
1046 let dig = tb.mk_range_u8(b'0', b'9');
1047 let us = tb.mk_u8(b'_');
1048 tb.mk_unions([az, big, dig, us].into_iter())
1049 }
1050 regex_syntax::ast::ClassPerlKind::Digit => tb.mk_range_u8(b'0', b'9'),
1051 regex_syntax::ast::ClassPerlKind::Space => {
1052 let sp = tb.mk_u8(b' ');
1053 let tab = tb.mk_u8(b'\t');
1054 let nl = tb.mk_u8(b'\n');
1055 let cr = tb.mk_u8(b'\r');
1056 let ff = tb.mk_u8(0x0C);
1057 let vt = tb.mk_u8(0x0B);
1058 tb.mk_unions([sp, tab, nl, cr, ff, vt].into_iter())
1059 }
1060 };
1061 if negated {
1062 resharp_algebra::neg_class(tb, pos)
1063 } else {
1064 pos
1065 }
1066 } else if self.global_unicode {
1067 match kind {
1068 regex_syntax::ast::ClassPerlKind::Word => {
1069 if self.global_full_unicode {
1070 self.unicode_classes.ensure_word_full(tb);
1071 } else {
1072 self.unicode_classes.ensure_word(tb);
1073 }
1074 if negated {
1075 self.unicode_classes.non_word
1076 } else {
1077 self.unicode_classes.word
1078 }
1079 }
1080 regex_syntax::ast::ClassPerlKind::Digit => {
1081 if self.global_full_unicode {
1082 self.unicode_classes.ensure_digit_full(tb);
1083 } else {
1084 self.unicode_classes.ensure_digit(tb);
1085 }
1086 if negated {
1087 self.unicode_classes.non_digit
1088 } else {
1089 self.unicode_classes.digit
1090 }
1091 }
1092 regex_syntax::ast::ClassPerlKind::Space => {
1093 if self.global_full_unicode {
1094 self.unicode_classes.ensure_space_full(tb);
1095 } else {
1096 self.unicode_classes.ensure_space(tb);
1097 }
1098 if negated {
1099 self.unicode_classes.non_space
1100 } else {
1101 self.unicode_classes.space
1102 }
1103 }
1104 }
1105 } else {
1106 let pos = match kind {
1107 regex_syntax::ast::ClassPerlKind::Word => {
1108 let az = tb.mk_range_u8(b'a', b'z');
1109 let big = tb.mk_range_u8(b'A', b'Z');
1110 let dig = tb.mk_range_u8(b'0', b'9');
1111 let us = tb.mk_u8(b'_');
1112 tb.mk_unions([az, big, dig, us].into_iter())
1113 }
1114 regex_syntax::ast::ClassPerlKind::Digit => tb.mk_range_u8(b'0', b'9'),
1115 regex_syntax::ast::ClassPerlKind::Space => {
1116 let sp = tb.mk_u8(b' ');
1117 let tab = tb.mk_u8(b'\t');
1118 let nl = tb.mk_u8(b'\n');
1119 let cr = tb.mk_u8(b'\r');
1120 let ff = tb.mk_u8(0x0C);
1121 let vt = tb.mk_u8(0x0B);
1122 tb.mk_unions([sp, tab, nl, cr, ff, vt].into_iter())
1123 }
1124 };
1125 if negated {
1126 tb.mk_compl(pos)
1127 } else {
1128 pos
1129 }
1130 };
1131 self.perl_classes.push((negated, kind, translated));
1132 Ok(translated)
1133 }
1134 }
1135 }
1136
1137 fn word_char_kind(ast: &Ast, left: bool) -> WordCharKind {
1138 use WordCharKind::*;
1139 match ast {
1140 Ast::Literal(lit) => {
1141 if is_word_byte(lit.c as u8) {
1142 Word
1143 } else {
1144 NonWord
1145 }
1146 }
1147 Ast::ClassPerl(c) => match (&c.kind, c.negated) {
1148 (®ex_syntax::ast::ClassPerlKind::Word, false) => Word,
1149 (®ex_syntax::ast::ClassPerlKind::Word, true) => NonWord,
1150 (®ex_syntax::ast::ClassPerlKind::Space, false) => NonWord,
1151 (®ex_syntax::ast::ClassPerlKind::Space, true) => Unknown,
1152 (®ex_syntax::ast::ClassPerlKind::Digit, false) => Word,
1153 (®ex_syntax::ast::ClassPerlKind::Digit, true) => Unknown,
1154 },
1155 Ast::Dot(_) | Ast::Top(_) => Unknown,
1156 Ast::Group(g) => Self::word_char_kind(&g.ast, left),
1157 Ast::Concat(c) if !c.asts.is_empty() => {
1158 let edge = if left { c.asts.len() - 1 } else { 0 };
1159 let kind = Self::word_char_kind(&c.asts[edge], left);
1160 match kind {
1161 MaybeWord => {
1162 let dir: isize = if left { -1 } else { 1 };
1163 match Self::concat_neighbor_kind(&c.asts, edge, dir) {
1164 Word => Word,
1165 _ => MaybeWord,
1166 }
1167 }
1168 MaybeNonWord => {
1169 let dir: isize = if left { -1 } else { 1 };
1170 match Self::concat_neighbor_kind(&c.asts, edge, dir) {
1171 NonWord => NonWord,
1172 _ => MaybeNonWord,
1173 }
1174 }
1175 other => other,
1176 }
1177 }
1178 Ast::Alternation(alt) if !alt.asts.is_empty() => {
1179 let first = Self::word_char_kind(&alt.asts[0], left);
1180 if alt.asts[1..]
1181 .iter()
1182 .all(|a| Self::word_char_kind(a, left) == first)
1183 {
1184 first
1185 } else {
1186 Unknown
1187 }
1188 }
1189 Ast::Repetition(r) => {
1190 let inner = Self::word_char_kind(&r.ast, left);
1191 let nullable = matches!(
1192 &r.op.kind,
1193 ast::RepetitionKind::ZeroOrMore
1194 | ast::RepetitionKind::ZeroOrOne
1195 | ast::RepetitionKind::Range(ast::RepetitionRange::Bounded(0, _))
1196 );
1197 if nullable {
1198 match inner {
1199 Word => MaybeWord,
1200 NonWord => MaybeNonWord,
1201 _ => Unknown,
1202 }
1203 } else {
1204 inner
1205 }
1206 }
1207 Ast::Lookaround(la) => Self::word_char_kind(&la.ast, left),
1208 _ => Unknown,
1209 }
1210 }
1211
1212 fn edge_class_ast(ast: &Ast, left: bool) -> Option<&Ast> {
1214 match ast {
1215 Ast::Literal(_)
1216 | Ast::ClassPerl(_)
1217 | Ast::ClassBracketed(_)
1218 | Ast::ClassUnicode(_)
1219 | Ast::Dot(_)
1220 | Ast::Top(_) => Some(ast),
1221 Ast::Group(g) => Self::edge_class_ast(&g.ast, left),
1222 Ast::Concat(c) if !c.asts.is_empty() => {
1223 Self::edge_class_ast(&c.asts[if left { c.asts.len() - 1 } else { 0 }], left)
1224 }
1225 Ast::Repetition(r) => {
1226 let nullable = matches!(
1227 &r.op.kind,
1228 ast::RepetitionKind::ZeroOrMore
1229 | ast::RepetitionKind::ZeroOrOne
1230 | ast::RepetitionKind::Range(ast::RepetitionRange::Bounded(0, _))
1231 );
1232 if nullable {
1233 None
1234 } else {
1235 Self::edge_class_ast(&r.ast, left)
1236 }
1237 }
1238 _ => None,
1239 }
1240 }
1241
1242 fn resolve_word_kind(
1243 &mut self,
1244 asts: &[Ast],
1245 idx: usize,
1246 dir: isize,
1247 translator: &mut Option<Translator>,
1248 tb: &mut TB<'s>,
1249 word_id: NodeId,
1250 not_word_id: NodeId,
1251 ) -> Result<WordCharKind> {
1252 use WordCharKind::*;
1253 let fast = Self::concat_neighbor_kind(asts, idx, dir);
1254 if fast != Unknown {
1255 return Ok(fast);
1256 }
1257 let neighbor_idx = (idx as isize + dir) as usize;
1258 let node = if let Some(edge) = Self::edge_class_ast(&asts[neighbor_idx], dir < 0) {
1259 self.ast_to_node_id(edge, translator, tb)?
1260 } else {
1261 let neighbor_node = self.ast_to_node_id(&asts[neighbor_idx], translator, tb)?;
1263 let mut neighbor_node = tb
1264 .try_elim_lookarounds(neighbor_node)
1265 .ok_or_else(|| self.error(self.span(), ast::ErrorKind::UnsupportedResharpRegex))?;
1266 if dir < 0 {
1267 neighbor_node = tb.reverse(neighbor_node).or_else(|_| {
1268 Err(self.error(self.span(), ast::ErrorKind::UnsupportedResharpRegex))
1269 })?;
1270 }
1271 let word_prefix = tb.mk_concat(word_id, NodeId::TS);
1272 let non_word_prefix = tb.mk_concat(not_word_id, NodeId::TS);
1273 return if tb.subsumes(word_prefix, neighbor_node) == Some(true) {
1274 Ok(Word)
1275 } else if tb.subsumes(non_word_prefix, neighbor_node) == Some(true) {
1276 Ok(NonWord)
1277 } else {
1278 Ok(Unknown)
1279 };
1280 };
1281 if tb.subsumes(word_id, node) == Some(true) {
1282 Ok(Word)
1283 } else if tb.subsumes(not_word_id, node) == Some(true) {
1284 Ok(NonWord)
1285 } else {
1286 Ok(Unknown)
1287 }
1288 }
1289
1290 fn concat_neighbor_kind(asts: &[Ast], idx: usize, dir: isize) -> WordCharKind {
1291 use WordCharKind::*;
1292 let next = idx as isize + dir;
1293 if next < 0 || next >= asts.len() as isize {
1294 return Edge;
1295 }
1296 let kind = Self::word_char_kind(&asts[next as usize], dir < 0);
1297 match kind {
1298 MaybeWord => match Self::concat_neighbor_kind(asts, next as usize, dir) {
1299 Word => Word,
1300 _ => Unknown,
1301 },
1302 MaybeNonWord => match Self::concat_neighbor_kind(asts, next as usize, dir) {
1303 NonWord => NonWord,
1304 _ => Unknown,
1305 },
1306 other => other,
1307 }
1308 }
1309
1310 fn rewrite_word_boundary_in_concat(
1311 &mut self,
1312 asts: &[Ast],
1313 idx: usize,
1314 translator: &mut Option<Translator>,
1315 tb: &mut TB<'s>,
1316 ) -> Result<(NodeId, usize)> {
1317 use WordCharKind::*;
1318 let (word_id, not_word_id) = if self.global_full_unicode {
1319 self.unicode_classes.ensure_word_full(tb);
1320 (self.unicode_classes.word, self.unicode_classes.non_word)
1321 } else if self.global_unicode && !self.global_ascii_perl {
1322 self.unicode_classes.ensure_word(tb);
1323 (self.unicode_classes.word, self.unicode_classes.non_word)
1324 } else {
1325 let az = tb.mk_range_u8(b'a', b'z');
1326 let big = tb.mk_range_u8(b'A', b'Z');
1327 let dig = tb.mk_range_u8(b'0', b'9');
1328 let us = tb.mk_u8(b'_');
1329 let w = tb.mk_unions([az, big, dig, us].into_iter());
1330 (w, tb.mk_compl(w))
1331 };
1332 let left = self.resolve_word_kind(asts, idx, -1, translator, tb, word_id, not_word_id)?;
1333 let right = self.resolve_word_kind(asts, idx, 1, translator, tb, word_id, not_word_id)?;
1334 match (left, right) {
1335 (NonWord, Word) | (Word, NonWord) => Ok((NodeId::EPS, idx + 1)),
1336 (Word, _) => {
1337 let neg = tb.mk_neg_lookahead(word_id, 0);
1338 Ok((neg, idx + 1))
1339 }
1340 (NonWord, _) => {
1341 let tail = tb.mk_concat(word_id, NodeId::TS);
1342 self.merge_boundary_with_following_lookaheads(asts, idx, tail, translator, tb)
1343 }
1344 (_, Word) => Ok((tb.mk_neg_lookbehind(word_id), idx + 1)),
1345 (_, NonWord) => Ok((tb.mk_lookbehind(word_id, NodeId::MISSING), idx + 1)),
1346 _ => Err(self.error(self.span(), ast::ErrorKind::UnsupportedResharpRegex)),
1350 }
1351 }
1352
1353 fn merge_boundary_with_following_lookaheads(
1354 &mut self,
1355 asts: &[Ast],
1356 wb_idx: usize,
1357 boundary_tail: NodeId,
1358 translator: &mut Option<Translator>,
1359 tb: &mut TB<'s>,
1360 ) -> Result<(NodeId, usize)> {
1361 let mut next = wb_idx + 1;
1362 let mut la_bodies = vec![boundary_tail];
1363 while next < asts.len() {
1364 match &asts[next] {
1365 Ast::Lookaround(la) if la.kind == ast::LookaroundKind::PositiveLookahead => {
1366 let body = self.ast_to_node_id(&la.ast, translator, tb)?;
1367 la_bodies.push(tb.mk_concat(body, NodeId::TS));
1368 next += 1;
1369 }
1370 _ => break,
1371 }
1372 }
1373 let merged = tb.mk_inters(la_bodies.into_iter());
1374 Ok((tb.mk_lookahead(merged, NodeId::MISSING, 0), next))
1375 }
1376
1377 fn ast_to_node_id(
1378 &mut self,
1379 ast: &Ast,
1380 translator: &mut Option<Translator>,
1381 tb: &mut TB<'s>,
1382 ) -> Result<NodeId> {
1383 match ast {
1384 Ast::Empty(_) => Ok(NodeId::EPS),
1385 Ast::Flags(f) => {
1386 if f.flags.flag_state(ast::Flag::SwapGreed).is_some() {
1387 return Err(self.error(f.span, ast::ErrorKind::UnsupportedResharpRegex));
1388 }
1389 let mut translator_builder = self.default_translator_builder();
1390 if let Some(state) = f.flags.flag_state(ast::Flag::CaseInsensitive) {
1391 translator_builder.case_insensitive(state);
1392 }
1393 if let Some(state) = f.flags.flag_state(ast::Flag::Unicode) {
1394 translator_builder.unicode(state);
1395 }
1396 if let Some(state) = f.flags.flag_state(ast::Flag::DotMatchesNewLine) {
1397 self.dot_all.set(state);
1398 }
1399 if let Some(state) = f.flags.flag_state(ast::Flag::MultiLine) {
1400 self.multiline.set(state);
1401 }
1402 let concat_translator = Some(translator_builder.build());
1403 *translator = concat_translator;
1404 Ok(NodeId::EPS)
1405 }
1406 Ast::Literal(l) => {
1407 let ast_lit = regex_syntax::ast::Ast::literal(*l.to_owned());
1408 self.translator_to_node_id(&ast_lit, translator, tb)
1409 }
1410 Ast::Top(_) => Ok(NodeId::TOP),
1411 Ast::Dot(_) => {
1412 let codepoint_dot = self.global_ascii_perl || self.global_full_unicode;
1413 let hirv = match (codepoint_dot, self.dot_all.get()) {
1414 (true, true) => hir::Hir::dot(hir::Dot::AnyChar),
1415 (true, false) => hir::Hir::dot(hir::Dot::AnyCharExceptLF),
1416 (false, true) => return Ok(NodeId::TOP),
1417 (false, false) => hir::Hir::dot(hir::Dot::AnyByteExceptLF),
1418 };
1419 self.hir_to_node_id(&hirv, tb)
1420 }
1421 Ast::Assertion(a) => match &a.kind {
1422 ast::AssertionKind::StartText => Ok(NodeId::BEGIN),
1423 ast::AssertionKind::EndText => Ok(NodeId::END),
1424 ast::AssertionKind::WordBoundary => {
1425 Err(self.error(self.span(), ast::ErrorKind::UnsupportedResharpRegex))
1426 }
1427 ast::AssertionKind::NotWordBoundary => {
1428 Err(self.error(self.span(), ast::ErrorKind::UnsupportedResharpRegex))
1429 }
1430 ast::AssertionKind::StartLine => {
1431 if !self.multiline.get() {
1432 return Ok(NodeId::BEGIN);
1433 }
1434 let left = NodeId::BEGIN;
1435 let right = tb.mk_u8(b'\n');
1436 let union = tb.mk_union(left, right);
1437 Ok(tb.mk_lookbehind(union, NodeId::MISSING))
1438 }
1439 ast::AssertionKind::EndLine => {
1440 if !self.multiline.get() {
1441 return Ok(NodeId::END);
1442 }
1443 let left = NodeId::END;
1444 let right = tb.mk_u8(b'\n');
1445 let union = tb.mk_union(left, right);
1446 Ok(tb.mk_lookahead(union, NodeId::MISSING, 0))
1447 }
1448 ast::AssertionKind::WordBoundaryStart => {
1449 Err(self.error(a.span, ast::ErrorKind::UnsupportedResharpRegex))
1450 }
1451 ast::AssertionKind::WordBoundaryEnd => {
1452 Err(self.error(a.span, ast::ErrorKind::UnsupportedResharpRegex))
1453 }
1454 ast::AssertionKind::WordBoundaryStartAngle => {
1455 Err(self.error(a.span, ast::ErrorKind::UnsupportedResharpRegex))
1456 }
1457 ast::AssertionKind::WordBoundaryEndAngle => {
1458 Err(self.error(a.span, ast::ErrorKind::UnsupportedResharpRegex))
1459 }
1460 ast::AssertionKind::WordBoundaryStartHalf => {
1461 Err(self.error(a.span, ast::ErrorKind::UnsupportedResharpRegex))
1462 }
1463 ast::AssertionKind::WordBoundaryEndHalf => {
1464 Err(self.error(a.span, ast::ErrorKind::UnsupportedResharpRegex))
1465 }
1466 },
1467 Ast::ClassUnicode(c) => {
1468 let tmp = regex_syntax::ast::ClassUnicode {
1469 span: c.span,
1470 negated: c.negated,
1471 kind: c.kind.clone(),
1472 };
1473 if !c.negated {
1474 if let regex_syntax::ast::ClassUnicodeKind::Named(s) = &c.kind {
1475 match s.as_str() {
1476 "ascii" => return Ok(tb.mk_range_u8(0, 127)),
1478 "utf8" => {
1480 let ascii = tb.mk_range_u8(0, 127);
1481 let beta = tb.mk_range_u8(128, 0xBF);
1482 let c0 = tb.mk_range_u8(0xC0, 0xDF);
1483 let c0s = tb.mk_concats([c0, beta].into_iter());
1484 let e0 = tb.mk_range_u8(0xE0, 0xEF);
1485 let e0s = tb.mk_concats([e0, beta, beta].into_iter());
1486 let f0 = tb.mk_range_u8(0xF0, 0xF7);
1487 let f0s = tb.mk_concats([f0, beta, beta, beta].into_iter());
1488 return Ok(tb.mk_unions([ascii, c0s, e0s, f0s].into_iter()));
1489 }
1490 "hex" => {
1491 let nums = tb.mk_range_u8(b'0', b'9');
1492 let lets = tb.mk_range_u8(b'a', b'f');
1493 let lets2 = tb.mk_range_u8(b'A', b'F');
1494 let merged = tb.mk_unions([nums, lets, lets2].into_iter());
1495 return Ok(merged);
1496 }
1497 _ => {}
1498 }
1499 };
1500 }
1501
1502 let orig_ast = regex_syntax::ast::Ast::class_unicode(tmp);
1503 self.translator_to_node_id(&orig_ast, translator, tb)
1504 }
1505 Ast::ClassPerl(c) => self.get_class(c.negated, c.kind.clone(), tb),
1506 Ast::ClassBracketed(c) => match &c.kind {
1507 regex_syntax::ast::ClassSet::Item(item) => {
1508 if !c.negated && is_universal_perl_pair(item) {
1509 return Ok(NodeId::TOP);
1510 }
1511 let tmp = regex_syntax::ast::ClassBracketed {
1512 span: c.span,
1513 negated: c.negated,
1514 kind: c.kind.clone(),
1515 };
1516 let orig_ast = regex_syntax::ast::Ast::class_bracketed(tmp);
1517 self.translator_to_node_id(&orig_ast, translator, tb)
1518 }
1519 regex_syntax::ast::ClassSet::BinaryOp(_) => {
1520 Err(self.error(c.span, ast::ErrorKind::UnsupportedResharpRegex))
1521 }
1522 },
1523 Ast::Repetition(r) => {
1524 let body = self.ast_to_node_id(&r.ast, translator, tb);
1525 match body {
1526 Ok(body) => match &r.op.kind {
1527 ast::RepetitionKind::ZeroOrOne => Ok(tb.mk_opt(body)),
1528 ast::RepetitionKind::ZeroOrMore => Ok(tb.mk_star(body)),
1529 ast::RepetitionKind::OneOrMore => Ok(tb.mk_plus(body)),
1530 ast::RepetitionKind::Range(r) => match r {
1531 ast::RepetitionRange::Exactly(n) => Ok(tb.mk_repeat(body, *n, *n)),
1532 ast::RepetitionRange::AtLeast(n) => {
1533 let rep = tb.mk_repeat(body, *n, *n);
1534 let st = tb.mk_star(body);
1535 Ok(tb.mk_concat(rep, st))
1536 }
1537
1538 ast::RepetitionRange::Bounded(n, m) => Ok(tb.mk_repeat(body, *n, *m)),
1539 },
1540 },
1541 Err(_) => body,
1542 }
1543 }
1544 Ast::Lookaround(g) => {
1545 let body = self.ast_to_node_id(&g.ast, translator, tb)?;
1546 match g.kind {
1547 ast::LookaroundKind::PositiveLookahead
1548 | ast::LookaroundKind::NegativeLookahead
1549 if body.contains_lookbehind(tb) =>
1550 {
1551 Err(self.error(g.span, ast::ErrorKind::UnsupportedResharpRegex))
1552 }
1553 ast::LookaroundKind::PositiveLookahead => {
1554 Ok(tb.mk_lookahead(body, NodeId::MISSING, 0))
1555 }
1556 ast::LookaroundKind::PositiveLookbehind => {
1557 Ok(tb.mk_lookbehind(body, NodeId::MISSING))
1558 }
1559 ast::LookaroundKind::NegativeLookahead => Ok(tb.mk_neg_lookahead(body, 0)),
1560 ast::LookaroundKind::NegativeLookbehind => Ok(tb.mk_neg_lookbehind(body)),
1561 }
1562 }
1563 Ast::Group(g) => {
1564 if let ast::GroupKind::NonCapturing(ref flags) = g.kind {
1565 if !flags.items.is_empty() {
1566 let mut translator_builder = self.default_translator_builder();
1567 if let Some(state) = flags.flag_state(ast::Flag::CaseInsensitive) {
1568 translator_builder.case_insensitive(state);
1569 }
1570 if let Some(state) = flags.flag_state(ast::Flag::Unicode) {
1571 translator_builder.unicode(state);
1572 }
1573 let saved_dot_all = self.dot_all.get();
1574 if let Some(state) = flags.flag_state(ast::Flag::DotMatchesNewLine) {
1575 self.dot_all.set(state);
1576 }
1577 let saved_multiline = self.multiline.get();
1578 if let Some(state) = flags.flag_state(ast::Flag::MultiLine) {
1579 self.multiline.set(state);
1580 }
1581 let mut scoped = Some(translator_builder.build());
1582 let result = self.ast_to_node_id(&g.ast, &mut scoped, tb);
1583 self.dot_all.set(saved_dot_all);
1584 self.multiline.set(saved_multiline);
1585 return result;
1586 }
1587 }
1588 self.ast_to_node_id(&g.ast, translator, tb)
1589 }
1590 Ast::Alternation(a) => {
1591 let mut children = vec![];
1592 for ast in &a.asts {
1593 match self.ast_to_node_id(ast, translator, tb) {
1594 Ok(node_id) => children.push(node_id),
1595 Err(err) => return Err(err),
1596 }
1597 }
1598 Ok(tb.mk_unions(children.iter().copied()))
1599 }
1600 Ast::Concat(c) => {
1601 let mut concat_translator: Option<Translator> = None;
1602 let mut children = vec![];
1603 let mut i = 0;
1604 while i < c.asts.len() {
1605 let ast = &c.asts[i];
1606 match ast {
1607 Ast::Flags(f) => {
1608 if f.flags.flag_state(ast::Flag::SwapGreed).is_some() {
1609 return Err(
1610 self.error(f.span, ast::ErrorKind::UnsupportedResharpRegex)
1611 );
1612 }
1613 let mut translator_builder = self.default_translator_builder();
1614 if let Some(state) = f.flags.flag_state(ast::Flag::CaseInsensitive) {
1615 translator_builder.case_insensitive(state);
1616 }
1617 if let Some(state) = f.flags.flag_state(ast::Flag::Unicode) {
1618 translator_builder.unicode(state);
1619 }
1620 if let Some(state) = f.flags.flag_state(ast::Flag::DotMatchesNewLine) {
1621 self.dot_all.set(state);
1622 }
1623 if let Some(state) = f.flags.flag_state(ast::Flag::MultiLine) {
1624 self.multiline.set(state);
1625 }
1626 concat_translator = Some(translator_builder.build());
1627 *translator = concat_translator.clone();
1628 i += 1;
1629 continue;
1630 }
1631 Ast::Assertion(a) if a.kind == ast::AssertionKind::WordBoundary => {
1632 let node =
1633 self.rewrite_word_boundary_in_concat(&c.asts, i, translator, tb)?;
1634 children.push(node.0);
1635 i = node.1; continue;
1637 }
1638 _ => {}
1639 }
1640 match concat_translator {
1641 Some(_) => match self.ast_to_node_id(ast, &mut concat_translator, tb) {
1642 Ok(node_id) => children.push(node_id),
1643 Err(err) => return Err(err),
1644 },
1645 None => match self.ast_to_node_id(ast, translator, tb) {
1646 Ok(node_id) => children.push(node_id),
1647 Err(err) => return Err(err),
1648 },
1649 }
1650 i += 1;
1651 }
1652 Ok(tb.mk_concats(children.iter().cloned()))
1653 }
1654 Ast::Intersection(intersection) => {
1655 let mut children = vec![];
1656 for ast in &intersection.asts {
1657 match self.ast_to_node_id(ast, translator, tb) {
1658 Ok(node_id) => children.push(node_id),
1659 Err(err) => return Err(err),
1660 }
1661 }
1662 Ok(tb.mk_inters(children.into_iter()))
1663 }
1664 Ast::Complement(complement) => {
1665 let body = self.ast_to_node_id(&complement.ast, translator, tb);
1666 body.map(|x| tb.mk_compl(x))
1667 }
1668 }
1669 }
1670
1671 fn parse_inner(&mut self) -> Result<Ast> {
1672 let mut concat = Concat {
1673 span: self.span(),
1674 asts: vec![],
1675 };
1676 loop {
1677 self.bump_space();
1678 if self.is_eof() {
1679 break;
1680 }
1681 match self.char() {
1682 '(' => concat = self.push_group(concat)?,
1683 ')' => concat = self.pop_group(concat)?,
1684 '|' => concat = self.push_alternate(concat)?,
1685 '&' => concat = self.push_intersect(concat)?,
1686 '~' => concat = self.push_compl_group(concat)?,
1687 '[' => {
1688 let class = self.parse_set_class()?;
1689 concat.asts.push(Ast::class_bracketed(class));
1690 }
1691 '?' => {
1692 concat =
1693 self.parse_uncounted_repetition(concat, ast::RepetitionKind::ZeroOrOne)?;
1694 }
1695 '*' => {
1696 concat =
1697 self.parse_uncounted_repetition(concat, ast::RepetitionKind::ZeroOrMore)?;
1698 }
1699 '+' => {
1700 concat =
1701 self.parse_uncounted_repetition(concat, ast::RepetitionKind::OneOrMore)?;
1702 }
1703 '{' => {
1704 concat = self.parse_counted_repetition(concat)?;
1705 }
1706 _ => concat.asts.push(self.parse_primitive()?.into_ast()),
1707 }
1708 }
1709 let ast = self.pop_group_end(concat)?;
1710 if expanded_ast_size(&ast, self.expanded_ast_limit) >= self.expanded_ast_limit
1711 || max_concat_length(&ast) >= self.max_list_len
1712 {
1713 return Err(self.error(*ast.span(), ast::ErrorKind::UnsupportedResharpRegex));
1714 }
1715 Ok(ast)
1716 }
1717
1718 fn parse(&mut self, tb: &mut TB<'s>) -> Result<NodeId> {
1719 let ast = self.parse_inner()?;
1720 self.ast_to_node_id(&ast, &mut None, tb)
1721 }
1722
1723 #[inline(never)]
1724 fn parse_uncounted_repetition(
1725 &self,
1726 mut concat: ast::Concat,
1727 kind: ast::RepetitionKind,
1728 ) -> Result<ast::Concat> {
1729 let op_start = self.pos();
1731 let ast = match concat.asts.pop() {
1732 Some(ast) => ast,
1733 None => return Err(self.error(self.span(), ast::ErrorKind::RepetitionMissing)),
1734 };
1735 match ast {
1736 Ast::Empty(_) | Ast::Flags(_) => {
1737 return Err(self.error(self.span(), ast::ErrorKind::RepetitionMissing))
1738 }
1739 _ => {}
1740 }
1741 if self.bump() && self.char() == '?' {
1742 return Err(self.error(
1743 Span::new(op_start, self.pos()),
1744 ast::ErrorKind::UnsupportedLazyQuantifier,
1745 ));
1746 }
1747 concat.asts.push(Ast::repetition(ast::Repetition {
1748 span: ast.span().with_end(self.pos()),
1749 op: ast::RepetitionOp {
1750 span: Span::new(op_start, self.pos()),
1751 kind,
1752 },
1753 greedy: true,
1754 ast: Box::new(ast),
1755 }));
1756 Ok(concat)
1757 }
1758
1759 #[inline(never)]
1760 fn parse_counted_repetition(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
1761 assert!(self.char() == '{');
1762 let start = self.pos();
1763 let ast = match concat.asts.pop() {
1764 Some(ast) => ast,
1765 None => return Err(self.error(self.span(), ast::ErrorKind::RepetitionMissing)),
1766 };
1767 match ast {
1768 Ast::Empty(_) | Ast::Flags(_) => {
1769 return Err(self.error(self.span(), ast::ErrorKind::RepetitionMissing))
1770 }
1771 _ => {}
1772 }
1773 if !self.bump_and_bump_space() {
1774 return Err(self.error(
1775 Span::new(start, self.pos()),
1776 ast::ErrorKind::RepetitionCountUnclosed,
1777 ));
1778 }
1779 let count_start = specialize_err(
1780 self.parse_decimal(),
1781 ast::ErrorKind::DecimalEmpty,
1782 ast::ErrorKind::RepetitionCountDecimalEmpty,
1783 );
1784 if self.is_eof() {
1785 return Err(self.error(
1786 Span::new(start, self.pos()),
1787 ast::ErrorKind::RepetitionCountUnclosed,
1788 ));
1789 }
1790 let range = if self.char() == ',' {
1791 if !self.bump_and_bump_space() {
1792 return Err(self.error(
1793 Span::new(start, self.pos()),
1794 ast::ErrorKind::RepetitionCountUnclosed,
1795 ));
1796 }
1797 if self.char() != '}' {
1798 let count_start = match count_start {
1799 Ok(c) => c,
1800 Err(err) if err.kind == ast::ErrorKind::RepetitionCountDecimalEmpty => {
1801 if self.parser().empty_min_range {
1802 0
1803 } else {
1804 return Err(err);
1805 }
1806 }
1807 err => err?,
1808 };
1809 let count_end = specialize_err(
1810 self.parse_decimal(),
1811 ast::ErrorKind::DecimalEmpty,
1812 ast::ErrorKind::RepetitionCountDecimalEmpty,
1813 )?;
1814 ast::RepetitionRange::Bounded(count_start, count_end)
1815 } else {
1816 ast::RepetitionRange::AtLeast(count_start?)
1817 }
1818 } else {
1819 ast::RepetitionRange::Exactly(count_start?)
1820 };
1821
1822 if self.is_eof() || self.char() != '}' {
1823 return Err(self.error(
1824 Span::new(start, self.pos()),
1825 ast::ErrorKind::RepetitionCountUnclosed,
1826 ));
1827 }
1828
1829 if self.bump_and_bump_space() && self.char() == '?' {
1830 return Err(self.error(
1831 Span::new(start, self.pos()),
1832 ast::ErrorKind::UnsupportedLazyQuantifier,
1833 ));
1834 }
1835
1836 let op_span = Span::new(start, self.pos());
1837 if !range.is_valid() {
1838 return Err(self.error(op_span, ast::ErrorKind::RepetitionCountInvalid));
1839 }
1840
1841 let over_limit = match &range {
1842 ast::RepetitionRange::Exactly(n) => *n > self.max_repeat,
1843 ast::RepetitionRange::AtLeast(n) => *n > self.max_repeat,
1844 ast::RepetitionRange::Bounded(n, m) => {
1845 *n > self.max_repeat || *m > self.max_repeat
1846 }
1847 };
1848 if over_limit {
1849 return Err(self.error(op_span, ast::ErrorKind::UnsupportedResharpRegex));
1850 }
1851 concat.asts.push(Ast::repetition(ast::Repetition {
1852 span: ast.span().with_end(self.pos()),
1853 op: ast::RepetitionOp {
1854 span: op_span,
1855 kind: ast::RepetitionKind::Range(range),
1856 },
1857 greedy: true,
1858 ast: Box::new(ast),
1859 }));
1860 Ok(concat)
1861 }
1862
1863 #[inline(never)]
1864 fn parse_group(&self) -> Result<Either<ast::SetFlags, ast::Group>> {
1865 assert_eq!(self.char(), '(');
1866 let open_span = self.span_char();
1867 self.bump();
1868 self.bump_space();
1869 if let Some((ahead, pos)) = self.is_lookaround_prefix() {
1870 let kind = match (pos, ahead) {
1871 (true, true) => LookaroundKind::PositiveLookahead,
1872 (true, false) => LookaroundKind::PositiveLookbehind,
1873 (false, true) => LookaroundKind::NegativeLookahead,
1874 (false, false) => LookaroundKind::NegativeLookbehind,
1875 };
1876 return Ok(Either::Right(ast::Group {
1877 span: open_span,
1878 kind: ast::GroupKind::Lookaround(kind),
1879 ast: Box::new(Ast::empty(self.span())),
1880 }));
1881 }
1882 let inner_span = self.span();
1883 let mut starts_with_p = true;
1884 if self.bump_if("?P<") || {
1885 starts_with_p = false;
1886 self.bump_if("?<")
1887 } {
1888 let capture_index = self.next_capture_index(open_span)?;
1889 let name = self.parse_capture_name(capture_index)?;
1890 Ok(Either::Right(ast::Group {
1891 span: open_span,
1892 kind: ast::GroupKind::CaptureName {
1893 starts_with_p,
1894 name,
1895 },
1896 ast: Box::new(Ast::empty(self.span())),
1897 }))
1898 } else if self.bump_if("?") {
1899 if self.is_eof() {
1900 return Err(self.error(open_span, ast::ErrorKind::GroupUnclosed));
1901 }
1902 let flags = self.parse_flags()?;
1903 let char_end = self.char();
1904 self.bump();
1905 if char_end == ')' {
1906 if flags.items.is_empty() {
1909 return Err(self.error(inner_span, ast::ErrorKind::RepetitionMissing));
1910 }
1911 Ok(Either::Left(ast::SetFlags {
1912 span: Span {
1913 end: self.pos(),
1914 ..open_span
1915 },
1916 flags,
1917 }))
1918 } else {
1919 assert_eq!(char_end, ':');
1920 Ok(Either::Right(ast::Group {
1921 span: open_span,
1922 kind: ast::GroupKind::NonCapturing(flags),
1923 ast: Box::new(Ast::empty(self.span())),
1924 }))
1925 }
1926 } else {
1927 let capture_index = self.next_capture_index(open_span)?;
1928 Ok(Either::Right(ast::Group {
1929 span: open_span,
1930 kind: ast::GroupKind::CaptureIndex(capture_index),
1931 ast: Box::new(Ast::empty(self.span())),
1932 }))
1933 }
1934 }
1935
1936 #[inline(never)]
1937 fn parse_capture_name(&self, capture_index: u32) -> Result<ast::CaptureName> {
1938 if self.is_eof() {
1939 return Err(self.error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
1940 }
1941 let start = self.pos();
1942 loop {
1943 if self.char() == '>' {
1944 break;
1945 }
1946 if !is_capture_char(self.char(), self.pos() == start) {
1947 return Err(self.error(self.span_char(), ast::ErrorKind::GroupNameInvalid));
1948 }
1949 if !self.bump() {
1950 break;
1951 }
1952 }
1953 let end = self.pos();
1954 if self.is_eof() {
1955 return Err(self.error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
1956 }
1957 assert_eq!(self.char(), '>');
1958 self.bump();
1959 let name = &self.pattern()[start.offset..end.offset];
1960 if name.is_empty() {
1961 return Err(self.error(Span::new(start, start), ast::ErrorKind::GroupNameEmpty));
1962 }
1963 let capname = ast::CaptureName {
1964 span: Span::new(start, end),
1965 name: name.to_string(),
1966 index: capture_index,
1967 };
1968 self.add_capture_name(&capname)?;
1969 Ok(capname)
1970 }
1971
1972 #[inline(never)]
1973 fn parse_flags(&self) -> Result<ast::Flags> {
1974 let mut flags = ast::Flags {
1975 span: self.span(),
1976 items: vec![],
1977 };
1978 let mut last_was_negation = None;
1979 while self.char() != ':' && self.char() != ')' {
1980 if self.char() == '-' {
1981 last_was_negation = Some(self.span_char());
1982 let item = ast::FlagsItem {
1983 span: self.span_char(),
1984 kind: ast::FlagsItemKind::Negation,
1985 };
1986 if let Some(i) = flags.add_item(item) {
1987 return Err(self.error(
1988 self.span_char(),
1989 ast::ErrorKind::FlagRepeatedNegation {
1990 original: flags.items[i].span,
1991 },
1992 ));
1993 }
1994 } else {
1995 last_was_negation = None;
1996 let item = ast::FlagsItem {
1997 span: self.span_char(),
1998 kind: ast::FlagsItemKind::Flag(self.parse_flag()?),
1999 };
2000 if let Some(i) = flags.add_item(item) {
2001 return Err(self.error(
2002 self.span_char(),
2003 ast::ErrorKind::FlagDuplicate {
2004 original: flags.items[i].span,
2005 },
2006 ));
2007 }
2008 }
2009 if !self.bump() {
2010 return Err(self.error(self.span(), ast::ErrorKind::FlagUnexpectedEof));
2011 }
2012 }
2013 if let Some(span) = last_was_negation {
2014 return Err(self.error(span, ast::ErrorKind::FlagDanglingNegation));
2015 }
2016 flags.span.end = self.pos();
2017 Ok(flags)
2018 }
2019
2020 #[inline(never)]
2021 fn parse_flag(&self) -> Result<ast::Flag> {
2022 match self.char() {
2023 'i' => Ok(ast::Flag::CaseInsensitive),
2024 'm' => Ok(ast::Flag::MultiLine),
2025 's' => Ok(ast::Flag::DotMatchesNewLine),
2026 'U' => Ok(ast::Flag::SwapGreed),
2027 'u' => Ok(ast::Flag::Unicode),
2028 'R' => Ok(ast::Flag::CRLF),
2029 'x' => Ok(ast::Flag::IgnoreWhitespace),
2030 _ => Err(self.error(self.span_char(), ast::ErrorKind::FlagUnrecognized)),
2031 }
2032 }
2033
2034 fn parse_primitive(&self) -> Result<Primitive> {
2035 match self.char() {
2036 '\\' => self.parse_escape(),
2037 '_' => {
2038 let ast = Primitive::Top(self.span_char());
2039 self.bump();
2040 Ok(ast)
2041 }
2042 '.' => {
2043 let ast = Primitive::Dot(self.span_char());
2044 self.bump();
2045 Ok(ast)
2046 }
2047 '^' => {
2048 let ast = Primitive::Assertion(ast::Assertion {
2049 span: self.span_char(),
2050 kind: ast::AssertionKind::StartLine,
2051 });
2052 self.bump();
2053 Ok(ast)
2054 }
2055 '$' => {
2056 let ast = Primitive::Assertion(ast::Assertion {
2057 span: self.span_char(),
2058 kind: ast::AssertionKind::EndLine,
2059 });
2060 self.bump();
2061 Ok(ast)
2062 }
2063 c => {
2064 let ast = Primitive::Literal(Literal {
2065 span: self.span_char(),
2066 kind: LiteralKind::Verbatim,
2067 c,
2068 });
2069 self.bump();
2070 Ok(ast)
2071 }
2072 }
2073 }
2074
2075 #[inline(never)]
2076 fn parse_escape(&self) -> Result<Primitive> {
2077 assert_eq!(self.char(), '\\');
2078 let start = self.pos();
2079 if !self.bump() {
2080 return Err(self.error(
2081 Span::new(start, self.pos()),
2082 ast::ErrorKind::EscapeUnexpectedEof,
2083 ));
2084 }
2085 let c = self.char();
2086 match c {
2088 '0'..='9' => {
2089 if !self.parser().octal {
2090 return Err(self.error(
2091 Span::new(start, self.span_char().end),
2092 ast::ErrorKind::UnsupportedBackreference,
2093 ));
2094 }
2095 let mut lit = self.parse_octal();
2096 lit.span.start = start;
2097 return Ok(Primitive::Literal(lit));
2098 }
2099 'x' | 'u' | 'U' => {
2106 let mut lit = self.parse_hex()?;
2107 lit.span.start = start;
2108 return Ok(Primitive::Literal(lit));
2109 }
2110 'p' | 'P' => {
2111 let mut cls = self.parse_unicode_class()?;
2112 cls.span.start = start;
2113 return Ok(Primitive::Unicode(cls));
2114 }
2115 'd' | 's' | 'w' | 'D' | 'S' | 'W' => {
2116 let mut cls = self.parse_perl_class();
2117 cls.span.start = start;
2118 return Ok(Primitive::Perl(cls));
2119 }
2120 _ => {}
2121 }
2122
2123 self.bump();
2125 let span = Span::new(start, self.pos());
2126 if is_meta_character(c) {
2127 return Ok(Primitive::Literal(Literal {
2128 span,
2129 kind: LiteralKind::Meta,
2130 c,
2131 }));
2132 }
2133 if is_escapeable_character(c) {
2134 return Ok(Primitive::Literal(Literal {
2135 span,
2136 kind: LiteralKind::Superfluous,
2137 c,
2138 }));
2139 }
2140 let special = |kind, c| {
2141 Ok(Primitive::Literal(Literal {
2142 span,
2143 kind: LiteralKind::Special(kind),
2144 c,
2145 }))
2146 };
2147 match c {
2148 'a' => special(SpecialLiteralKind::Bell, '\x07'),
2149 'f' => special(SpecialLiteralKind::FormFeed, '\x0C'),
2150 't' => special(SpecialLiteralKind::Tab, '\t'),
2151 'n' => special(SpecialLiteralKind::LineFeed, '\n'),
2152 'r' => special(SpecialLiteralKind::CarriageReturn, '\r'),
2153 'v' => special(SpecialLiteralKind::VerticalTab, '\x0B'),
2154 'A' => Ok(Primitive::Assertion(ast::Assertion {
2155 span,
2156 kind: ast::AssertionKind::StartText,
2157 })),
2158 'z' => Ok(Primitive::Assertion(ast::Assertion {
2159 span,
2160 kind: ast::AssertionKind::EndText,
2161 })),
2162 'b' => {
2163 let mut wb = ast::Assertion {
2164 span,
2165 kind: ast::AssertionKind::WordBoundary,
2166 };
2167 if !self.is_eof() && self.char() == '{' {
2170 if let Some(kind) = self.maybe_parse_special_word_boundary(start)? {
2171 wb.kind = kind;
2172 wb.span.end = self.pos();
2173 }
2174 }
2175 Ok(Primitive::Assertion(wb))
2176 }
2177 'B' => Ok(Primitive::Assertion(ast::Assertion {
2178 span,
2179 kind: ast::AssertionKind::NotWordBoundary,
2180 })),
2181 '<' => Ok(Primitive::Assertion(ast::Assertion {
2182 span,
2183 kind: ast::AssertionKind::WordBoundaryStartAngle,
2184 })),
2185 '>' => Ok(Primitive::Assertion(ast::Assertion {
2186 span,
2187 kind: ast::AssertionKind::WordBoundaryEndAngle,
2188 })),
2189 _ => Err(self.error(span, ast::ErrorKind::EscapeUnrecognized)),
2190 }
2191 }
2192
2193 fn maybe_parse_special_word_boundary(
2194 &self,
2195 wb_start: Position,
2196 ) -> Result<Option<ast::AssertionKind>> {
2197 assert_eq!(self.char(), '{');
2198
2199 let is_valid_char = |c| matches!(c, 'A'..='Z' | 'a'..='z' | '-');
2200 let start = self.pos();
2201 if !self.bump_and_bump_space() {
2202 return Err(self.error(
2203 Span::new(wb_start, self.pos()),
2204 ast::ErrorKind::SpecialWordOrRepetitionUnexpectedEof,
2205 ));
2206 }
2207 let start_contents = self.pos();
2208 if !is_valid_char(self.char()) {
2209 self.parser().pos.set(start);
2210 return Ok(None);
2211 }
2212
2213 let mut scratch = self.parser().scratch.borrow_mut();
2215 scratch.clear();
2216 while !self.is_eof() && is_valid_char(self.char()) {
2217 scratch.push(self.char());
2218 self.bump_and_bump_space();
2219 }
2220 if self.is_eof() || self.char() != '}' {
2221 return Err(self.error(
2222 Span::new(start, self.pos()),
2223 ast::ErrorKind::SpecialWordBoundaryUnclosed,
2224 ));
2225 }
2226 let end = self.pos();
2227 self.bump();
2228 let kind = match scratch.as_str() {
2229 "start" => ast::AssertionKind::WordBoundaryStart,
2230 "end" => ast::AssertionKind::WordBoundaryEnd,
2231 "start-half" => ast::AssertionKind::WordBoundaryStartHalf,
2232 "end-half" => ast::AssertionKind::WordBoundaryEndHalf,
2233 _ => {
2234 return Err(self.error(
2235 Span::new(start_contents, end),
2236 ast::ErrorKind::SpecialWordBoundaryUnrecognized,
2237 ))
2238 }
2239 };
2240 Ok(Some(kind))
2241 }
2242
2243 #[inline(never)]
2244 fn parse_octal(&self) -> Literal {
2245 assert!(self.parser().octal);
2246 assert!('0' <= self.char() && self.char() <= '7');
2247 let start = self.pos();
2248 while self.bump()
2250 && '0' <= self.char()
2251 && self.char() <= '7'
2252 && self.pos().offset - start.offset <= 2
2253 {}
2254 let end = self.pos();
2255 let octal = &self.pattern()[start.offset..end.offset];
2256 let codepoint = u32::from_str_radix(octal, 8).expect("valid octal number");
2259 let c = char::from_u32(codepoint).expect("Unicode scalar value");
2262 Literal {
2263 span: Span::new(start, end),
2264 kind: LiteralKind::Octal,
2265 c,
2266 }
2267 }
2268
2269 #[inline(never)]
2270 fn parse_hex(&self) -> Result<Literal> {
2271 assert!(self.char() == 'x' || self.char() == 'u' || self.char() == 'U');
2272
2273 let hex_kind = match self.char() {
2274 'x' => HexLiteralKind::X,
2275 'u' => HexLiteralKind::UnicodeShort,
2276 _ => HexLiteralKind::UnicodeLong,
2277 };
2278 if !self.bump_and_bump_space() {
2279 return Err(self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
2280 }
2281 if self.char() == '{' {
2282 self.parse_hex_brace(hex_kind)
2283 } else {
2284 self.parse_hex_digits(hex_kind)
2285 }
2286 }
2287
2288 #[inline(never)]
2289 fn parse_hex_digits(&self, kind: HexLiteralKind) -> Result<Literal> {
2290 let mut scratch = self.parser().scratch.borrow_mut();
2291 scratch.clear();
2292
2293 let start = self.pos();
2294 for i in 0..kind.digits() {
2295 if i > 0 && !self.bump_and_bump_space() {
2296 return Err(self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
2297 }
2298 if !is_hex(self.char()) {
2299 return Err(self.error(self.span_char(), ast::ErrorKind::EscapeHexInvalidDigit));
2300 }
2301 scratch.push(self.char());
2302 }
2303 self.bump_and_bump_space();
2304 let end = self.pos();
2305 let hex = scratch.as_str();
2306 match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
2307 None => Err(self.error(Span::new(start, end), ast::ErrorKind::EscapeHexInvalid)),
2308 Some(c) => Ok(Literal {
2309 span: Span::new(start, end),
2310 kind: LiteralKind::HexFixed(kind),
2311 c,
2312 }),
2313 }
2314 }
2315
2316 #[inline(never)]
2317 fn parse_hex_brace(&self, kind: HexLiteralKind) -> Result<Literal> {
2318 let mut scratch = self.parser().scratch.borrow_mut();
2319 scratch.clear();
2320
2321 let brace_pos = self.pos();
2322 let start = self.span_char().end;
2323 while self.bump_and_bump_space() && self.char() != '}' {
2324 if !is_hex(self.char()) {
2325 return Err(self.error(self.span_char(), ast::ErrorKind::EscapeHexInvalidDigit));
2326 }
2327 scratch.push(self.char());
2328 }
2329 if self.is_eof() {
2330 return Err(self.error(
2331 Span::new(brace_pos, self.pos()),
2332 ast::ErrorKind::EscapeUnexpectedEof,
2333 ));
2334 }
2335 let end = self.pos();
2336 let hex = scratch.as_str();
2337 assert_eq!(self.char(), '}');
2338 self.bump_and_bump_space();
2339
2340 if hex.is_empty() {
2341 return Err(self.error(
2342 Span::new(brace_pos, self.pos()),
2343 ast::ErrorKind::EscapeHexEmpty,
2344 ));
2345 }
2346 match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
2347 None => Err(self.error(Span::new(start, end), ast::ErrorKind::EscapeHexInvalid)),
2348 Some(c) => Ok(Literal {
2349 span: Span::new(start, self.pos()),
2350 kind: LiteralKind::HexBrace(kind),
2351 c,
2352 }),
2353 }
2354 }
2355
2356 fn parse_decimal(&self) -> Result<u32> {
2357 let mut scratch = self.parser().scratch.borrow_mut();
2358 scratch.clear();
2359
2360 while !self.is_eof() && self.char().is_whitespace() {
2361 self.bump();
2362 }
2363 let start = self.pos();
2364 while !self.is_eof() && '0' <= self.char() && self.char() <= '9' {
2365 scratch.push(self.char());
2366 self.bump_and_bump_space();
2367 }
2368 let span = Span::new(start, self.pos());
2369 while !self.is_eof() && self.char().is_whitespace() {
2370 self.bump_and_bump_space();
2371 }
2372 let digits = scratch.as_str();
2373 if digits.is_empty() {
2374 return Err(self.error(span, ast::ErrorKind::DecimalEmpty));
2375 }
2376 match digits.parse::<u32>().ok() {
2377 Some(n) => Ok(n),
2378 None => Err(self.error(span, ast::ErrorKind::DecimalInvalid)),
2379 }
2380 }
2381
2382 #[inline(never)]
2383 fn parse_set_class(&self) -> Result<ClassBracketed> {
2384 assert_eq!(self.char(), '[');
2385
2386 let mut union = ClassSetUnion {
2387 span: self.span(),
2388 items: vec![],
2389 };
2390 loop {
2391 self.bump_space();
2392 if self.is_eof() {
2393 return Err(self.unclosed_class_error());
2394 }
2395 match self.char() {
2396 '[' => {
2397 if !self.parser().stack_class.borrow().is_empty() {
2398 if let Some(cls) = self.maybe_parse_ascii_class() {
2399 union.push(ClassSetItem::Ascii(cls));
2400 continue;
2401 }
2402 }
2403 union = self.push_class_open(union)?;
2404 }
2405 ']' => match self.pop_class(union)? {
2406 Either::Left(nested_union) => {
2407 union = nested_union;
2408 }
2409 Either::Right(class) => return Ok(class),
2410 },
2411 '&' if self.peek() == Some('&') => {
2412 assert!(self.bump_if("&&"));
2413 union = self.push_class_op(ClassSetBinaryOpKind::Intersection, union);
2414 }
2415 '-' if self.peek() == Some('-') => {
2416 assert!(self.bump_if("--"));
2417 union = self.push_class_op(ClassSetBinaryOpKind::Difference, union);
2418 }
2419 '~' if self.peek() == Some('~') => {
2420 assert!(self.bump_if("~~"));
2421 union = self.push_class_op(ClassSetBinaryOpKind::SymmetricDifference, union);
2422 }
2423 _ => {
2424 union.push(self.parse_set_class_range()?);
2425 }
2426 }
2427 }
2428 }
2429
2430 #[inline(never)]
2431 fn parse_set_class_range(&self) -> Result<ClassSetItem> {
2432 let prim1 = self.parse_set_class_item()?;
2433 self.bump_space();
2434 if self.is_eof() {
2435 return Err(self.unclosed_class_error());
2436 }
2437 if self.char() != '-' || self.peek_space() == Some(']') || self.peek_space() == Some('-') {
2438 return prim1.into_class_set_item(self);
2439 }
2440 if !self.bump_and_bump_space() {
2441 return Err(self.unclosed_class_error());
2442 }
2443 let prim2 = self.parse_set_class_item()?;
2444 let range = ClassSetRange {
2445 span: Span::new(prim1.span().start, prim2.span().end),
2446 start: prim1.into_class_literal(self)?,
2447 end: prim2.into_class_literal(self)?,
2448 };
2449 if !range.is_valid() {
2450 return Err(self.error(range.span, ast::ErrorKind::ClassRangeInvalid));
2451 }
2452 Ok(ClassSetItem::Range(range))
2453 }
2454
2455 #[inline(never)]
2456 fn parse_set_class_item(&self) -> Result<Primitive> {
2457 if self.char() == '\\' {
2458 self.parse_escape()
2459 } else {
2460 let x = Primitive::Literal(Literal {
2461 span: self.span_char(),
2462 kind: LiteralKind::Verbatim,
2463 c: self.char(),
2464 });
2465 self.bump();
2466 Ok(x)
2467 }
2468 }
2469
2470 #[inline(never)]
2471 fn parse_set_class_open(&self) -> Result<(ClassBracketed, ClassSetUnion)> {
2472 assert_eq!(self.char(), '[');
2473 let start = self.pos();
2474 if !self.bump_and_bump_space() {
2475 return Err(self.error(Span::new(start, self.pos()), ast::ErrorKind::ClassUnclosed));
2476 }
2477
2478 let negated = if self.char() != '^' {
2479 false
2480 } else {
2481 if !self.bump_and_bump_space() {
2482 return Err(self.error(Span::new(start, self.pos()), ast::ErrorKind::ClassUnclosed));
2483 }
2484 true
2485 };
2486 let mut union = ClassSetUnion {
2488 span: self.span(),
2489 items: vec![],
2490 };
2491 while self.char() == '-' {
2492 union.push(ClassSetItem::Literal(Literal {
2493 span: self.span_char(),
2494 kind: LiteralKind::Verbatim,
2495 c: '-',
2496 }));
2497 if !self.bump_and_bump_space() {
2498 return Err(self.error(Span::new(start, start), ast::ErrorKind::ClassUnclosed));
2499 }
2500 }
2501 if union.items.is_empty() && self.char() == ']' {
2504 union.push(ClassSetItem::Literal(Literal {
2505 span: self.span_char(),
2506 kind: LiteralKind::Verbatim,
2507 c: ']',
2508 }));
2509 if !self.bump_and_bump_space() {
2510 return Err(self.error(Span::new(start, self.pos()), ast::ErrorKind::ClassUnclosed));
2511 }
2512 }
2513 let set = ClassBracketed {
2514 span: Span::new(start, self.pos()),
2515 negated,
2516 kind: ClassSet::union(ClassSetUnion {
2517 span: Span::new(union.span.start, union.span.start),
2518 items: vec![],
2519 }),
2520 };
2521 Ok((set, union))
2522 }
2523
2524 #[inline(never)]
2525 fn maybe_parse_ascii_class(&self) -> Option<ClassAscii> {
2526 assert_eq!(self.char(), '[');
2527 let start = self.pos();
2529 let mut negated = false;
2530 if !self.bump() || self.char() != ':' {
2531 self.parser().pos.set(start);
2532 return None;
2533 }
2534 if !self.bump() {
2535 self.parser().pos.set(start);
2536 return None;
2537 }
2538 if self.char() == '^' {
2539 negated = true;
2540 if !self.bump() {
2541 self.parser().pos.set(start);
2542 return None;
2543 }
2544 }
2545 let name_start = self.offset();
2546 while self.char() != ':' && self.bump() {}
2547 if self.is_eof() {
2548 self.parser().pos.set(start);
2549 return None;
2550 }
2551 let name = &self.pattern()[name_start..self.offset()];
2552 if !self.bump_if(":]") {
2553 self.parser().pos.set(start);
2554 return None;
2555 }
2556 let kind = match regex_syntax::ast::ClassAsciiKind::from_name(name) {
2557 Some(kind) => kind,
2558 None => {
2559 self.parser().pos.set(start);
2560 return None;
2561 }
2562 };
2563 Some(ClassAscii {
2564 span: Span::new(start, self.pos()),
2565 kind,
2566 negated,
2567 })
2568 }
2569
2570 #[inline(never)]
2571 fn parse_unicode_class(&self) -> Result<ClassUnicode> {
2572 assert!(self.char() == 'p' || self.char() == 'P');
2573
2574 let mut scratch = self.parser().scratch.borrow_mut();
2575 scratch.clear();
2576
2577 let negated = self.char() == 'P';
2578 if !self.bump_and_bump_space() {
2579 return Err(self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
2580 }
2581 let (start, kind) = if self.char() == '{' {
2582 let start = self.span_char().end;
2583 while self.bump_and_bump_space() && self.char() != '}' {
2584 scratch.push(self.char());
2585 }
2586 if self.is_eof() {
2587 return Err(self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
2588 }
2589 assert_eq!(self.char(), '}');
2590 self.bump();
2591
2592 let name = scratch.as_str();
2593 if let Some(i) = name.find("!=") {
2594 (
2595 start,
2596 ClassUnicodeKind::NamedValue {
2597 op: ClassUnicodeOpKind::NotEqual,
2598 name: name[..i].to_string(),
2599 value: name[i + 2..].to_string(),
2600 },
2601 )
2602 } else if let Some(i) = name.find(':') {
2603 (
2604 start,
2605 ClassUnicodeKind::NamedValue {
2606 op: ClassUnicodeOpKind::Colon,
2607 name: name[..i].to_string(),
2608 value: name[i + 1..].to_string(),
2609 },
2610 )
2611 } else if let Some(i) = name.find('=') {
2612 (
2613 start,
2614 ClassUnicodeKind::NamedValue {
2615 op: ClassUnicodeOpKind::Equal,
2616 name: name[..i].to_string(),
2617 value: name[i + 1..].to_string(),
2618 },
2619 )
2620 } else {
2621 (start, ClassUnicodeKind::Named(name.to_string()))
2622 }
2623 } else {
2624 let start = self.pos();
2625 let c = self.char();
2626 if c == '\\' {
2627 return Err(self.error(self.span_char(), ast::ErrorKind::UnicodeClassInvalid));
2628 }
2629 self.bump_and_bump_space();
2630 let kind = ClassUnicodeKind::OneLetter(c);
2631 (start, kind)
2632 };
2633 Ok(ClassUnicode {
2634 span: Span::new(start, self.pos()),
2635 negated,
2636 kind,
2637 })
2638 }
2639
2640 #[inline(never)]
2641 fn parse_perl_class(&self) -> ClassPerl {
2642 let c = self.char();
2643 let span = self.span_char();
2644 self.bump();
2645 let (negated, kind) = match c {
2646 'd' => (false, regex_syntax::ast::ClassPerlKind::Digit),
2647 'D' => (true, regex_syntax::ast::ClassPerlKind::Digit),
2648 's' => (false, regex_syntax::ast::ClassPerlKind::Space),
2649 'S' => (true, regex_syntax::ast::ClassPerlKind::Space),
2650 'w' => (false, regex_syntax::ast::ClassPerlKind::Word),
2651 'W' => (true, regex_syntax::ast::ClassPerlKind::Word),
2652 c => panic!("expected valid Perl class but got '{}'", c),
2653 };
2654 ClassPerl {
2655 span,
2656 kind,
2657 negated,
2658 }
2659 }
2660}
2661
2662fn is_universal_perl_pair(item: ®ex_syntax::ast::ClassSetItem) -> bool {
2665 use regex_syntax::ast::ClassSetItem;
2666 let items = match item {
2667 ClassSetItem::Union(u) => &u.items,
2668 _ => return false,
2669 };
2670 if items.len() != 2 {
2671 return false;
2672 }
2673 match (&items[0], &items[1]) {
2674 (ClassSetItem::Perl(a), ClassSetItem::Perl(b)) => {
2675 let is_all = a.kind == b.kind && a.negated != b.negated;
2676 is_all
2677 }
2678 _ => false,
2679 }
2680}
2681
2682pub fn max_concat_length(ast: &ast::Ast) -> usize {
2683 match ast {
2684 ast::Ast::Empty(_)
2685 | ast::Ast::Flags(_)
2686 | ast::Ast::Literal(_)
2687 | ast::Ast::Dot(_)
2688 | ast::Ast::Top(_)
2689 | ast::Ast::Assertion(_)
2690 | ast::Ast::ClassUnicode(_)
2691 | ast::Ast::ClassPerl(_)
2692 | ast::Ast::ClassBracketed(_) => 0,
2693 ast::Ast::Group(g) => max_concat_length(&g.ast),
2694 ast::Ast::Complement(c) => max_concat_length(&c.ast),
2695 ast::Ast::Lookaround(l) => max_concat_length(&l.ast),
2696 ast::Ast::Repetition(r) => max_concat_length(&r.ast),
2697 ast::Ast::Concat(c) => c
2698 .asts
2699 .len()
2700 .max(c.asts.iter().map(max_concat_length).max().unwrap_or(0)),
2701 ast::Ast::Alternation(a) => a.asts.iter().map(max_concat_length).max().unwrap_or(0),
2702 ast::Ast::Intersection(i) => i.asts.iter().map(max_concat_length).max().unwrap_or(0),
2703 }
2704}
2705
2706pub fn expanded_ast_size(ast: &ast::Ast, limit: u64) -> u64 {
2707 fn go(ast: &ast::Ast, limit: u64) -> u64 {
2708 match ast {
2709 ast::Ast::Empty(_) | ast::Ast::Flags(_) => 1,
2710 ast::Ast::Literal(_) | ast::Ast::Dot(_) | ast::Ast::Top(_) => 1,
2711 ast::Ast::Assertion(_) => 1,
2712 ast::Ast::ClassUnicode(_) | ast::Ast::ClassPerl(_) | ast::Ast::ClassBracketed(_) => 1,
2713 ast::Ast::Group(g) => go(&g.ast, limit).saturating_add(1).min(limit),
2714 ast::Ast::Complement(c) => go(&c.ast, limit).saturating_add(1).min(limit),
2715 ast::Ast::Lookaround(l) => go(&l.ast, limit).saturating_add(1).min(limit),
2716 ast::Ast::Concat(c) => sum_children(&c.asts, limit),
2717 ast::Ast::Alternation(a) => sum_children(&a.asts, limit),
2718 ast::Ast::Intersection(i) => sum_children(&i.asts, limit),
2719 ast::Ast::Repetition(r) => {
2720 let body = go(&r.ast, limit);
2721 let factor: u64 = match &r.op.kind {
2722 ast::RepetitionKind::ZeroOrOne => 2,
2723 ast::RepetitionKind::ZeroOrMore | ast::RepetitionKind::OneOrMore => 2,
2724 ast::RepetitionKind::Range(ast::RepetitionRange::Exactly(n)) => {
2725 (*n as u64).max(1)
2726 }
2727 ast::RepetitionKind::Range(ast::RepetitionRange::AtLeast(n)) => {
2728 (*n as u64).max(1).saturating_add(1)
2729 }
2730 ast::RepetitionKind::Range(ast::RepetitionRange::Bounded(_, m)) => {
2731 (*m as u64).max(1)
2732 }
2733 };
2734 body.saturating_mul(factor).min(limit)
2735 }
2736 }
2737 }
2738 fn sum_children(children: &[ast::Ast], limit: u64) -> u64 {
2739 let mut total: u64 = 0;
2740 for c in children {
2741 total = total.saturating_add(go(c, limit));
2742 if total >= limit {
2743 return limit;
2744 }
2745 }
2746 total
2747 }
2748 go(ast, limit)
2749}
2750
2751pub fn parse_ast<'s>(tb: &mut TB<'s>, pattern: &'s str) -> std::result::Result<NodeId, ParseError> {
2752 let mut p: ResharpParser<'s> = ResharpParser::new(pattern);
2753 p.parse(tb)
2754}
2755
2756pub fn parse_ast_with<'s>(
2757 tb: &mut TB<'s>,
2758 pattern: &'s str,
2759 flags: &PatternFlags,
2760) -> std::result::Result<NodeId, ParseError> {
2761 let mut p: ResharpParser<'s> = ResharpParser::with_flags(pattern, flags);
2762 p.parse(tb)
2763}
2764
2765pub fn parse_to_ast(pattern: &str) -> std::result::Result<ast::Ast, ParseError> {
2767 let mut p: ResharpParser = ResharpParser::new(pattern);
2768 p.parse_inner()
2769}