1#![warn(dead_code)]
6pub mod ast;
7use std::cell::{Cell, RefCell};
8
9use ast::{Ast, Concat, ErrorKind, GroupKind, LookaroundKind};
10use regex_syntax::{
11 ast::{
12 ClassAscii, ClassBracketed, ClassPerl, ClassSet, ClassSetBinaryOpKind, ClassSetItem,
13 ClassSetRange, ClassSetUnion, ClassUnicode, ClassUnicodeKind, ClassUnicodeOpKind,
14 HexLiteralKind, Literal, LiteralKind, Position, Span, SpecialLiteralKind,
15 },
16 hir::{
17 self,
18 translate::{Translator, TranslatorBuilder},
19 },
20 utf8::Utf8Sequences,
21};
22use resharp_algebra::NodeId;
23
24type TB<'s> = resharp_algebra::RegexBuilder;
25
26pub struct PatternFlags {
28 pub unicode: bool,
30 pub full_unicode: bool,
32 pub case_insensitive: bool,
34 pub dot_matches_new_line: bool,
36 pub multiline: bool,
38 pub ignore_whitespace: bool,
40 pub ascii_perl_classes: bool,
43 pub expanded_ast_limit: u64,
46 pub max_list_len: usize,
49 pub max_repeat: u32,
51}
52
53pub const DEFAULT_MAX_REPEAT: u32 = 500;
56pub const DEFAULT_EXPANDED_AST_LIMIT: u64 = 50_000;
57pub const DEFAULT_MAX_LIST_LEN: usize = 4_000;
58
59impl Default for PatternFlags {
60 fn default() -> Self {
61 Self {
62 unicode: true,
63 full_unicode: false,
64 case_insensitive: false,
65 dot_matches_new_line: false,
66 multiline: true,
67 ignore_whitespace: false,
68 ascii_perl_classes: false,
69 expanded_ast_limit: DEFAULT_EXPANDED_AST_LIMIT,
70 max_list_len: DEFAULT_MAX_LIST_LEN,
71 max_repeat: DEFAULT_MAX_REPEAT,
72 }
73 }
74}
75
76#[derive(Clone, Copy, PartialEq, Debug)]
77enum WordCharKind {
78 Word,
79 NonWord,
80 MaybeWord,
81 MaybeNonWord,
82 Unknown,
83 Edge,
84}
85
86fn is_word_byte(b: u8) -> bool {
87 b.is_ascii_alphanumeric() || b == b'_'
88}
89
90#[derive(Clone, Debug, Eq, PartialEq)]
91enum Primitive {
92 Literal(Literal),
93 Assertion(ast::Assertion),
94 Dot(Span),
95 Top(Span),
96 Perl(ClassPerl),
97 Unicode(ClassUnicode),
98}
99
100impl Primitive {
101 fn span(&self) -> &Span {
102 match *self {
103 Primitive::Literal(ref x) => &x.span,
104 Primitive::Assertion(ref x) => &x.span,
105 Primitive::Dot(ref span) => span,
106 Primitive::Top(ref span) => span,
107 Primitive::Perl(ref x) => &x.span,
108 Primitive::Unicode(ref x) => &x.span,
109 }
110 }
111
112 fn into_ast(self) -> Ast {
113 match self {
114 Primitive::Literal(lit) => Ast::literal(lit),
115 Primitive::Assertion(assert) => Ast::assertion(assert),
116 Primitive::Dot(span) => Ast::dot(span),
117 Primitive::Top(span) => Ast::top(span),
118 Primitive::Perl(cls) => Ast::class_perl(cls),
119 Primitive::Unicode(cls) => Ast::class_unicode(cls),
120 }
121 }
122
123 fn into_class_set_item(self, p: &ResharpParser) -> Result<regex_syntax::ast::ClassSetItem> {
124 use self::Primitive::*;
125 use regex_syntax::ast::ClassSetItem;
126
127 match self {
128 Literal(lit) => Ok(ClassSetItem::Literal(lit)),
129 Perl(cls) => Ok(ClassSetItem::Perl(cls)),
130 Unicode(cls) => Ok(ClassSetItem::Unicode(cls)),
131 x => Err(p.error(*x.span(), ast::ErrorKind::ClassEscapeInvalid)),
132 }
133 }
134
135 fn into_class_literal(self, p: &ResharpParser) -> Result<Literal> {
136 use self::Primitive::*;
137
138 match self {
139 Literal(lit) => Ok(lit),
140 x => Err(p.error(*x.span(), ast::ErrorKind::ClassRangeLiteral)),
141 }
142 }
143}
144
145#[derive(Clone, Debug, Eq, PartialEq)]
146pub enum Either<Left, Right> {
147 Left(Left),
148 Right(Right),
149}
150
151#[derive(Clone, Debug, Eq, PartialEq)]
152pub struct ParseError {
153 pub kind: ErrorKind,
155 pattern: String,
158 pub span: Span,
160}
161
162impl std::fmt::Display for ParseError {
163 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
164 write!(f, "{:?}: {:?}", self.kind, self.span)
165 }
166}
167impl std::error::Error for ParseError {}
168
169type Result<T> = core::result::Result<T, ParseError>;
170
171#[derive(Clone, Debug)]
172enum GroupState {
173 Group {
175 concat: Concat,
177 group: ast::Group,
179 ignore_whitespace: bool,
181 },
182 Alternation(ast::Alternation),
183 Intersection(ast::Intersection),
184}
185
186#[derive(Clone, Debug)]
187enum ClassState {
188 Open {
190 union: regex_syntax::ast::ClassSetUnion,
192 set: regex_syntax::ast::ClassBracketed,
193 },
194 Op {
197 kind: regex_syntax::ast::ClassSetBinaryOpKind,
199 lhs: regex_syntax::ast::ClassSet,
201 },
202}
203
204pub struct ResharpParser<'s> {
206 perl_classes: Vec<(bool, regex_syntax::ast::ClassPerlKind, NodeId)>,
207 unicode_classes: resharp_algebra::UnicodeClassCache,
208 pub translator: regex_syntax::hir::translate::Translator,
209 pub pattern: &'s str,
210 pos: Cell<Position>,
211 capture_index: Cell<u32>,
212 octal: bool,
213 empty_min_range: bool,
214 ignore_whitespace: Cell<bool>,
215 dot_all: Cell<bool>,
216 multiline: Cell<bool>,
217 global_unicode: bool,
218 global_full_unicode: bool,
219 global_ascii_perl: bool,
220 global_case_insensitive: bool,
221 expanded_ast_limit: u64,
222 max_list_len: usize,
223 max_repeat: u32,
224 comments: RefCell<Vec<ast::Comment>>,
225 stack_group: RefCell<Vec<GroupState>>,
226 stack_class: RefCell<Vec<ClassState>>,
227 capture_names: RefCell<Vec<ast::CaptureName>>,
228 scratch: RefCell<String>,
229}
230
231fn specialize_err<T>(result: Result<T>, from: ast::ErrorKind, to: ast::ErrorKind) -> Result<T> {
232 result.map_err(|e| {
233 if e.kind == from {
234 ParseError {
235 kind: to,
236 pattern: e.pattern,
237 span: e.span,
238 }
239 } else {
240 e
241 }
242 })
243}
244
245fn is_capture_char(c: char, first: bool) -> bool {
246 if first {
247 c == '_' || c.is_alphabetic()
248 } else {
249 c == '_' || c == '.' || c == '[' || c == ']' || c.is_alphanumeric()
250 }
251}
252
253pub fn is_meta_character(c: char) -> bool {
254 matches!(
255 c,
256 '\\' | '.'
257 | '+'
258 | '*'
259 | '?'
260 | '('
261 | ')'
262 | '|'
263 | '['
264 | ']'
265 | '{'
266 | '}'
267 | '^'
268 | '$'
269 | '#'
270 | '&'
271 | '-'
272 | '~'
273 | '_'
274 )
275}
276
277pub fn escape(text: &str) -> String {
279 let mut buf = String::new();
280 escape_into(text, &mut buf);
281 buf
282}
283
284pub fn escape_into(text: &str, buf: &mut String) {
286 buf.reserve(text.len());
287 for c in text.chars() {
288 if is_meta_character(c) {
289 buf.push('\\');
290 }
291 buf.push(c);
292 }
293}
294
295pub fn is_escapeable_character(c: char) -> bool {
296 if is_meta_character(c) {
297 return true;
298 }
299 if !c.is_ascii() {
300 return false;
301 }
302 match c {
303 '0'..='9' | 'A'..='Z' | 'a'..='z' => false,
304 '<' | '>' => false,
305 _ => true,
306 }
307}
308
309fn is_hex(c: char) -> bool {
310 c.is_ascii_digit() || ('a'..='f').contains(&c) || ('A'..='F').contains(&c)
311}
312
313impl<'s> ResharpParser<'s> {
314 fn default_translator_builder(&self) -> TranslatorBuilder {
315 let mut trb = TranslatorBuilder::new();
316 trb.unicode(self.global_unicode);
317 trb.utf8(false);
318 trb.case_insensitive(self.global_case_insensitive);
319 trb
320 }
321
322 pub fn new(pattern: &'s str) -> Self {
323 Self::with_flags(pattern, &PatternFlags::default())
324 }
325
326 pub fn with_flags(pattern: &'s str, flags: &PatternFlags) -> Self {
327 let mut trb = TranslatorBuilder::new();
328 trb.unicode(flags.unicode);
329 trb.utf8(false);
330 trb.case_insensitive(flags.case_insensitive);
331 Self {
332 translator: trb.build(),
333 pattern,
334 perl_classes: vec![],
335 unicode_classes: resharp_algebra::UnicodeClassCache::default(),
336 pos: Cell::new(Position::new(0, 0, 0)),
337 capture_index: Cell::new(0),
338 octal: false,
339 empty_min_range: false,
340 ignore_whitespace: Cell::new(flags.ignore_whitespace),
341 dot_all: Cell::new(flags.dot_matches_new_line),
342 multiline: Cell::new(flags.multiline),
343 global_unicode: flags.unicode || flags.full_unicode || flags.ascii_perl_classes,
344 global_full_unicode: flags.full_unicode,
345 global_ascii_perl: flags.ascii_perl_classes,
346 global_case_insensitive: flags.case_insensitive,
347 expanded_ast_limit: flags.expanded_ast_limit,
348 max_list_len: flags.max_list_len,
349 max_repeat: flags.max_repeat,
350 comments: RefCell::new(vec![]),
351 stack_group: RefCell::new(vec![]),
352 stack_class: RefCell::new(vec![]),
353 capture_names: RefCell::new(vec![]),
354 scratch: RefCell::new(String::new()),
355 }
356 }
357
358 fn parser(&'_ self) -> &'_ ResharpParser<'_> {
359 self
360 }
361
362 fn pattern(&self) -> &str {
363 self.pattern
364 }
365
366 fn error(&self, span: Span, kind: ast::ErrorKind) -> ParseError {
367 ParseError {
368 kind,
369 pattern: self.pattern().to_string(),
370 span,
371 }
372 }
373
374 fn unsupported_error(&self, _: regex_syntax::hir::Error) -> ParseError {
375 self.error(
376 Span::splat(self.pos()),
377 ast::ErrorKind::UnsupportedResharpRegex,
378 )
379 }
380
381 fn offset(&self) -> usize {
382 self.parser().pos.get().offset
383 }
384
385 fn line(&self) -> usize {
386 self.parser().pos.get().line
387 }
388
389 fn column(&self) -> usize {
390 self.parser().pos.get().column
391 }
392
393 fn next_capture_index(&self, span: Span) -> Result<u32> {
394 let current = self.parser().capture_index.get();
395 let i = current
396 .checked_add(1)
397 .ok_or_else(|| self.error(span, ast::ErrorKind::CaptureLimitExceeded))?;
398 self.parser().capture_index.set(i);
399 Ok(i)
400 }
401
402 fn add_capture_name(&self, cap: &ast::CaptureName) -> Result<()> {
403 let mut names = self.parser().capture_names.borrow_mut();
404 match names.binary_search_by_key(&cap.name.as_str(), |c| c.name.as_str()) {
405 Err(i) => {
406 names.insert(i, cap.clone());
407 Ok(())
408 }
409 Ok(i) => Err(self.error(
410 cap.span,
411 ast::ErrorKind::GroupNameDuplicate {
412 original: names[i].span,
413 },
414 )),
415 }
416 }
417
418 fn ignore_whitespace(&self) -> bool {
419 self.parser().ignore_whitespace.get()
420 }
421
422 fn char(&self) -> char {
423 self.char_at(self.offset())
424 }
425
426 fn char_at(&self, i: usize) -> char {
427 self.pattern()[i..]
428 .chars()
429 .next()
430 .unwrap_or_else(|| panic!("expected char at offset {}", i))
431 }
432
433 fn bump(&self) -> bool {
434 if self.is_eof() {
435 return false;
436 }
437 let Position {
438 mut offset,
439 mut line,
440 mut column,
441 } = self.pos();
442 if self.char() == '\n' {
443 line = line.checked_add(1).unwrap();
444 column = 1;
445 } else {
446 column = column.checked_add(1).unwrap();
447 }
448 offset += self.char().len_utf8();
449 self.parser().pos.set(Position {
450 offset,
451 line,
452 column,
453 });
454 self.pattern()[self.offset()..].chars().next().is_some()
455 }
456
457 fn bump_if(&self, prefix: &str) -> bool {
458 if self.pattern()[self.offset()..].starts_with(prefix) {
459 for _ in 0..prefix.chars().count() {
460 self.bump();
461 }
462 true
463 } else {
464 false
465 }
466 }
467
468 fn is_lookaround_prefix(&self) -> Option<(bool, bool)> {
469 if self.bump_if("?=") {
470 return Some((true, true));
471 }
472 if self.bump_if("?!") {
473 return Some((true, false));
474 }
475 if self.bump_if("?<=") {
476 return Some((false, true));
477 }
478 if self.bump_if("?<!") {
479 return Some((false, false));
480 }
481 None
482 }
483
484 fn bump_and_bump_space(&self) -> bool {
485 if !self.bump() {
486 return false;
487 }
488 self.bump_space();
489 !self.is_eof()
490 }
491
492 fn bump_space(&self) {
493 if !self.ignore_whitespace() {
494 return;
495 }
496 while !self.is_eof() {
497 if self.char().is_whitespace() {
498 self.bump();
499 } else if self.char() == '#' {
500 let start = self.pos();
501 let mut comment_text = String::new();
502 self.bump();
503 while !self.is_eof() {
504 let c = self.char();
505 self.bump();
506 if c == '\n' {
507 break;
508 }
509 comment_text.push(c);
510 }
511 let comment = ast::Comment {
512 span: Span::new(start, self.pos()),
513 comment: comment_text,
514 };
515 self.parser().comments.borrow_mut().push(comment);
516 } else {
517 break;
518 }
519 }
520 }
521
522 fn peek(&self) -> Option<char> {
523 if self.is_eof() {
524 return None;
525 }
526 self.pattern()[self.offset() + self.char().len_utf8()..]
527 .chars()
528 .next()
529 }
530
531 fn peek_space(&self) -> Option<char> {
534 if !self.ignore_whitespace() {
535 return self.peek();
536 }
537 if self.is_eof() {
538 return None;
539 }
540 let mut start = self.offset() + self.char().len_utf8();
541 let mut in_comment = false;
542 for (i, c) in self.pattern()[start..].char_indices() {
543 if c.is_whitespace() {
544 continue;
545 } else if !in_comment && c == '#' {
546 in_comment = true;
547 } else if in_comment && c == '\n' {
548 in_comment = false;
549 } else {
550 start += i;
551 break;
552 }
553 }
554 self.pattern()[start..].chars().next()
555 }
556
557 fn is_eof(&self) -> bool {
558 self.offset() == self.pattern().len()
559 }
560
561 fn pos(&self) -> Position {
562 self.parser().pos.get()
563 }
564
565 fn span(&self) -> Span {
566 Span::splat(self.pos())
567 }
568
569 fn span_char(&self) -> Span {
570 let mut next = Position {
571 offset: self.offset().checked_add(self.char().len_utf8()).unwrap(),
572 line: self.line(),
573 column: self.column().checked_add(1).unwrap(),
574 };
575 if self.char() == '\n' {
576 next.line += 1;
577 next.column = 1;
578 }
579 Span::new(self.pos(), next)
580 }
581
582 #[inline(never)]
583 fn push_alternate(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
584 assert_eq!(self.char(), '|');
585 concat.span.end = self.pos();
586 self.push_or_add_alternation(concat);
587 self.bump();
588 Ok(ast::Concat {
589 span: self.span(),
590 asts: vec![],
591 })
592 }
593
594 fn push_or_add_alternation(&self, concat: Concat) {
595 use self::GroupState::*;
596
597 let mut stack = self.parser().stack_group.borrow_mut();
598 if let Some(&mut Alternation(ref mut alts)) = stack.last_mut() {
599 alts.asts.push(concat.into_ast());
600 return;
601 }
602 stack.push(Alternation(ast::Alternation {
603 span: Span::new(concat.span.start, self.pos()),
604 asts: vec![concat.into_ast()],
605 }));
606 }
607
608 #[inline(never)]
609 fn push_intersect(&self, mut concat: Concat) -> Result<Concat> {
610 assert_eq!(self.char(), '&');
611 concat.span.end = self.pos();
612 self.push_or_add_intersect(concat);
613 self.bump();
614 Ok(Concat {
615 span: self.span(),
616 asts: vec![],
617 })
618 }
619
620 fn push_or_add_intersect(&self, concat: Concat) {
621 use self::GroupState::*;
622
623 let mut stack = self.parser().stack_group.borrow_mut();
624 if let Some(&mut Intersection(ref mut alts)) = stack.last_mut() {
625 alts.asts.push(concat.into_ast());
626 return;
627 }
628 stack.push(Intersection(ast::Intersection {
629 span: Span::new(concat.span.start, self.pos()),
630 asts: vec![concat.into_ast()],
631 }));
632 }
633
634 #[inline(never)]
635 fn push_group(&self, mut concat: Concat) -> Result<Concat> {
636 assert_eq!(self.char(), '(');
637 match self.parse_group()? {
638 Either::Left(set) => {
639 let ignore = set.flags.flag_state(ast::Flag::IgnoreWhitespace);
640 if let Some(v) = ignore {
641 self.parser().ignore_whitespace.set(v);
642 }
643
644 concat.asts.push(Ast::flags(set));
645 Ok(concat)
646 }
647 Either::Right(group) => {
648 let old_ignore_whitespace = self.ignore_whitespace();
649 let new_ignore_whitespace = group
650 .flags()
651 .and_then(|f| f.flag_state(ast::Flag::IgnoreWhitespace))
652 .unwrap_or(old_ignore_whitespace);
653 self.parser()
654 .stack_group
655 .borrow_mut()
656 .push(GroupState::Group {
657 concat,
658 group,
659 ignore_whitespace: old_ignore_whitespace,
660 });
661 self.parser().ignore_whitespace.set(new_ignore_whitespace);
662 Ok(Concat {
663 span: self.span(),
664 asts: vec![],
665 })
666 }
667 }
668 }
669
670 #[inline(never)]
671 fn push_compl_group(&self, concat: Concat) -> Result<Concat> {
672 assert_eq!(self.char(), '~');
673 self.bump();
674 if self.is_eof() || self.char() != '(' {
675 return Err(self.error(self.span(), ast::ErrorKind::ComplementGroupExpected));
676 }
677 let open_span = self.span_char();
678 self.bump();
679 let group = ast::Group {
680 span: open_span,
681 kind: ast::GroupKind::Complement,
682 ast: Box::new(Ast::empty(self.span())),
683 };
684
685 let old_ignore_whitespace = self.ignore_whitespace();
686 let new_ignore_whitespace = group
687 .flags()
688 .and_then(|f| f.flag_state(ast::Flag::IgnoreWhitespace))
689 .unwrap_or(old_ignore_whitespace);
690 self.parser()
691 .stack_group
692 .borrow_mut()
693 .push(GroupState::Group {
694 concat,
695 group,
696 ignore_whitespace: old_ignore_whitespace,
697 });
698 self.parser().ignore_whitespace.set(new_ignore_whitespace);
699 Ok(Concat {
700 span: self.span(),
701 asts: vec![],
702 })
703 }
704
705 #[inline(never)]
706 fn pop_group(&self, mut group_concat: Concat) -> Result<Concat> {
707 use self::GroupState::*;
708 assert_eq!(self.char(), ')');
709 let mut stack = self.parser().stack_group.borrow_mut();
710 let topstack = stack.pop();
711
712 let (mut prior_concat, mut group, ignore_whitespace, alt) = match topstack {
713 Some(Group {
714 concat,
715 group,
716 ignore_whitespace,
717 }) => (concat, group, ignore_whitespace, None),
718 Some(Alternation(alt)) => match stack.pop() {
719 Some(Group {
720 concat,
721 group,
722 ignore_whitespace,
723 }) => (
724 concat,
725 group,
726 ignore_whitespace,
727 Some(Either::Left::<ast::Alternation, ast::Intersection>(alt)),
728 ),
729 None | Some(Alternation(_)) | Some(Intersection(_)) => {
730 return Err(self.error(self.span_char(), ast::ErrorKind::GroupUnopened));
731 }
732 },
733 Some(Intersection(int)) => match stack.pop() {
734 Some(Group {
735 concat,
736 group,
737 ignore_whitespace,
738 }) => (
739 concat,
740 group,
741 ignore_whitespace,
742 Some(Either::Right::<ast::Alternation, ast::Intersection>(int)),
743 ),
744 None | Some(Alternation(_)) | Some(Intersection(_)) => {
745 return Err(self.error(self.span_char(), ast::ErrorKind::GroupUnopened));
746 }
747 },
748
749 None => {
750 return Err(self.error(self.span_char(), ast::ErrorKind::GroupUnopened));
751 }
752 };
753 self.parser().ignore_whitespace.set(ignore_whitespace);
754 group_concat.span.end = self.pos();
755 self.bump();
756 group.span.end = self.pos();
757 match alt {
758 Some(Either::Left(mut alt)) => {
759 alt.span.end = group_concat.span.end;
760 alt.asts.push(group_concat.into_ast());
761 group.ast = Box::new(alt.into_ast());
762 }
763 Some(Either::Right(mut int)) => {
764 int.span.end = group_concat.span.end;
765 int.asts.push(group_concat.into_ast());
766 group.ast = Box::new(int.into_ast());
767 }
768 None => {
769 group.ast = Box::new(group_concat.into_ast());
770 }
771 }
772
773 if group.kind == GroupKind::Complement {
774 let complement = ast::Complement {
775 span: self.span(),
776 ast: group.ast,
777 };
778 prior_concat.asts.push(Ast::complement(complement));
779 }
780 else {
782 prior_concat.asts.push(Ast::group(group));
783 }
784 Ok(prior_concat)
785 }
786
787 #[inline(never)]
788 fn pop_group_end(&self, mut concat: ast::Concat) -> Result<Ast> {
789 concat.span.end = self.pos();
790 let mut stack = self.parser().stack_group.borrow_mut();
791 let ast = match stack.pop() {
792 None => Ok(concat.into_ast()),
793 Some(GroupState::Alternation(mut alt)) => {
794 alt.span.end = self.pos();
795 alt.asts.push(concat.into_ast());
796 Ok(Ast::alternation(alt))
797 }
798 Some(GroupState::Intersection(mut int)) => {
799 int.span.end = self.pos();
800 int.asts.push(concat.into_ast());
801
802 Ok(Ast::intersection(int))
803 }
804 Some(GroupState::Group { group, .. }) => {
805 return Err(self.error(group.span, ast::ErrorKind::GroupUnclosed));
806 }
807 };
808 match stack.pop() {
810 None => ast,
811 Some(GroupState::Alternation(alt)) => {
812 Err(self.error(alt.span, ast::ErrorKind::UnsupportedResharpRegex))
813 }
814 Some(GroupState::Intersection(int)) => {
815 Err(self.error(int.span, ast::ErrorKind::UnsupportedResharpRegex))
816 }
817 Some(GroupState::Group { group, .. }) => {
818 Err(self.error(group.span, ast::ErrorKind::GroupUnclosed))
819 }
820 }
821 }
822
823 #[inline(never)]
824 fn push_class_open(
825 &self,
826 parent_union: regex_syntax::ast::ClassSetUnion,
827 ) -> Result<regex_syntax::ast::ClassSetUnion> {
828 assert_eq!(self.char(), '[');
829
830 let (nested_set, nested_union) = self.parse_set_class_open()?;
831 self.parser()
832 .stack_class
833 .borrow_mut()
834 .push(ClassState::Open {
835 union: parent_union,
836 set: nested_set,
837 });
838 Ok(nested_union)
839 }
840
841 #[inline(never)]
842 fn pop_class(
843 &self,
844 nested_union: regex_syntax::ast::ClassSetUnion,
845 ) -> Result<Either<regex_syntax::ast::ClassSetUnion, regex_syntax::ast::ClassBracketed>> {
846 assert_eq!(self.char(), ']');
847
848 let item = regex_syntax::ast::ClassSet::Item(nested_union.into_item());
849 let prevset = self.pop_class_op(item);
850 let mut stack = self.parser().stack_class.borrow_mut();
851 match stack.pop() {
852 None => panic!("unexpected empty character class stack"),
853 Some(ClassState::Op { .. }) => panic!("unexpected ClassState::Op"),
854 Some(ClassState::Open { mut union, mut set }) => {
855 self.bump();
856 set.span.end = self.pos();
857 set.kind = prevset;
858 if stack.is_empty() {
859 Ok(Either::Right(set))
860 } else {
861 union.push(regex_syntax::ast::ClassSetItem::Bracketed(Box::new(set)));
862 Ok(Either::Left(union))
863 }
864 }
865 }
866 }
867
868 #[inline(never)]
869 fn unclosed_class_error(&self) -> ParseError {
870 for state in self.parser().stack_class.borrow().iter().rev() {
871 if let ClassState::Open { ref set, .. } = *state {
872 return self.error(set.span, ast::ErrorKind::ClassUnclosed);
873 }
874 }
875 panic!("no open character class found")
876 }
877
878 #[inline(never)]
879 fn push_class_op(
880 &self,
881 next_kind: regex_syntax::ast::ClassSetBinaryOpKind,
882 next_union: regex_syntax::ast::ClassSetUnion,
883 ) -> regex_syntax::ast::ClassSetUnion {
884 let item = regex_syntax::ast::ClassSet::Item(next_union.into_item());
885 let new_lhs = self.pop_class_op(item);
886 self.parser().stack_class.borrow_mut().push(ClassState::Op {
887 kind: next_kind,
888 lhs: new_lhs,
889 });
890 regex_syntax::ast::ClassSetUnion {
891 span: self.span(),
892 items: vec![],
893 }
894 }
895
896 #[inline(never)]
897 fn pop_class_op(&self, rhs: regex_syntax::ast::ClassSet) -> regex_syntax::ast::ClassSet {
898 let mut stack = self.parser().stack_class.borrow_mut();
899 let (kind, lhs) = match stack.pop() {
900 Some(ClassState::Op { kind, lhs }) => (kind, lhs),
901 Some(state @ ClassState::Open { .. }) => {
902 stack.push(state);
903 return rhs;
904 }
905 None => unreachable!(),
906 };
907 let span = Span::new(lhs.span().start, rhs.span().end);
908 regex_syntax::ast::ClassSet::BinaryOp(regex_syntax::ast::ClassSetBinaryOp {
909 span,
910 kind,
911 lhs: Box::new(lhs),
912 rhs: Box::new(rhs),
913 })
914 }
915
916 fn hir_to_node_id(&self, hir: &hir::Hir, tb: &mut TB<'s>) -> Result<NodeId> {
917 match hir.kind() {
918 hir::HirKind::Empty => Ok(NodeId::EPS),
919 hir::HirKind::Literal(l) => {
920 if l.0.len() == 1 {
921 let node = tb.mk_u8(l.0[0]);
922 Ok(node)
923 } else {
924 let ws: Vec<_> = l.0.iter().map(|l| tb.mk_u8(*l)).collect();
925 let conc = tb.mk_concats(ws.iter().copied());
926 Ok(conc)
927 }
928 }
929 hir::HirKind::Class(class) => match class {
930 hir::Class::Unicode(class_unicode) => {
931 let ranges = class_unicode.ranges();
932 if ranges.len() == 1
933 && ranges[0].start() == '\u{0}'
934 && ranges[0].end() == '\u{10FFFF}'
935 {
936 return Ok(tb.mk_range_u8(0, 255));
937 }
938 let mut nodes = Vec::new();
939 for range in ranges {
940 for seq in Utf8Sequences::new(range.start(), range.end()) {
941 let sl = seq.as_slice();
942 let bytes: Vec<_> = sl.iter().map(|s| (s.start, s.end)).collect();
943 let node = match bytes.len() {
944 1 => tb.mk_range_u8(bytes[0].0, bytes[0].1),
945 n => {
946 let last = tb.mk_range_u8(bytes[n - 1].0, bytes[n - 1].1);
947 let mut conc = last;
948 for i in (0..n - 1).rev() {
949 let b = tb.mk_range_u8(bytes[i].0, bytes[i].1);
950 conc = tb.mk_concat(b, conc);
951 }
952 conc
953 }
954 };
955 nodes.push(node);
956 }
957 }
958 let merged = tb.mk_unions(nodes.into_iter());
959 Ok(merged)
960 }
961 hir::Class::Bytes(class_bytes) => {
962 let ranges = class_bytes.ranges();
963 let mut result = NodeId::BOT;
964 for range in ranges {
965 let start = range.start();
966 let end = range.end();
967 let node = tb.mk_range_u8(start, end);
968 result = tb.mk_union(result, node);
969 }
970 Ok(result)
971 }
972 },
973 hir::HirKind::Look(_) => Err(self.error(
974 Span::splat(self.pos()),
975 ast::ErrorKind::UnsupportedResharpRegex,
976 )),
977 hir::HirKind::Repetition(_) => Err(self.error(
978 Span::splat(self.pos()),
979 ast::ErrorKind::UnsupportedResharpRegex,
980 )),
981 hir::HirKind::Capture(_) => Err(self.error(
982 Span::splat(self.pos()),
983 ast::ErrorKind::UnsupportedResharpRegex,
984 )),
985 hir::HirKind::Concat(body) => {
986 let mut result = NodeId::EPS;
987 for child in body {
988 let node = self.hir_to_node_id(child, tb)?;
989 result = tb.mk_concat(result, node);
990 }
991 Ok(result)
992 }
993 hir::HirKind::Alternation(_) => Err(self.error(
994 Span::splat(self.pos()),
995 ast::ErrorKind::UnsupportedResharpRegex,
996 )),
997 }
998 }
999
1000 fn translate_ast_to_hir(
1001 &mut self,
1002 orig_ast: ®ex_syntax::ast::Ast,
1003 tb: &mut TB<'s>,
1004 ) -> Result<NodeId> {
1005 match self.translator.translate("", orig_ast) {
1006 Err(_) => Err(self.error(self.span(), ast::ErrorKind::UnicodeClassInvalid)),
1007 Ok(hir) => self.hir_to_node_id(&hir, tb),
1008 }
1009 }
1010
1011 fn translator_to_node_id(
1012 &mut self,
1013 orig_ast: ®ex_syntax::ast::Ast,
1014 translator: &mut Option<Translator>,
1015 tb: &mut TB<'s>,
1016 ) -> Result<NodeId> {
1017 match translator {
1018 Some(tr) => {
1019 let hir = tr
1020 .translate("", orig_ast)
1021 .map_err(|e| self.unsupported_error(e))?;
1022 self.hir_to_node_id(&hir, tb)
1023 }
1024 None => self.translate_ast_to_hir(orig_ast, tb),
1025 }
1026 }
1027
1028 fn get_class(
1029 &mut self,
1030 negated: bool,
1031 kind: regex_syntax::ast::ClassPerlKind,
1032 tb: &mut TB<'s>,
1033 ) -> Result<NodeId> {
1034 let w = self
1035 .perl_classes
1036 .iter()
1037 .find(|(c_neg, c_kind, _)| *c_kind == kind && *c_neg == negated);
1038 match w {
1039 Some((_, _, value)) => Ok(*value),
1040 None => {
1041 let translated = if self.global_ascii_perl {
1042 let pos = match kind {
1043 regex_syntax::ast::ClassPerlKind::Word => {
1044 let az = tb.mk_range_u8(b'a', b'z');
1045 let big = tb.mk_range_u8(b'A', b'Z');
1046 let dig = tb.mk_range_u8(b'0', b'9');
1047 let us = tb.mk_u8(b'_');
1048 tb.mk_unions([az, big, dig, us].into_iter())
1049 }
1050 regex_syntax::ast::ClassPerlKind::Digit => tb.mk_range_u8(b'0', b'9'),
1051 regex_syntax::ast::ClassPerlKind::Space => {
1052 let sp = tb.mk_u8(b' ');
1053 let tab = tb.mk_u8(b'\t');
1054 let nl = tb.mk_u8(b'\n');
1055 let cr = tb.mk_u8(b'\r');
1056 let ff = tb.mk_u8(0x0C);
1057 let vt = tb.mk_u8(0x0B);
1058 tb.mk_unions([sp, tab, nl, cr, ff, vt].into_iter())
1059 }
1060 };
1061 if negated {
1062 resharp_algebra::neg_class(tb, pos)
1063 } else {
1064 pos
1065 }
1066 } else if self.global_unicode {
1067 match kind {
1068 regex_syntax::ast::ClassPerlKind::Word => {
1069 if self.global_full_unicode {
1070 self.unicode_classes.ensure_word_full(tb);
1071 } else {
1072 self.unicode_classes.ensure_word(tb);
1073 }
1074 if negated {
1075 self.unicode_classes.non_word
1076 } else {
1077 self.unicode_classes.word
1078 }
1079 }
1080 regex_syntax::ast::ClassPerlKind::Digit => {
1081 if self.global_full_unicode {
1082 self.unicode_classes.ensure_digit_full(tb);
1083 } else {
1084 self.unicode_classes.ensure_digit(tb);
1085 }
1086 if negated {
1087 self.unicode_classes.non_digit
1088 } else {
1089 self.unicode_classes.digit
1090 }
1091 }
1092 regex_syntax::ast::ClassPerlKind::Space => {
1093 if self.global_full_unicode {
1094 self.unicode_classes.ensure_space_full(tb);
1095 } else {
1096 self.unicode_classes.ensure_space(tb);
1097 }
1098 if negated {
1099 self.unicode_classes.non_space
1100 } else {
1101 self.unicode_classes.space
1102 }
1103 }
1104 }
1105 } else {
1106 let pos = match kind {
1107 regex_syntax::ast::ClassPerlKind::Word => {
1108 let az = tb.mk_range_u8(b'a', b'z');
1109 let big = tb.mk_range_u8(b'A', b'Z');
1110 let dig = tb.mk_range_u8(b'0', b'9');
1111 let us = tb.mk_u8(b'_');
1112 tb.mk_unions([az, big, dig, us].into_iter())
1113 }
1114 regex_syntax::ast::ClassPerlKind::Digit => tb.mk_range_u8(b'0', b'9'),
1115 regex_syntax::ast::ClassPerlKind::Space => {
1116 let sp = tb.mk_u8(b' ');
1117 let tab = tb.mk_u8(b'\t');
1118 let nl = tb.mk_u8(b'\n');
1119 let cr = tb.mk_u8(b'\r');
1120 let ff = tb.mk_u8(0x0C);
1121 let vt = tb.mk_u8(0x0B);
1122 tb.mk_unions([sp, tab, nl, cr, ff, vt].into_iter())
1123 }
1124 };
1125 if negated {
1126 tb.mk_compl(pos)
1127 } else {
1128 pos
1129 }
1130 };
1131 self.perl_classes.push((negated, kind, translated));
1132 Ok(translated)
1133 }
1134 }
1135 }
1136
1137 fn word_char_kind(ast: &Ast, left: bool) -> WordCharKind {
1138 use WordCharKind::*;
1139 match ast {
1140 Ast::Literal(lit) => {
1141 if is_word_byte(lit.c as u8) {
1142 Word
1143 } else {
1144 NonWord
1145 }
1146 }
1147 Ast::ClassPerl(c) => match (&c.kind, c.negated) {
1148 (®ex_syntax::ast::ClassPerlKind::Word, false) => Word,
1149 (®ex_syntax::ast::ClassPerlKind::Word, true) => NonWord,
1150 (®ex_syntax::ast::ClassPerlKind::Space, false) => NonWord,
1151 (®ex_syntax::ast::ClassPerlKind::Space, true) => Unknown,
1152 (®ex_syntax::ast::ClassPerlKind::Digit, false) => Word,
1153 (®ex_syntax::ast::ClassPerlKind::Digit, true) => Unknown,
1154 },
1155 Ast::Dot(_) | Ast::Top(_) => Unknown,
1156 Ast::Group(g) => Self::word_char_kind(&g.ast, left),
1157 Ast::Concat(c) if !c.asts.is_empty() => {
1158 let edge = if left { c.asts.len() - 1 } else { 0 };
1159 let kind = Self::word_char_kind(&c.asts[edge], left);
1160 match kind {
1161 MaybeWord => {
1162 let dir: isize = if left { -1 } else { 1 };
1163 match Self::concat_neighbor_kind(&c.asts, edge, dir) {
1164 Word => Word,
1165 _ => MaybeWord,
1166 }
1167 }
1168 MaybeNonWord => {
1169 let dir: isize = if left { -1 } else { 1 };
1170 match Self::concat_neighbor_kind(&c.asts, edge, dir) {
1171 NonWord => NonWord,
1172 _ => MaybeNonWord,
1173 }
1174 }
1175 other => other,
1176 }
1177 }
1178 Ast::Alternation(alt) if !alt.asts.is_empty() => {
1179 let first = Self::word_char_kind(&alt.asts[0], left);
1180 if alt.asts[1..]
1181 .iter()
1182 .all(|a| Self::word_char_kind(a, left) == first)
1183 {
1184 first
1185 } else {
1186 Unknown
1187 }
1188 }
1189 Ast::Repetition(r) => {
1190 let inner = Self::word_char_kind(&r.ast, left);
1191 let nullable = matches!(
1192 &r.op.kind,
1193 ast::RepetitionKind::ZeroOrMore
1194 | ast::RepetitionKind::ZeroOrOne
1195 | ast::RepetitionKind::Range(ast::RepetitionRange::Bounded(0, _))
1196 );
1197 if nullable {
1198 match inner {
1199 Word => MaybeWord,
1200 NonWord => MaybeNonWord,
1201 _ => Unknown,
1202 }
1203 } else {
1204 inner
1205 }
1206 }
1207 Ast::Lookaround(la) => Self::word_char_kind(&la.ast, left),
1208 _ => Unknown,
1209 }
1210 }
1211
1212 fn edge_class_ast(ast: &Ast, left: bool) -> Option<&Ast> {
1214 match ast {
1215 Ast::Literal(_)
1216 | Ast::ClassPerl(_)
1217 | Ast::ClassBracketed(_)
1218 | Ast::ClassUnicode(_)
1219 | Ast::Dot(_)
1220 | Ast::Top(_) => Some(ast),
1221 Ast::Group(g) => Self::edge_class_ast(&g.ast, left),
1222 Ast::Concat(c) if !c.asts.is_empty() => {
1223 Self::edge_class_ast(&c.asts[if left { c.asts.len() - 1 } else { 0 }], left)
1224 }
1225 Ast::Repetition(r) => {
1226 let nullable = matches!(
1227 &r.op.kind,
1228 ast::RepetitionKind::ZeroOrMore
1229 | ast::RepetitionKind::ZeroOrOne
1230 | ast::RepetitionKind::Range(ast::RepetitionRange::Bounded(0, _))
1231 );
1232 if nullable {
1233 None
1234 } else {
1235 Self::edge_class_ast(&r.ast, left)
1236 }
1237 }
1238 _ => None,
1239 }
1240 }
1241
1242 fn resolve_word_kind(
1243 &mut self,
1244 asts: &[Ast],
1245 idx: usize,
1246 dir: isize,
1247 translator: &mut Option<Translator>,
1248 tb: &mut TB<'s>,
1249 word_id: NodeId,
1250 not_word_id: NodeId,
1251 ) -> Result<WordCharKind> {
1252 use WordCharKind::*;
1253 let fast = Self::concat_neighbor_kind(asts, idx, dir);
1254 if fast != Unknown {
1255 return Ok(fast);
1256 }
1257 let neighbor_idx = (idx as isize + dir) as usize;
1258 let node = if let Some(edge) = Self::edge_class_ast(&asts[neighbor_idx], dir < 0) {
1259 self.ast_to_node_id(edge, translator, tb)?
1260 } else {
1261 let neighbor_node = self.ast_to_node_id(&asts[neighbor_idx], translator, tb)?;
1263 let mut neighbor_node = tb
1264 .try_elim_lookarounds(neighbor_node)
1265 .ok_or_else(|| self.error(self.span(), ast::ErrorKind::UnsupportedResharpRegex))?;
1266 if dir < 0 {
1267 neighbor_node = tb.reverse(neighbor_node).or_else(|_| {
1268 Err(self.error(self.span(), ast::ErrorKind::UnsupportedResharpRegex))
1269 })?;
1270 }
1271 let word_prefix = tb.mk_concat(word_id, NodeId::TS);
1272 let non_word_prefix = tb.mk_concat(not_word_id, NodeId::TS);
1273 return if tb.subsumes(word_prefix, neighbor_node) == Some(true) {
1274 Ok(Word)
1275 } else if tb.subsumes(non_word_prefix, neighbor_node) == Some(true) {
1276 Ok(NonWord)
1277 } else {
1278 Ok(Unknown)
1279 };
1280 };
1281 if tb.subsumes(word_id, node) == Some(true) {
1282 Ok(Word)
1283 } else if tb.subsumes(not_word_id, node) == Some(true) {
1284 Ok(NonWord)
1285 } else {
1286 Ok(Unknown)
1287 }
1288 }
1289
1290 fn concat_neighbor_kind(asts: &[Ast], idx: usize, dir: isize) -> WordCharKind {
1291 use WordCharKind::*;
1292 let next = idx as isize + dir;
1293 if next < 0 || next >= asts.len() as isize {
1294 return Edge;
1295 }
1296 let kind = Self::word_char_kind(&asts[next as usize], dir < 0);
1297 match kind {
1298 MaybeWord => match Self::concat_neighbor_kind(asts, next as usize, dir) {
1299 Word => Word,
1300 _ => Unknown,
1301 },
1302 MaybeNonWord => match Self::concat_neighbor_kind(asts, next as usize, dir) {
1303 NonWord => NonWord,
1304 _ => Unknown,
1305 },
1306 other => other,
1307 }
1308 }
1309
1310 fn rewrite_word_boundary_in_concat(
1311 &mut self,
1312 asts: &[Ast],
1313 idx: usize,
1314 translator: &mut Option<Translator>,
1315 tb: &mut TB<'s>,
1316 ) -> Result<(NodeId, usize)> {
1317 use WordCharKind::*;
1318 let (word_id, not_word_id) = if self.global_full_unicode {
1319 self.unicode_classes.ensure_word_full(tb);
1320 (self.unicode_classes.word, self.unicode_classes.non_word)
1321 } else if self.global_unicode && !self.global_ascii_perl {
1322 self.unicode_classes.ensure_word(tb);
1323 (self.unicode_classes.word, self.unicode_classes.non_word)
1324 } else {
1325 let az = tb.mk_range_u8(b'a', b'z');
1326 let big = tb.mk_range_u8(b'A', b'Z');
1327 let dig = tb.mk_range_u8(b'0', b'9');
1328 let us = tb.mk_u8(b'_');
1329 let w = tb.mk_unions([az, big, dig, us].into_iter());
1330 (w, tb.mk_compl(w))
1331 };
1332 let left = self.resolve_word_kind(asts, idx, -1, translator, tb, word_id, not_word_id)?;
1333 let right = self.resolve_word_kind(asts, idx, 1, translator, tb, word_id, not_word_id)?;
1334 match (left, right) {
1335 (NonWord, Word) | (Word, NonWord) => Ok((NodeId::EPS, idx + 1)),
1336 (Word, _) => {
1337 let neg = tb.mk_neg_lookahead(word_id, 0);
1338 Ok((neg, idx + 1))
1339 }
1340 (NonWord, _) => {
1341 let tail = tb.mk_concat(word_id, NodeId::TS);
1342 self.merge_boundary_with_following_lookaheads(asts, idx, tail, translator, tb)
1343 }
1344 (_, Word) => Ok((tb.mk_neg_lookbehind(word_id), idx + 1)),
1345 (_, NonWord) => Ok((tb.mk_lookbehind(word_id, NodeId::MISSING), idx + 1)),
1346 _ => Err(self.error(self.span(), ast::ErrorKind::UnsupportedResharpRegex)),
1350 }
1351 }
1352
1353 fn merge_boundary_with_following_lookaheads(
1354 &mut self,
1355 asts: &[Ast],
1356 wb_idx: usize,
1357 boundary_tail: NodeId,
1358 translator: &mut Option<Translator>,
1359 tb: &mut TB<'s>,
1360 ) -> Result<(NodeId, usize)> {
1361 let mut next = wb_idx + 1;
1362 let mut la_bodies = vec![boundary_tail];
1363 while next < asts.len() {
1364 match &asts[next] {
1365 Ast::Lookaround(la) if la.kind == ast::LookaroundKind::PositiveLookahead => {
1366 let body = self.ast_to_node_id(&la.ast, translator, tb)?;
1367 la_bodies.push(tb.mk_concat(body, NodeId::TS));
1368 next += 1;
1369 }
1370 _ => break,
1371 }
1372 }
1373 let merged = tb.mk_inters(la_bodies.into_iter());
1374 Ok((tb.mk_lookahead(merged, NodeId::MISSING, 0), next))
1375 }
1376
1377 fn ast_to_node_id(
1378 &mut self,
1379 ast: &Ast,
1380 translator: &mut Option<Translator>,
1381 tb: &mut TB<'s>,
1382 ) -> Result<NodeId> {
1383 match ast {
1384 Ast::Empty(_) => Ok(NodeId::EPS),
1385 Ast::Flags(f) => {
1386 if f.flags.flag_state(ast::Flag::SwapGreed).is_some() {
1387 return Err(self.error(f.span, ast::ErrorKind::UnsupportedResharpRegex));
1388 }
1389 let mut translator_builder = self.default_translator_builder();
1390 if let Some(state) = f.flags.flag_state(ast::Flag::CaseInsensitive) {
1391 translator_builder.case_insensitive(state);
1392 }
1393 if let Some(state) = f.flags.flag_state(ast::Flag::Unicode) {
1394 translator_builder.unicode(state);
1395 }
1396 if let Some(state) = f.flags.flag_state(ast::Flag::DotMatchesNewLine) {
1397 self.dot_all.set(state);
1398 }
1399 if let Some(state) = f.flags.flag_state(ast::Flag::MultiLine) {
1400 self.multiline.set(state);
1401 }
1402 let concat_translator = Some(translator_builder.build());
1403 *translator = concat_translator;
1404 Ok(NodeId::EPS)
1405 }
1406 Ast::Literal(l) => {
1407 let ast_lit = regex_syntax::ast::Ast::literal(*l.to_owned());
1408 self.translator_to_node_id(&ast_lit, translator, tb)
1409 }
1410 Ast::Top(_) => Ok(NodeId::TOP),
1411 Ast::Dot(_) => {
1412 let codepoint_dot = self.global_ascii_perl || self.global_full_unicode;
1413 let hirv = match (codepoint_dot, self.dot_all.get()) {
1414 (true, true) => hir::Hir::dot(hir::Dot::AnyChar),
1415 (true, false) => hir::Hir::dot(hir::Dot::AnyCharExceptLF),
1416 (false, true) => return Ok(NodeId::TOP),
1417 (false, false) => hir::Hir::dot(hir::Dot::AnyByteExceptLF),
1418 };
1419 self.hir_to_node_id(&hirv, tb)
1420 }
1421 Ast::Assertion(a) => match &a.kind {
1422 ast::AssertionKind::StartText => Ok(NodeId::BEGIN),
1423 ast::AssertionKind::EndText => Ok(NodeId::END),
1424 ast::AssertionKind::WordBoundary => {
1425 Err(self.error(self.span(), ast::ErrorKind::UnsupportedResharpRegex))
1426 }
1427 ast::AssertionKind::NotWordBoundary => {
1428 Err(self.error(self.span(), ast::ErrorKind::UnsupportedResharpRegex))
1429 }
1430 ast::AssertionKind::StartLine => {
1431 if !self.multiline.get() {
1432 return Ok(NodeId::BEGIN);
1433 }
1434 let left = NodeId::BEGIN;
1435 let right = tb.mk_u8(b'\n');
1436 let union = tb.mk_union(left, right);
1437 Ok(tb.mk_lookbehind(union, NodeId::MISSING))
1438 }
1439 ast::AssertionKind::EndLine => {
1440 if !self.multiline.get() {
1441 return Ok(NodeId::END);
1442 }
1443 let left = NodeId::END;
1444 let right = tb.mk_u8(b'\n');
1445 let union = tb.mk_union(left, right);
1446 Ok(tb.mk_lookahead(union, NodeId::MISSING, 0))
1447 }
1448 ast::AssertionKind::WordBoundaryStart => {
1449 Err(self.error(a.span, ast::ErrorKind::UnsupportedResharpRegex))
1450 }
1451 ast::AssertionKind::WordBoundaryEnd => {
1452 Err(self.error(a.span, ast::ErrorKind::UnsupportedResharpRegex))
1453 }
1454 ast::AssertionKind::WordBoundaryStartAngle => {
1455 Err(self.error(a.span, ast::ErrorKind::UnsupportedResharpRegex))
1456 }
1457 ast::AssertionKind::WordBoundaryEndAngle => {
1458 Err(self.error(a.span, ast::ErrorKind::UnsupportedResharpRegex))
1459 }
1460 ast::AssertionKind::WordBoundaryStartHalf => {
1461 Err(self.error(a.span, ast::ErrorKind::UnsupportedResharpRegex))
1462 }
1463 ast::AssertionKind::WordBoundaryEndHalf => {
1464 Err(self.error(a.span, ast::ErrorKind::UnsupportedResharpRegex))
1465 }
1466 },
1467 Ast::ClassUnicode(c) => {
1468 let tmp = regex_syntax::ast::ClassUnicode {
1469 span: c.span,
1470 negated: c.negated,
1471 kind: c.kind.clone(),
1472 };
1473 if !c.negated {
1474 if let regex_syntax::ast::ClassUnicodeKind::Named(s) = &c.kind {
1475 match s.as_str() {
1476 "ascii" => return Ok(tb.mk_range_u8(0, 127)),
1478 "utf8" => {
1480 let ascii = tb.mk_range_u8(0, 127);
1481 let beta = tb.mk_range_u8(128, 0xBF);
1482 let c0 = tb.mk_range_u8(0xC0, 0xDF);
1483 let c0s = tb.mk_concats([c0, beta].into_iter());
1484 let e0 = tb.mk_range_u8(0xE0, 0xEF);
1485 let e0s = tb.mk_concats([e0, beta, beta].into_iter());
1486 let f0 = tb.mk_range_u8(0xF0, 0xF7);
1487 let f0s = tb.mk_concats([f0, beta, beta, beta].into_iter());
1488 let merged = tb.mk_unions([ascii, c0s, e0s, f0s].into_iter());
1489 return Ok(tb.mk_star(merged));
1490 }
1491 "hex" => {
1492 let nums = tb.mk_range_u8(b'0', b'9');
1493 let lets = tb.mk_range_u8(b'a', b'f');
1494 let lets2 = tb.mk_range_u8(b'A', b'F');
1495 let merged = tb.mk_unions([nums, lets, lets2].into_iter());
1496 return Ok(merged);
1497 }
1498 _ => {}
1499 }
1500 };
1501 }
1502
1503 let orig_ast = regex_syntax::ast::Ast::class_unicode(tmp);
1504 self.translator_to_node_id(&orig_ast, translator, tb)
1505 }
1506 Ast::ClassPerl(c) => self.get_class(c.negated, c.kind.clone(), tb),
1507 Ast::ClassBracketed(c) => match &c.kind {
1508 regex_syntax::ast::ClassSet::Item(item) => {
1509 if !c.negated && is_universal_perl_pair(item) {
1510 return Ok(NodeId::TOP);
1511 }
1512 let tmp = regex_syntax::ast::ClassBracketed {
1513 span: c.span,
1514 negated: c.negated,
1515 kind: c.kind.clone(),
1516 };
1517 let orig_ast = regex_syntax::ast::Ast::class_bracketed(tmp);
1518 self.translator_to_node_id(&orig_ast, translator, tb)
1519 }
1520 regex_syntax::ast::ClassSet::BinaryOp(_) => {
1521 Err(self.error(c.span, ast::ErrorKind::UnsupportedResharpRegex))
1522 }
1523 },
1524 Ast::Repetition(r) => {
1525 let body = self.ast_to_node_id(&r.ast, translator, tb);
1526 match body {
1527 Ok(body) => match &r.op.kind {
1528 ast::RepetitionKind::ZeroOrOne => Ok(tb.mk_opt(body)),
1529 ast::RepetitionKind::ZeroOrMore => Ok(tb.mk_star(body)),
1530 ast::RepetitionKind::OneOrMore => Ok(tb.mk_plus(body)),
1531 ast::RepetitionKind::Range(r) => match r {
1532 ast::RepetitionRange::Exactly(n) => Ok(tb.mk_repeat(body, *n, *n)),
1533 ast::RepetitionRange::AtLeast(n) => {
1534 let rep = tb.mk_repeat(body, *n, *n);
1535 let st = tb.mk_star(body);
1536 Ok(tb.mk_concat(rep, st))
1537 }
1538
1539 ast::RepetitionRange::Bounded(n, m) => Ok(tb.mk_repeat(body, *n, *m)),
1540 },
1541 },
1542 Err(_) => body,
1543 }
1544 }
1545 Ast::Lookaround(g) => {
1546 let body = self.ast_to_node_id(&g.ast, translator, tb)?;
1547 match g.kind {
1548 ast::LookaroundKind::PositiveLookahead
1549 | ast::LookaroundKind::NegativeLookahead
1550 if body.contains_lookbehind(tb) =>
1551 {
1552 Err(self.error(g.span, ast::ErrorKind::UnsupportedResharpRegex))
1553 }
1554 ast::LookaroundKind::PositiveLookahead => {
1555 Ok(tb.mk_lookahead(body, NodeId::MISSING, 0))
1556 }
1557 ast::LookaroundKind::PositiveLookbehind => {
1558 Ok(tb.mk_lookbehind(body, NodeId::MISSING))
1559 }
1560 ast::LookaroundKind::NegativeLookahead => Ok(tb.mk_neg_lookahead(body, 0)),
1561 ast::LookaroundKind::NegativeLookbehind => Ok(tb.mk_neg_lookbehind(body)),
1562 }
1563 }
1564 Ast::Group(g) => {
1565 if let ast::GroupKind::NonCapturing(ref flags) = g.kind {
1566 if !flags.items.is_empty() {
1567 let mut translator_builder = self.default_translator_builder();
1568 if let Some(state) = flags.flag_state(ast::Flag::CaseInsensitive) {
1569 translator_builder.case_insensitive(state);
1570 }
1571 if let Some(state) = flags.flag_state(ast::Flag::Unicode) {
1572 translator_builder.unicode(state);
1573 }
1574 let saved_dot_all = self.dot_all.get();
1575 if let Some(state) = flags.flag_state(ast::Flag::DotMatchesNewLine) {
1576 self.dot_all.set(state);
1577 }
1578 let saved_multiline = self.multiline.get();
1579 if let Some(state) = flags.flag_state(ast::Flag::MultiLine) {
1580 self.multiline.set(state);
1581 }
1582 let mut scoped = Some(translator_builder.build());
1583 let result = self.ast_to_node_id(&g.ast, &mut scoped, tb);
1584 self.dot_all.set(saved_dot_all);
1585 self.multiline.set(saved_multiline);
1586 return result;
1587 }
1588 }
1589 self.ast_to_node_id(&g.ast, translator, tb)
1590 }
1591 Ast::Alternation(a) => {
1592 let mut children = vec![];
1593 for ast in &a.asts {
1594 match self.ast_to_node_id(ast, translator, tb) {
1595 Ok(node_id) => children.push(node_id),
1596 Err(err) => return Err(err),
1597 }
1598 }
1599 Ok(tb.mk_unions(children.iter().copied()))
1600 }
1601 Ast::Concat(c) => {
1602 let mut concat_translator: Option<Translator> = None;
1603 let mut children = vec![];
1604 let mut i = 0;
1605 while i < c.asts.len() {
1606 let ast = &c.asts[i];
1607 match ast {
1608 Ast::Flags(f) => {
1609 if f.flags.flag_state(ast::Flag::SwapGreed).is_some() {
1610 return Err(
1611 self.error(f.span, ast::ErrorKind::UnsupportedResharpRegex)
1612 );
1613 }
1614 let mut translator_builder = self.default_translator_builder();
1615 if let Some(state) = f.flags.flag_state(ast::Flag::CaseInsensitive) {
1616 translator_builder.case_insensitive(state);
1617 }
1618 if let Some(state) = f.flags.flag_state(ast::Flag::Unicode) {
1619 translator_builder.unicode(state);
1620 }
1621 if let Some(state) = f.flags.flag_state(ast::Flag::DotMatchesNewLine) {
1622 self.dot_all.set(state);
1623 }
1624 if let Some(state) = f.flags.flag_state(ast::Flag::MultiLine) {
1625 self.multiline.set(state);
1626 }
1627 concat_translator = Some(translator_builder.build());
1628 *translator = concat_translator.clone();
1629 i += 1;
1630 continue;
1631 }
1632 Ast::Assertion(a) if a.kind == ast::AssertionKind::WordBoundary => {
1633 let node =
1634 self.rewrite_word_boundary_in_concat(&c.asts, i, translator, tb)?;
1635 children.push(node.0);
1636 i = node.1; continue;
1638 }
1639 _ => {}
1640 }
1641 match concat_translator {
1642 Some(_) => match self.ast_to_node_id(ast, &mut concat_translator, tb) {
1643 Ok(node_id) => children.push(node_id),
1644 Err(err) => return Err(err),
1645 },
1646 None => match self.ast_to_node_id(ast, translator, tb) {
1647 Ok(node_id) => children.push(node_id),
1648 Err(err) => return Err(err),
1649 },
1650 }
1651 i += 1;
1652 }
1653 Ok(tb.mk_concats(children.iter().cloned()))
1654 }
1655 Ast::Intersection(intersection) => {
1656 let mut children = vec![];
1657 for ast in &intersection.asts {
1658 match self.ast_to_node_id(ast, translator, tb) {
1659 Ok(node_id) => children.push(node_id),
1660 Err(err) => return Err(err),
1661 }
1662 }
1663 Ok(tb.mk_inters(children.into_iter()))
1664 }
1665 Ast::Complement(complement) => {
1666 let body = self.ast_to_node_id(&complement.ast, translator, tb);
1667 body.map(|x| tb.mk_compl(x))
1668 }
1669 }
1670 }
1671
1672 fn parse_inner(&mut self) -> Result<Ast> {
1673 let mut concat = Concat {
1674 span: self.span(),
1675 asts: vec![],
1676 };
1677 loop {
1678 self.bump_space();
1679 if self.is_eof() {
1680 break;
1681 }
1682 match self.char() {
1683 '(' => concat = self.push_group(concat)?,
1684 ')' => concat = self.pop_group(concat)?,
1685 '|' => concat = self.push_alternate(concat)?,
1686 '&' => concat = self.push_intersect(concat)?,
1687 '~' => concat = self.push_compl_group(concat)?,
1688 '[' => {
1689 let class = self.parse_set_class()?;
1690 concat.asts.push(Ast::class_bracketed(class));
1691 }
1692 '?' => {
1693 concat =
1694 self.parse_uncounted_repetition(concat, ast::RepetitionKind::ZeroOrOne)?;
1695 }
1696 '*' => {
1697 concat =
1698 self.parse_uncounted_repetition(concat, ast::RepetitionKind::ZeroOrMore)?;
1699 }
1700 '+' => {
1701 concat =
1702 self.parse_uncounted_repetition(concat, ast::RepetitionKind::OneOrMore)?;
1703 }
1704 '{' => {
1705 concat = self.parse_counted_repetition(concat)?;
1706 }
1707 _ => concat.asts.push(self.parse_primitive()?.into_ast()),
1708 }
1709 }
1710 let ast = self.pop_group_end(concat)?;
1711 if expanded_ast_size(&ast, self.expanded_ast_limit) >= self.expanded_ast_limit
1712 || max_concat_length(&ast) >= self.max_list_len
1713 {
1714 return Err(self.error(*ast.span(), ast::ErrorKind::UnsupportedResharpRegex));
1715 }
1716 Ok(ast)
1717 }
1718
1719 fn parse(&mut self, tb: &mut TB<'s>) -> Result<NodeId> {
1720 let ast = self.parse_inner()?;
1721 self.ast_to_node_id(&ast, &mut None, tb)
1722 }
1723
1724 #[inline(never)]
1725 fn parse_uncounted_repetition(
1726 &self,
1727 mut concat: ast::Concat,
1728 kind: ast::RepetitionKind,
1729 ) -> Result<ast::Concat> {
1730 let op_start = self.pos();
1732 let ast = match concat.asts.pop() {
1733 Some(ast) => ast,
1734 None => return Err(self.error(self.span(), ast::ErrorKind::RepetitionMissing)),
1735 };
1736 match ast {
1737 Ast::Empty(_) | Ast::Flags(_) => {
1738 return Err(self.error(self.span(), ast::ErrorKind::RepetitionMissing))
1739 }
1740 _ => {}
1741 }
1742 if self.bump() && self.char() == '?' {
1743 return Err(self.error(
1744 Span::new(op_start, self.pos()),
1745 ast::ErrorKind::UnsupportedLazyQuantifier,
1746 ));
1747 }
1748 concat.asts.push(Ast::repetition(ast::Repetition {
1749 span: ast.span().with_end(self.pos()),
1750 op: ast::RepetitionOp {
1751 span: Span::new(op_start, self.pos()),
1752 kind,
1753 },
1754 greedy: true,
1755 ast: Box::new(ast),
1756 }));
1757 Ok(concat)
1758 }
1759
1760 #[inline(never)]
1761 fn parse_counted_repetition(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
1762 assert!(self.char() == '{');
1763 let start = self.pos();
1764 let ast = match concat.asts.pop() {
1765 Some(ast) => ast,
1766 None => return Err(self.error(self.span(), ast::ErrorKind::RepetitionMissing)),
1767 };
1768 match ast {
1769 Ast::Empty(_) | Ast::Flags(_) => {
1770 return Err(self.error(self.span(), ast::ErrorKind::RepetitionMissing))
1771 }
1772 _ => {}
1773 }
1774 if !self.bump_and_bump_space() {
1775 return Err(self.error(
1776 Span::new(start, self.pos()),
1777 ast::ErrorKind::RepetitionCountUnclosed,
1778 ));
1779 }
1780 let count_start = specialize_err(
1781 self.parse_decimal(),
1782 ast::ErrorKind::DecimalEmpty,
1783 ast::ErrorKind::RepetitionCountDecimalEmpty,
1784 );
1785 if self.is_eof() {
1786 return Err(self.error(
1787 Span::new(start, self.pos()),
1788 ast::ErrorKind::RepetitionCountUnclosed,
1789 ));
1790 }
1791 let range = if self.char() == ',' {
1792 if !self.bump_and_bump_space() {
1793 return Err(self.error(
1794 Span::new(start, self.pos()),
1795 ast::ErrorKind::RepetitionCountUnclosed,
1796 ));
1797 }
1798 if self.char() != '}' {
1799 let count_start = match count_start {
1800 Ok(c) => c,
1801 Err(err) if err.kind == ast::ErrorKind::RepetitionCountDecimalEmpty => {
1802 if self.parser().empty_min_range {
1803 0
1804 } else {
1805 return Err(err);
1806 }
1807 }
1808 err => err?,
1809 };
1810 let count_end = specialize_err(
1811 self.parse_decimal(),
1812 ast::ErrorKind::DecimalEmpty,
1813 ast::ErrorKind::RepetitionCountDecimalEmpty,
1814 )?;
1815 ast::RepetitionRange::Bounded(count_start, count_end)
1816 } else {
1817 ast::RepetitionRange::AtLeast(count_start?)
1818 }
1819 } else {
1820 ast::RepetitionRange::Exactly(count_start?)
1821 };
1822
1823 if self.is_eof() || self.char() != '}' {
1824 return Err(self.error(
1825 Span::new(start, self.pos()),
1826 ast::ErrorKind::RepetitionCountUnclosed,
1827 ));
1828 }
1829
1830 if self.bump_and_bump_space() && self.char() == '?' {
1831 return Err(self.error(
1832 Span::new(start, self.pos()),
1833 ast::ErrorKind::UnsupportedLazyQuantifier,
1834 ));
1835 }
1836
1837 let op_span = Span::new(start, self.pos());
1838 if !range.is_valid() {
1839 return Err(self.error(op_span, ast::ErrorKind::RepetitionCountInvalid));
1840 }
1841
1842 let over_limit = match &range {
1843 ast::RepetitionRange::Exactly(n) => *n > self.max_repeat,
1844 ast::RepetitionRange::AtLeast(n) => *n > self.max_repeat,
1845 ast::RepetitionRange::Bounded(n, m) => {
1846 *n > self.max_repeat || *m > self.max_repeat
1847 }
1848 };
1849 if over_limit {
1850 return Err(self.error(op_span, ast::ErrorKind::UnsupportedResharpRegex));
1851 }
1852 concat.asts.push(Ast::repetition(ast::Repetition {
1853 span: ast.span().with_end(self.pos()),
1854 op: ast::RepetitionOp {
1855 span: op_span,
1856 kind: ast::RepetitionKind::Range(range),
1857 },
1858 greedy: true,
1859 ast: Box::new(ast),
1860 }));
1861 Ok(concat)
1862 }
1863
1864 #[inline(never)]
1865 fn parse_group(&self) -> Result<Either<ast::SetFlags, ast::Group>> {
1866 assert_eq!(self.char(), '(');
1867 let open_span = self.span_char();
1868 self.bump();
1869 self.bump_space();
1870 if let Some((ahead, pos)) = self.is_lookaround_prefix() {
1871 let kind = match (pos, ahead) {
1872 (true, true) => LookaroundKind::PositiveLookahead,
1873 (true, false) => LookaroundKind::PositiveLookbehind,
1874 (false, true) => LookaroundKind::NegativeLookahead,
1875 (false, false) => LookaroundKind::NegativeLookbehind,
1876 };
1877 return Ok(Either::Right(ast::Group {
1878 span: open_span,
1879 kind: ast::GroupKind::Lookaround(kind),
1880 ast: Box::new(Ast::empty(self.span())),
1881 }));
1882 }
1883 let inner_span = self.span();
1884 let mut starts_with_p = true;
1885 if self.bump_if("?P<") || {
1886 starts_with_p = false;
1887 self.bump_if("?<")
1888 } {
1889 let capture_index = self.next_capture_index(open_span)?;
1890 let name = self.parse_capture_name(capture_index)?;
1891 Ok(Either::Right(ast::Group {
1892 span: open_span,
1893 kind: ast::GroupKind::CaptureName {
1894 starts_with_p,
1895 name,
1896 },
1897 ast: Box::new(Ast::empty(self.span())),
1898 }))
1899 } else if self.bump_if("?") {
1900 if self.is_eof() {
1901 return Err(self.error(open_span, ast::ErrorKind::GroupUnclosed));
1902 }
1903 let flags = self.parse_flags()?;
1904 let char_end = self.char();
1905 self.bump();
1906 if char_end == ')' {
1907 if flags.items.is_empty() {
1910 return Err(self.error(inner_span, ast::ErrorKind::RepetitionMissing));
1911 }
1912 Ok(Either::Left(ast::SetFlags {
1913 span: Span {
1914 end: self.pos(),
1915 ..open_span
1916 },
1917 flags,
1918 }))
1919 } else {
1920 assert_eq!(char_end, ':');
1921 Ok(Either::Right(ast::Group {
1922 span: open_span,
1923 kind: ast::GroupKind::NonCapturing(flags),
1924 ast: Box::new(Ast::empty(self.span())),
1925 }))
1926 }
1927 } else {
1928 let capture_index = self.next_capture_index(open_span)?;
1929 Ok(Either::Right(ast::Group {
1930 span: open_span,
1931 kind: ast::GroupKind::CaptureIndex(capture_index),
1932 ast: Box::new(Ast::empty(self.span())),
1933 }))
1934 }
1935 }
1936
1937 #[inline(never)]
1938 fn parse_capture_name(&self, capture_index: u32) -> Result<ast::CaptureName> {
1939 if self.is_eof() {
1940 return Err(self.error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
1941 }
1942 let start = self.pos();
1943 loop {
1944 if self.char() == '>' {
1945 break;
1946 }
1947 if !is_capture_char(self.char(), self.pos() == start) {
1948 return Err(self.error(self.span_char(), ast::ErrorKind::GroupNameInvalid));
1949 }
1950 if !self.bump() {
1951 break;
1952 }
1953 }
1954 let end = self.pos();
1955 if self.is_eof() {
1956 return Err(self.error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
1957 }
1958 assert_eq!(self.char(), '>');
1959 self.bump();
1960 let name = &self.pattern()[start.offset..end.offset];
1961 if name.is_empty() {
1962 return Err(self.error(Span::new(start, start), ast::ErrorKind::GroupNameEmpty));
1963 }
1964 let capname = ast::CaptureName {
1965 span: Span::new(start, end),
1966 name: name.to_string(),
1967 index: capture_index,
1968 };
1969 self.add_capture_name(&capname)?;
1970 Ok(capname)
1971 }
1972
1973 #[inline(never)]
1974 fn parse_flags(&self) -> Result<ast::Flags> {
1975 let mut flags = ast::Flags {
1976 span: self.span(),
1977 items: vec![],
1978 };
1979 let mut last_was_negation = None;
1980 while self.char() != ':' && self.char() != ')' {
1981 if self.char() == '-' {
1982 last_was_negation = Some(self.span_char());
1983 let item = ast::FlagsItem {
1984 span: self.span_char(),
1985 kind: ast::FlagsItemKind::Negation,
1986 };
1987 if let Some(i) = flags.add_item(item) {
1988 return Err(self.error(
1989 self.span_char(),
1990 ast::ErrorKind::FlagRepeatedNegation {
1991 original: flags.items[i].span,
1992 },
1993 ));
1994 }
1995 } else {
1996 last_was_negation = None;
1997 let item = ast::FlagsItem {
1998 span: self.span_char(),
1999 kind: ast::FlagsItemKind::Flag(self.parse_flag()?),
2000 };
2001 if let Some(i) = flags.add_item(item) {
2002 return Err(self.error(
2003 self.span_char(),
2004 ast::ErrorKind::FlagDuplicate {
2005 original: flags.items[i].span,
2006 },
2007 ));
2008 }
2009 }
2010 if !self.bump() {
2011 return Err(self.error(self.span(), ast::ErrorKind::FlagUnexpectedEof));
2012 }
2013 }
2014 if let Some(span) = last_was_negation {
2015 return Err(self.error(span, ast::ErrorKind::FlagDanglingNegation));
2016 }
2017 flags.span.end = self.pos();
2018 Ok(flags)
2019 }
2020
2021 #[inline(never)]
2022 fn parse_flag(&self) -> Result<ast::Flag> {
2023 match self.char() {
2024 'i' => Ok(ast::Flag::CaseInsensitive),
2025 'm' => Ok(ast::Flag::MultiLine),
2026 's' => Ok(ast::Flag::DotMatchesNewLine),
2027 'U' => Ok(ast::Flag::SwapGreed),
2028 'u' => Ok(ast::Flag::Unicode),
2029 'R' => Ok(ast::Flag::CRLF),
2030 'x' => Ok(ast::Flag::IgnoreWhitespace),
2031 _ => Err(self.error(self.span_char(), ast::ErrorKind::FlagUnrecognized)),
2032 }
2033 }
2034
2035 fn parse_primitive(&self) -> Result<Primitive> {
2036 match self.char() {
2037 '\\' => self.parse_escape(),
2038 '_' => {
2039 let ast = Primitive::Top(self.span_char());
2040 self.bump();
2041 Ok(ast)
2042 }
2043 '.' => {
2044 let ast = Primitive::Dot(self.span_char());
2045 self.bump();
2046 Ok(ast)
2047 }
2048 '^' => {
2049 let ast = Primitive::Assertion(ast::Assertion {
2050 span: self.span_char(),
2051 kind: ast::AssertionKind::StartLine,
2052 });
2053 self.bump();
2054 Ok(ast)
2055 }
2056 '$' => {
2057 let ast = Primitive::Assertion(ast::Assertion {
2058 span: self.span_char(),
2059 kind: ast::AssertionKind::EndLine,
2060 });
2061 self.bump();
2062 Ok(ast)
2063 }
2064 c => {
2065 let ast = Primitive::Literal(Literal {
2066 span: self.span_char(),
2067 kind: LiteralKind::Verbatim,
2068 c,
2069 });
2070 self.bump();
2071 Ok(ast)
2072 }
2073 }
2074 }
2075
2076 #[inline(never)]
2077 fn parse_escape(&self) -> Result<Primitive> {
2078 assert_eq!(self.char(), '\\');
2079 let start = self.pos();
2080 if !self.bump() {
2081 return Err(self.error(
2082 Span::new(start, self.pos()),
2083 ast::ErrorKind::EscapeUnexpectedEof,
2084 ));
2085 }
2086 let c = self.char();
2087 match c {
2089 '0'..='9' => {
2090 if !self.parser().octal {
2091 return Err(self.error(
2092 Span::new(start, self.span_char().end),
2093 ast::ErrorKind::UnsupportedBackreference,
2094 ));
2095 }
2096 let mut lit = self.parse_octal();
2097 lit.span.start = start;
2098 return Ok(Primitive::Literal(lit));
2099 }
2100 'x' | 'u' | 'U' => {
2107 let mut lit = self.parse_hex()?;
2108 lit.span.start = start;
2109 return Ok(Primitive::Literal(lit));
2110 }
2111 'p' | 'P' => {
2112 let mut cls = self.parse_unicode_class()?;
2113 cls.span.start = start;
2114 return Ok(Primitive::Unicode(cls));
2115 }
2116 'd' | 's' | 'w' | 'D' | 'S' | 'W' => {
2117 let mut cls = self.parse_perl_class();
2118 cls.span.start = start;
2119 return Ok(Primitive::Perl(cls));
2120 }
2121 _ => {}
2122 }
2123
2124 self.bump();
2126 let span = Span::new(start, self.pos());
2127 if is_meta_character(c) {
2128 return Ok(Primitive::Literal(Literal {
2129 span,
2130 kind: LiteralKind::Meta,
2131 c,
2132 }));
2133 }
2134 if is_escapeable_character(c) {
2135 return Ok(Primitive::Literal(Literal {
2136 span,
2137 kind: LiteralKind::Superfluous,
2138 c,
2139 }));
2140 }
2141 let special = |kind, c| {
2142 Ok(Primitive::Literal(Literal {
2143 span,
2144 kind: LiteralKind::Special(kind),
2145 c,
2146 }))
2147 };
2148 match c {
2149 'a' => special(SpecialLiteralKind::Bell, '\x07'),
2150 'f' => special(SpecialLiteralKind::FormFeed, '\x0C'),
2151 't' => special(SpecialLiteralKind::Tab, '\t'),
2152 'n' => special(SpecialLiteralKind::LineFeed, '\n'),
2153 'r' => special(SpecialLiteralKind::CarriageReturn, '\r'),
2154 'v' => special(SpecialLiteralKind::VerticalTab, '\x0B'),
2155 'A' => Ok(Primitive::Assertion(ast::Assertion {
2156 span,
2157 kind: ast::AssertionKind::StartText,
2158 })),
2159 'z' => Ok(Primitive::Assertion(ast::Assertion {
2160 span,
2161 kind: ast::AssertionKind::EndText,
2162 })),
2163 'b' => {
2164 let mut wb = ast::Assertion {
2165 span,
2166 kind: ast::AssertionKind::WordBoundary,
2167 };
2168 if !self.is_eof() && self.char() == '{' {
2171 if let Some(kind) = self.maybe_parse_special_word_boundary(start)? {
2172 wb.kind = kind;
2173 wb.span.end = self.pos();
2174 }
2175 }
2176 Ok(Primitive::Assertion(wb))
2177 }
2178 'B' => Ok(Primitive::Assertion(ast::Assertion {
2179 span,
2180 kind: ast::AssertionKind::NotWordBoundary,
2181 })),
2182 '<' => Ok(Primitive::Assertion(ast::Assertion {
2183 span,
2184 kind: ast::AssertionKind::WordBoundaryStartAngle,
2185 })),
2186 '>' => Ok(Primitive::Assertion(ast::Assertion {
2187 span,
2188 kind: ast::AssertionKind::WordBoundaryEndAngle,
2189 })),
2190 _ => Err(self.error(span, ast::ErrorKind::EscapeUnrecognized)),
2191 }
2192 }
2193
2194 fn maybe_parse_special_word_boundary(
2195 &self,
2196 wb_start: Position,
2197 ) -> Result<Option<ast::AssertionKind>> {
2198 assert_eq!(self.char(), '{');
2199
2200 let is_valid_char = |c| matches!(c, 'A'..='Z' | 'a'..='z' | '-');
2201 let start = self.pos();
2202 if !self.bump_and_bump_space() {
2203 return Err(self.error(
2204 Span::new(wb_start, self.pos()),
2205 ast::ErrorKind::SpecialWordOrRepetitionUnexpectedEof,
2206 ));
2207 }
2208 let start_contents = self.pos();
2209 if !is_valid_char(self.char()) {
2210 self.parser().pos.set(start);
2211 return Ok(None);
2212 }
2213
2214 let mut scratch = self.parser().scratch.borrow_mut();
2216 scratch.clear();
2217 while !self.is_eof() && is_valid_char(self.char()) {
2218 scratch.push(self.char());
2219 self.bump_and_bump_space();
2220 }
2221 if self.is_eof() || self.char() != '}' {
2222 return Err(self.error(
2223 Span::new(start, self.pos()),
2224 ast::ErrorKind::SpecialWordBoundaryUnclosed,
2225 ));
2226 }
2227 let end = self.pos();
2228 self.bump();
2229 let kind = match scratch.as_str() {
2230 "start" => ast::AssertionKind::WordBoundaryStart,
2231 "end" => ast::AssertionKind::WordBoundaryEnd,
2232 "start-half" => ast::AssertionKind::WordBoundaryStartHalf,
2233 "end-half" => ast::AssertionKind::WordBoundaryEndHalf,
2234 _ => {
2235 return Err(self.error(
2236 Span::new(start_contents, end),
2237 ast::ErrorKind::SpecialWordBoundaryUnrecognized,
2238 ))
2239 }
2240 };
2241 Ok(Some(kind))
2242 }
2243
2244 #[inline(never)]
2245 fn parse_octal(&self) -> Literal {
2246 assert!(self.parser().octal);
2247 assert!('0' <= self.char() && self.char() <= '7');
2248 let start = self.pos();
2249 while self.bump()
2251 && '0' <= self.char()
2252 && self.char() <= '7'
2253 && self.pos().offset - start.offset <= 2
2254 {}
2255 let end = self.pos();
2256 let octal = &self.pattern()[start.offset..end.offset];
2257 let codepoint = u32::from_str_radix(octal, 8).expect("valid octal number");
2260 let c = char::from_u32(codepoint).expect("Unicode scalar value");
2263 Literal {
2264 span: Span::new(start, end),
2265 kind: LiteralKind::Octal,
2266 c,
2267 }
2268 }
2269
2270 #[inline(never)]
2271 fn parse_hex(&self) -> Result<Literal> {
2272 assert!(self.char() == 'x' || self.char() == 'u' || self.char() == 'U');
2273
2274 let hex_kind = match self.char() {
2275 'x' => HexLiteralKind::X,
2276 'u' => HexLiteralKind::UnicodeShort,
2277 _ => HexLiteralKind::UnicodeLong,
2278 };
2279 if !self.bump_and_bump_space() {
2280 return Err(self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
2281 }
2282 if self.char() == '{' {
2283 self.parse_hex_brace(hex_kind)
2284 } else {
2285 self.parse_hex_digits(hex_kind)
2286 }
2287 }
2288
2289 #[inline(never)]
2290 fn parse_hex_digits(&self, kind: HexLiteralKind) -> Result<Literal> {
2291 let mut scratch = self.parser().scratch.borrow_mut();
2292 scratch.clear();
2293
2294 let start = self.pos();
2295 for i in 0..kind.digits() {
2296 if i > 0 && !self.bump_and_bump_space() {
2297 return Err(self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
2298 }
2299 if !is_hex(self.char()) {
2300 return Err(self.error(self.span_char(), ast::ErrorKind::EscapeHexInvalidDigit));
2301 }
2302 scratch.push(self.char());
2303 }
2304 self.bump_and_bump_space();
2305 let end = self.pos();
2306 let hex = scratch.as_str();
2307 match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
2308 None => Err(self.error(Span::new(start, end), ast::ErrorKind::EscapeHexInvalid)),
2309 Some(c) => Ok(Literal {
2310 span: Span::new(start, end),
2311 kind: LiteralKind::HexFixed(kind),
2312 c,
2313 }),
2314 }
2315 }
2316
2317 #[inline(never)]
2318 fn parse_hex_brace(&self, kind: HexLiteralKind) -> Result<Literal> {
2319 let mut scratch = self.parser().scratch.borrow_mut();
2320 scratch.clear();
2321
2322 let brace_pos = self.pos();
2323 let start = self.span_char().end;
2324 while self.bump_and_bump_space() && self.char() != '}' {
2325 if !is_hex(self.char()) {
2326 return Err(self.error(self.span_char(), ast::ErrorKind::EscapeHexInvalidDigit));
2327 }
2328 scratch.push(self.char());
2329 }
2330 if self.is_eof() {
2331 return Err(self.error(
2332 Span::new(brace_pos, self.pos()),
2333 ast::ErrorKind::EscapeUnexpectedEof,
2334 ));
2335 }
2336 let end = self.pos();
2337 let hex = scratch.as_str();
2338 assert_eq!(self.char(), '}');
2339 self.bump_and_bump_space();
2340
2341 if hex.is_empty() {
2342 return Err(self.error(
2343 Span::new(brace_pos, self.pos()),
2344 ast::ErrorKind::EscapeHexEmpty,
2345 ));
2346 }
2347 match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
2348 None => Err(self.error(Span::new(start, end), ast::ErrorKind::EscapeHexInvalid)),
2349 Some(c) => Ok(Literal {
2350 span: Span::new(start, self.pos()),
2351 kind: LiteralKind::HexBrace(kind),
2352 c,
2353 }),
2354 }
2355 }
2356
2357 fn parse_decimal(&self) -> Result<u32> {
2358 let mut scratch = self.parser().scratch.borrow_mut();
2359 scratch.clear();
2360
2361 while !self.is_eof() && self.char().is_whitespace() {
2362 self.bump();
2363 }
2364 let start = self.pos();
2365 while !self.is_eof() && '0' <= self.char() && self.char() <= '9' {
2366 scratch.push(self.char());
2367 self.bump_and_bump_space();
2368 }
2369 let span = Span::new(start, self.pos());
2370 while !self.is_eof() && self.char().is_whitespace() {
2371 self.bump_and_bump_space();
2372 }
2373 let digits = scratch.as_str();
2374 if digits.is_empty() {
2375 return Err(self.error(span, ast::ErrorKind::DecimalEmpty));
2376 }
2377 match digits.parse::<u32>().ok() {
2378 Some(n) => Ok(n),
2379 None => Err(self.error(span, ast::ErrorKind::DecimalInvalid)),
2380 }
2381 }
2382
2383 #[inline(never)]
2384 fn parse_set_class(&self) -> Result<ClassBracketed> {
2385 assert_eq!(self.char(), '[');
2386
2387 let mut union = ClassSetUnion {
2388 span: self.span(),
2389 items: vec![],
2390 };
2391 loop {
2392 self.bump_space();
2393 if self.is_eof() {
2394 return Err(self.unclosed_class_error());
2395 }
2396 match self.char() {
2397 '[' => {
2398 if !self.parser().stack_class.borrow().is_empty() {
2399 if let Some(cls) = self.maybe_parse_ascii_class() {
2400 union.push(ClassSetItem::Ascii(cls));
2401 continue;
2402 }
2403 }
2404 union = self.push_class_open(union)?;
2405 }
2406 ']' => match self.pop_class(union)? {
2407 Either::Left(nested_union) => {
2408 union = nested_union;
2409 }
2410 Either::Right(class) => return Ok(class),
2411 },
2412 '&' if self.peek() == Some('&') => {
2413 assert!(self.bump_if("&&"));
2414 union = self.push_class_op(ClassSetBinaryOpKind::Intersection, union);
2415 }
2416 '-' if self.peek() == Some('-') => {
2417 assert!(self.bump_if("--"));
2418 union = self.push_class_op(ClassSetBinaryOpKind::Difference, union);
2419 }
2420 '~' if self.peek() == Some('~') => {
2421 assert!(self.bump_if("~~"));
2422 union = self.push_class_op(ClassSetBinaryOpKind::SymmetricDifference, union);
2423 }
2424 _ => {
2425 union.push(self.parse_set_class_range()?);
2426 }
2427 }
2428 }
2429 }
2430
2431 #[inline(never)]
2432 fn parse_set_class_range(&self) -> Result<ClassSetItem> {
2433 let prim1 = self.parse_set_class_item()?;
2434 self.bump_space();
2435 if self.is_eof() {
2436 return Err(self.unclosed_class_error());
2437 }
2438 if self.char() != '-' || self.peek_space() == Some(']') || self.peek_space() == Some('-') {
2439 return prim1.into_class_set_item(self);
2440 }
2441 if !self.bump_and_bump_space() {
2442 return Err(self.unclosed_class_error());
2443 }
2444 let prim2 = self.parse_set_class_item()?;
2445 let range = ClassSetRange {
2446 span: Span::new(prim1.span().start, prim2.span().end),
2447 start: prim1.into_class_literal(self)?,
2448 end: prim2.into_class_literal(self)?,
2449 };
2450 if !range.is_valid() {
2451 return Err(self.error(range.span, ast::ErrorKind::ClassRangeInvalid));
2452 }
2453 Ok(ClassSetItem::Range(range))
2454 }
2455
2456 #[inline(never)]
2457 fn parse_set_class_item(&self) -> Result<Primitive> {
2458 if self.char() == '\\' {
2459 self.parse_escape()
2460 } else {
2461 let x = Primitive::Literal(Literal {
2462 span: self.span_char(),
2463 kind: LiteralKind::Verbatim,
2464 c: self.char(),
2465 });
2466 self.bump();
2467 Ok(x)
2468 }
2469 }
2470
2471 #[inline(never)]
2472 fn parse_set_class_open(&self) -> Result<(ClassBracketed, ClassSetUnion)> {
2473 assert_eq!(self.char(), '[');
2474 let start = self.pos();
2475 if !self.bump_and_bump_space() {
2476 return Err(self.error(Span::new(start, self.pos()), ast::ErrorKind::ClassUnclosed));
2477 }
2478
2479 let negated = if self.char() != '^' {
2480 false
2481 } else {
2482 if !self.bump_and_bump_space() {
2483 return Err(self.error(Span::new(start, self.pos()), ast::ErrorKind::ClassUnclosed));
2484 }
2485 true
2486 };
2487 let mut union = ClassSetUnion {
2489 span: self.span(),
2490 items: vec![],
2491 };
2492 while self.char() == '-' {
2493 union.push(ClassSetItem::Literal(Literal {
2494 span: self.span_char(),
2495 kind: LiteralKind::Verbatim,
2496 c: '-',
2497 }));
2498 if !self.bump_and_bump_space() {
2499 return Err(self.error(Span::new(start, start), ast::ErrorKind::ClassUnclosed));
2500 }
2501 }
2502 if union.items.is_empty() && self.char() == ']' {
2505 union.push(ClassSetItem::Literal(Literal {
2506 span: self.span_char(),
2507 kind: LiteralKind::Verbatim,
2508 c: ']',
2509 }));
2510 if !self.bump_and_bump_space() {
2511 return Err(self.error(Span::new(start, self.pos()), ast::ErrorKind::ClassUnclosed));
2512 }
2513 }
2514 let set = ClassBracketed {
2515 span: Span::new(start, self.pos()),
2516 negated,
2517 kind: ClassSet::union(ClassSetUnion {
2518 span: Span::new(union.span.start, union.span.start),
2519 items: vec![],
2520 }),
2521 };
2522 Ok((set, union))
2523 }
2524
2525 #[inline(never)]
2526 fn maybe_parse_ascii_class(&self) -> Option<ClassAscii> {
2527 assert_eq!(self.char(), '[');
2528 let start = self.pos();
2530 let mut negated = false;
2531 if !self.bump() || self.char() != ':' {
2532 self.parser().pos.set(start);
2533 return None;
2534 }
2535 if !self.bump() {
2536 self.parser().pos.set(start);
2537 return None;
2538 }
2539 if self.char() == '^' {
2540 negated = true;
2541 if !self.bump() {
2542 self.parser().pos.set(start);
2543 return None;
2544 }
2545 }
2546 let name_start = self.offset();
2547 while self.char() != ':' && self.bump() {}
2548 if self.is_eof() {
2549 self.parser().pos.set(start);
2550 return None;
2551 }
2552 let name = &self.pattern()[name_start..self.offset()];
2553 if !self.bump_if(":]") {
2554 self.parser().pos.set(start);
2555 return None;
2556 }
2557 let kind = match regex_syntax::ast::ClassAsciiKind::from_name(name) {
2558 Some(kind) => kind,
2559 None => {
2560 self.parser().pos.set(start);
2561 return None;
2562 }
2563 };
2564 Some(ClassAscii {
2565 span: Span::new(start, self.pos()),
2566 kind,
2567 negated,
2568 })
2569 }
2570
2571 #[inline(never)]
2572 fn parse_unicode_class(&self) -> Result<ClassUnicode> {
2573 assert!(self.char() == 'p' || self.char() == 'P');
2574
2575 let mut scratch = self.parser().scratch.borrow_mut();
2576 scratch.clear();
2577
2578 let negated = self.char() == 'P';
2579 if !self.bump_and_bump_space() {
2580 return Err(self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
2581 }
2582 let (start, kind) = if self.char() == '{' {
2583 let start = self.span_char().end;
2584 while self.bump_and_bump_space() && self.char() != '}' {
2585 scratch.push(self.char());
2586 }
2587 if self.is_eof() {
2588 return Err(self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
2589 }
2590 assert_eq!(self.char(), '}');
2591 self.bump();
2592
2593 let name = scratch.as_str();
2594 if let Some(i) = name.find("!=") {
2595 (
2596 start,
2597 ClassUnicodeKind::NamedValue {
2598 op: ClassUnicodeOpKind::NotEqual,
2599 name: name[..i].to_string(),
2600 value: name[i + 2..].to_string(),
2601 },
2602 )
2603 } else if let Some(i) = name.find(':') {
2604 (
2605 start,
2606 ClassUnicodeKind::NamedValue {
2607 op: ClassUnicodeOpKind::Colon,
2608 name: name[..i].to_string(),
2609 value: name[i + 1..].to_string(),
2610 },
2611 )
2612 } else if let Some(i) = name.find('=') {
2613 (
2614 start,
2615 ClassUnicodeKind::NamedValue {
2616 op: ClassUnicodeOpKind::Equal,
2617 name: name[..i].to_string(),
2618 value: name[i + 1..].to_string(),
2619 },
2620 )
2621 } else {
2622 (start, ClassUnicodeKind::Named(name.to_string()))
2623 }
2624 } else {
2625 let start = self.pos();
2626 let c = self.char();
2627 if c == '\\' {
2628 return Err(self.error(self.span_char(), ast::ErrorKind::UnicodeClassInvalid));
2629 }
2630 self.bump_and_bump_space();
2631 let kind = ClassUnicodeKind::OneLetter(c);
2632 (start, kind)
2633 };
2634 Ok(ClassUnicode {
2635 span: Span::new(start, self.pos()),
2636 negated,
2637 kind,
2638 })
2639 }
2640
2641 #[inline(never)]
2642 fn parse_perl_class(&self) -> ClassPerl {
2643 let c = self.char();
2644 let span = self.span_char();
2645 self.bump();
2646 let (negated, kind) = match c {
2647 'd' => (false, regex_syntax::ast::ClassPerlKind::Digit),
2648 'D' => (true, regex_syntax::ast::ClassPerlKind::Digit),
2649 's' => (false, regex_syntax::ast::ClassPerlKind::Space),
2650 'S' => (true, regex_syntax::ast::ClassPerlKind::Space),
2651 'w' => (false, regex_syntax::ast::ClassPerlKind::Word),
2652 'W' => (true, regex_syntax::ast::ClassPerlKind::Word),
2653 c => panic!("expected valid Perl class but got '{}'", c),
2654 };
2655 ClassPerl {
2656 span,
2657 kind,
2658 negated,
2659 }
2660 }
2661}
2662
2663fn is_universal_perl_pair(item: ®ex_syntax::ast::ClassSetItem) -> bool {
2666 use regex_syntax::ast::ClassSetItem;
2667 let items = match item {
2668 ClassSetItem::Union(u) => &u.items,
2669 _ => return false,
2670 };
2671 if items.len() != 2 {
2672 return false;
2673 }
2674 match (&items[0], &items[1]) {
2675 (ClassSetItem::Perl(a), ClassSetItem::Perl(b)) => {
2676 let is_all = a.kind == b.kind && a.negated != b.negated;
2677 is_all
2678 }
2679 _ => false,
2680 }
2681}
2682
2683pub fn max_concat_length(ast: &ast::Ast) -> usize {
2684 match ast {
2685 ast::Ast::Empty(_)
2686 | ast::Ast::Flags(_)
2687 | ast::Ast::Literal(_)
2688 | ast::Ast::Dot(_)
2689 | ast::Ast::Top(_)
2690 | ast::Ast::Assertion(_)
2691 | ast::Ast::ClassUnicode(_)
2692 | ast::Ast::ClassPerl(_)
2693 | ast::Ast::ClassBracketed(_) => 0,
2694 ast::Ast::Group(g) => max_concat_length(&g.ast),
2695 ast::Ast::Complement(c) => max_concat_length(&c.ast),
2696 ast::Ast::Lookaround(l) => max_concat_length(&l.ast),
2697 ast::Ast::Repetition(r) => max_concat_length(&r.ast),
2698 ast::Ast::Concat(c) => c
2699 .asts
2700 .len()
2701 .max(c.asts.iter().map(max_concat_length).max().unwrap_or(0)),
2702 ast::Ast::Alternation(a) => a.asts.iter().map(max_concat_length).max().unwrap_or(0),
2703 ast::Ast::Intersection(i) => i.asts.iter().map(max_concat_length).max().unwrap_or(0),
2704 }
2705}
2706
2707pub fn expanded_ast_size(ast: &ast::Ast, limit: u64) -> u64 {
2708 fn go(ast: &ast::Ast, limit: u64) -> u64 {
2709 match ast {
2710 ast::Ast::Empty(_) | ast::Ast::Flags(_) => 1,
2711 ast::Ast::Literal(_) | ast::Ast::Dot(_) | ast::Ast::Top(_) => 1,
2712 ast::Ast::Assertion(_) => 1,
2713 ast::Ast::ClassUnicode(_) | ast::Ast::ClassPerl(_) | ast::Ast::ClassBracketed(_) => 1,
2714 ast::Ast::Group(g) => go(&g.ast, limit).saturating_add(1).min(limit),
2715 ast::Ast::Complement(c) => go(&c.ast, limit).saturating_add(1).min(limit),
2716 ast::Ast::Lookaround(l) => go(&l.ast, limit).saturating_add(1).min(limit),
2717 ast::Ast::Concat(c) => sum_children(&c.asts, limit),
2718 ast::Ast::Alternation(a) => sum_children(&a.asts, limit),
2719 ast::Ast::Intersection(i) => sum_children(&i.asts, limit),
2720 ast::Ast::Repetition(r) => {
2721 let body = go(&r.ast, limit);
2722 let factor: u64 = match &r.op.kind {
2723 ast::RepetitionKind::ZeroOrOne => 2,
2724 ast::RepetitionKind::ZeroOrMore | ast::RepetitionKind::OneOrMore => 2,
2725 ast::RepetitionKind::Range(ast::RepetitionRange::Exactly(n)) => {
2726 (*n as u64).max(1)
2727 }
2728 ast::RepetitionKind::Range(ast::RepetitionRange::AtLeast(n)) => {
2729 (*n as u64).max(1).saturating_add(1)
2730 }
2731 ast::RepetitionKind::Range(ast::RepetitionRange::Bounded(_, m)) => {
2732 (*m as u64).max(1)
2733 }
2734 };
2735 body.saturating_mul(factor).min(limit)
2736 }
2737 }
2738 }
2739 fn sum_children(children: &[ast::Ast], limit: u64) -> u64 {
2740 let mut total: u64 = 0;
2741 for c in children {
2742 total = total.saturating_add(go(c, limit));
2743 if total >= limit {
2744 return limit;
2745 }
2746 }
2747 total
2748 }
2749 go(ast, limit)
2750}
2751
2752pub fn parse_ast<'s>(tb: &mut TB<'s>, pattern: &'s str) -> std::result::Result<NodeId, ParseError> {
2753 let mut p: ResharpParser<'s> = ResharpParser::new(pattern);
2754 p.parse(tb)
2755}
2756
2757pub fn parse_ast_with<'s>(
2758 tb: &mut TB<'s>,
2759 pattern: &'s str,
2760 flags: &PatternFlags,
2761) -> std::result::Result<NodeId, ParseError> {
2762 let mut p: ResharpParser<'s> = ResharpParser::with_flags(pattern, flags);
2763 p.parse(tb)
2764}
2765
2766pub fn parse_to_ast(pattern: &str) -> std::result::Result<ast::Ast, ParseError> {
2768 let mut p: ResharpParser = ResharpParser::new(pattern);
2769 p.parse_inner()
2770}