1#![warn(dead_code)]
6mod ast;
7use std::cell::{Cell, RefCell};
8
9use ast::{Ast, Concat, ErrorKind, GroupKind, LookaroundKind, TaggedEps};
10use regex_syntax::{
11 ast::{
12 ClassAscii, ClassBracketed, ClassPerl, ClassSet, ClassSetBinaryOpKind, ClassSetItem,
13 ClassSetRange, ClassSetUnion, ClassUnicode, ClassUnicodeKind, ClassUnicodeOpKind,
14 HexLiteralKind, Literal, LiteralKind, Position, Span, SpecialLiteralKind,
15 },
16 hir::{
17 self,
18 translate::{Translator, TranslatorBuilder},
19 },
20 utf8::Utf8Sequences,
21};
22use resharp_algebra::NodeId;
23
24type TB<'s> = resharp_algebra::RegexBuilder;
25
26#[derive(Clone, Copy, PartialEq)]
27enum WordCharKind {
28 Word,
29 NonWord,
30 MaybeWord,
31 MaybeNonWord,
32 Unknown,
33 Edge,
34}
35
36fn is_word_byte(b: u8) -> bool {
37 b.is_ascii_alphanumeric() || b == b'_'
38}
39
40#[derive(Clone, Debug, Eq, PartialEq)]
41enum Primitive {
42 Literal(Literal),
43 Assertion(ast::Assertion),
44 Dot(Span),
45 Top(Span),
46 Perl(ClassPerl),
47 Unicode(ClassUnicode),
48 TaggedEps(ast::TaggedEps),
49}
50
51impl Primitive {
52 fn span(&self) -> &Span {
53 match *self {
54 Primitive::Literal(ref x) => &x.span,
55 Primitive::Assertion(ref x) => &x.span,
56 Primitive::Dot(ref span) => span,
57 Primitive::Top(ref span) => span,
58 Primitive::Perl(ref x) => &x.span,
59 Primitive::Unicode(ref x) => &x.span,
60 Primitive::TaggedEps(ref x) => &x.span,
61 }
62 }
63
64 fn into_ast(self) -> Ast {
65 match self {
66 Primitive::Literal(lit) => Ast::literal(lit),
67 Primitive::Assertion(assert) => Ast::assertion(assert),
68 Primitive::Dot(span) => Ast::dot(span),
69 Primitive::Top(span) => Ast::top(span),
70 Primitive::Perl(cls) => Ast::class_perl(cls),
71 Primitive::Unicode(cls) => Ast::class_unicode(cls),
72 Primitive::TaggedEps(te) => Ast::tagged_eps(te),
73 }
74 }
75
76 fn into_class_set_item(self, p: &ResharpParser) -> Result<regex_syntax::ast::ClassSetItem> {
77 use self::Primitive::*;
78 use regex_syntax::ast::ClassSetItem;
79
80 match self {
81 Literal(lit) => Ok(ClassSetItem::Literal(lit)),
82 Perl(cls) => Ok(ClassSetItem::Perl(cls)),
83 Unicode(cls) => Ok(ClassSetItem::Unicode(cls)),
84 x => Err(p.error(*x.span(), ast::ErrorKind::ClassEscapeInvalid)),
85 }
86 }
87
88 fn into_class_literal(self, p: &ResharpParser) -> Result<Literal> {
89 use self::Primitive::*;
90
91 match self {
92 Literal(lit) => Ok(lit),
93 x => Err(p.error(*x.span(), ast::ErrorKind::ClassRangeLiteral)),
94 }
95 }
96}
97
98#[derive(Clone, Debug, Eq, PartialEq)]
99pub enum Either<Left, Right> {
100 Left(Left),
101 Right(Right),
102}
103
104#[derive(Clone, Debug, Eq, PartialEq)]
105pub struct ResharpError {
106 pub kind: ErrorKind,
108 pattern: String,
111 pub span: Span,
113}
114
115impl std::fmt::Display for ResharpError {
116 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
117 write!(f, "{:?}: {:?}", self.kind, self.span)
118 }
119}
120impl std::error::Error for ResharpError {}
121
122type Result<T> = core::result::Result<T, ResharpError>;
123
124
125#[derive(Clone, Debug)]
126enum GroupState {
127 Group {
129 concat: Concat,
131 group: ast::Group,
133 ignore_whitespace: bool,
135 },
136 Alternation(ast::Alternation),
141 Intersection(ast::Intersection),
142}
143
144#[derive(Clone, Debug)]
145enum ClassState {
146 Open {
148 union: regex_syntax::ast::ClassSetUnion,
150 set: regex_syntax::ast::ClassBracketed,
154 },
155 Op {
158 kind: regex_syntax::ast::ClassSetBinaryOpKind,
160 lhs: regex_syntax::ast::ClassSet,
162 },
163}
164
165pub struct ResharpParser<'s> {
167 perl_classes: Vec<(bool, regex_syntax::ast::ClassPerlKind, NodeId)>,
168 pub translator: regex_syntax::hir::translate::Translator,
169 pub pattern: &'s str,
170 pos: Cell<Position>,
171 capture_index: Cell<u32>,
172 octal: bool,
173 empty_min_range: bool,
174 ignore_whitespace: Cell<bool>,
175 comments: RefCell<Vec<ast::Comment>>,
176 stack_group: RefCell<Vec<GroupState>>,
177 stack_class: RefCell<Vec<ClassState>>,
178 capture_names: RefCell<Vec<ast::CaptureName>>,
179 scratch: RefCell<String>,
180}
181
182fn specialize_err<T>(result: Result<T>, from: ast::ErrorKind, to: ast::ErrorKind) -> Result<T> {
183 if let Err(e) = result {
184 if e.kind == from {
185 Err(ResharpError {
186 kind: to,
187 pattern: e.pattern,
188 span: e.span,
189 })
190 } else {
191 Err(e)
192 }
193 } else {
194 result
195 }
196}
197
198fn is_capture_char(c: char, first: bool) -> bool {
199 if first {
200 c == '_' || c.is_alphabetic()
201 } else {
202 c == '_' || c == '.' || c == '[' || c == ']' || c.is_alphanumeric()
203 }
204}
205
206pub fn is_meta_character(c: char) -> bool {
207 match c {
208 '\\' | '.' | '+' | '*' | '?' | '(' | ')' | '|' | '[' | ']' | '{' | '}' | '^' | '$'
209 | '#' | '&' | '-' | '~' => true,
210 _ => false,
211 }
212}
213
214pub fn is_escapeable_character(c: char) -> bool {
215 if is_meta_character(c) {
217 return true;
218 }
219 if !c.is_ascii() {
222 return false;
223 }
224 match c {
229 '0'..='9' | 'A'..='Z' | 'a'..='z' => false,
230 '<' | '>' => false,
240 _ => true,
241 }
242}
243
244fn is_hex(c: char) -> bool {
245 ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')
246}
247
248impl<'s> ResharpParser<'s> {
249 fn default_translator_builder() -> TranslatorBuilder {
250 let mut trb = TranslatorBuilder::new();
251 trb.unicode(true);
252 trb.utf8(false);
253 trb
254 }
255
256 pub fn new(pattern: &'s str) -> Self {
257 let trb = Self::default_translator_builder();
258 Self {
259 translator: trb.build(),
260 pattern,
261 perl_classes: vec![],
262 pos: Cell::new(Position::new(0, 0, 0)),
263 capture_index: Cell::new(0),
264 octal: false,
265 empty_min_range: false,
266 ignore_whitespace: Cell::new(false),
267 comments: RefCell::new(vec![]),
268 stack_group: RefCell::new(vec![]),
269 stack_class: RefCell::new(vec![]),
270 capture_names: RefCell::new(vec![]),
271 scratch: RefCell::new(String::new()),
272 }
273 }
274
275 fn parser(&'_ self) -> &'_ ResharpParser<'_> {
277 self
278 }
279
280 fn pattern(&self) -> &str {
282 self.pattern
283 }
284
285 fn error(&self, span: Span, kind: ast::ErrorKind) -> ResharpError {
287 ResharpError {
288 kind,
289 pattern: self.pattern().to_string(),
290 span,
291 }
292 }
293
294 fn unsupported_error(&self, _: regex_syntax::hir::Error) -> ResharpError {
295 let emptyspan = Span::splat(self.pos());
296 let inner = self.error(emptyspan, ast::ErrorKind::UnsupportedResharpRegex);
297 inner
298 }
299
300 fn offset(&self) -> usize {
305 self.parser().pos.get().offset
306 }
307
308 fn line(&self) -> usize {
312 self.parser().pos.get().line
313 }
314
315 fn column(&self) -> usize {
319 self.parser().pos.get().column
320 }
321
322 fn next_capture_index(&self, span: Span) -> Result<u32> {
330 let current = self.parser().capture_index.get();
331 let i = current
332 .checked_add(1)
333 .ok_or_else(|| self.error(span, ast::ErrorKind::CaptureLimitExceeded))?;
334 self.parser().capture_index.set(i);
335 Ok(i)
336 }
337
338 fn add_capture_name(&self, cap: &ast::CaptureName) -> Result<()> {
339 let mut names = self.parser().capture_names.borrow_mut();
340 match names.binary_search_by_key(&cap.name.as_str(), |c| c.name.as_str()) {
341 Err(i) => {
342 names.insert(i, cap.clone());
343 Ok(())
344 }
345 Ok(i) => Err(self.error(
346 cap.span,
347 ast::ErrorKind::GroupNameDuplicate {
348 original: names[i].span,
349 },
350 )),
351 }
352 }
353
354 fn ignore_whitespace(&self) -> bool {
355 self.parser().ignore_whitespace.get()
356 }
357
358 fn char(&self) -> char {
359 self.char_at(self.offset())
360 }
361
362 fn char_at(&self, i: usize) -> char {
363 self.pattern()[i..]
364 .chars()
365 .next()
366 .unwrap_or_else(|| panic!("expected char at offset {}", i))
367 }
368
369 fn bump(&self) -> bool {
370 if self.is_eof() {
371 return false;
372 }
373 let Position {
374 mut offset,
375 mut line,
376 mut column,
377 } = self.pos();
378 if self.char() == '\n' {
379 line = line.checked_add(1).unwrap();
380 column = 1;
381 } else {
382 column = column.checked_add(1).unwrap();
383 }
384 offset += self.char().len_utf8();
385 self.parser().pos.set(Position {
386 offset,
387 line,
388 column,
389 });
390 self.pattern()[self.offset()..].chars().next().is_some()
391 }
392
393 fn bump_if(&self, prefix: &str) -> bool {
394 if self.pattern()[self.offset()..].starts_with(prefix) {
395 for _ in 0..prefix.chars().count() {
396 self.bump();
397 }
398 true
399 } else {
400 false
401 }
402 }
403
404 fn is_lookaround_prefix(&self) -> Option<(bool, bool)> {
405 if self.bump_if("?=") {
406 return Some((true, true));
407 }
408 if self.bump_if("?!") {
409 return Some((true, false));
410 }
411 if self.bump_if("?<=") {
412 return Some((false, true));
413 }
414 if self.bump_if("?<!") {
415 return Some((false, false));
416 }
417 return None;
418 }
420
421 fn bump_and_bump_space(&self) -> bool {
422 if !self.bump() {
423 return false;
424 }
425 self.bump_space();
426 !self.is_eof()
427 }
428
429 fn bump_space(&self) {
430 if !self.ignore_whitespace() {
431 return;
432 }
433 while !self.is_eof() {
434 if self.char().is_whitespace() {
435 self.bump();
436 } else if self.char() == '#' {
437 let start = self.pos();
438 let mut comment_text = String::new();
439 self.bump();
440 while !self.is_eof() {
441 let c = self.char();
442 self.bump();
443 if c == '\n' {
444 break;
445 }
446 comment_text.push(c);
447 }
448 let comment = ast::Comment {
449 span: Span::new(start, self.pos()),
450 comment: comment_text,
451 };
452 self.parser().comments.borrow_mut().push(comment);
453 } else {
454 break;
455 }
456 }
457 }
458
459 fn peek(&self) -> Option<char> {
463 if self.is_eof() {
464 return None;
465 }
466 self.pattern()[self.offset() + self.char().len_utf8()..]
467 .chars()
468 .next()
469 }
470
471 fn peek_space(&self) -> Option<char> {
474 if !self.ignore_whitespace() {
475 return self.peek();
476 }
477 if self.is_eof() {
478 return None;
479 }
480 let mut start = self.offset() + self.char().len_utf8();
481 let mut in_comment = false;
482 for (i, c) in self.pattern()[start..].char_indices() {
483 if c.is_whitespace() {
484 continue;
485 } else if !in_comment && c == '#' {
486 in_comment = true;
487 } else if in_comment && c == '\n' {
488 in_comment = false;
489 } else {
490 start += i;
491 break;
492 }
493 }
494 self.pattern()[start..].chars().next()
495 }
496
497 fn is_eof(&self) -> bool {
499 self.offset() == self.pattern().len()
500 }
501
502 fn pos(&self) -> Position {
505 self.parser().pos.get()
506 }
507
508 fn span(&self) -> Span {
511 Span::splat(self.pos())
512 }
513
514 fn span_char(&self) -> Span {
516 let mut next = Position {
517 offset: self.offset().checked_add(self.char().len_utf8()).unwrap(),
518 line: self.line(),
519 column: self.column().checked_add(1).unwrap(),
520 };
521 if self.char() == '\n' {
522 next.line += 1;
523 next.column = 1;
524 }
525 Span::new(self.pos(), next)
526 }
527
528 #[inline(never)]
538 fn push_alternate(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
539 assert_eq!(self.char(), '|');
540 concat.span.end = self.pos();
541 self.push_or_add_alternation(concat);
542 self.bump();
543 Ok(ast::Concat {
544 span: self.span(),
545 asts: vec![],
546 })
547 }
548
549 fn push_or_add_alternation(&self, concat: Concat) {
552 use self::GroupState::*;
553
554 let mut stack = self.parser().stack_group.borrow_mut();
555 if let Some(&mut Alternation(ref mut alts)) = stack.last_mut() {
556 alts.asts.push(concat.into_ast());
557 return;
558 }
559 stack.push(Alternation(ast::Alternation {
560 span: Span::new(concat.span.start, self.pos()),
561 asts: vec![concat.into_ast()],
562 }));
563 }
564
565 #[inline(never)]
566 fn push_intersect(&self, mut concat: Concat) -> Result<Concat> {
567 assert_eq!(self.char(), '&');
568 concat.span.end = self.pos();
569 self.push_or_add_intersect(concat);
570 self.bump();
571 Ok(Concat {
572 span: self.span(),
573 asts: vec![],
574 })
575 }
576
577 fn push_or_add_intersect(&self, concat: Concat) {
580 use self::GroupState::*;
581
582 let mut stack = self.parser().stack_group.borrow_mut();
583 if let Some(&mut Intersection(ref mut alts)) = stack.last_mut() {
584 alts.asts.push(concat.into_ast());
585 return;
586 }
587 stack.push(Intersection(ast::Intersection {
588 span: Span::new(concat.span.start, self.pos()),
589 asts: vec![concat.into_ast()],
590 }));
591 }
592
593 #[inline(never)]
607 fn push_group(&self, mut concat: Concat) -> Result<Concat> {
608 assert_eq!(self.char(), '(');
609 match self.parse_group()? {
610 Either::Left(set) => {
611 let ignore = set.flags.flag_state(ast::Flag::IgnoreWhitespace);
612 if let Some(v) = ignore {
613 self.parser().ignore_whitespace.set(v);
614 }
615
616 concat.asts.push(Ast::flags(set));
617 Ok(concat)
618 }
619 Either::Right(group) => {
620 let old_ignore_whitespace = self.ignore_whitespace();
621 let new_ignore_whitespace = group
622 .flags()
623 .and_then(|f| f.flag_state(ast::Flag::IgnoreWhitespace))
624 .unwrap_or(old_ignore_whitespace);
625 self.parser()
626 .stack_group
627 .borrow_mut()
628 .push(GroupState::Group {
629 concat,
630 group,
631 ignore_whitespace: old_ignore_whitespace,
632 });
633 self.parser().ignore_whitespace.set(new_ignore_whitespace);
634 Ok(Concat {
635 span: self.span(),
636 asts: vec![],
637 })
638 }
639 }
640 }
641
642 #[inline(never)]
643 fn push_compl_group(&self, concat: Concat) -> Result<Concat> {
644 assert_eq!(self.char(), '~');
645 self.bump();
646 if self.is_eof() || self.char() != '(' {
647 return Err(self.error(self.span(), ast::ErrorKind::ComplementGroupExpected));
648 }
649 let open_span = self.span_char();
650 self.bump();
651 let group = ast::Group {
652 span: open_span,
653 kind: ast::GroupKind::Complement,
654 ast: Box::new(Ast::empty(self.span())),
655 };
656
657 let old_ignore_whitespace = self.ignore_whitespace();
658 let new_ignore_whitespace = group
659 .flags()
660 .and_then(|f| f.flag_state(ast::Flag::IgnoreWhitespace))
661 .unwrap_or(old_ignore_whitespace);
662 self.parser()
663 .stack_group
664 .borrow_mut()
665 .push(GroupState::Group {
666 concat,
667 group,
668 ignore_whitespace: old_ignore_whitespace,
669 });
670 self.parser().ignore_whitespace.set(new_ignore_whitespace);
671 Ok(Concat {
672 span: self.span(),
673 asts: vec![],
674 })
675 }
676
677 #[inline(never)]
687 fn pop_group(&self, mut group_concat: Concat) -> Result<Concat> {
688 use self::GroupState::*;
689 assert_eq!(self.char(), ')');
690 let mut stack = self.parser().stack_group.borrow_mut();
691 let topstack = stack.pop();
692
693 let (mut prior_concat, mut group, ignore_whitespace, alt) = match topstack {
694 Some(Group {
695 concat,
696 group,
697 ignore_whitespace,
698 }) => (concat, group, ignore_whitespace, None),
699 Some(Alternation(alt)) => match stack.pop() {
700 Some(Group {
701 concat,
702 group,
703 ignore_whitespace,
704 }) => (
705 concat,
706 group,
707 ignore_whitespace,
708 Some(Either::Left::<ast::Alternation, ast::Intersection>(alt)),
709 ),
710 None | Some(Alternation(_)) | Some(Intersection(_)) => {
711 return Err(self.error(self.span_char(), ast::ErrorKind::GroupUnopened));
712 }
713 },
714 Some(Intersection(int)) => match stack.pop() {
715 Some(Group {
716 concat,
717 group,
718 ignore_whitespace,
719 }) => (
720 concat,
721 group,
722 ignore_whitespace,
723 Some(Either::Right::<ast::Alternation, ast::Intersection>(int)),
724 ),
725 None | Some(Alternation(_)) | Some(Intersection(_)) => {
726 return Err(self.error(self.span_char(), ast::ErrorKind::GroupUnopened));
727 }
728 },
729
730 None => {
731 return Err(self.error(self.span_char(), ast::ErrorKind::GroupUnopened));
732 }
733 };
734 self.parser().ignore_whitespace.set(ignore_whitespace);
735 group_concat.span.end = self.pos();
736 self.bump();
737 group.span.end = self.pos();
738 match alt {
739 Some(Either::Left(mut alt)) => {
740 alt.span.end = group_concat.span.end;
741 alt.asts.push(group_concat.into_ast());
742 group.ast = Box::new(alt.into_ast());
743 }
744 Some(Either::Right(mut int)) => {
745 int.span.end = group_concat.span.end;
746 int.asts.push(group_concat.into_ast());
747 group.ast = Box::new(int.into_ast());
748 }
749 None => {
750 group.ast = Box::new(group_concat.into_ast());
751 }
752 }
753
754 if group.kind == GroupKind::Complement {
755 let complement = ast::Complement {
756 span: self.span(),
757 ast: group.ast,
758 };
759 prior_concat.asts.push(Ast::complement(complement));
760 }
761 else {
763 prior_concat.asts.push(Ast::group(group));
765 }
766 Ok(prior_concat)
767 }
768
769 #[inline(never)]
776 fn pop_group_end(&self, mut concat: ast::Concat) -> Result<Ast> {
777 concat.span.end = self.pos();
778 let mut stack = self.parser().stack_group.borrow_mut();
780 let ast = match stack.pop() {
781 None => Ok(concat.into_ast()),
782 Some(GroupState::Alternation(mut alt)) => {
783 alt.span.end = self.pos();
784 alt.asts.push(concat.into_ast());
785 Ok(Ast::alternation(alt))
786 }
787 Some(GroupState::Intersection(mut int)) => {
788 int.span.end = self.pos();
789 int.asts.push(concat.into_ast());
790
791 Ok(Ast::intersection(int))
793 }
794 Some(GroupState::Group { group, .. }) => {
795 return Err(self.error(group.span, ast::ErrorKind::GroupUnclosed));
796 }
797 };
798 match stack.pop() {
800 None => ast,
801 Some(GroupState::Alternation(_)) => {
802 unreachable!()
809 }
810 Some(GroupState::Intersection(_)) => {
811 unreachable!()
812 }
813 Some(GroupState::Group { group, .. }) => {
814 Err(self.error(group.span, ast::ErrorKind::GroupUnclosed))
815 }
816 }
817 }
818
819 #[inline(never)]
828 fn push_class_open(
829 &self,
830 parent_union: regex_syntax::ast::ClassSetUnion,
831 ) -> Result<regex_syntax::ast::ClassSetUnion> {
832 assert_eq!(self.char(), '[');
833
834 let (nested_set, nested_union) = self.parse_set_class_open()?;
835 self.parser()
836 .stack_class
837 .borrow_mut()
838 .push(ClassState::Open {
839 union: parent_union,
840 set: nested_set,
841 });
842 Ok(nested_union)
843 }
844
845 #[inline(never)]
860 fn pop_class(
861 &self,
862 nested_union: regex_syntax::ast::ClassSetUnion,
863 ) -> Result<Either<regex_syntax::ast::ClassSetUnion, regex_syntax::ast::ClassBracketed>> {
864 assert_eq!(self.char(), ']');
865
866 let item = regex_syntax::ast::ClassSet::Item(nested_union.into_item());
867 let prevset = self.pop_class_op(item);
868 let mut stack = self.parser().stack_class.borrow_mut();
869 match stack.pop() {
870 None => {
871 panic!("unexpected empty character class stack")
880 }
881 Some(ClassState::Op { .. }) => {
882 panic!("unexpected ClassState::Op")
889 }
890 Some(ClassState::Open { mut union, mut set }) => {
891 self.bump();
892 set.span.end = self.pos();
893 set.kind = prevset;
894 if stack.is_empty() {
895 Ok(Either::Right(set))
896 } else {
897 union.push(regex_syntax::ast::ClassSetItem::Bracketed(Box::new(set)));
898 Ok(Either::Left(union))
899 }
900 }
901 }
902 }
903
904 #[inline(never)]
909 fn unclosed_class_error(&self) -> ResharpError {
910 for state in self.parser().stack_class.borrow().iter().rev() {
911 if let ClassState::Open { ref set, .. } = *state {
912 return self.error(set.span, ast::ErrorKind::ClassUnclosed);
913 }
914 }
915 panic!("no open character class found")
918 }
919
920 #[inline(never)]
926 fn push_class_op(
927 &self,
928 next_kind: regex_syntax::ast::ClassSetBinaryOpKind,
929 next_union: regex_syntax::ast::ClassSetUnion,
930 ) -> regex_syntax::ast::ClassSetUnion {
931 let item = regex_syntax::ast::ClassSet::Item(next_union.into_item());
932 let new_lhs = self.pop_class_op(item);
933 self.parser().stack_class.borrow_mut().push(ClassState::Op {
934 kind: next_kind,
935 lhs: new_lhs,
936 });
937 regex_syntax::ast::ClassSetUnion {
938 span: self.span(),
939 items: vec![],
940 }
941 }
942
943 #[inline(never)]
949 fn pop_class_op(&self, rhs: regex_syntax::ast::ClassSet) -> regex_syntax::ast::ClassSet {
950 let mut stack = self.parser().stack_class.borrow_mut();
951 let (kind, lhs) = match stack.pop() {
952 Some(ClassState::Op { kind, lhs }) => (kind, lhs),
953 Some(state @ ClassState::Open { .. }) => {
954 stack.push(state);
955 return rhs;
956 }
957 None => unreachable!(),
958 };
959 let span = Span::new(lhs.span().start, rhs.span().end);
960 regex_syntax::ast::ClassSet::BinaryOp(regex_syntax::ast::ClassSetBinaryOp {
961 span,
962 kind,
963 lhs: Box::new(lhs),
964 rhs: Box::new(rhs),
965 })
966 }
967
968 fn hir_to_node_id(&self, hir: &hir::Hir, tb: &mut TB<'s>) -> Result<NodeId> {
969 match hir.kind() {
970 hir::HirKind::Empty => Ok(NodeId::EPS),
971 hir::HirKind::Literal(l) => {
972 if l.0.len() == 1 {
973 let node = tb.mk_u8(l.0[0]);
974 Ok(node)
975 } else {
976 let ws: Vec<_> = l.0.iter().map(|l| tb.mk_u8(*l)).collect();
977 let conc = tb.mk_concats(ws.iter().copied());
978 Ok(conc)
979 }
980 }
981 hir::HirKind::Class(class) => {
982 match class {
984 hir::Class::Unicode(class_unicode) => {
985 let ranges = class_unicode.ranges();
986 let mut s1 = NodeId::BOT;
987 let mut s2 = NodeId::BOT;
988 let mut s3 = NodeId::BOT;
989 let mut s4 = NodeId::BOT;
990 for range in ranges {
991 for seq in Utf8Sequences::new(range.start(), range.end()) {
992 if seq.len() > 2 {
994 continue;
995 }
996
997 let v: Vec<_> = seq
998 .as_slice()
999 .iter()
1000 .map(|s|
1001 (s.start, s.end))
1003 .collect();
1004 if v.len() == 1 {
1005 let (start, end) = v[0];
1006 let node = tb.mk_range_u8(start, end);
1007 s1 = tb.mk_union(s1, node);
1008 } else if v.len() == 2 {
1009 let node1 = tb.mk_range_u8(v[0].0, v[0].1);
1010 let node2 = tb.mk_range_u8(v[1].0, v[1].1);
1011 let conc = tb.mk_concat(node1, node2);
1012 s2 = tb.mk_union(s2, conc);
1013 } else if v.len() == 3 {
1014 let node1 = tb.mk_range_u8(v[0].0, v[0].1);
1015 let node2 = tb.mk_range_u8(v[1].0, v[1].1);
1016 let node3 = tb.mk_range_u8(v[2].0, v[2].1);
1017 let conc2 = tb.mk_concat(node2, node3);
1018 let conc1 = tb.mk_concat(node1, conc2);
1019 s3 = tb.mk_union(s3, conc1);
1020 } else {
1021 let mut conc = NodeId::EPS;
1022 for i in (0..4).rev() {
1023 let node = tb.mk_range_u8(v[i].0, v[i].1);
1024 conc = tb.mk_concat(conc, node);
1025 }
1026 s4 = tb.mk_union(s4, conc);
1027 }
1028 }
1029 }
1030 let merged = tb.mk_union(s2, s1);
1031 let merged = tb.mk_union(s3, merged);
1032 let merged = tb.mk_union(s4, merged);
1033 Ok(merged)
1034 }
1035 hir::Class::Bytes(class_bytes) => {
1036 let ranges = class_bytes.ranges();
1037 let mut result = NodeId::BOT;
1038 for range in ranges {
1039 let start = range.start();
1040 let end = range.end();
1041 let node = tb.mk_range_u8(start, end);
1042 result = tb.mk_union(result, node);
1043 }
1044 Ok(result)
1045 }
1046 }
1047 }
1048 hir::HirKind::Look(_) => todo!(),
1049 hir::HirKind::Repetition(_) => todo!(),
1050 hir::HirKind::Capture(_) => todo!(),
1051 hir::HirKind::Concat(body) => {
1052 let mut result = NodeId::EPS;
1053 for child in body {
1054 let node = self.hir_to_node_id(child, tb)?;
1055 result = tb.mk_concat(result, node);
1056 }
1057 Ok(result)
1058 }
1059 hir::HirKind::Alternation(_) => todo!(),
1060 }
1061 }
1062
1063 fn translate_ast_to_hir(
1064 &mut self,
1065 orig_ast: ®ex_syntax::ast::Ast,
1066 tb: &mut TB<'s>,
1067 ) -> Result<NodeId> {
1068 match self.translator.translate("", orig_ast) {
1071 Err(_) => return Err(self.error(self.span(), ast::ErrorKind::UnicodeClassInvalid)),
1072 Ok(hir) => {
1073 let mapped = self.hir_to_node_id(&hir, tb);
1074 mapped
1075 }
1076 }
1077 }
1078
1079 fn translator_to_node_id(
1080 &mut self,
1081 orig_ast: ®ex_syntax::ast::Ast,
1082 translator: &mut Option<Translator>,
1083 tb: &mut TB<'s>,
1084 ) -> Result<NodeId> {
1085 match translator {
1086 Some(tr) => {
1087 let hir = tr
1088 .translate("", &orig_ast)
1089 .map_err(|e| self.unsupported_error(e))?;
1090 self.hir_to_node_id(&hir, tb)
1091 }
1092 None => self.translate_ast_to_hir(&orig_ast, tb),
1093 }
1094 }
1095
1096 fn get_class(
1097 &mut self,
1098 negated: bool,
1099 kind: regex_syntax::ast::ClassPerlKind,
1100 tb: &mut TB<'s>,
1101 ) -> Result<NodeId> {
1102 let w = self
1104 .perl_classes
1105 .iter()
1106 .find(|(c_neg, c_kind, _)| *c_kind == kind && *c_neg == negated);
1107 match w {
1108 Some((_, _, value)) => return Ok(*value),
1109 None => {
1110 let tmp = regex_syntax::ast::ClassPerl {
1111 span: Span::splat(Position::new(0, 0, 0)),
1112 negated,
1113 kind: kind.clone(),
1114 };
1115 let word_ast = regex_syntax::ast::Ast::class_perl(tmp);
1116 let translated = self.translate_ast_to_hir(&word_ast, tb)?;
1117 self.perl_classes.push((negated, kind, translated));
1118 Ok(translated)
1119 }
1120 }
1121 }
1122
1123 fn word_char_kind(ast: &Ast, left: bool) -> WordCharKind {
1124 use WordCharKind::*;
1125 match ast {
1126 Ast::Literal(lit) => {
1127 if is_word_byte(lit.c as u8) {
1128 Word
1129 } else {
1130 NonWord
1131 }
1132 }
1133 Ast::ClassPerl(c) => match (&c.kind, c.negated) {
1134 (®ex_syntax::ast::ClassPerlKind::Word, false) => Word,
1135 (®ex_syntax::ast::ClassPerlKind::Word, true) => NonWord,
1136 (®ex_syntax::ast::ClassPerlKind::Space, false) => NonWord,
1137 (®ex_syntax::ast::ClassPerlKind::Digit, false) => Word,
1138 _ => Unknown,
1139 },
1140 Ast::Dot(_) | Ast::Top(_) => Unknown,
1141 Ast::Group(g) => Self::word_char_kind(&g.ast, left),
1142 Ast::Concat(c) if !c.asts.is_empty() => {
1143 Self::word_char_kind(&c.asts[if left { c.asts.len() - 1 } else { 0 }], left)
1144 }
1145 Ast::Repetition(r) => {
1146 let inner = Self::word_char_kind(&r.ast, left);
1147 let nullable = matches!(
1148 &r.op.kind,
1149 ast::RepetitionKind::ZeroOrMore
1150 | ast::RepetitionKind::ZeroOrOne
1151 | ast::RepetitionKind::Range(ast::RepetitionRange::Bounded(0, _))
1152 );
1153 if nullable {
1154 match inner {
1155 Word => MaybeWord,
1156 NonWord => MaybeNonWord,
1157 _ => Unknown,
1158 }
1159 } else {
1160 inner
1161 }
1162 }
1163 Ast::Lookaround(la) => Self::word_char_kind(&la.ast, left),
1164 _ => Unknown,
1165 }
1166 }
1167
1168 fn concat_neighbor_kind(asts: &[Ast], idx: usize, dir: isize) -> WordCharKind {
1169 use WordCharKind::*;
1170 let next = idx as isize + dir;
1171 if next < 0 || next >= asts.len() as isize {
1172 return Edge;
1173 }
1174 let kind = Self::word_char_kind(&asts[next as usize], dir < 0);
1175 match kind {
1176 MaybeWord => match Self::concat_neighbor_kind(asts, next as usize, dir) {
1177 Word => Word,
1178 _ => Unknown,
1179 },
1180 MaybeNonWord => match Self::concat_neighbor_kind(asts, next as usize, dir) {
1181 NonWord => NonWord,
1182 _ => Unknown,
1183 },
1184 other => other,
1185 }
1186 }
1187
1188 fn rewrite_word_boundary_in_concat(
1189 &mut self,
1190 asts: &[Ast],
1191 idx: usize,
1192 translator: &mut Option<Translator>,
1193 tb: &mut TB<'s>,
1194 ) -> Result<(NodeId, usize)> {
1195 use WordCharKind::*;
1196 let left = Self::concat_neighbor_kind(asts, idx, -1);
1197 let right = Self::concat_neighbor_kind(asts, idx, 1);
1198 let word_id = self.get_class(false, regex_syntax::ast::ClassPerlKind::Word, tb)?;
1199 let not_word_id = self.get_class(true, regex_syntax::ast::ClassPerlKind::Word, tb)?;
1200
1201 match (left, right) {
1202 (NonWord, Word) | (Word, NonWord) => Ok((NodeId::EPS, idx + 1)),
1203 (Word, _) => {
1204 let set = tb.mk_union(NodeId::END, not_word_id);
1205 let tail = tb.mk_concat(set, NodeId::TS);
1206 self.merge_boundary_with_following_lookaheads(asts, idx, tail, translator, tb)
1207 }
1208 (NonWord, _) => {
1209 let set = tb.mk_union(NodeId::END, word_id);
1210 let tail = tb.mk_concat(set, NodeId::TS);
1211 self.merge_boundary_with_following_lookaheads(asts, idx, tail, translator, tb)
1212 }
1213 (_, Word) => {
1214 let body = tb.mk_union(NodeId::BEGIN, not_word_id);
1215 Ok((tb.mk_lookbehind(body, NodeId::MISSING), idx + 1))
1216 }
1217 (_, NonWord) => {
1218 let body = tb.mk_union(NodeId::BEGIN, word_id);
1219 Ok((tb.mk_lookbehind(body, NodeId::MISSING), idx + 1))
1220 }
1221 _ => Ok((self.make_full_word_boundary(tb)?, idx + 1)),
1222 }
1223 }
1224
1225 fn merge_boundary_with_following_lookaheads(
1226 &mut self,
1227 asts: &[Ast],
1228 wb_idx: usize,
1229 boundary_tail: NodeId,
1230 translator: &mut Option<Translator>,
1231 tb: &mut TB<'s>,
1232 ) -> Result<(NodeId, usize)> {
1233 let mut next = wb_idx + 1;
1234 let mut la_bodies = vec![boundary_tail];
1235 while next < asts.len() {
1236 match &asts[next] {
1237 Ast::Lookaround(la) if la.kind == ast::LookaroundKind::PositiveLookahead => {
1238 let body = self.ast_to_node_id(&la.ast, translator, tb)?;
1239 la_bodies.push(tb.mk_concat(body, NodeId::TS));
1240 next += 1;
1241 }
1242 _ => break,
1243 }
1244 }
1245 let merged = tb.mk_inters(la_bodies.into_iter());
1246 Ok((tb.mk_lookahead(merged, NodeId::MISSING, 0), next))
1247 }
1248
1249 fn make_full_word_boundary(&mut self, tb: &mut TB<'s>) -> Result<NodeId> {
1250 let w = self.get_class(false, regex_syntax::ast::ClassPerlKind::Word, tb)?;
1251 let nw = self.get_class(true, regex_syntax::ast::ClassPerlKind::Word, tb)?;
1252 let lb_w = tb.mk_lookbehind(w, NodeId::MISSING);
1253 let la_nw = tb.mk_lookahead(nw, NodeId::MISSING, 0);
1254 let c1 = tb.mk_concat(lb_w, la_nw);
1255 let lb_nw = tb.mk_lookbehind(nw, NodeId::MISSING);
1256 let la_w = tb.mk_lookahead(w, NodeId::MISSING, 0);
1257 let c2 = tb.mk_concat(lb_nw, la_w);
1258 Ok(tb.mk_union(c1, c2))
1259 }
1260
1261 fn ast_to_node_id(
1262 &mut self,
1263 ast: &Ast,
1264 translator: &mut Option<Translator>,
1265 tb: &mut TB<'s>,
1266 ) -> Result<NodeId> {
1267 match ast {
1268 Ast::Empty(_) => Ok(NodeId::EPS),
1269 Ast::Flags(f) => {
1270 let mut translator_builder = Self::default_translator_builder();
1271 if let Some(state) = f.flags.flag_state(ast::Flag::CaseInsensitive) {
1272 translator_builder.case_insensitive(state);
1273 }
1274 if let Some(state) = f.flags.flag_state(ast::Flag::Unicode) {
1275 translator_builder.unicode(state);
1276 }
1277 let concat_translator = Some(translator_builder.build());
1278 *translator = concat_translator;
1279 Ok(NodeId::EPS)
1280 }
1281 Ast::Literal(l) => {
1282 let ast_lit = regex_syntax::ast::Ast::literal(*l.to_owned());
1283 self.translator_to_node_id(&ast_lit, translator, tb)
1284 }
1285 Ast::Top(_) => Ok(NodeId::TOP),
1286 Ast::Dot(_) => {
1287 let hirv = hir::Hir::dot(hir::Dot::AnyByteExceptLF);
1288 self.hir_to_node_id(&hirv, tb)
1289 }
1290 Ast::Assertion(a) => match &a.kind {
1291 ast::AssertionKind::StartText => Ok(NodeId::BEGIN),
1292 ast::AssertionKind::EndText => Ok(NodeId::END),
1293 ast::AssertionKind::WordBoundary => {
1294 let word_id =
1295 self.get_class(false, regex_syntax::ast::ClassPerlKind::Word, tb)?;
1296 let not_word_id =
1297 self.get_class(true, regex_syntax::ast::ClassPerlKind::Word, tb)?;
1298 let case1_1 = tb.mk_lookbehind(word_id, NodeId::MISSING);
1300 let case1_2 = tb.mk_lookahead(not_word_id, NodeId::MISSING, 0);
1301 let case1 = tb.mk_concat(case1_1, case1_2);
1302 let case2_1 = tb.mk_lookbehind(not_word_id, NodeId::MISSING);
1304 let case2_2 = tb.mk_lookahead(word_id, NodeId::MISSING, 0);
1305 let case2 = tb.mk_concat(case2_1, case2_2);
1306 Ok(tb.mk_union(case1, case2))
1307 }
1308 ast::AssertionKind::NotWordBoundary => {
1309 return Err(self.error(self.span(), ast::ErrorKind::UnsupportedResharpRegex))
1310 }
1311 ast::AssertionKind::StartLine => {
1312 let left = NodeId::BEGIN;
1313 let right = tb.mk_u8('\n' as u8);
1314 let union = tb.mk_union(left, right);
1315 Ok(tb.mk_lookbehind(union, NodeId::MISSING))
1316 }
1317 ast::AssertionKind::EndLine => {
1318 let left = NodeId::END;
1319 let right = tb.mk_u8('\n' as u8);
1320 let union = tb.mk_union(left, right);
1321 Ok(tb.mk_lookahead(union, NodeId::MISSING, 0))
1322 }
1323 ast::AssertionKind::WordBoundaryStart => todo!(),
1324 ast::AssertionKind::WordBoundaryEnd => todo!(),
1325 ast::AssertionKind::WordBoundaryStartAngle => todo!(),
1326 ast::AssertionKind::WordBoundaryEndAngle => Ok(tb.mk_string(">")),
1327 ast::AssertionKind::WordBoundaryStartHalf => todo!(),
1328 ast::AssertionKind::WordBoundaryEndHalf => todo!(),
1329 },
1330 Ast::ClassUnicode(c) => {
1331 let tmp = regex_syntax::ast::ClassUnicode {
1332 span: c.span,
1333 negated: c.negated,
1334 kind: c.kind.clone(),
1335 };
1336 if !c.negated {
1337 match &c.kind {
1338 regex_syntax::ast::ClassUnicodeKind::Named(s) => match s.as_str() {
1339 "ascii" => return Ok(tb.mk_range_u8(0, 127)),
1341 "utf8" => {
1343 let ascii = tb.mk_range_u8(0, 127);
1344 let beta = tb.mk_range_u8(128, 0xBF);
1345 let c0 = tb.mk_range_u8(0xC0, 0xDF);
1346 let c0s = tb.mk_concats([c0, beta].into_iter());
1347 let e0 = tb.mk_range_u8(0xE0, 0xEF);
1348 let e0s = tb.mk_concats([e0, beta, beta].into_iter());
1349 let f0 = tb.mk_range_u8(0xF0, 0xF7);
1350 let f0s = tb.mk_concats([f0, beta, beta, beta].into_iter());
1351 let merged = tb.mk_unions([ascii, c0s, e0s, f0s].into_iter());
1352 return Ok(tb.mk_star(merged));
1353 }
1354 "hex" => {
1355 let nums = tb.mk_range_u8(b'0', b'9');
1356 let lets = tb.mk_range_u8(b'a', b'f');
1357 let lets2 = tb.mk_range_u8(b'A', b'F');
1358 let merged = tb.mk_unions([nums, lets, lets2].into_iter());
1359 return Ok(merged);
1360 }
1361 _ => {}
1362 },
1363 _ => {}
1364 };
1365 }
1366
1367 let orig_ast = regex_syntax::ast::Ast::class_unicode(tmp);
1368 self.translator_to_node_id(&orig_ast, translator, tb)
1369 }
1370 Ast::ClassPerl(c) => {
1371 let tmp = regex_syntax::ast::ClassPerl {
1372 span: c.span,
1373 negated: c.negated,
1374 kind: c.kind.clone(),
1375 };
1376 let orig_ast = regex_syntax::ast::Ast::class_perl(tmp);
1377 self.translator_to_node_id(&orig_ast, translator, tb)
1378 }
1379 Ast::ClassBracketed(c) => match &c.kind {
1380 regex_syntax::ast::ClassSet::Item(_) => {
1381 let tmp = regex_syntax::ast::ClassBracketed {
1382 span: c.span,
1383 negated: c.negated,
1384 kind: c.kind.clone(),
1385 };
1386 let orig_ast = regex_syntax::ast::Ast::class_bracketed(tmp);
1387 self.translator_to_node_id(&orig_ast, translator, tb)
1388 }
1389 regex_syntax::ast::ClassSet::BinaryOp(_) => todo!(),
1390 },
1391 Ast::Repetition(r) => {
1392 let body = self.ast_to_node_id(&r.ast, translator, tb);
1393 match body {
1394 Ok(body) => match &r.op.kind {
1395 ast::RepetitionKind::ZeroOrOne => Ok(tb.mk_opt(body)),
1396 ast::RepetitionKind::ZeroOrMore => Ok(tb.mk_star(body)),
1397 ast::RepetitionKind::OneOrMore => Ok(tb.mk_plus(body)),
1398 ast::RepetitionKind::Range(r) => match r {
1399 ast::RepetitionRange::Exactly(n) => Ok(tb.mk_repeat(body, *n, *n)),
1400 ast::RepetitionRange::AtLeast(n) => {
1401 let rep = tb.mk_repeat(body, *n, *n);
1402 let st = tb.mk_star(body);
1403 Ok(tb.mk_concat(rep, st))
1404 }
1405
1406 ast::RepetitionRange::Bounded(n, m) => Ok(tb.mk_repeat(body, *n, *m)),
1407 },
1408 },
1409 Err(_) => body,
1410 }
1411 }
1412 Ast::Lookaround(g) => {
1413 let body = self.ast_to_node_id(&g.ast, translator, tb)?;
1414 match g.kind {
1415 ast::LookaroundKind::PositiveLookahead => {
1416 Ok(tb.mk_lookahead(body, NodeId::MISSING, 0))
1417 }
1418 ast::LookaroundKind::PositiveLookbehind => {
1419 Ok(tb.mk_lookbehind(body, NodeId::MISSING))
1420 }
1421 ast::LookaroundKind::NegativeLookahead => Ok(tb.mk_neg_lookahead(body, 0)),
1422 ast::LookaroundKind::NegativeLookbehind => Ok(tb.mk_neg_lookbehind(body)),
1423 }
1424 }
1425 Ast::Group(g) => {
1426 let child = self.ast_to_node_id(&g.ast, translator, tb);
1427 child
1428 }
1429 Ast::Alternation(a) => {
1430 let mut children = vec![];
1431 for ast in &a.asts {
1432 match self.ast_to_node_id(ast, translator, tb) {
1433 Ok(node_id) => children.push(node_id),
1434 Err(err) => return Err(err),
1435 }
1436 }
1437 Ok(tb.mk_unions(children.iter().copied()))
1438 }
1439 Ast::Concat(c) => {
1440 let mut concat_translator: Option<Translator> = None;
1441 let mut children = vec![];
1442 let mut i = 0;
1443 while i < c.asts.len() {
1444 let ast = &c.asts[i];
1445 match ast {
1446 Ast::Flags(f) => {
1447 let mut translator_builder = Self::default_translator_builder();
1448 translator_builder.utf8(false);
1449 if let Some(state) = f.flags.flag_state(ast::Flag::CaseInsensitive) {
1450 translator_builder.case_insensitive(state);
1451 }
1452 if let Some(state) = f.flags.flag_state(ast::Flag::Unicode) {
1453 translator_builder.unicode(state);
1454 }
1455 concat_translator = Some(translator_builder.build());
1456 i += 1;
1457 continue;
1458 }
1459 Ast::Assertion(a) if a.kind == ast::AssertionKind::WordBoundary => {
1460 let node =
1461 self.rewrite_word_boundary_in_concat(&c.asts, i, translator, tb)?;
1462 children.push(node.0);
1463 i = node.1; continue;
1465 }
1466 _ => {}
1467 }
1468 match concat_translator {
1469 Some(_) => match self.ast_to_node_id(ast, &mut concat_translator, tb) {
1470 Ok(node_id) => children.push(node_id),
1471 Err(err) => return Err(err),
1472 },
1473 None => match self.ast_to_node_id(ast, translator, tb) {
1474 Ok(node_id) => children.push(node_id),
1475 Err(err) => return Err(err),
1476 },
1477 }
1478 i += 1;
1479 }
1480 Ok(tb.mk_concats(children.iter().cloned()))
1481 }
1482 Ast::Intersection(intersection) => {
1483 let mut children = vec![];
1484 for ast in &intersection.asts {
1485 match self.ast_to_node_id(ast, translator, tb) {
1486 Ok(node_id) => children.push(node_id),
1487 Err(err) => return Err(err),
1488 }
1489 }
1490 Ok(tb.mk_inters(children.into_iter()))
1491 }
1492 Ast::Complement(complement) => {
1493 let body = self.ast_to_node_id(&complement.ast, translator, tb);
1494 body.map(|x| tb.mk_compl(x))
1495 }
1496 Ast::TaggedEps(_) => Err(self.error(self.span(), ErrorKind::UnsupportedBackreference)),
1497 }
1498 }
1499
1500 fn parse(&mut self, tb: &mut TB<'s>) -> Result<NodeId> {
1503 let mut concat = Concat {
1504 span: self.span(),
1505 asts: vec![],
1506 };
1507 loop {
1508 self.bump_space();
1509 if self.is_eof() {
1510 break;
1511 }
1512 match self.char() {
1513 '(' => concat = self.push_group(concat)?,
1514 ')' => concat = self.pop_group(concat)?,
1515 '|' => concat = self.push_alternate(concat)?,
1516 '&' => concat = self.push_intersect(concat)?,
1517 '~' => concat = self.push_compl_group(concat)?,
1518 '[' => {
1519 let class = self.parse_set_class()?;
1520 concat.asts.push(Ast::class_bracketed(class));
1521 }
1522 '?' => {
1523 concat =
1524 self.parse_uncounted_repetition(concat, ast::RepetitionKind::ZeroOrOne)?;
1525 }
1526 '*' => {
1527 concat =
1528 self.parse_uncounted_repetition(concat, ast::RepetitionKind::ZeroOrMore)?;
1529 }
1530 '+' => {
1531 concat =
1532 self.parse_uncounted_repetition(concat, ast::RepetitionKind::OneOrMore)?;
1533 }
1534 '{' => {
1535 concat = self.parse_counted_repetition(concat)?;
1536 }
1537 _ => concat.asts.push(self.parse_primitive()?.into_ast()),
1538 }
1539 }
1540 let ast = self.pop_group_end(concat)?;
1541 let node_id = self.ast_to_node_id(&ast, &mut None, tb);
1543 node_id
1544 }
1545
1546 #[inline(never)]
1547 fn parse_uncounted_repetition(
1548 &self,
1549 mut concat: ast::Concat,
1550 kind: ast::RepetitionKind,
1551 ) -> Result<ast::Concat> {
1552 let op_start = self.pos();
1554 let ast = match concat.asts.pop() {
1555 Some(ast) => ast,
1556 None => return Err(self.error(self.span(), ast::ErrorKind::RepetitionMissing)),
1557 };
1558 match ast {
1559 Ast::Empty(_) | Ast::Flags(_) => {
1560 return Err(self.error(self.span(), ast::ErrorKind::RepetitionMissing))
1561 }
1562 _ => {}
1563 }
1564 let mut greedy = true;
1565 if self.bump() && self.char() == '?' {
1566 greedy = false;
1567 self.bump();
1568 }
1569 concat.asts.push(Ast::repetition(ast::Repetition {
1570 span: ast.span().with_end(self.pos()),
1571 op: ast::RepetitionOp {
1572 span: Span::new(op_start, self.pos()),
1573 kind,
1574 },
1575 greedy,
1576 ast: Box::new(ast),
1577 }));
1578 Ok(concat)
1579 }
1580
1581 #[inline(never)]
1582 fn parse_counted_repetition(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
1583 assert!(self.char() == '{');
1584 let start = self.pos();
1585 let ast = match concat.asts.pop() {
1586 Some(ast) => ast,
1587 None => return Err(self.error(self.span(), ast::ErrorKind::RepetitionMissing)),
1588 };
1589 match ast {
1590 Ast::Empty(_) | Ast::Flags(_) => {
1591 return Err(self.error(self.span(), ast::ErrorKind::RepetitionMissing))
1592 }
1593 _ => {}
1594 }
1595 if !self.bump_and_bump_space() {
1596 return Err(self.error(
1597 Span::new(start, self.pos()),
1598 ast::ErrorKind::RepetitionCountUnclosed,
1599 ));
1600 }
1601 let count_start = specialize_err(
1602 self.parse_decimal(),
1603 ast::ErrorKind::DecimalEmpty,
1604 ast::ErrorKind::RepetitionCountDecimalEmpty,
1605 );
1606 if self.is_eof() {
1607 return Err(self.error(
1608 Span::new(start, self.pos()),
1609 ast::ErrorKind::RepetitionCountUnclosed,
1610 ));
1611 }
1612 let range = if self.char() == ',' {
1613 if !self.bump_and_bump_space() {
1614 return Err(self.error(
1615 Span::new(start, self.pos()),
1616 ast::ErrorKind::RepetitionCountUnclosed,
1617 ));
1618 }
1619 if self.char() != '}' {
1620 let count_start = match count_start {
1621 Ok(c) => c,
1622 Err(err) if err.kind == ast::ErrorKind::RepetitionCountDecimalEmpty => {
1623 if self.parser().empty_min_range {
1624 0
1625 } else {
1626 return Err(err);
1627 }
1628 }
1629 err => err?,
1630 };
1631 let count_end = specialize_err(
1632 self.parse_decimal(),
1633 ast::ErrorKind::DecimalEmpty,
1634 ast::ErrorKind::RepetitionCountDecimalEmpty,
1635 )?;
1636 ast::RepetitionRange::Bounded(count_start, count_end)
1637 } else {
1638 ast::RepetitionRange::AtLeast(count_start?)
1639 }
1640 } else {
1641 ast::RepetitionRange::Exactly(count_start?)
1642 };
1643
1644 if self.is_eof() || self.char() != '}' {
1645 return Err(self.error(
1646 Span::new(start, self.pos()),
1647 ast::ErrorKind::RepetitionCountUnclosed,
1648 ));
1649 }
1650
1651 let mut greedy = true;
1652 if self.bump_and_bump_space() && self.char() == '?' {
1653 greedy = false;
1654 self.bump();
1655 }
1656
1657 let op_span = Span::new(start, self.pos());
1658 if !range.is_valid() {
1659 return Err(self.error(op_span, ast::ErrorKind::RepetitionCountInvalid));
1660 }
1661 concat.asts.push(Ast::repetition(ast::Repetition {
1662 span: ast.span().with_end(self.pos()),
1663 op: ast::RepetitionOp {
1664 span: op_span,
1665 kind: ast::RepetitionKind::Range(range),
1666 },
1667 greedy,
1668 ast: Box::new(ast),
1669 }));
1670 Ok(concat)
1671 }
1672
1673 #[inline(never)]
1674 fn parse_group(&self) -> Result<Either<ast::SetFlags, ast::Group>> {
1675 assert_eq!(self.char(), '(');
1676 let open_span = self.span_char();
1677 self.bump();
1678 self.bump_space();
1679 if let Some((ahead, pos)) = self.is_lookaround_prefix() {
1680 let kind = match (pos, ahead) {
1681 (true, true) => LookaroundKind::PositiveLookahead,
1682 (true, false) => LookaroundKind::PositiveLookbehind,
1683 (false, true) => LookaroundKind::NegativeLookahead,
1684 (false, false) => LookaroundKind::NegativeLookbehind,
1685 };
1686 return Ok(Either::Right(ast::Group {
1687 span: open_span,
1688 kind: ast::GroupKind::Lookaround(kind),
1689 ast: Box::new(Ast::empty(self.span())),
1690 }));
1691 }
1692 let inner_span = self.span();
1693 let mut starts_with_p = true;
1694 if self.bump_if("?P<") || {
1695 starts_with_p = false;
1696 self.bump_if("?<")
1697 } {
1698 let capture_index = self.next_capture_index(open_span)?;
1699 let name = self.parse_capture_name(capture_index)?;
1700 Ok(Either::Right(ast::Group {
1701 span: open_span,
1702 kind: ast::GroupKind::CaptureName {
1703 starts_with_p,
1704 name,
1705 },
1706 ast: Box::new(Ast::empty(self.span())),
1707 }))
1708 } else if self.bump_if("?") {
1709 if self.is_eof() {
1710 return Err(self.error(open_span, ast::ErrorKind::GroupUnclosed));
1711 }
1712 let flags = self.parse_flags()?;
1713 let char_end = self.char();
1714 self.bump();
1715 if char_end == ')' {
1716 if flags.items.is_empty() {
1719 return Err(self.error(inner_span, ast::ErrorKind::RepetitionMissing));
1720 }
1721 Ok(Either::Left(ast::SetFlags {
1722 span: Span {
1723 end: self.pos(),
1724 ..open_span
1725 },
1726 flags,
1727 }))
1728 } else {
1729 assert_eq!(char_end, ':');
1730 Ok(Either::Right(ast::Group {
1731 span: open_span,
1732 kind: ast::GroupKind::NonCapturing(flags),
1733 ast: Box::new(Ast::empty(self.span())),
1734 }))
1735 }
1736 } else {
1737 let capture_index = self.next_capture_index(open_span)?;
1738 Ok(Either::Right(ast::Group {
1739 span: open_span,
1740 kind: ast::GroupKind::CaptureIndex(capture_index),
1741 ast: Box::new(Ast::empty(self.span())),
1742 }))
1743 }
1744 }
1745
1746 #[inline(never)]
1747 fn parse_capture_name(&self, capture_index: u32) -> Result<ast::CaptureName> {
1748 if self.is_eof() {
1749 return Err(self.error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
1750 }
1751 let start = self.pos();
1752 loop {
1753 if self.char() == '>' {
1754 break;
1755 }
1756 if !is_capture_char(self.char(), self.pos() == start) {
1757 return Err(self.error(self.span_char(), ast::ErrorKind::GroupNameInvalid));
1758 }
1759 if !self.bump() {
1760 break;
1761 }
1762 }
1763 let end = self.pos();
1764 if self.is_eof() {
1765 return Err(self.error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
1766 }
1767 assert_eq!(self.char(), '>');
1768 self.bump();
1769 let name = &self.pattern()[start.offset..end.offset];
1770 if name.is_empty() {
1771 return Err(self.error(Span::new(start, start), ast::ErrorKind::GroupNameEmpty));
1772 }
1773 let capname = ast::CaptureName {
1774 span: Span::new(start, end),
1775 name: name.to_string(),
1776 index: capture_index,
1777 };
1778 self.add_capture_name(&capname)?;
1779 Ok(capname)
1780 }
1781
1782 #[inline(never)]
1783 fn parse_flags(&self) -> Result<ast::Flags> {
1784 let mut flags = ast::Flags {
1785 span: self.span(),
1786 items: vec![],
1787 };
1788 let mut last_was_negation = None;
1789 while self.char() != ':' && self.char() != ')' {
1790 if self.char() == '-' {
1791 last_was_negation = Some(self.span_char());
1792 let item = ast::FlagsItem {
1793 span: self.span_char(),
1794 kind: ast::FlagsItemKind::Negation,
1795 };
1796 if let Some(i) = flags.add_item(item) {
1797 return Err(self.error(
1798 self.span_char(),
1799 ast::ErrorKind::FlagRepeatedNegation {
1800 original: flags.items[i].span,
1801 },
1802 ));
1803 }
1804 } else {
1805 last_was_negation = None;
1806 let item = ast::FlagsItem {
1807 span: self.span_char(),
1808 kind: ast::FlagsItemKind::Flag(self.parse_flag()?),
1809 };
1810 if let Some(i) = flags.add_item(item) {
1811 return Err(self.error(
1812 self.span_char(),
1813 ast::ErrorKind::FlagDuplicate {
1814 original: flags.items[i].span,
1815 },
1816 ));
1817 }
1818 }
1819 if !self.bump() {
1820 return Err(self.error(self.span(), ast::ErrorKind::FlagUnexpectedEof));
1821 }
1822 }
1823 if let Some(span) = last_was_negation {
1824 return Err(self.error(span, ast::ErrorKind::FlagDanglingNegation));
1825 }
1826 flags.span.end = self.pos();
1827 Ok(flags)
1828 }
1829
1830 #[inline(never)]
1831 fn parse_flag(&self) -> Result<ast::Flag> {
1832 match self.char() {
1833 'i' => Ok(ast::Flag::CaseInsensitive),
1834 'm' => Ok(ast::Flag::MultiLine),
1835 's' => Ok(ast::Flag::DotMatchesNewLine),
1836 'U' => Ok(ast::Flag::SwapGreed),
1837 'u' => Ok(ast::Flag::Unicode),
1838 'R' => Ok(ast::Flag::CRLF),
1839 'x' => Ok(ast::Flag::IgnoreWhitespace),
1840 _ => Err(self.error(self.span_char(), ast::ErrorKind::FlagUnrecognized)),
1841 }
1842 }
1843
1844 fn parse_primitive(&self) -> Result<Primitive> {
1845 match self.char() {
1846 '\\' => self.parse_escape(),
1847 '_' => {
1848 let ast = Primitive::Top(self.span_char());
1849 self.bump();
1850 Ok(ast)
1851 }
1852 '.' => {
1853 let ast = Primitive::Dot(self.span_char());
1854 self.bump();
1855 Ok(ast)
1856 }
1857 '^' => {
1858 let ast = Primitive::Assertion(ast::Assertion {
1859 span: self.span_char(),
1860 kind: ast::AssertionKind::StartLine,
1861 });
1862 self.bump();
1863 Ok(ast)
1864 }
1865 '$' => {
1866 let ast = Primitive::Assertion(ast::Assertion {
1867 span: self.span_char(),
1868 kind: ast::AssertionKind::EndLine,
1869 });
1870 self.bump();
1871 Ok(ast)
1872 }
1873 c => {
1874 let ast = Primitive::Literal(Literal {
1875 span: self.span_char(),
1876 kind: LiteralKind::Verbatim,
1877 c,
1878 });
1879 self.bump();
1880 Ok(ast)
1881 }
1882 }
1883 }
1884
1885 #[inline(never)]
1886 fn parse_escape(&self) -> Result<Primitive> {
1887 assert_eq!(self.char(), '\\');
1888 let start = self.pos();
1889 if !self.bump() {
1890 return Err(self.error(
1891 Span::new(start, self.pos()),
1892 ast::ErrorKind::EscapeUnexpectedEof,
1893 ));
1894 }
1895 let c = self.char();
1896 match c {
1898 '0'..='9' => {
1899 if !self.parser().octal {
1900 let contents = self.parse_decimal()?;
1901 let ast = TaggedEps {
1902 span: self.span(),
1903 num: contents,
1904 };
1905 return Ok(Primitive::TaggedEps(ast));
1906 }
1907 let mut lit = self.parse_octal();
1908 lit.span.start = start;
1909 return Ok(Primitive::Literal(lit));
1910 }
1911 'x' | 'u' | 'U' => {
1918 let mut lit = self.parse_hex()?;
1919 lit.span.start = start;
1920 return Ok(Primitive::Literal(lit));
1921 }
1922 'p' | 'P' => {
1923 let mut cls = self.parse_unicode_class()?;
1924 cls.span.start = start;
1925 return Ok(Primitive::Unicode(cls));
1926 }
1927 'd' | 's' | 'w' | 'D' | 'S' | 'W' => {
1928 let mut cls = self.parse_perl_class();
1929 cls.span.start = start;
1930 return Ok(Primitive::Perl(cls));
1931 }
1932 _ => {}
1933 }
1934
1935 self.bump();
1937 let span = Span::new(start, self.pos());
1938 if is_meta_character(c) {
1939 return Ok(Primitive::Literal(Literal {
1940 span,
1941 kind: LiteralKind::Meta,
1942 c,
1943 }));
1944 }
1945 if is_escapeable_character(c) {
1946 return Ok(Primitive::Literal(Literal {
1947 span,
1948 kind: LiteralKind::Superfluous,
1949 c,
1950 }));
1951 }
1952 let special = |kind, c| {
1953 Ok(Primitive::Literal(Literal {
1954 span,
1955 kind: LiteralKind::Special(kind),
1956 c,
1957 }))
1958 };
1959 match c {
1960 'a' => special(SpecialLiteralKind::Bell, '\x07'),
1961 'f' => special(SpecialLiteralKind::FormFeed, '\x0C'),
1962 't' => special(SpecialLiteralKind::Tab, '\t'),
1963 'n' => special(SpecialLiteralKind::LineFeed, '\n'),
1964 'r' => special(SpecialLiteralKind::CarriageReturn, '\r'),
1965 'v' => special(SpecialLiteralKind::VerticalTab, '\x0B'),
1966 'A' => Ok(Primitive::Assertion(ast::Assertion {
1967 span,
1968 kind: ast::AssertionKind::StartText,
1969 })),
1970 'z' => Ok(Primitive::Assertion(ast::Assertion {
1971 span,
1972 kind: ast::AssertionKind::EndText,
1973 })),
1974 'b' => {
1975 let mut wb = ast::Assertion {
1976 span,
1977 kind: ast::AssertionKind::WordBoundary,
1978 };
1979 if !self.is_eof() && self.char() == '{' {
1982 if let Some(kind) = self.maybe_parse_special_word_boundary(start)? {
1983 wb.kind = kind;
1984 wb.span.end = self.pos();
1985 }
1986 }
1987 Ok(Primitive::Assertion(wb))
1988 }
1989 'B' => Ok(Primitive::Assertion(ast::Assertion {
1990 span,
1991 kind: ast::AssertionKind::NotWordBoundary,
1992 })),
1993 '<' => Ok(Primitive::Assertion(ast::Assertion {
1994 span,
1995 kind: ast::AssertionKind::WordBoundaryStartAngle,
1996 })),
1997 '>' => Ok(Primitive::Assertion(ast::Assertion {
1998 span,
1999 kind: ast::AssertionKind::WordBoundaryEndAngle,
2000 })),
2001 _ => Err(self.error(span, ast::ErrorKind::EscapeUnrecognized)),
2002 }
2003 }
2004
2005 fn maybe_parse_special_word_boundary(
2006 &self,
2007 wb_start: Position,
2008 ) -> Result<Option<ast::AssertionKind>> {
2009 assert_eq!(self.char(), '{');
2010
2011 let is_valid_char = |c| match c {
2012 'A'..='Z' | 'a'..='z' | '-' => true,
2013 _ => false,
2014 };
2015 let start = self.pos();
2016 if !self.bump_and_bump_space() {
2017 return Err(self.error(
2018 Span::new(wb_start, self.pos()),
2019 ast::ErrorKind::SpecialWordOrRepetitionUnexpectedEof,
2020 ));
2021 }
2022 let start_contents = self.pos();
2023 if !is_valid_char(self.char()) {
2028 self.parser().pos.set(start);
2029 return Ok(None);
2030 }
2031
2032 let mut scratch = self.parser().scratch.borrow_mut();
2034 scratch.clear();
2035 while !self.is_eof() && is_valid_char(self.char()) {
2036 scratch.push(self.char());
2037 self.bump_and_bump_space();
2038 }
2039 if self.is_eof() || self.char() != '}' {
2040 return Err(self.error(
2041 Span::new(start, self.pos()),
2042 ast::ErrorKind::SpecialWordBoundaryUnclosed,
2043 ));
2044 }
2045 let end = self.pos();
2046 self.bump();
2047 let kind = match scratch.as_str() {
2048 "start" => ast::AssertionKind::WordBoundaryStart,
2049 "end" => ast::AssertionKind::WordBoundaryEnd,
2050 "start-half" => ast::AssertionKind::WordBoundaryStartHalf,
2051 "end-half" => ast::AssertionKind::WordBoundaryEndHalf,
2052 _ => {
2053 return Err(self.error(
2054 Span::new(start_contents, end),
2055 ast::ErrorKind::SpecialWordBoundaryUnrecognized,
2056 ))
2057 }
2058 };
2059 Ok(Some(kind))
2060 }
2061
2062 #[inline(never)]
2063 fn parse_octal(&self) -> Literal {
2064 assert!(self.parser().octal);
2065 assert!('0' <= self.char() && self.char() <= '7');
2066 let start = self.pos();
2067 while self.bump()
2069 && '0' <= self.char()
2070 && self.char() <= '7'
2071 && self.pos().offset - start.offset <= 2
2072 {}
2073 let end = self.pos();
2074 let octal = &self.pattern()[start.offset..end.offset];
2075 let codepoint = u32::from_str_radix(octal, 8).expect("valid octal number");
2078 let c = char::from_u32(codepoint).expect("Unicode scalar value");
2081 Literal {
2082 span: Span::new(start, end),
2083 kind: LiteralKind::Octal,
2084 c,
2085 }
2086 }
2087
2088 #[inline(never)]
2089 fn parse_hex(&self) -> Result<Literal> {
2090 assert!(self.char() == 'x' || self.char() == 'u' || self.char() == 'U');
2091
2092 let hex_kind = match self.char() {
2093 'x' => HexLiteralKind::X,
2094 'u' => HexLiteralKind::UnicodeShort,
2095 _ => HexLiteralKind::UnicodeLong,
2096 };
2097 if !self.bump_and_bump_space() {
2098 return Err(self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
2099 }
2100 if self.char() == '{' {
2101 self.parse_hex_brace(hex_kind)
2102 } else {
2103 self.parse_hex_digits(hex_kind)
2104 }
2105 }
2106
2107 #[inline(never)]
2108 fn parse_hex_digits(&self, kind: HexLiteralKind) -> Result<Literal> {
2109 let mut scratch = self.parser().scratch.borrow_mut();
2110 scratch.clear();
2111
2112 let start = self.pos();
2113 for i in 0..kind.digits() {
2114 if i > 0 && !self.bump_and_bump_space() {
2115 return Err(self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
2116 }
2117 if !is_hex(self.char()) {
2118 return Err(self.error(self.span_char(), ast::ErrorKind::EscapeHexInvalidDigit));
2119 }
2120 scratch.push(self.char());
2121 }
2122 self.bump_and_bump_space();
2125 let end = self.pos();
2126 let hex = scratch.as_str();
2127 match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
2128 None => Err(self.error(Span::new(start, end), ast::ErrorKind::EscapeHexInvalid)),
2129 Some(c) => Ok(Literal {
2130 span: Span::new(start, end),
2131 kind: LiteralKind::HexFixed(kind),
2132 c,
2133 }),
2134 }
2135 }
2136
2137 #[inline(never)]
2138 fn parse_hex_brace(&self, kind: HexLiteralKind) -> Result<Literal> {
2139 let mut scratch = self.parser().scratch.borrow_mut();
2140 scratch.clear();
2141
2142 let brace_pos = self.pos();
2143 let start = self.span_char().end;
2144 while self.bump_and_bump_space() && self.char() != '}' {
2145 if !is_hex(self.char()) {
2146 return Err(self.error(self.span_char(), ast::ErrorKind::EscapeHexInvalidDigit));
2147 }
2148 scratch.push(self.char());
2149 }
2150 if self.is_eof() {
2151 return Err(self.error(
2152 Span::new(brace_pos, self.pos()),
2153 ast::ErrorKind::EscapeUnexpectedEof,
2154 ));
2155 }
2156 let end = self.pos();
2157 let hex = scratch.as_str();
2158 assert_eq!(self.char(), '}');
2159 self.bump_and_bump_space();
2160
2161 if hex.is_empty() {
2162 return Err(self.error(
2163 Span::new(brace_pos, self.pos()),
2164 ast::ErrorKind::EscapeHexEmpty,
2165 ));
2166 }
2167 match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
2168 None => Err(self.error(Span::new(start, end), ast::ErrorKind::EscapeHexInvalid)),
2169 Some(c) => Ok(Literal {
2170 span: Span::new(start, self.pos()),
2171 kind: LiteralKind::HexBrace(kind),
2172 c,
2173 }),
2174 }
2175 }
2176
2177 fn parse_decimal(&self) -> Result<u32> {
2178 let mut scratch = self.parser().scratch.borrow_mut();
2179 scratch.clear();
2180
2181 while !self.is_eof() && self.char().is_whitespace() {
2182 self.bump();
2183 }
2184 let start = self.pos();
2185 while !self.is_eof() && '0' <= self.char() && self.char() <= '9' {
2186 scratch.push(self.char());
2187 self.bump_and_bump_space();
2188 }
2189 let span = Span::new(start, self.pos());
2190 while !self.is_eof() && self.char().is_whitespace() {
2191 self.bump_and_bump_space();
2192 }
2193 let digits = scratch.as_str();
2194 if digits.is_empty() {
2195 return Err(self.error(span, ast::ErrorKind::DecimalEmpty));
2196 }
2197 match u32::from_str_radix(digits, 10).ok() {
2198 Some(n) => Ok(n),
2199 None => Err(self.error(span, ast::ErrorKind::DecimalInvalid)),
2200 }
2201 }
2202
2203 #[inline(never)]
2204 fn parse_set_class(&self) -> Result<ClassBracketed> {
2205 assert_eq!(self.char(), '[');
2206
2207 let mut union = ClassSetUnion {
2208 span: self.span(),
2209 items: vec![],
2210 };
2211 loop {
2212 self.bump_space();
2213 if self.is_eof() {
2214 return Err(self.unclosed_class_error());
2215 }
2216 match self.char() {
2217 '[' => {
2218 if !self.parser().stack_class.borrow().is_empty() {
2223 if let Some(cls) = self.maybe_parse_ascii_class() {
2224 union.push(ClassSetItem::Ascii(cls));
2225 continue;
2226 }
2227 }
2228 union = self.push_class_open(union)?;
2229 }
2230 ']' => match self.pop_class(union)? {
2231 Either::Left(nested_union) => {
2232 union = nested_union;
2233 }
2234 Either::Right(class) => return Ok(class),
2235 },
2236 '&' if self.peek() == Some('&') => {
2237 assert!(self.bump_if("&&"));
2238 union = self.push_class_op(ClassSetBinaryOpKind::Intersection, union);
2239 }
2240 '-' if self.peek() == Some('-') => {
2241 assert!(self.bump_if("--"));
2242 union = self.push_class_op(ClassSetBinaryOpKind::Difference, union);
2243 }
2244 '~' if self.peek() == Some('~') => {
2245 assert!(self.bump_if("~~"));
2246 union = self.push_class_op(ClassSetBinaryOpKind::SymmetricDifference, union);
2247 }
2248 _ => {
2249 union.push(self.parse_set_class_range()?);
2250 }
2251 }
2252 }
2253 }
2254
2255 #[inline(never)]
2256 fn parse_set_class_range(&self) -> Result<ClassSetItem> {
2257 let prim1 = self.parse_set_class_item()?;
2258 self.bump_space();
2259 if self.is_eof() {
2260 return Err(self.unclosed_class_error());
2261 }
2262 if self.char() != '-' || self.peek_space() == Some(']') || self.peek_space() == Some('-') {
2263 return prim1.into_class_set_item(self);
2264 }
2265 if !self.bump_and_bump_space() {
2266 return Err(self.unclosed_class_error());
2267 }
2268 let prim2 = self.parse_set_class_item()?;
2269 let range = ClassSetRange {
2270 span: Span::new(prim1.span().start, prim2.span().end),
2271 start: prim1.into_class_literal(self)?,
2272 end: prim2.into_class_literal(self)?,
2273 };
2274 if !range.is_valid() {
2275 return Err(self.error(range.span, ast::ErrorKind::ClassRangeInvalid));
2276 }
2277 Ok(ClassSetItem::Range(range))
2278 }
2279
2280 #[inline(never)]
2281 fn parse_set_class_item(&self) -> Result<Primitive> {
2282 if self.char() == '\\' {
2283 self.parse_escape()
2284 } else {
2285 let x = Primitive::Literal(Literal {
2286 span: self.span_char(),
2287 kind: LiteralKind::Verbatim,
2288 c: self.char(),
2289 });
2290 self.bump();
2291 Ok(x)
2292 }
2293 }
2294
2295 #[inline(never)]
2296 fn parse_set_class_open(&self) -> Result<(ClassBracketed, ClassSetUnion)> {
2297 assert_eq!(self.char(), '[');
2298 let start = self.pos();
2299 if !self.bump_and_bump_space() {
2300 return Err(self.error(Span::new(start, self.pos()), ast::ErrorKind::ClassUnclosed));
2301 }
2302
2303 let negated = if self.char() != '^' {
2304 false
2305 } else {
2306 if !self.bump_and_bump_space() {
2307 return Err(self.error(Span::new(start, self.pos()), ast::ErrorKind::ClassUnclosed));
2308 }
2309 true
2310 };
2311 let mut union = ClassSetUnion {
2313 span: self.span(),
2314 items: vec![],
2315 };
2316 while self.char() == '-' {
2317 union.push(ClassSetItem::Literal(Literal {
2318 span: self.span_char(),
2319 kind: LiteralKind::Verbatim,
2320 c: '-',
2321 }));
2322 if !self.bump_and_bump_space() {
2323 return Err(self.error(Span::new(start, start), ast::ErrorKind::ClassUnclosed));
2324 }
2325 }
2326 if union.items.is_empty() && self.char() == ']' {
2329 union.push(ClassSetItem::Literal(Literal {
2330 span: self.span_char(),
2331 kind: LiteralKind::Verbatim,
2332 c: ']',
2333 }));
2334 if !self.bump_and_bump_space() {
2335 return Err(self.error(Span::new(start, self.pos()), ast::ErrorKind::ClassUnclosed));
2336 }
2337 }
2338 let set = ClassBracketed {
2339 span: Span::new(start, self.pos()),
2340 negated,
2341 kind: ClassSet::union(ClassSetUnion {
2342 span: Span::new(union.span.start, union.span.start),
2343 items: vec![],
2344 }),
2345 };
2346 Ok((set, union))
2347 }
2348
2349 #[inline(never)]
2350 fn maybe_parse_ascii_class(&self) -> Option<ClassAscii> {
2351 assert_eq!(self.char(), '[');
2352 let start = self.pos();
2354 let mut negated = false;
2355 if !self.bump() || self.char() != ':' {
2356 self.parser().pos.set(start);
2357 return None;
2358 }
2359 if !self.bump() {
2360 self.parser().pos.set(start);
2361 return None;
2362 }
2363 if self.char() == '^' {
2364 negated = true;
2365 if !self.bump() {
2366 self.parser().pos.set(start);
2367 return None;
2368 }
2369 }
2370 let name_start = self.offset();
2371 while self.char() != ':' && self.bump() {}
2372 if self.is_eof() {
2373 self.parser().pos.set(start);
2374 return None;
2375 }
2376 let name = &self.pattern()[name_start..self.offset()];
2377 if !self.bump_if(":]") {
2378 self.parser().pos.set(start);
2379 return None;
2380 }
2381 let kind = match regex_syntax::ast::ClassAsciiKind::from_name(name) {
2382 Some(kind) => kind,
2383 None => {
2384 self.parser().pos.set(start);
2385 return None;
2386 }
2387 };
2388 Some(ClassAscii {
2389 span: Span::new(start, self.pos()),
2390 kind,
2391 negated,
2392 })
2393 }
2394
2395 #[inline(never)]
2396 fn parse_unicode_class(&self) -> Result<ClassUnicode> {
2397 assert!(self.char() == 'p' || self.char() == 'P');
2398
2399 let mut scratch = self.parser().scratch.borrow_mut();
2400 scratch.clear();
2401
2402 let negated = self.char() == 'P';
2403 if !self.bump_and_bump_space() {
2404 return Err(self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
2405 }
2406 let (start, kind) = if self.char() == '{' {
2407 let start = self.span_char().end;
2408 while self.bump_and_bump_space() && self.char() != '}' {
2409 scratch.push(self.char());
2410 }
2411 if self.is_eof() {
2412 return Err(self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
2413 }
2414 assert_eq!(self.char(), '}');
2415 self.bump();
2416
2417 let name = scratch.as_str();
2418 if let Some(i) = name.find("!=") {
2419 (
2420 start,
2421 ClassUnicodeKind::NamedValue {
2422 op: ClassUnicodeOpKind::NotEqual,
2423 name: name[..i].to_string(),
2424 value: name[i + 2..].to_string(),
2425 },
2426 )
2427 } else if let Some(i) = name.find(':') {
2428 (
2429 start,
2430 ClassUnicodeKind::NamedValue {
2431 op: ClassUnicodeOpKind::Colon,
2432 name: name[..i].to_string(),
2433 value: name[i + 1..].to_string(),
2434 },
2435 )
2436 } else if let Some(i) = name.find('=') {
2437 (
2438 start,
2439 ClassUnicodeKind::NamedValue {
2440 op: ClassUnicodeOpKind::Equal,
2441 name: name[..i].to_string(),
2442 value: name[i + 1..].to_string(),
2443 },
2444 )
2445 } else {
2446 (start, ClassUnicodeKind::Named(name.to_string()))
2447 }
2448 } else {
2449 let start = self.pos();
2450 let c = self.char();
2451 if c == '\\' {
2452 return Err(self.error(self.span_char(), ast::ErrorKind::UnicodeClassInvalid));
2453 }
2454 self.bump_and_bump_space();
2455 let kind = ClassUnicodeKind::OneLetter(c);
2456 (start, kind)
2457 };
2458 Ok(ClassUnicode {
2459 span: Span::new(start, self.pos()),
2460 negated,
2461 kind,
2462 })
2463 }
2464
2465 #[inline(never)]
2466 fn parse_perl_class(&self) -> ClassPerl {
2467 let c = self.char();
2468 let span = self.span_char();
2469 self.bump();
2470 let (negated, kind) = match c {
2471 'd' => (false, regex_syntax::ast::ClassPerlKind::Digit),
2472 'D' => (true, regex_syntax::ast::ClassPerlKind::Digit),
2473 's' => (false, regex_syntax::ast::ClassPerlKind::Space),
2474 'S' => (true, regex_syntax::ast::ClassPerlKind::Space),
2475 'w' => (false, regex_syntax::ast::ClassPerlKind::Word),
2476 'W' => (true, regex_syntax::ast::ClassPerlKind::Word),
2477 c => panic!("expected valid Perl class but got '{}'", c),
2478 };
2479 ClassPerl {
2480 span,
2481 kind,
2482 negated,
2483 }
2484 }
2485}
2486
2487pub fn parse_ast<'s>(
2488 tb: &mut TB<'s>,
2489 pattern: &'s str,
2490) -> std::result::Result<NodeId, ResharpError> {
2491 let mut p: ResharpParser<'s> = ResharpParser::new(pattern);
2492 let result = p.parse(tb);
2493 result
2494}