resharp_parser/
lib.rs

1//! Parser for resharp regex patterns.
2//!
3//! Converts regex pattern strings into the node representation used by resharp-algebra.
4
5#![warn(dead_code)]
6pub mod ast;
7use std::cell::{Cell, RefCell};
8
9use ast::{Ast, Concat, ErrorKind, GroupKind, LookaroundKind};
10use regex_syntax::{
11    ast::{
12        ClassAscii, ClassBracketed, ClassPerl, ClassSet, ClassSetBinaryOpKind, ClassSetItem,
13        ClassSetRange, ClassSetUnion, ClassUnicode, ClassUnicodeKind, ClassUnicodeOpKind,
14        HexLiteralKind, Literal, LiteralKind, Position, Span, SpecialLiteralKind,
15    },
16    hir::{
17        self,
18        translate::{Translator, TranslatorBuilder},
19    },
20    utf8::Utf8Sequences,
21};
22use resharp_algebra::NodeId;
23
24type TB<'s> = resharp_algebra::RegexBuilder;
25
26/// global pattern-level flags, set from `EngineOptions`.
27pub struct PatternFlags {
28    /// `\w`/`\d`/`\s` match full Unicode (true) or ASCII only (false).
29    pub unicode: bool,
30    /// `\w` covers all Unicode word chars including 3- and 4-byte sequences.
31    pub full_unicode: bool,
32    /// global case-insensitive matching.
33    pub case_insensitive: bool,
34    /// `.` matches `\n` (behaves like `_`).
35    pub dot_matches_new_line: bool,
36    /// allow whitespace and `#` comments in the pattern.
37    pub ignore_whitespace: bool,
38}
39
40impl Default for PatternFlags {
41    fn default() -> Self {
42        Self {
43            unicode: true,
44            full_unicode: false,
45            case_insensitive: false,
46            dot_matches_new_line: false,
47            ignore_whitespace: false,
48        }
49    }
50}
51
52#[derive(Clone, Copy, PartialEq, Debug)]
53enum WordCharKind {
54    Word,
55    NonWord,
56    MaybeWord,
57    MaybeNonWord,
58    Unknown,
59    Edge,
60}
61
62fn is_word_byte(b: u8) -> bool {
63    b.is_ascii_alphanumeric() || b == b'_'
64}
65
66#[derive(Clone, Debug, Eq, PartialEq)]
67enum Primitive {
68    Literal(Literal),
69    Assertion(ast::Assertion),
70    Dot(Span),
71    Top(Span),
72    Perl(ClassPerl),
73    Unicode(ClassUnicode),
74}
75
76impl Primitive {
77    fn span(&self) -> &Span {
78        match *self {
79            Primitive::Literal(ref x) => &x.span,
80            Primitive::Assertion(ref x) => &x.span,
81            Primitive::Dot(ref span) => span,
82            Primitive::Top(ref span) => span,
83            Primitive::Perl(ref x) => &x.span,
84            Primitive::Unicode(ref x) => &x.span,
85        }
86    }
87
88    fn into_ast(self) -> Ast {
89        match self {
90            Primitive::Literal(lit) => Ast::literal(lit),
91            Primitive::Assertion(assert) => Ast::assertion(assert),
92            Primitive::Dot(span) => Ast::dot(span),
93            Primitive::Top(span) => Ast::top(span),
94            Primitive::Perl(cls) => Ast::class_perl(cls),
95            Primitive::Unicode(cls) => Ast::class_unicode(cls),
96        }
97    }
98
99    fn into_class_set_item(self, p: &ResharpParser) -> Result<regex_syntax::ast::ClassSetItem> {
100        use self::Primitive::*;
101        use regex_syntax::ast::ClassSetItem;
102
103        match self {
104            Literal(lit) => Ok(ClassSetItem::Literal(lit)),
105            Perl(cls) => Ok(ClassSetItem::Perl(cls)),
106            Unicode(cls) => Ok(ClassSetItem::Unicode(cls)),
107            x => Err(p.error(*x.span(), ast::ErrorKind::ClassEscapeInvalid)),
108        }
109    }
110
111    fn into_class_literal(self, p: &ResharpParser) -> Result<Literal> {
112        use self::Primitive::*;
113
114        match self {
115            Literal(lit) => Ok(lit),
116            x => Err(p.error(*x.span(), ast::ErrorKind::ClassRangeLiteral)),
117        }
118    }
119}
120
121#[derive(Clone, Debug, Eq, PartialEq)]
122pub enum Either<Left, Right> {
123    Left(Left),
124    Right(Right),
125}
126
127#[derive(Clone, Debug, Eq, PartialEq)]
128pub struct ResharpError {
129    /// The kind of error.
130    pub kind: ErrorKind,
131    /// The original pattern that the parser generated the error from. Every
132    /// span in an error is a valid range into this string.
133    pattern: String,
134    /// The span of this error.
135    pub span: Span,
136}
137
138impl std::fmt::Display for ResharpError {
139    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
140        write!(f, "{:?}: {:?}", self.kind, self.span)
141    }
142}
143impl std::error::Error for ResharpError {}
144
145type Result<T> = core::result::Result<T, ResharpError>;
146
147#[derive(Clone, Debug)]
148enum GroupState {
149    /// This state is pushed whenever an opening group is found.
150    Group {
151        /// The concatenation immediately preceding the opening group.
152        concat: Concat,
153        /// The group that has been opened. Its sub-AST is always empty.
154        group: ast::Group,
155        /// Whether this group has the `x` flag enabled or not.
156        ignore_whitespace: bool,
157    },
158    /// This state is pushed whenever a new alternation branch is found. If
159    /// an alternation branch is found and this state is at the top of the
160    /// stack, then this state should be modified to include the new
161    /// alternation.
162    Alternation(ast::Alternation),
163    Intersection(ast::Intersection),
164}
165
166#[derive(Clone, Debug)]
167enum ClassState {
168    /// This state is pushed whenever an opening bracket is found.
169    Open {
170        /// The union of class items immediately preceding this class.
171        union: regex_syntax::ast::ClassSetUnion,
172        /// The class that has been opened. Typically this just corresponds
173        /// to the `[`, but it can also include `[^` since `^` indicates
174        /// negation of the class.
175        set: regex_syntax::ast::ClassBracketed,
176    },
177    /// This state is pushed when a operator is seen. When popped, the stored
178    /// set becomes the left hand side of the operator.
179    Op {
180        /// The type of the operation, i.e., &&, -- or ~~.
181        kind: regex_syntax::ast::ClassSetBinaryOpKind,
182        /// The left-hand side of the operator.
183        lhs: regex_syntax::ast::ClassSet,
184    },
185}
186
187/// RE# syntax parser based on the regex-syntax crate.
188pub struct ResharpParser<'s> {
189    perl_classes: Vec<(bool, regex_syntax::ast::ClassPerlKind, NodeId)>,
190    unicode_classes: resharp_algebra::UnicodeClassCache,
191    pub translator: regex_syntax::hir::translate::Translator,
192    pub pattern: &'s str,
193    pos: Cell<Position>,
194    capture_index: Cell<u32>,
195    octal: bool,
196    empty_min_range: bool,
197    ignore_whitespace: Cell<bool>,
198    dot_all: Cell<bool>,
199    global_unicode: bool,
200    global_full_unicode: bool,
201    global_case_insensitive: bool,
202    comments: RefCell<Vec<ast::Comment>>,
203    stack_group: RefCell<Vec<GroupState>>,
204    stack_class: RefCell<Vec<ClassState>>,
205    capture_names: RefCell<Vec<ast::CaptureName>>,
206    scratch: RefCell<String>,
207}
208
209fn specialize_err<T>(result: Result<T>, from: ast::ErrorKind, to: ast::ErrorKind) -> Result<T> {
210    result.map_err(|e| {
211        if e.kind == from {
212            ResharpError {
213                kind: to,
214                pattern: e.pattern,
215                span: e.span,
216            }
217        } else {
218            e
219        }
220    })
221}
222
223fn is_capture_char(c: char, first: bool) -> bool {
224    if first {
225        c == '_' || c.is_alphabetic()
226    } else {
227        c == '_' || c == '.' || c == '[' || c == ']' || c.is_alphanumeric()
228    }
229}
230
231pub fn is_meta_character(c: char) -> bool {
232    matches!(
233        c,
234        '\\' | '.'
235            | '+'
236            | '*'
237            | '?'
238            | '('
239            | ')'
240            | '|'
241            | '['
242            | ']'
243            | '{'
244            | '}'
245            | '^'
246            | '$'
247            | '#'
248            | '&'
249            | '-'
250            | '~'
251            | '_'
252    )
253}
254
255/// escapes all resharp meta characters in `text`.
256pub fn escape(text: &str) -> String {
257    let mut buf = String::new();
258    escape_into(text, &mut buf);
259    buf
260}
261
262/// escapes all resharp meta characters in `text` and appends to `buf`.
263pub fn escape_into(text: &str, buf: &mut String) {
264    buf.reserve(text.len());
265    for c in text.chars() {
266        if is_meta_character(c) {
267            buf.push('\\');
268        }
269        buf.push(c);
270    }
271}
272
273pub fn is_escapeable_character(c: char) -> bool {
274    // Certainly escapeable if it's a meta character.
275    if is_meta_character(c) {
276        return true;
277    }
278    // Any character that isn't ASCII is definitely not escapeable. There's
279    // no real need to allow things like \☃ right?
280    if !c.is_ascii() {
281        return false;
282    }
283    // Otherwise, we basically say that everything is escapeable unless it's a
284    // letter or digit. Things like \3 are either octal (when enabled) or an
285    // error, and we should keep it that way. Otherwise, letters are reserved
286    // for adding new syntax in a backwards compatible way.
287    match c {
288        '0'..='9' | 'A'..='Z' | 'a'..='z' => false,
289        // While not currently supported, we keep these as not escapeable to
290        // give us some flexibility with respect to supporting the \< and
291        // \> word boundary assertions in the future. By rejecting them as
292        // escapeable, \< and \> will result in a parse error. Thus, we can
293        // turn them into something else in the future without it being a
294        // backwards incompatible change.
295        //
296        // OK, now we support \< and \>, and we need to retain them as *not*
297        // escapeable here since the escape sequence is significant.
298        '<' | '>' => false,
299        _ => true,
300    }
301}
302
303fn is_hex(c: char) -> bool {
304    c.is_ascii_digit() || ('a'..='f').contains(&c) || ('A'..='F').contains(&c)
305}
306
307impl<'s> ResharpParser<'s> {
308    fn default_translator_builder(&self) -> TranslatorBuilder {
309        let mut trb = TranslatorBuilder::new();
310        trb.unicode(self.global_unicode);
311        trb.utf8(false);
312        trb.case_insensitive(self.global_case_insensitive);
313        trb
314    }
315
316    pub fn new(pattern: &'s str) -> Self {
317        Self::with_flags(pattern, &PatternFlags::default())
318    }
319
320    pub fn with_flags(pattern: &'s str, flags: &PatternFlags) -> Self {
321        let mut trb = TranslatorBuilder::new();
322        trb.unicode(flags.unicode);
323        trb.utf8(false);
324        trb.case_insensitive(flags.case_insensitive);
325        Self {
326            translator: trb.build(),
327            pattern,
328            perl_classes: vec![],
329            unicode_classes: resharp_algebra::UnicodeClassCache::default(),
330            pos: Cell::new(Position::new(0, 0, 0)),
331            capture_index: Cell::new(0),
332            octal: false,
333            empty_min_range: false,
334            ignore_whitespace: Cell::new(flags.ignore_whitespace),
335            dot_all: Cell::new(flags.dot_matches_new_line),
336            global_unicode: flags.unicode || flags.full_unicode,
337            global_full_unicode: flags.full_unicode,
338            global_case_insensitive: flags.case_insensitive,
339            comments: RefCell::new(vec![]),
340            stack_group: RefCell::new(vec![]),
341            stack_class: RefCell::new(vec![]),
342            capture_names: RefCell::new(vec![]),
343            scratch: RefCell::new(String::new()),
344        }
345    }
346
347    /// Return a reference to the parser state.
348    fn parser(&'_ self) -> &'_ ResharpParser<'_> {
349        self
350    }
351
352    /// Return a reference to the pattern being parsed.
353    fn pattern(&self) -> &str {
354        self.pattern
355    }
356
357    /// Create a new error with the given span and error type.
358    fn error(&self, span: Span, kind: ast::ErrorKind) -> ResharpError {
359        ResharpError {
360            kind,
361            pattern: self.pattern().to_string(),
362            span,
363        }
364    }
365
366    fn unsupported_error(&self, _: regex_syntax::hir::Error) -> ResharpError {
367        self.error(
368            Span::splat(self.pos()),
369            ast::ErrorKind::UnsupportedResharpRegex,
370        )
371    }
372
373    /// Return the current offset of the parser.
374    ///
375    /// The offset starts at `0` from the beginning of the regular expression
376    /// pattern string.
377    fn offset(&self) -> usize {
378        self.parser().pos.get().offset
379    }
380
381    /// Return the current line number of the parser.
382    ///
383    /// The line number starts at `1`.
384    fn line(&self) -> usize {
385        self.parser().pos.get().line
386    }
387
388    /// Return the current column of the parser.
389    ///
390    /// The column number starts at `1` and is reset whenever a `\n` is seen.
391    fn column(&self) -> usize {
392        self.parser().pos.get().column
393    }
394
395    /// Return the next capturing index. Each subsequent call increments the
396    /// internal index.
397    ///
398    /// The span given should correspond to the location of the opening
399    /// parenthesis.
400    ///
401    /// If the capture limit is exceeded, then an error is returned.
402    fn next_capture_index(&self, span: Span) -> Result<u32> {
403        let current = self.parser().capture_index.get();
404        let i = current
405            .checked_add(1)
406            .ok_or_else(|| self.error(span, ast::ErrorKind::CaptureLimitExceeded))?;
407        self.parser().capture_index.set(i);
408        Ok(i)
409    }
410
411    fn add_capture_name(&self, cap: &ast::CaptureName) -> Result<()> {
412        let mut names = self.parser().capture_names.borrow_mut();
413        match names.binary_search_by_key(&cap.name.as_str(), |c| c.name.as_str()) {
414            Err(i) => {
415                names.insert(i, cap.clone());
416                Ok(())
417            }
418            Ok(i) => Err(self.error(
419                cap.span,
420                ast::ErrorKind::GroupNameDuplicate {
421                    original: names[i].span,
422                },
423            )),
424        }
425    }
426
427    fn ignore_whitespace(&self) -> bool {
428        self.parser().ignore_whitespace.get()
429    }
430
431    fn char(&self) -> char {
432        self.char_at(self.offset())
433    }
434
435    fn char_at(&self, i: usize) -> char {
436        self.pattern()[i..]
437            .chars()
438            .next()
439            .unwrap_or_else(|| panic!("expected char at offset {}", i))
440    }
441
442    fn bump(&self) -> bool {
443        if self.is_eof() {
444            return false;
445        }
446        let Position {
447            mut offset,
448            mut line,
449            mut column,
450        } = self.pos();
451        if self.char() == '\n' {
452            line = line.checked_add(1).unwrap();
453            column = 1;
454        } else {
455            column = column.checked_add(1).unwrap();
456        }
457        offset += self.char().len_utf8();
458        self.parser().pos.set(Position {
459            offset,
460            line,
461            column,
462        });
463        self.pattern()[self.offset()..].chars().next().is_some()
464    }
465
466    fn bump_if(&self, prefix: &str) -> bool {
467        if self.pattern()[self.offset()..].starts_with(prefix) {
468            for _ in 0..prefix.chars().count() {
469                self.bump();
470            }
471            true
472        } else {
473            false
474        }
475    }
476
477    fn is_lookaround_prefix(&self) -> Option<(bool, bool)> {
478        if self.bump_if("?=") {
479            return Some((true, true));
480        }
481        if self.bump_if("?!") {
482            return Some((true, false));
483        }
484        if self.bump_if("?<=") {
485            return Some((false, true));
486        }
487        if self.bump_if("?<!") {
488            return Some((false, false));
489        }
490        None
491    }
492
493    fn bump_and_bump_space(&self) -> bool {
494        if !self.bump() {
495            return false;
496        }
497        self.bump_space();
498        !self.is_eof()
499    }
500
501    fn bump_space(&self) {
502        if !self.ignore_whitespace() {
503            return;
504        }
505        while !self.is_eof() {
506            if self.char().is_whitespace() {
507                self.bump();
508            } else if self.char() == '#' {
509                let start = self.pos();
510                let mut comment_text = String::new();
511                self.bump();
512                while !self.is_eof() {
513                    let c = self.char();
514                    self.bump();
515                    if c == '\n' {
516                        break;
517                    }
518                    comment_text.push(c);
519                }
520                let comment = ast::Comment {
521                    span: Span::new(start, self.pos()),
522                    comment: comment_text,
523                };
524                self.parser().comments.borrow_mut().push(comment);
525            } else {
526                break;
527            }
528        }
529    }
530
531    /// Peek at the next character in the input without advancing the parser.
532    ///
533    /// If the input has been exhausted, then this returns `None`.
534    fn peek(&self) -> Option<char> {
535        if self.is_eof() {
536            return None;
537        }
538        self.pattern()[self.offset() + self.char().len_utf8()..]
539            .chars()
540            .next()
541    }
542
543    /// Like peek, but will ignore spaces when the parser is in whitespace
544    /// insensitive mode.
545    fn peek_space(&self) -> Option<char> {
546        if !self.ignore_whitespace() {
547            return self.peek();
548        }
549        if self.is_eof() {
550            return None;
551        }
552        let mut start = self.offset() + self.char().len_utf8();
553        let mut in_comment = false;
554        for (i, c) in self.pattern()[start..].char_indices() {
555            if c.is_whitespace() {
556                continue;
557            } else if !in_comment && c == '#' {
558                in_comment = true;
559            } else if in_comment && c == '\n' {
560                in_comment = false;
561            } else {
562                start += i;
563                break;
564            }
565        }
566        self.pattern()[start..].chars().next()
567    }
568
569    /// Returns true if the next call to `bump` would return false.
570    fn is_eof(&self) -> bool {
571        self.offset() == self.pattern().len()
572    }
573
574    /// Return the current position of the parser, which includes the offset,
575    /// line and column.
576    fn pos(&self) -> Position {
577        self.parser().pos.get()
578    }
579
580    /// Create a span at the current position of the parser. Both the start
581    /// and end of the span are set.
582    fn span(&self) -> Span {
583        Span::splat(self.pos())
584    }
585
586    /// Create a span that covers the current character.
587    fn span_char(&self) -> Span {
588        let mut next = Position {
589            offset: self.offset().checked_add(self.char().len_utf8()).unwrap(),
590            line: self.line(),
591            column: self.column().checked_add(1).unwrap(),
592        };
593        if self.char() == '\n' {
594            next.line += 1;
595            next.column = 1;
596        }
597        Span::new(self.pos(), next)
598    }
599
600    /// Parse and push a single alternation on to the parser's internal stack.
601    /// If the top of the stack already has an alternation, then add to that
602    /// instead of pushing a new one.
603    ///
604    /// The concatenation given corresponds to a single alternation branch.
605    /// The concatenation returned starts the next branch and is empty.
606    ///
607    /// This assumes the parser is currently positioned at `|` and will advance
608    /// the parser to the character following `|`.
609    #[inline(never)]
610    fn push_alternate(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
611        assert_eq!(self.char(), '|');
612        concat.span.end = self.pos();
613        self.push_or_add_alternation(concat);
614        self.bump();
615        Ok(ast::Concat {
616            span: self.span(),
617            asts: vec![],
618        })
619    }
620
621    /// Pushes or adds the given branch of an alternation to the parser's
622    /// internal stack of state.
623    fn push_or_add_alternation(&self, concat: Concat) {
624        use self::GroupState::*;
625
626        let mut stack = self.parser().stack_group.borrow_mut();
627        if let Some(&mut Alternation(ref mut alts)) = stack.last_mut() {
628            alts.asts.push(concat.into_ast());
629            return;
630        }
631        stack.push(Alternation(ast::Alternation {
632            span: Span::new(concat.span.start, self.pos()),
633            asts: vec![concat.into_ast()],
634        }));
635    }
636
637    #[inline(never)]
638    fn push_intersect(&self, mut concat: Concat) -> Result<Concat> {
639        assert_eq!(self.char(), '&');
640        concat.span.end = self.pos();
641        self.push_or_add_intersect(concat);
642        self.bump();
643        Ok(Concat {
644            span: self.span(),
645            asts: vec![],
646        })
647    }
648
649    /// Pushes or adds the given branch of an alternation to the parser's
650    /// internal stack of state.
651    fn push_or_add_intersect(&self, concat: Concat) {
652        use self::GroupState::*;
653
654        let mut stack = self.parser().stack_group.borrow_mut();
655        if let Some(&mut Intersection(ref mut alts)) = stack.last_mut() {
656            alts.asts.push(concat.into_ast());
657            return;
658        }
659        stack.push(Intersection(ast::Intersection {
660            span: Span::new(concat.span.start, self.pos()),
661            asts: vec![concat.into_ast()],
662        }));
663    }
664
665    /// Parse and push a group AST (and its parent concatenation) on to the
666    /// parser's internal stack. Return a fresh concatenation corresponding
667    /// to the group's sub-AST.
668    ///
669    /// If a set of flags was found (with no group), then the concatenation
670    /// is returned with that set of flags added.
671    ///
672    /// This assumes that the parser is currently positioned on the opening
673    /// parenthesis. It advances the parser to the character at the start
674    /// of the sub-expression (or adjoining expression).
675    ///
676    /// If there was a problem parsing the start of the group, then an error
677    /// is returned.
678    #[inline(never)]
679    fn push_group(&self, mut concat: Concat) -> Result<Concat> {
680        assert_eq!(self.char(), '(');
681        match self.parse_group()? {
682            Either::Left(set) => {
683                let ignore = set.flags.flag_state(ast::Flag::IgnoreWhitespace);
684                if let Some(v) = ignore {
685                    self.parser().ignore_whitespace.set(v);
686                }
687
688                concat.asts.push(Ast::flags(set));
689                Ok(concat)
690            }
691            Either::Right(group) => {
692                let old_ignore_whitespace = self.ignore_whitespace();
693                let new_ignore_whitespace = group
694                    .flags()
695                    .and_then(|f| f.flag_state(ast::Flag::IgnoreWhitespace))
696                    .unwrap_or(old_ignore_whitespace);
697                self.parser()
698                    .stack_group
699                    .borrow_mut()
700                    .push(GroupState::Group {
701                        concat,
702                        group,
703                        ignore_whitespace: old_ignore_whitespace,
704                    });
705                self.parser().ignore_whitespace.set(new_ignore_whitespace);
706                Ok(Concat {
707                    span: self.span(),
708                    asts: vec![],
709                })
710            }
711        }
712    }
713
714    #[inline(never)]
715    fn push_compl_group(&self, concat: Concat) -> Result<Concat> {
716        assert_eq!(self.char(), '~');
717        self.bump();
718        if self.is_eof() || self.char() != '(' {
719            return Err(self.error(self.span(), ast::ErrorKind::ComplementGroupExpected));
720        }
721        let open_span = self.span_char();
722        self.bump();
723        let group = ast::Group {
724            span: open_span,
725            kind: ast::GroupKind::Complement,
726            ast: Box::new(Ast::empty(self.span())),
727        };
728
729        let old_ignore_whitespace = self.ignore_whitespace();
730        let new_ignore_whitespace = group
731            .flags()
732            .and_then(|f| f.flag_state(ast::Flag::IgnoreWhitespace))
733            .unwrap_or(old_ignore_whitespace);
734        self.parser()
735            .stack_group
736            .borrow_mut()
737            .push(GroupState::Group {
738                concat,
739                group,
740                ignore_whitespace: old_ignore_whitespace,
741            });
742        self.parser().ignore_whitespace.set(new_ignore_whitespace);
743        Ok(Concat {
744            span: self.span(),
745            asts: vec![],
746        })
747    }
748
749    /// Pop a group AST from the parser's internal stack and set the group's
750    /// AST to the given concatenation. Return the concatenation containing
751    /// the group.
752    ///
753    /// This assumes that the parser is currently positioned on the closing
754    /// parenthesis and advances the parser to the character following the `)`.
755    ///
756    /// If no such group could be popped, then an unopened group error is
757    /// returned.
758    #[inline(never)]
759    fn pop_group(&self, mut group_concat: Concat) -> Result<Concat> {
760        use self::GroupState::*;
761        assert_eq!(self.char(), ')');
762        let mut stack = self.parser().stack_group.borrow_mut();
763        let topstack = stack.pop();
764
765        let (mut prior_concat, mut group, ignore_whitespace, alt) = match topstack {
766            Some(Group {
767                concat,
768                group,
769                ignore_whitespace,
770            }) => (concat, group, ignore_whitespace, None),
771            Some(Alternation(alt)) => match stack.pop() {
772                Some(Group {
773                    concat,
774                    group,
775                    ignore_whitespace,
776                }) => (
777                    concat,
778                    group,
779                    ignore_whitespace,
780                    Some(Either::Left::<ast::Alternation, ast::Intersection>(alt)),
781                ),
782                None | Some(Alternation(_)) | Some(Intersection(_)) => {
783                    return Err(self.error(self.span_char(), ast::ErrorKind::GroupUnopened));
784                }
785            },
786            Some(Intersection(int)) => match stack.pop() {
787                Some(Group {
788                    concat,
789                    group,
790                    ignore_whitespace,
791                }) => (
792                    concat,
793                    group,
794                    ignore_whitespace,
795                    Some(Either::Right::<ast::Alternation, ast::Intersection>(int)),
796                ),
797                None | Some(Alternation(_)) | Some(Intersection(_)) => {
798                    return Err(self.error(self.span_char(), ast::ErrorKind::GroupUnopened));
799                }
800            },
801
802            None => {
803                return Err(self.error(self.span_char(), ast::ErrorKind::GroupUnopened));
804            }
805        };
806        self.parser().ignore_whitespace.set(ignore_whitespace);
807        group_concat.span.end = self.pos();
808        self.bump();
809        group.span.end = self.pos();
810        match alt {
811            Some(Either::Left(mut alt)) => {
812                alt.span.end = group_concat.span.end;
813                alt.asts.push(group_concat.into_ast());
814                group.ast = Box::new(alt.into_ast());
815            }
816            Some(Either::Right(mut int)) => {
817                int.span.end = group_concat.span.end;
818                int.asts.push(group_concat.into_ast());
819                group.ast = Box::new(int.into_ast());
820            }
821            None => {
822                group.ast = Box::new(group_concat.into_ast());
823            }
824        }
825
826        if group.kind == GroupKind::Complement {
827            let complement = ast::Complement {
828                span: self.span(),
829                ast: group.ast,
830            };
831            prior_concat.asts.push(Ast::complement(complement));
832        }
833        // ignore groups for now
834        else {
835            prior_concat.asts.push(Ast::group(group));
836        }
837        Ok(prior_concat)
838    }
839
840    /// Pop the last state from the parser's internal stack, if it exists, and
841    /// add the given concatenation to it. There either must be no state or a
842    /// single alternation item on the stack. Any other scenario produces an
843    /// error.
844    ///
845    /// This assumes that the parser has advanced to the end.
846    #[inline(never)]
847    fn pop_group_end(&self, mut concat: ast::Concat) -> Result<Ast> {
848        concat.span.end = self.pos();
849        let mut stack = self.parser().stack_group.borrow_mut();
850        let ast = match stack.pop() {
851            None => Ok(concat.into_ast()),
852            Some(GroupState::Alternation(mut alt)) => {
853                alt.span.end = self.pos();
854                alt.asts.push(concat.into_ast());
855                Ok(Ast::alternation(alt))
856            }
857            Some(GroupState::Intersection(mut int)) => {
858                int.span.end = self.pos();
859                int.asts.push(concat.into_ast());
860
861                Ok(Ast::intersection(int))
862            }
863            Some(GroupState::Group { group, .. }) => {
864                return Err(self.error(group.span, ast::ErrorKind::GroupUnclosed));
865            }
866        };
867        // If we try to pop again, there should be nothing.
868        match stack.pop() {
869            None => ast,
870            Some(GroupState::Alternation(_)) => {
871                // This unreachable is unfortunate. This case can't happen
872                // because the only way we can be here is if there were two
873                // `GroupState::Alternation`s adjacent in the parser's stack,
874                // which we guarantee to never happen because we never push a
875                // `GroupState::Alternation` if one is already at the top of
876                // the stack.
877                unreachable!()
878            }
879            Some(GroupState::Intersection(_)) => {
880                unreachable!()
881            }
882            Some(GroupState::Group { group, .. }) => {
883                Err(self.error(group.span, ast::ErrorKind::GroupUnclosed))
884            }
885        }
886    }
887
888    /// Parse the opening of a character class and push the current class
889    /// parsing context onto the parser's stack. This assumes that the parser
890    /// is positioned at an opening `[`. The given union should correspond to
891    /// the union of set items built up before seeing the `[`.
892    ///
893    /// If there was a problem parsing the opening of the class, then an error
894    /// is returned. Otherwise, a new union of set items for the class is
895    /// returned (which may be populated with either a `]` or a `-`).
896    #[inline(never)]
897    fn push_class_open(
898        &self,
899        parent_union: regex_syntax::ast::ClassSetUnion,
900    ) -> Result<regex_syntax::ast::ClassSetUnion> {
901        assert_eq!(self.char(), '[');
902
903        let (nested_set, nested_union) = self.parse_set_class_open()?;
904        self.parser()
905            .stack_class
906            .borrow_mut()
907            .push(ClassState::Open {
908                union: parent_union,
909                set: nested_set,
910            });
911        Ok(nested_union)
912    }
913
914    /// Parse the end of a character class set and pop the character class
915    /// parser stack. The union given corresponds to the last union built
916    /// before seeing the closing `]`. The union returned corresponds to the
917    /// parent character class set with the nested class added to it.
918    ///
919    /// This assumes that the parser is positioned at a `]` and will advance
920    /// the parser to the byte immediately following the `]`.
921    ///
922    /// If the stack is empty after popping, then this returns the final
923    /// "top-level" character class AST (where a "top-level" character class
924    /// is one that is not nested inside any other character class).
925    ///
926    /// If there is no corresponding opening bracket on the parser's stack,
927    /// then an error is returned.
928    #[inline(never)]
929    fn pop_class(
930        &self,
931        nested_union: regex_syntax::ast::ClassSetUnion,
932    ) -> Result<Either<regex_syntax::ast::ClassSetUnion, regex_syntax::ast::ClassBracketed>> {
933        assert_eq!(self.char(), ']');
934
935        let item = regex_syntax::ast::ClassSet::Item(nested_union.into_item());
936        let prevset = self.pop_class_op(item);
937        let mut stack = self.parser().stack_class.borrow_mut();
938        match stack.pop() {
939            None => {
940                // We can never observe an empty stack:
941                //
942                // 1) We are guaranteed to start with a non-empty stack since
943                //    the character class parser is only initiated when it sees
944                //    a `[`.
945                // 2) If we ever observe an empty stack while popping after
946                //    seeing a `]`, then we signal the character class parser
947                //    to terminate.
948                panic!("unexpected empty character class stack")
949            }
950            Some(ClassState::Op { .. }) => {
951                // This panic is unfortunate, but this case is impossible
952                // since we already popped the Op state if one exists above.
953                // Namely, every push to the class parser stack is guarded by
954                // whether an existing Op is already on the top of the stack.
955                // If it is, the existing Op is modified. That is, the stack
956                // can never have consecutive Op states.
957                panic!("unexpected ClassState::Op")
958            }
959            Some(ClassState::Open { mut union, mut set }) => {
960                self.bump();
961                set.span.end = self.pos();
962                set.kind = prevset;
963                if stack.is_empty() {
964                    Ok(Either::Right(set))
965                } else {
966                    union.push(regex_syntax::ast::ClassSetItem::Bracketed(Box::new(set)));
967                    Ok(Either::Left(union))
968                }
969            }
970        }
971    }
972
973    /// Return an "unclosed class" error whose span points to the most
974    /// recently opened class.
975    ///
976    /// This should only be called while parsing a character class.
977    #[inline(never)]
978    fn unclosed_class_error(&self) -> ResharpError {
979        for state in self.parser().stack_class.borrow().iter().rev() {
980            if let ClassState::Open { ref set, .. } = *state {
981                return self.error(set.span, ast::ErrorKind::ClassUnclosed);
982            }
983        }
984        // We are guaranteed to have a non-empty stack with at least
985        // one open bracket, so we should never get here.
986        panic!("no open character class found")
987    }
988
989    /// Push the current set of class items on to the class parser's stack as
990    /// the left hand side of the given operator.
991    ///
992    /// A fresh set union is returned, which should be used to build the right
993    /// hand side of this operator.
994    #[inline(never)]
995    fn push_class_op(
996        &self,
997        next_kind: regex_syntax::ast::ClassSetBinaryOpKind,
998        next_union: regex_syntax::ast::ClassSetUnion,
999    ) -> regex_syntax::ast::ClassSetUnion {
1000        let item = regex_syntax::ast::ClassSet::Item(next_union.into_item());
1001        let new_lhs = self.pop_class_op(item);
1002        self.parser().stack_class.borrow_mut().push(ClassState::Op {
1003            kind: next_kind,
1004            lhs: new_lhs,
1005        });
1006        regex_syntax::ast::ClassSetUnion {
1007            span: self.span(),
1008            items: vec![],
1009        }
1010    }
1011
1012    /// Pop a character class set from the character class parser stack. If the
1013    /// top of the stack is just an item (not an operation), then return the
1014    /// given set unchanged. If the top of the stack is an operation, then the
1015    /// given set will be used as the rhs of the operation on the top of the
1016    /// stack. In that case, the binary operation is returned as a set.
1017    #[inline(never)]
1018    fn pop_class_op(&self, rhs: regex_syntax::ast::ClassSet) -> regex_syntax::ast::ClassSet {
1019        let mut stack = self.parser().stack_class.borrow_mut();
1020        let (kind, lhs) = match stack.pop() {
1021            Some(ClassState::Op { kind, lhs }) => (kind, lhs),
1022            Some(state @ ClassState::Open { .. }) => {
1023                stack.push(state);
1024                return rhs;
1025            }
1026            None => unreachable!(),
1027        };
1028        let span = Span::new(lhs.span().start, rhs.span().end);
1029        regex_syntax::ast::ClassSet::BinaryOp(regex_syntax::ast::ClassSetBinaryOp {
1030            span,
1031            kind,
1032            lhs: Box::new(lhs),
1033            rhs: Box::new(rhs),
1034        })
1035    }
1036
1037    fn hir_to_node_id(&self, hir: &hir::Hir, tb: &mut TB<'s>) -> Result<NodeId> {
1038        match hir.kind() {
1039            hir::HirKind::Empty => Ok(NodeId::EPS),
1040            hir::HirKind::Literal(l) => {
1041                if l.0.len() == 1 {
1042                    let node = tb.mk_u8(l.0[0]);
1043                    Ok(node)
1044                } else {
1045                    let ws: Vec<_> = l.0.iter().map(|l| tb.mk_u8(*l)).collect();
1046                    let conc = tb.mk_concats(ws.iter().copied());
1047                    Ok(conc)
1048                }
1049            }
1050            hir::HirKind::Class(class) => match class {
1051                hir::Class::Unicode(class_unicode) => {
1052                    let ranges = class_unicode.ranges();
1053                    let mut nodes = Vec::new();
1054                    for range in ranges {
1055                        for seq in Utf8Sequences::new(range.start(), range.end()) {
1056                            let sl = seq.as_slice();
1057                            let bytes: Vec<_> = sl.iter().map(|s| (s.start, s.end)).collect();
1058                            let node = match bytes.len() {
1059                                1 => tb.mk_range_u8(bytes[0].0, bytes[0].1),
1060                                n => {
1061                                    let last = tb.mk_range_u8(bytes[n - 1].0, bytes[n - 1].1);
1062                                    let mut conc = last;
1063                                    for i in (0..n - 1).rev() {
1064                                        let b = tb.mk_range_u8(bytes[i].0, bytes[i].1);
1065                                        conc = tb.mk_concat(b, conc);
1066                                    }
1067                                    conc
1068                                }
1069                            };
1070                            nodes.push(node);
1071                        }
1072                    }
1073                    let merged = tb.mk_unions(nodes.into_iter());
1074                    Ok(merged)
1075                }
1076                hir::Class::Bytes(class_bytes) => {
1077                    let ranges = class_bytes.ranges();
1078                    let mut result = NodeId::BOT;
1079                    for range in ranges {
1080                        let start = range.start();
1081                        let end = range.end();
1082                        let node = tb.mk_range_u8(start, end);
1083                        result = tb.mk_union(result, node);
1084                    }
1085                    Ok(result)
1086                }
1087            },
1088            hir::HirKind::Look(_) => Err(self.error(
1089                Span::splat(self.pos()),
1090                ast::ErrorKind::UnsupportedResharpRegex,
1091            )),
1092            hir::HirKind::Repetition(_) => Err(self.error(
1093                Span::splat(self.pos()),
1094                ast::ErrorKind::UnsupportedResharpRegex,
1095            )),
1096            hir::HirKind::Capture(_) => Err(self.error(
1097                Span::splat(self.pos()),
1098                ast::ErrorKind::UnsupportedResharpRegex,
1099            )),
1100            hir::HirKind::Concat(body) => {
1101                let mut result = NodeId::EPS;
1102                for child in body {
1103                    let node = self.hir_to_node_id(child, tb)?;
1104                    result = tb.mk_concat(result, node);
1105                }
1106                Ok(result)
1107            }
1108            hir::HirKind::Alternation(_) => Err(self.error(
1109                Span::splat(self.pos()),
1110                ast::ErrorKind::UnsupportedResharpRegex,
1111            )),
1112        }
1113    }
1114
1115    fn translate_ast_to_hir(
1116        &mut self,
1117        orig_ast: &regex_syntax::ast::Ast,
1118        tb: &mut TB<'s>,
1119    ) -> Result<NodeId> {
1120        match self.translator.translate("", orig_ast) {
1121            Err(_) => Err(self.error(self.span(), ast::ErrorKind::UnicodeClassInvalid)),
1122            Ok(hir) => self.hir_to_node_id(&hir, tb),
1123        }
1124    }
1125
1126    fn translator_to_node_id(
1127        &mut self,
1128        orig_ast: &regex_syntax::ast::Ast,
1129        translator: &mut Option<Translator>,
1130        tb: &mut TB<'s>,
1131    ) -> Result<NodeId> {
1132        match translator {
1133            Some(tr) => {
1134                let hir = tr
1135                    .translate("", orig_ast)
1136                    .map_err(|e| self.unsupported_error(e))?;
1137                self.hir_to_node_id(&hir, tb)
1138            }
1139            None => self.translate_ast_to_hir(orig_ast, tb),
1140        }
1141    }
1142
1143    fn get_class(
1144        &mut self,
1145        negated: bool,
1146        kind: regex_syntax::ast::ClassPerlKind,
1147        tb: &mut TB<'s>,
1148    ) -> Result<NodeId> {
1149        let w = self
1150            .perl_classes
1151            .iter()
1152            .find(|(c_neg, c_kind, _)| *c_kind == kind && *c_neg == negated);
1153        match w {
1154            Some((_, _, value)) => Ok(*value),
1155            None => {
1156                let translated = if self.global_unicode {
1157                    match kind {
1158                        regex_syntax::ast::ClassPerlKind::Word => {
1159                            if self.global_full_unicode {
1160                                self.unicode_classes.ensure_word_full(tb);
1161                            } else {
1162                                self.unicode_classes.ensure_word(tb);
1163                            }
1164                            if negated {
1165                                self.unicode_classes.non_word
1166                            } else {
1167                                self.unicode_classes.word
1168                            }
1169                        }
1170                        regex_syntax::ast::ClassPerlKind::Digit => {
1171                            if self.global_full_unicode {
1172                                self.unicode_classes.ensure_digit_full(tb);
1173                            } else {
1174                                self.unicode_classes.ensure_digit(tb);
1175                            }
1176                            if negated {
1177                                self.unicode_classes.non_digit
1178                            } else {
1179                                self.unicode_classes.digit
1180                            }
1181                        }
1182                        regex_syntax::ast::ClassPerlKind::Space => {
1183                            self.unicode_classes.ensure_space(tb);
1184                            if negated {
1185                                self.unicode_classes.non_space
1186                            } else {
1187                                self.unicode_classes.space
1188                            }
1189                        }
1190                    }
1191                } else {
1192                    let pos = match kind {
1193                        regex_syntax::ast::ClassPerlKind::Word => {
1194                            let az = tb.mk_range_u8(b'a', b'z');
1195                            let big = tb.mk_range_u8(b'A', b'Z');
1196                            let dig = tb.mk_range_u8(b'0', b'9');
1197                            let us = tb.mk_u8(b'_');
1198                            tb.mk_unions([az, big, dig, us].into_iter())
1199                        }
1200                        regex_syntax::ast::ClassPerlKind::Digit => tb.mk_range_u8(b'0', b'9'),
1201                        regex_syntax::ast::ClassPerlKind::Space => {
1202                            let sp = tb.mk_u8(b' ');
1203                            let tab = tb.mk_u8(b'\t');
1204                            let nl = tb.mk_u8(b'\n');
1205                            let cr = tb.mk_u8(b'\r');
1206                            let ff = tb.mk_u8(0x0C);
1207                            let vt = tb.mk_u8(0x0B);
1208                            tb.mk_unions([sp, tab, nl, cr, ff, vt].into_iter())
1209                        }
1210                    };
1211                    if negated {
1212                        tb.mk_compl(pos)
1213                    } else {
1214                        pos
1215                    }
1216                };
1217                self.perl_classes.push((negated, kind, translated));
1218                Ok(translated)
1219            }
1220        }
1221    }
1222
1223    fn word_char_kind(ast: &Ast, left: bool) -> WordCharKind {
1224        use WordCharKind::*;
1225        match ast {
1226            Ast::Literal(lit) => {
1227                if is_word_byte(lit.c as u8) {
1228                    Word
1229                } else {
1230                    NonWord
1231                }
1232            }
1233            Ast::ClassPerl(c) => match (&c.kind, c.negated) {
1234                (&regex_syntax::ast::ClassPerlKind::Word, false) => Word,
1235                (&regex_syntax::ast::ClassPerlKind::Word, true) => NonWord,
1236                (&regex_syntax::ast::ClassPerlKind::Space, false) => NonWord,
1237                (&regex_syntax::ast::ClassPerlKind::Space, true) => Unknown,
1238                (&regex_syntax::ast::ClassPerlKind::Digit, false) => Word,
1239                (&regex_syntax::ast::ClassPerlKind::Digit, true) => Unknown,
1240            },
1241            Ast::Dot(_) | Ast::Top(_) => Unknown,
1242            Ast::Group(g) => Self::word_char_kind(&g.ast, left),
1243            Ast::Concat(c) if !c.asts.is_empty() => {
1244                let edge = if left { c.asts.len() - 1 } else { 0 };
1245                let kind = Self::word_char_kind(&c.asts[edge], left);
1246                match kind {
1247                    MaybeWord => {
1248                        let dir: isize = if left { -1 } else { 1 };
1249                        match Self::concat_neighbor_kind(&c.asts, edge, dir) {
1250                            Word => Word,
1251                            _ => MaybeWord,
1252                        }
1253                    }
1254                    MaybeNonWord => {
1255                        let dir: isize = if left { -1 } else { 1 };
1256                        match Self::concat_neighbor_kind(&c.asts, edge, dir) {
1257                            NonWord => NonWord,
1258                            _ => MaybeNonWord,
1259                        }
1260                    }
1261                    other => other,
1262                }
1263            }
1264            Ast::Alternation(alt) if !alt.asts.is_empty() => {
1265                let first = Self::word_char_kind(&alt.asts[0], left);
1266                if alt.asts[1..]
1267                    .iter()
1268                    .all(|a| Self::word_char_kind(a, left) == first)
1269                {
1270                    first
1271                } else {
1272                    Unknown
1273                }
1274            }
1275            Ast::Repetition(r) => {
1276                let inner = Self::word_char_kind(&r.ast, left);
1277                let nullable = matches!(
1278                    &r.op.kind,
1279                    ast::RepetitionKind::ZeroOrMore
1280                        | ast::RepetitionKind::ZeroOrOne
1281                        | ast::RepetitionKind::Range(ast::RepetitionRange::Bounded(0, _))
1282                );
1283                if nullable {
1284                    match inner {
1285                        Word => MaybeWord,
1286                        NonWord => MaybeNonWord,
1287                        _ => Unknown,
1288                    }
1289                } else {
1290                    inner
1291                }
1292            }
1293            Ast::Lookaround(la) => Self::word_char_kind(&la.ast, left),
1294            _ => Unknown,
1295        }
1296    }
1297
1298    // ok to return None here, it's only an optimization
1299    fn edge_class_ast(ast: &Ast, left: bool) -> Option<&Ast> {
1300        match ast {
1301            Ast::Literal(_)
1302            | Ast::ClassPerl(_)
1303            | Ast::ClassBracketed(_)
1304            | Ast::ClassUnicode(_)
1305            | Ast::Dot(_)
1306            | Ast::Top(_) => Some(ast),
1307            Ast::Group(g) => Self::edge_class_ast(&g.ast, left),
1308            Ast::Concat(c) if !c.asts.is_empty() => {
1309                Self::edge_class_ast(&c.asts[if left { c.asts.len() - 1 } else { 0 }], left)
1310            }
1311            Ast::Repetition(r) => {
1312                let nullable = matches!(
1313                    &r.op.kind,
1314                    ast::RepetitionKind::ZeroOrMore
1315                        | ast::RepetitionKind::ZeroOrOne
1316                        | ast::RepetitionKind::Range(ast::RepetitionRange::Bounded(0, _))
1317                );
1318                if nullable {
1319                    None
1320                } else {
1321                    Self::edge_class_ast(&r.ast, left)
1322                }
1323            }
1324            _ => None,
1325        }
1326    }
1327
1328    fn resolve_word_kind(
1329        &mut self,
1330        asts: &[Ast],
1331        idx: usize,
1332        dir: isize,
1333        translator: &mut Option<Translator>,
1334        tb: &mut TB<'s>,
1335        word_id: NodeId,
1336        not_word_id: NodeId,
1337    ) -> Result<WordCharKind> {
1338        use WordCharKind::*;
1339        let fast = Self::concat_neighbor_kind(asts, idx, dir);
1340        if fast != Unknown {
1341            return Ok(fast);
1342        }
1343        let neighbor_idx = (idx as isize + dir) as usize;
1344        let node = if let Some(edge) = Self::edge_class_ast(&asts[neighbor_idx], dir < 0) {
1345            self.ast_to_node_id(edge, translator, tb)?
1346        } else {
1347            // check if \w_* (starts-with-word) or \W_* (starts-with-non-word) subsumes it.
1348            let neighbor_node = self.ast_to_node_id(&asts[neighbor_idx], translator, tb)?;
1349            let mut neighbor_node = tb
1350                .try_elim_lookarounds(neighbor_node)
1351                .ok_or_else(|| self.error(self.span(), ast::ErrorKind::UnsupportedResharpRegex))?;
1352            if dir < 0 {
1353                neighbor_node = tb.reverse(neighbor_node).or_else(|_| {
1354                    Err(self.error(self.span(), ast::ErrorKind::UnsupportedResharpRegex))
1355                })?;
1356            }
1357            let word_prefix = tb.mk_concat(word_id, NodeId::TS);
1358            let non_word_prefix = tb.mk_concat(not_word_id, NodeId::TS);
1359            return if tb.subsumes(word_prefix, neighbor_node) == Some(true) {
1360                Ok(Word)
1361            } else if tb.subsumes(non_word_prefix, neighbor_node) == Some(true) {
1362                Ok(NonWord)
1363            } else {
1364                Ok(Unknown)
1365            };
1366        };
1367        if tb.subsumes(word_id, node) == Some(true) {
1368            Ok(Word)
1369        } else if tb.subsumes(not_word_id, node) == Some(true) {
1370            Ok(NonWord)
1371        } else {
1372            Ok(Unknown)
1373        }
1374    }
1375
1376    fn concat_neighbor_kind(asts: &[Ast], idx: usize, dir: isize) -> WordCharKind {
1377        use WordCharKind::*;
1378        let next = idx as isize + dir;
1379        if next < 0 || next >= asts.len() as isize {
1380            return Edge;
1381        }
1382        let kind = Self::word_char_kind(&asts[next as usize], dir < 0);
1383        match kind {
1384            MaybeWord => match Self::concat_neighbor_kind(asts, next as usize, dir) {
1385                Word => Word,
1386                _ => Unknown,
1387            },
1388            MaybeNonWord => match Self::concat_neighbor_kind(asts, next as usize, dir) {
1389                NonWord => NonWord,
1390                _ => Unknown,
1391            },
1392            other => other,
1393        }
1394    }
1395
1396    fn rewrite_word_boundary_in_concat(
1397        &mut self,
1398        asts: &[Ast],
1399        idx: usize,
1400        translator: &mut Option<Translator>,
1401        tb: &mut TB<'s>,
1402    ) -> Result<(NodeId, usize)> {
1403        use WordCharKind::*;
1404        let (word_id, not_word_id) = if self.global_full_unicode {
1405            self.unicode_classes.ensure_word_full(tb);
1406            (self.unicode_classes.word, self.unicode_classes.non_word)
1407        } else if self.global_unicode {
1408            self.unicode_classes.ensure_word(tb);
1409            (self.unicode_classes.word, self.unicode_classes.non_word)
1410        } else {
1411            let az = tb.mk_range_u8(b'a', b'z');
1412            let big = tb.mk_range_u8(b'A', b'Z');
1413            let dig = tb.mk_range_u8(b'0', b'9');
1414            let us = tb.mk_u8(b'_');
1415            let w = tb.mk_unions([az, big, dig, us].into_iter());
1416            (w, tb.mk_compl(w))
1417        };
1418        let left = self.resolve_word_kind(asts, idx, -1, translator, tb, word_id, not_word_id)?;
1419        let right = self.resolve_word_kind(asts, idx, 1, translator, tb, word_id, not_word_id)?;
1420        match (left, right) {
1421            (NonWord, Word) | (Word, NonWord) => Ok((NodeId::EPS, idx + 1)),
1422            (Word, _) => {
1423                let neg = tb.mk_neg_lookahead(word_id, 0);
1424                Ok((neg, idx + 1))
1425            }
1426            (NonWord, _) => {
1427                let tail = tb.mk_concat(word_id, NodeId::TS);
1428                self.merge_boundary_with_following_lookaheads(asts, idx, tail, translator, tb)
1429            }
1430            (_, Word) => Ok((tb.mk_neg_lookbehind(word_id), idx + 1)),
1431            (_, NonWord) => Ok((tb.mk_lookbehind(word_id, NodeId::MISSING), idx + 1)),
1432            // TODO: (Unknown, Unknown) is possible via make_full_word_boundary but
1433            // the full expansion (lb(\w)·la(\W) | lb(\W)·la(\w)) is too expensive
1434            // reimplement once/if the builder is more optimized
1435            _ => Err(self.error(self.span(), ast::ErrorKind::UnsupportedResharpRegex)),
1436        }
1437    }
1438
1439    fn merge_boundary_with_following_lookaheads(
1440        &mut self,
1441        asts: &[Ast],
1442        wb_idx: usize,
1443        boundary_tail: NodeId,
1444        translator: &mut Option<Translator>,
1445        tb: &mut TB<'s>,
1446    ) -> Result<(NodeId, usize)> {
1447        let mut next = wb_idx + 1;
1448        let mut la_bodies = vec![boundary_tail];
1449        while next < asts.len() {
1450            match &asts[next] {
1451                Ast::Lookaround(la) if la.kind == ast::LookaroundKind::PositiveLookahead => {
1452                    let body = self.ast_to_node_id(&la.ast, translator, tb)?;
1453                    la_bodies.push(tb.mk_concat(body, NodeId::TS));
1454                    next += 1;
1455                }
1456                _ => break,
1457            }
1458        }
1459        let merged = tb.mk_inters(la_bodies.into_iter());
1460        Ok((tb.mk_lookahead(merged, NodeId::MISSING, 0), next))
1461    }
1462
1463    fn ast_to_node_id(
1464        &mut self,
1465        ast: &Ast,
1466        translator: &mut Option<Translator>,
1467        tb: &mut TB<'s>,
1468    ) -> Result<NodeId> {
1469        match ast {
1470            Ast::Empty(_) => Ok(NodeId::EPS),
1471            Ast::Flags(f) => {
1472                if f.flags.flag_state(ast::Flag::SwapGreed).is_some() {
1473                    return Err(self.error(f.span, ast::ErrorKind::UnsupportedResharpRegex));
1474                }
1475                let mut translator_builder = self.default_translator_builder();
1476                if let Some(state) = f.flags.flag_state(ast::Flag::CaseInsensitive) {
1477                    translator_builder.case_insensitive(state);
1478                }
1479                if let Some(state) = f.flags.flag_state(ast::Flag::Unicode) {
1480                    translator_builder.unicode(state);
1481                }
1482                if let Some(state) = f.flags.flag_state(ast::Flag::DotMatchesNewLine) {
1483                    self.dot_all.set(state);
1484                }
1485                let concat_translator = Some(translator_builder.build());
1486                *translator = concat_translator;
1487                Ok(NodeId::EPS)
1488            }
1489            Ast::Literal(l) => {
1490                let ast_lit = regex_syntax::ast::Ast::literal(*l.to_owned());
1491                self.translator_to_node_id(&ast_lit, translator, tb)
1492            }
1493            Ast::Top(_) => Ok(NodeId::TOP),
1494            Ast::Dot(_) => {
1495                if self.dot_all.get() {
1496                    Ok(NodeId::TOP)
1497                } else {
1498                    let hirv = hir::Hir::dot(hir::Dot::AnyByteExceptLF);
1499                    self.hir_to_node_id(&hirv, tb)
1500                }
1501            }
1502            Ast::Assertion(a) => match &a.kind {
1503                ast::AssertionKind::StartText => Ok(NodeId::BEGIN),
1504                ast::AssertionKind::EndText => Ok(NodeId::END),
1505                ast::AssertionKind::WordBoundary => {
1506                    Err(self.error(self.span(), ast::ErrorKind::UnsupportedResharpRegex))
1507                }
1508                ast::AssertionKind::NotWordBoundary => {
1509                    Err(self.error(self.span(), ast::ErrorKind::UnsupportedResharpRegex))
1510                }
1511                ast::AssertionKind::StartLine => {
1512                    let left = NodeId::BEGIN;
1513                    let right = tb.mk_u8(b'\n');
1514                    let union = tb.mk_union(left, right);
1515                    Ok(tb.mk_lookbehind(union, NodeId::MISSING))
1516                }
1517                ast::AssertionKind::EndLine => {
1518                    let left = NodeId::END;
1519                    let right = tb.mk_u8(b'\n');
1520                    let union = tb.mk_union(left, right);
1521                    Ok(tb.mk_lookahead(union, NodeId::MISSING, 0))
1522                }
1523                ast::AssertionKind::WordBoundaryStart => {
1524                    Err(self.error(a.span, ast::ErrorKind::UnsupportedResharpRegex))
1525                }
1526                ast::AssertionKind::WordBoundaryEnd => {
1527                    Err(self.error(a.span, ast::ErrorKind::UnsupportedResharpRegex))
1528                }
1529                ast::AssertionKind::WordBoundaryStartAngle => {
1530                    Err(self.error(a.span, ast::ErrorKind::UnsupportedResharpRegex))
1531                }
1532                ast::AssertionKind::WordBoundaryEndAngle => {
1533                    Err(self.error(a.span, ast::ErrorKind::UnsupportedResharpRegex))
1534                }
1535                ast::AssertionKind::WordBoundaryStartHalf => {
1536                    Err(self.error(a.span, ast::ErrorKind::UnsupportedResharpRegex))
1537                }
1538                ast::AssertionKind::WordBoundaryEndHalf => {
1539                    Err(self.error(a.span, ast::ErrorKind::UnsupportedResharpRegex))
1540                }
1541            },
1542            Ast::ClassUnicode(c) => {
1543                let tmp = regex_syntax::ast::ClassUnicode {
1544                    span: c.span,
1545                    negated: c.negated,
1546                    kind: c.kind.clone(),
1547                };
1548                if !c.negated {
1549                    if let regex_syntax::ast::ClassUnicodeKind::Named(s) = &c.kind {
1550                        match s.as_str() {
1551                            // \p{ascii} for ascii, \p{ascii}&\p{Letter} => [A-Za-z]
1552                            "ascii" => return Ok(tb.mk_range_u8(0, 127)),
1553                            // restricts matches to valid utf8, \p{utf8}*&~(a) => non a, but valid utf8
1554                            "utf8" => {
1555                                let ascii = tb.mk_range_u8(0, 127);
1556                                let beta = tb.mk_range_u8(128, 0xBF);
1557                                let c0 = tb.mk_range_u8(0xC0, 0xDF);
1558                                let c0s = tb.mk_concats([c0, beta].into_iter());
1559                                let e0 = tb.mk_range_u8(0xE0, 0xEF);
1560                                let e0s = tb.mk_concats([e0, beta, beta].into_iter());
1561                                let f0 = tb.mk_range_u8(0xF0, 0xF7);
1562                                let f0s = tb.mk_concats([f0, beta, beta, beta].into_iter());
1563                                let merged = tb.mk_unions([ascii, c0s, e0s, f0s].into_iter());
1564                                return Ok(tb.mk_star(merged));
1565                            }
1566                            "hex" => {
1567                                let nums = tb.mk_range_u8(b'0', b'9');
1568                                let lets = tb.mk_range_u8(b'a', b'f');
1569                                let lets2 = tb.mk_range_u8(b'A', b'F');
1570                                let merged = tb.mk_unions([nums, lets, lets2].into_iter());
1571                                return Ok(merged);
1572                            }
1573                            _ => {}
1574                        }
1575                    };
1576                }
1577
1578                let orig_ast = regex_syntax::ast::Ast::class_unicode(tmp);
1579                self.translator_to_node_id(&orig_ast, translator, tb)
1580            }
1581            Ast::ClassPerl(c) => self.get_class(c.negated, c.kind.clone(), tb),
1582            Ast::ClassBracketed(c) => match &c.kind {
1583                regex_syntax::ast::ClassSet::Item(_) => {
1584                    let tmp = regex_syntax::ast::ClassBracketed {
1585                        span: c.span,
1586                        negated: c.negated,
1587                        kind: c.kind.clone(),
1588                    };
1589                    let orig_ast = regex_syntax::ast::Ast::class_bracketed(tmp);
1590                    self.translator_to_node_id(&orig_ast, translator, tb)
1591                }
1592                regex_syntax::ast::ClassSet::BinaryOp(_) => {
1593                    Err(self.error(c.span, ast::ErrorKind::UnsupportedResharpRegex))
1594                }
1595            },
1596            Ast::Repetition(r) => {
1597                let body = self.ast_to_node_id(&r.ast, translator, tb);
1598                match body {
1599                    Ok(body) => match &r.op.kind {
1600                        ast::RepetitionKind::ZeroOrOne => Ok(tb.mk_opt(body)),
1601                        ast::RepetitionKind::ZeroOrMore => Ok(tb.mk_star(body)),
1602                        ast::RepetitionKind::OneOrMore => Ok(tb.mk_plus(body)),
1603                        ast::RepetitionKind::Range(r) => match r {
1604                            ast::RepetitionRange::Exactly(n) => Ok(tb.mk_repeat(body, *n, *n)),
1605                            ast::RepetitionRange::AtLeast(n) => {
1606                                let rep = tb.mk_repeat(body, *n, *n);
1607                                let st = tb.mk_star(body);
1608                                Ok(tb.mk_concat(rep, st))
1609                            }
1610
1611                            ast::RepetitionRange::Bounded(n, m) => Ok(tb.mk_repeat(body, *n, *m)),
1612                        },
1613                    },
1614                    Err(_) => body,
1615                }
1616            }
1617            Ast::Lookaround(g) => {
1618                let body = self.ast_to_node_id(&g.ast, translator, tb)?;
1619                match g.kind {
1620                    ast::LookaroundKind::PositiveLookahead => {
1621                        Ok(tb.mk_lookahead(body, NodeId::MISSING, 0))
1622                    }
1623                    ast::LookaroundKind::PositiveLookbehind => {
1624                        Ok(tb.mk_lookbehind(body, NodeId::MISSING))
1625                    }
1626                    ast::LookaroundKind::NegativeLookahead => Ok(tb.mk_neg_lookahead(body, 0)),
1627                    ast::LookaroundKind::NegativeLookbehind => Ok(tb.mk_neg_lookbehind(body)),
1628                }
1629            }
1630            Ast::Group(g) => {
1631                if let ast::GroupKind::NonCapturing(ref flags) = g.kind {
1632                    if !flags.items.is_empty() {
1633                        let mut translator_builder = self.default_translator_builder();
1634                        if let Some(state) = flags.flag_state(ast::Flag::CaseInsensitive) {
1635                            translator_builder.case_insensitive(state);
1636                        }
1637                        if let Some(state) = flags.flag_state(ast::Flag::Unicode) {
1638                            translator_builder.unicode(state);
1639                        }
1640                        let saved_dot_all = self.dot_all.get();
1641                        if let Some(state) = flags.flag_state(ast::Flag::DotMatchesNewLine) {
1642                            self.dot_all.set(state);
1643                        }
1644                        let mut scoped = Some(translator_builder.build());
1645                        let result = self.ast_to_node_id(&g.ast, &mut scoped, tb);
1646                        self.dot_all.set(saved_dot_all);
1647                        return result;
1648                    }
1649                }
1650                self.ast_to_node_id(&g.ast, translator, tb)
1651            }
1652            Ast::Alternation(a) => {
1653                let mut children = vec![];
1654                for ast in &a.asts {
1655                    match self.ast_to_node_id(ast, translator, tb) {
1656                        Ok(node_id) => children.push(node_id),
1657                        Err(err) => return Err(err),
1658                    }
1659                }
1660                Ok(tb.mk_unions(children.iter().copied()))
1661            }
1662            Ast::Concat(c) => {
1663                let mut concat_translator: Option<Translator> = None;
1664                let mut children = vec![];
1665                let mut i = 0;
1666                while i < c.asts.len() {
1667                    let ast = &c.asts[i];
1668                    match ast {
1669                        Ast::Flags(f) => {
1670                            if f.flags.flag_state(ast::Flag::SwapGreed).is_some() {
1671                                return Err(
1672                                    self.error(f.span, ast::ErrorKind::UnsupportedResharpRegex)
1673                                );
1674                            }
1675                            let mut translator_builder = self.default_translator_builder();
1676                            if let Some(state) = f.flags.flag_state(ast::Flag::CaseInsensitive) {
1677                                translator_builder.case_insensitive(state);
1678                            }
1679                            if let Some(state) = f.flags.flag_state(ast::Flag::Unicode) {
1680                                translator_builder.unicode(state);
1681                            }
1682                            if let Some(state) = f.flags.flag_state(ast::Flag::DotMatchesNewLine) {
1683                                self.dot_all.set(state);
1684                            }
1685                            concat_translator = Some(translator_builder.build());
1686                            *translator = concat_translator.clone();
1687                            i += 1;
1688                            continue;
1689                        }
1690                        Ast::Assertion(a) if a.kind == ast::AssertionKind::WordBoundary => {
1691                            let node =
1692                                self.rewrite_word_boundary_in_concat(&c.asts, i, translator, tb)?;
1693                            children.push(node.0);
1694                            i = node.1; // skip consumed lookaheads
1695                            continue;
1696                        }
1697                        _ => {}
1698                    }
1699                    match concat_translator {
1700                        Some(_) => match self.ast_to_node_id(ast, &mut concat_translator, tb) {
1701                            Ok(node_id) => children.push(node_id),
1702                            Err(err) => return Err(err),
1703                        },
1704                        None => match self.ast_to_node_id(ast, translator, tb) {
1705                            Ok(node_id) => children.push(node_id),
1706                            Err(err) => return Err(err),
1707                        },
1708                    }
1709                    i += 1;
1710                }
1711                Ok(tb.mk_concats(children.iter().cloned()))
1712            }
1713            Ast::Intersection(intersection) => {
1714                let mut children = vec![];
1715                for ast in &intersection.asts {
1716                    match self.ast_to_node_id(ast, translator, tb) {
1717                        Ok(node_id) => children.push(node_id),
1718                        Err(err) => return Err(err),
1719                    }
1720                }
1721                Ok(tb.mk_inters(children.into_iter()))
1722            }
1723            Ast::Complement(complement) => {
1724                let body = self.ast_to_node_id(&complement.ast, translator, tb);
1725                body.map(|x| tb.mk_compl(x))
1726            }
1727        }
1728    }
1729
1730    fn parse_inner(&mut self) -> Result<Ast> {
1731        let mut concat = Concat {
1732            span: self.span(),
1733            asts: vec![],
1734        };
1735        loop {
1736            self.bump_space();
1737            if self.is_eof() {
1738                break;
1739            }
1740            match self.char() {
1741                '(' => concat = self.push_group(concat)?,
1742                ')' => concat = self.pop_group(concat)?,
1743                '|' => concat = self.push_alternate(concat)?,
1744                '&' => concat = self.push_intersect(concat)?,
1745                '~' => concat = self.push_compl_group(concat)?,
1746                '[' => {
1747                    let class = self.parse_set_class()?;
1748                    concat.asts.push(Ast::class_bracketed(class));
1749                }
1750                '?' => {
1751                    concat =
1752                        self.parse_uncounted_repetition(concat, ast::RepetitionKind::ZeroOrOne)?;
1753                }
1754                '*' => {
1755                    concat =
1756                        self.parse_uncounted_repetition(concat, ast::RepetitionKind::ZeroOrMore)?;
1757                }
1758                '+' => {
1759                    concat =
1760                        self.parse_uncounted_repetition(concat, ast::RepetitionKind::OneOrMore)?;
1761                }
1762                '{' => {
1763                    concat = self.parse_counted_repetition(concat)?;
1764                }
1765                _ => concat.asts.push(self.parse_primitive()?.into_ast()),
1766            }
1767        }
1768        self.pop_group_end(concat)
1769    }
1770
1771    /// Parse the regular expression and return an abstract syntax tree with
1772    /// all of the comments found in the pattern.
1773    fn parse(&mut self, tb: &mut TB<'s>) -> Result<NodeId> {
1774        let ast = self.parse_inner()?;
1775        self.ast_to_node_id(&ast, &mut None, tb)
1776    }
1777
1778    #[inline(never)]
1779    fn parse_uncounted_repetition(
1780        &self,
1781        mut concat: ast::Concat,
1782        kind: ast::RepetitionKind,
1783    ) -> Result<ast::Concat> {
1784        // assert!(self.char() == '?' || self.char() == '*' || self.char() == '+');
1785        let op_start = self.pos();
1786        let ast = match concat.asts.pop() {
1787            Some(ast) => ast,
1788            None => return Err(self.error(self.span(), ast::ErrorKind::RepetitionMissing)),
1789        };
1790        match ast {
1791            Ast::Empty(_) | Ast::Flags(_) => {
1792                return Err(self.error(self.span(), ast::ErrorKind::RepetitionMissing))
1793            }
1794            _ => {}
1795        }
1796        if self.bump() && self.char() == '?' {
1797            return Err(self.error(
1798                Span::new(op_start, self.pos()),
1799                ast::ErrorKind::UnsupportedLazyQuantifier,
1800            ));
1801        }
1802        concat.asts.push(Ast::repetition(ast::Repetition {
1803            span: ast.span().with_end(self.pos()),
1804            op: ast::RepetitionOp {
1805                span: Span::new(op_start, self.pos()),
1806                kind,
1807            },
1808            greedy: true,
1809            ast: Box::new(ast),
1810        }));
1811        Ok(concat)
1812    }
1813
1814    #[inline(never)]
1815    fn parse_counted_repetition(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
1816        assert!(self.char() == '{');
1817        let start = self.pos();
1818        let ast = match concat.asts.pop() {
1819            Some(ast) => ast,
1820            None => return Err(self.error(self.span(), ast::ErrorKind::RepetitionMissing)),
1821        };
1822        match ast {
1823            Ast::Empty(_) | Ast::Flags(_) => {
1824                return Err(self.error(self.span(), ast::ErrorKind::RepetitionMissing))
1825            }
1826            _ => {}
1827        }
1828        if !self.bump_and_bump_space() {
1829            return Err(self.error(
1830                Span::new(start, self.pos()),
1831                ast::ErrorKind::RepetitionCountUnclosed,
1832            ));
1833        }
1834        let count_start = specialize_err(
1835            self.parse_decimal(),
1836            ast::ErrorKind::DecimalEmpty,
1837            ast::ErrorKind::RepetitionCountDecimalEmpty,
1838        );
1839        if self.is_eof() {
1840            return Err(self.error(
1841                Span::new(start, self.pos()),
1842                ast::ErrorKind::RepetitionCountUnclosed,
1843            ));
1844        }
1845        let range = if self.char() == ',' {
1846            if !self.bump_and_bump_space() {
1847                return Err(self.error(
1848                    Span::new(start, self.pos()),
1849                    ast::ErrorKind::RepetitionCountUnclosed,
1850                ));
1851            }
1852            if self.char() != '}' {
1853                let count_start = match count_start {
1854                    Ok(c) => c,
1855                    Err(err) if err.kind == ast::ErrorKind::RepetitionCountDecimalEmpty => {
1856                        if self.parser().empty_min_range {
1857                            0
1858                        } else {
1859                            return Err(err);
1860                        }
1861                    }
1862                    err => err?,
1863                };
1864                let count_end = specialize_err(
1865                    self.parse_decimal(),
1866                    ast::ErrorKind::DecimalEmpty,
1867                    ast::ErrorKind::RepetitionCountDecimalEmpty,
1868                )?;
1869                ast::RepetitionRange::Bounded(count_start, count_end)
1870            } else {
1871                ast::RepetitionRange::AtLeast(count_start?)
1872            }
1873        } else {
1874            ast::RepetitionRange::Exactly(count_start?)
1875        };
1876
1877        if self.is_eof() || self.char() != '}' {
1878            return Err(self.error(
1879                Span::new(start, self.pos()),
1880                ast::ErrorKind::RepetitionCountUnclosed,
1881            ));
1882        }
1883
1884        if self.bump_and_bump_space() && self.char() == '?' {
1885            return Err(self.error(
1886                Span::new(start, self.pos()),
1887                ast::ErrorKind::UnsupportedLazyQuantifier,
1888            ));
1889        }
1890
1891        let op_span = Span::new(start, self.pos());
1892        if !range.is_valid() {
1893            return Err(self.error(op_span, ast::ErrorKind::RepetitionCountInvalid));
1894        }
1895        concat.asts.push(Ast::repetition(ast::Repetition {
1896            span: ast.span().with_end(self.pos()),
1897            op: ast::RepetitionOp {
1898                span: op_span,
1899                kind: ast::RepetitionKind::Range(range),
1900            },
1901            greedy: true,
1902            ast: Box::new(ast),
1903        }));
1904        Ok(concat)
1905    }
1906
1907    #[inline(never)]
1908    fn parse_group(&self) -> Result<Either<ast::SetFlags, ast::Group>> {
1909        assert_eq!(self.char(), '(');
1910        let open_span = self.span_char();
1911        self.bump();
1912        self.bump_space();
1913        if let Some((ahead, pos)) = self.is_lookaround_prefix() {
1914            let kind = match (pos, ahead) {
1915                (true, true) => LookaroundKind::PositiveLookahead,
1916                (true, false) => LookaroundKind::PositiveLookbehind,
1917                (false, true) => LookaroundKind::NegativeLookahead,
1918                (false, false) => LookaroundKind::NegativeLookbehind,
1919            };
1920            return Ok(Either::Right(ast::Group {
1921                span: open_span,
1922                kind: ast::GroupKind::Lookaround(kind),
1923                ast: Box::new(Ast::empty(self.span())),
1924            }));
1925        }
1926        let inner_span = self.span();
1927        let mut starts_with_p = true;
1928        if self.bump_if("?P<") || {
1929            starts_with_p = false;
1930            self.bump_if("?<")
1931        } {
1932            let capture_index = self.next_capture_index(open_span)?;
1933            let name = self.parse_capture_name(capture_index)?;
1934            Ok(Either::Right(ast::Group {
1935                span: open_span,
1936                kind: ast::GroupKind::CaptureName {
1937                    starts_with_p,
1938                    name,
1939                },
1940                ast: Box::new(Ast::empty(self.span())),
1941            }))
1942        } else if self.bump_if("?") {
1943            if self.is_eof() {
1944                return Err(self.error(open_span, ast::ErrorKind::GroupUnclosed));
1945            }
1946            let flags = self.parse_flags()?;
1947            let char_end = self.char();
1948            self.bump();
1949            if char_end == ')' {
1950                // We don't allow empty flags, e.g., `(?)`. We instead
1951                // interpret it as a repetition operator missing its argument.
1952                if flags.items.is_empty() {
1953                    return Err(self.error(inner_span, ast::ErrorKind::RepetitionMissing));
1954                }
1955                Ok(Either::Left(ast::SetFlags {
1956                    span: Span {
1957                        end: self.pos(),
1958                        ..open_span
1959                    },
1960                    flags,
1961                }))
1962            } else {
1963                assert_eq!(char_end, ':');
1964                Ok(Either::Right(ast::Group {
1965                    span: open_span,
1966                    kind: ast::GroupKind::NonCapturing(flags),
1967                    ast: Box::new(Ast::empty(self.span())),
1968                }))
1969            }
1970        } else {
1971            let capture_index = self.next_capture_index(open_span)?;
1972            Ok(Either::Right(ast::Group {
1973                span: open_span,
1974                kind: ast::GroupKind::CaptureIndex(capture_index),
1975                ast: Box::new(Ast::empty(self.span())),
1976            }))
1977        }
1978    }
1979
1980    #[inline(never)]
1981    fn parse_capture_name(&self, capture_index: u32) -> Result<ast::CaptureName> {
1982        if self.is_eof() {
1983            return Err(self.error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
1984        }
1985        let start = self.pos();
1986        loop {
1987            if self.char() == '>' {
1988                break;
1989            }
1990            if !is_capture_char(self.char(), self.pos() == start) {
1991                return Err(self.error(self.span_char(), ast::ErrorKind::GroupNameInvalid));
1992            }
1993            if !self.bump() {
1994                break;
1995            }
1996        }
1997        let end = self.pos();
1998        if self.is_eof() {
1999            return Err(self.error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
2000        }
2001        assert_eq!(self.char(), '>');
2002        self.bump();
2003        let name = &self.pattern()[start.offset..end.offset];
2004        if name.is_empty() {
2005            return Err(self.error(Span::new(start, start), ast::ErrorKind::GroupNameEmpty));
2006        }
2007        let capname = ast::CaptureName {
2008            span: Span::new(start, end),
2009            name: name.to_string(),
2010            index: capture_index,
2011        };
2012        self.add_capture_name(&capname)?;
2013        Ok(capname)
2014    }
2015
2016    #[inline(never)]
2017    fn parse_flags(&self) -> Result<ast::Flags> {
2018        let mut flags = ast::Flags {
2019            span: self.span(),
2020            items: vec![],
2021        };
2022        let mut last_was_negation = None;
2023        while self.char() != ':' && self.char() != ')' {
2024            if self.char() == '-' {
2025                last_was_negation = Some(self.span_char());
2026                let item = ast::FlagsItem {
2027                    span: self.span_char(),
2028                    kind: ast::FlagsItemKind::Negation,
2029                };
2030                if let Some(i) = flags.add_item(item) {
2031                    return Err(self.error(
2032                        self.span_char(),
2033                        ast::ErrorKind::FlagRepeatedNegation {
2034                            original: flags.items[i].span,
2035                        },
2036                    ));
2037                }
2038            } else {
2039                last_was_negation = None;
2040                let item = ast::FlagsItem {
2041                    span: self.span_char(),
2042                    kind: ast::FlagsItemKind::Flag(self.parse_flag()?),
2043                };
2044                if let Some(i) = flags.add_item(item) {
2045                    return Err(self.error(
2046                        self.span_char(),
2047                        ast::ErrorKind::FlagDuplicate {
2048                            original: flags.items[i].span,
2049                        },
2050                    ));
2051                }
2052            }
2053            if !self.bump() {
2054                return Err(self.error(self.span(), ast::ErrorKind::FlagUnexpectedEof));
2055            }
2056        }
2057        if let Some(span) = last_was_negation {
2058            return Err(self.error(span, ast::ErrorKind::FlagDanglingNegation));
2059        }
2060        flags.span.end = self.pos();
2061        Ok(flags)
2062    }
2063
2064    #[inline(never)]
2065    fn parse_flag(&self) -> Result<ast::Flag> {
2066        match self.char() {
2067            'i' => Ok(ast::Flag::CaseInsensitive),
2068            'm' => Ok(ast::Flag::MultiLine),
2069            's' => Ok(ast::Flag::DotMatchesNewLine),
2070            'U' => Ok(ast::Flag::SwapGreed),
2071            'u' => Ok(ast::Flag::Unicode),
2072            'R' => Ok(ast::Flag::CRLF),
2073            'x' => Ok(ast::Flag::IgnoreWhitespace),
2074            _ => Err(self.error(self.span_char(), ast::ErrorKind::FlagUnrecognized)),
2075        }
2076    }
2077
2078    fn parse_primitive(&self) -> Result<Primitive> {
2079        match self.char() {
2080            '\\' => self.parse_escape(),
2081            '_' => {
2082                let ast = Primitive::Top(self.span_char());
2083                self.bump();
2084                Ok(ast)
2085            }
2086            '.' => {
2087                let ast = Primitive::Dot(self.span_char());
2088                self.bump();
2089                Ok(ast)
2090            }
2091            '^' => {
2092                let ast = Primitive::Assertion(ast::Assertion {
2093                    span: self.span_char(),
2094                    kind: ast::AssertionKind::StartLine,
2095                });
2096                self.bump();
2097                Ok(ast)
2098            }
2099            '$' => {
2100                let ast = Primitive::Assertion(ast::Assertion {
2101                    span: self.span_char(),
2102                    kind: ast::AssertionKind::EndLine,
2103                });
2104                self.bump();
2105                Ok(ast)
2106            }
2107            c => {
2108                let ast = Primitive::Literal(Literal {
2109                    span: self.span_char(),
2110                    kind: LiteralKind::Verbatim,
2111                    c,
2112                });
2113                self.bump();
2114                Ok(ast)
2115            }
2116        }
2117    }
2118
2119    #[inline(never)]
2120    fn parse_escape(&self) -> Result<Primitive> {
2121        assert_eq!(self.char(), '\\');
2122        let start = self.pos();
2123        if !self.bump() {
2124            return Err(self.error(
2125                Span::new(start, self.pos()),
2126                ast::ErrorKind::EscapeUnexpectedEof,
2127            ));
2128        }
2129        let c = self.char();
2130        // Put some of the more complicated routines into helpers.
2131        match c {
2132            '0'..='9' => {
2133                if !self.parser().octal {
2134                    return Err(self.error(
2135                        Span::new(start, self.span_char().end),
2136                        ast::ErrorKind::UnsupportedBackreference,
2137                    ));
2138                }
2139                let mut lit = self.parse_octal();
2140                lit.span.start = start;
2141                return Ok(Primitive::Literal(lit));
2142            }
2143            // '8'..='9' if !self.parser().octal => {
2144            //     return Err(self.error(
2145            //         Span::new(start, self.span_char().end),
2146            //         ast::ErrorKind::UnsupportedBackreference,
2147            //     ));
2148            // }
2149            'x' | 'u' | 'U' => {
2150                let mut lit = self.parse_hex()?;
2151                lit.span.start = start;
2152                return Ok(Primitive::Literal(lit));
2153            }
2154            'p' | 'P' => {
2155                let mut cls = self.parse_unicode_class()?;
2156                cls.span.start = start;
2157                return Ok(Primitive::Unicode(cls));
2158            }
2159            'd' | 's' | 'w' | 'D' | 'S' | 'W' => {
2160                let mut cls = self.parse_perl_class();
2161                cls.span.start = start;
2162                return Ok(Primitive::Perl(cls));
2163            }
2164            _ => {}
2165        }
2166
2167        // Handle all of the one letter sequences inline.
2168        self.bump();
2169        let span = Span::new(start, self.pos());
2170        if is_meta_character(c) {
2171            return Ok(Primitive::Literal(Literal {
2172                span,
2173                kind: LiteralKind::Meta,
2174                c,
2175            }));
2176        }
2177        if is_escapeable_character(c) {
2178            return Ok(Primitive::Literal(Literal {
2179                span,
2180                kind: LiteralKind::Superfluous,
2181                c,
2182            }));
2183        }
2184        let special = |kind, c| {
2185            Ok(Primitive::Literal(Literal {
2186                span,
2187                kind: LiteralKind::Special(kind),
2188                c,
2189            }))
2190        };
2191        match c {
2192            'a' => special(SpecialLiteralKind::Bell, '\x07'),
2193            'f' => special(SpecialLiteralKind::FormFeed, '\x0C'),
2194            't' => special(SpecialLiteralKind::Tab, '\t'),
2195            'n' => special(SpecialLiteralKind::LineFeed, '\n'),
2196            'r' => special(SpecialLiteralKind::CarriageReturn, '\r'),
2197            'v' => special(SpecialLiteralKind::VerticalTab, '\x0B'),
2198            'A' => Ok(Primitive::Assertion(ast::Assertion {
2199                span,
2200                kind: ast::AssertionKind::StartText,
2201            })),
2202            'z' => Ok(Primitive::Assertion(ast::Assertion {
2203                span,
2204                kind: ast::AssertionKind::EndText,
2205            })),
2206            'b' => {
2207                let mut wb = ast::Assertion {
2208                    span,
2209                    kind: ast::AssertionKind::WordBoundary,
2210                };
2211                // After a \b, we "try" to parse things like \b{start} for
2212                // special word boundary assertions.
2213                if !self.is_eof() && self.char() == '{' {
2214                    if let Some(kind) = self.maybe_parse_special_word_boundary(start)? {
2215                        wb.kind = kind;
2216                        wb.span.end = self.pos();
2217                    }
2218                }
2219                Ok(Primitive::Assertion(wb))
2220            }
2221            'B' => Ok(Primitive::Assertion(ast::Assertion {
2222                span,
2223                kind: ast::AssertionKind::NotWordBoundary,
2224            })),
2225            '<' => Ok(Primitive::Assertion(ast::Assertion {
2226                span,
2227                kind: ast::AssertionKind::WordBoundaryStartAngle,
2228            })),
2229            '>' => Ok(Primitive::Assertion(ast::Assertion {
2230                span,
2231                kind: ast::AssertionKind::WordBoundaryEndAngle,
2232            })),
2233            _ => Err(self.error(span, ast::ErrorKind::EscapeUnrecognized)),
2234        }
2235    }
2236
2237    fn maybe_parse_special_word_boundary(
2238        &self,
2239        wb_start: Position,
2240    ) -> Result<Option<ast::AssertionKind>> {
2241        assert_eq!(self.char(), '{');
2242
2243        let is_valid_char = |c| matches!(c, 'A'..='Z' | 'a'..='z' | '-');
2244        let start = self.pos();
2245        if !self.bump_and_bump_space() {
2246            return Err(self.error(
2247                Span::new(wb_start, self.pos()),
2248                ast::ErrorKind::SpecialWordOrRepetitionUnexpectedEof,
2249            ));
2250        }
2251        let start_contents = self.pos();
2252        // This is one of the critical bits: if the first non-whitespace
2253        // character isn't in [-A-Za-z] (i.e., this can't be a special word
2254        // boundary), then we bail and let the counted repetition parser deal
2255        // with this.
2256        if !is_valid_char(self.char()) {
2257            self.parser().pos.set(start);
2258            return Ok(None);
2259        }
2260
2261        // Now collect up our chars until we see a '}'.
2262        let mut scratch = self.parser().scratch.borrow_mut();
2263        scratch.clear();
2264        while !self.is_eof() && is_valid_char(self.char()) {
2265            scratch.push(self.char());
2266            self.bump_and_bump_space();
2267        }
2268        if self.is_eof() || self.char() != '}' {
2269            return Err(self.error(
2270                Span::new(start, self.pos()),
2271                ast::ErrorKind::SpecialWordBoundaryUnclosed,
2272            ));
2273        }
2274        let end = self.pos();
2275        self.bump();
2276        let kind = match scratch.as_str() {
2277            "start" => ast::AssertionKind::WordBoundaryStart,
2278            "end" => ast::AssertionKind::WordBoundaryEnd,
2279            "start-half" => ast::AssertionKind::WordBoundaryStartHalf,
2280            "end-half" => ast::AssertionKind::WordBoundaryEndHalf,
2281            _ => {
2282                return Err(self.error(
2283                    Span::new(start_contents, end),
2284                    ast::ErrorKind::SpecialWordBoundaryUnrecognized,
2285                ))
2286            }
2287        };
2288        Ok(Some(kind))
2289    }
2290
2291    #[inline(never)]
2292    fn parse_octal(&self) -> Literal {
2293        assert!(self.parser().octal);
2294        assert!('0' <= self.char() && self.char() <= '7');
2295        let start = self.pos();
2296        // Parse up to two more digits.
2297        while self.bump()
2298            && '0' <= self.char()
2299            && self.char() <= '7'
2300            && self.pos().offset - start.offset <= 2
2301        {}
2302        let end = self.pos();
2303        let octal = &self.pattern()[start.offset..end.offset];
2304        // Parsing the octal should never fail since the above guarantees a
2305        // valid number.
2306        let codepoint = u32::from_str_radix(octal, 8).expect("valid octal number");
2307        // The max value for 3 digit octal is 0777 = 511 and [0, 511] has no
2308        // invalid Unicode scalar values.
2309        let c = char::from_u32(codepoint).expect("Unicode scalar value");
2310        Literal {
2311            span: Span::new(start, end),
2312            kind: LiteralKind::Octal,
2313            c,
2314        }
2315    }
2316
2317    #[inline(never)]
2318    fn parse_hex(&self) -> Result<Literal> {
2319        assert!(self.char() == 'x' || self.char() == 'u' || self.char() == 'U');
2320
2321        let hex_kind = match self.char() {
2322            'x' => HexLiteralKind::X,
2323            'u' => HexLiteralKind::UnicodeShort,
2324            _ => HexLiteralKind::UnicodeLong,
2325        };
2326        if !self.bump_and_bump_space() {
2327            return Err(self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
2328        }
2329        if self.char() == '{' {
2330            self.parse_hex_brace(hex_kind)
2331        } else {
2332            self.parse_hex_digits(hex_kind)
2333        }
2334    }
2335
2336    #[inline(never)]
2337    fn parse_hex_digits(&self, kind: HexLiteralKind) -> Result<Literal> {
2338        let mut scratch = self.parser().scratch.borrow_mut();
2339        scratch.clear();
2340
2341        let start = self.pos();
2342        for i in 0..kind.digits() {
2343            if i > 0 && !self.bump_and_bump_space() {
2344                return Err(self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
2345            }
2346            if !is_hex(self.char()) {
2347                return Err(self.error(self.span_char(), ast::ErrorKind::EscapeHexInvalidDigit));
2348            }
2349            scratch.push(self.char());
2350        }
2351        // The final bump just moves the parser past the literal, which may
2352        // be EOF.
2353        self.bump_and_bump_space();
2354        let end = self.pos();
2355        let hex = scratch.as_str();
2356        match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
2357            None => Err(self.error(Span::new(start, end), ast::ErrorKind::EscapeHexInvalid)),
2358            Some(c) => Ok(Literal {
2359                span: Span::new(start, end),
2360                kind: LiteralKind::HexFixed(kind),
2361                c,
2362            }),
2363        }
2364    }
2365
2366    #[inline(never)]
2367    fn parse_hex_brace(&self, kind: HexLiteralKind) -> Result<Literal> {
2368        let mut scratch = self.parser().scratch.borrow_mut();
2369        scratch.clear();
2370
2371        let brace_pos = self.pos();
2372        let start = self.span_char().end;
2373        while self.bump_and_bump_space() && self.char() != '}' {
2374            if !is_hex(self.char()) {
2375                return Err(self.error(self.span_char(), ast::ErrorKind::EscapeHexInvalidDigit));
2376            }
2377            scratch.push(self.char());
2378        }
2379        if self.is_eof() {
2380            return Err(self.error(
2381                Span::new(brace_pos, self.pos()),
2382                ast::ErrorKind::EscapeUnexpectedEof,
2383            ));
2384        }
2385        let end = self.pos();
2386        let hex = scratch.as_str();
2387        assert_eq!(self.char(), '}');
2388        self.bump_and_bump_space();
2389
2390        if hex.is_empty() {
2391            return Err(self.error(
2392                Span::new(brace_pos, self.pos()),
2393                ast::ErrorKind::EscapeHexEmpty,
2394            ));
2395        }
2396        match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
2397            None => Err(self.error(Span::new(start, end), ast::ErrorKind::EscapeHexInvalid)),
2398            Some(c) => Ok(Literal {
2399                span: Span::new(start, self.pos()),
2400                kind: LiteralKind::HexBrace(kind),
2401                c,
2402            }),
2403        }
2404    }
2405
2406    fn parse_decimal(&self) -> Result<u32> {
2407        let mut scratch = self.parser().scratch.borrow_mut();
2408        scratch.clear();
2409
2410        while !self.is_eof() && self.char().is_whitespace() {
2411            self.bump();
2412        }
2413        let start = self.pos();
2414        while !self.is_eof() && '0' <= self.char() && self.char() <= '9' {
2415            scratch.push(self.char());
2416            self.bump_and_bump_space();
2417        }
2418        let span = Span::new(start, self.pos());
2419        while !self.is_eof() && self.char().is_whitespace() {
2420            self.bump_and_bump_space();
2421        }
2422        let digits = scratch.as_str();
2423        if digits.is_empty() {
2424            return Err(self.error(span, ast::ErrorKind::DecimalEmpty));
2425        }
2426        match digits.parse::<u32>().ok() {
2427            Some(n) => Ok(n),
2428            None => Err(self.error(span, ast::ErrorKind::DecimalInvalid)),
2429        }
2430    }
2431
2432    #[inline(never)]
2433    fn parse_set_class(&self) -> Result<ClassBracketed> {
2434        assert_eq!(self.char(), '[');
2435
2436        let mut union = ClassSetUnion {
2437            span: self.span(),
2438            items: vec![],
2439        };
2440        loop {
2441            self.bump_space();
2442            if self.is_eof() {
2443                return Err(self.unclosed_class_error());
2444            }
2445            match self.char() {
2446                '[' => {
2447                    // If we've already parsed the opening bracket, then
2448                    // attempt to treat this as the beginning of an ASCII
2449                    // class. If ASCII class parsing fails, then the parser
2450                    // backs up to `[`.
2451                    if !self.parser().stack_class.borrow().is_empty() {
2452                        if let Some(cls) = self.maybe_parse_ascii_class() {
2453                            union.push(ClassSetItem::Ascii(cls));
2454                            continue;
2455                        }
2456                    }
2457                    union = self.push_class_open(union)?;
2458                }
2459                ']' => match self.pop_class(union)? {
2460                    Either::Left(nested_union) => {
2461                        union = nested_union;
2462                    }
2463                    Either::Right(class) => return Ok(class),
2464                },
2465                '&' if self.peek() == Some('&') => {
2466                    assert!(self.bump_if("&&"));
2467                    union = self.push_class_op(ClassSetBinaryOpKind::Intersection, union);
2468                }
2469                '-' if self.peek() == Some('-') => {
2470                    assert!(self.bump_if("--"));
2471                    union = self.push_class_op(ClassSetBinaryOpKind::Difference, union);
2472                }
2473                '~' if self.peek() == Some('~') => {
2474                    assert!(self.bump_if("~~"));
2475                    union = self.push_class_op(ClassSetBinaryOpKind::SymmetricDifference, union);
2476                }
2477                _ => {
2478                    union.push(self.parse_set_class_range()?);
2479                }
2480            }
2481        }
2482    }
2483
2484    #[inline(never)]
2485    fn parse_set_class_range(&self) -> Result<ClassSetItem> {
2486        let prim1 = self.parse_set_class_item()?;
2487        self.bump_space();
2488        if self.is_eof() {
2489            return Err(self.unclosed_class_error());
2490        }
2491        if self.char() != '-' || self.peek_space() == Some(']') || self.peek_space() == Some('-') {
2492            return prim1.into_class_set_item(self);
2493        }
2494        if !self.bump_and_bump_space() {
2495            return Err(self.unclosed_class_error());
2496        }
2497        let prim2 = self.parse_set_class_item()?;
2498        let range = ClassSetRange {
2499            span: Span::new(prim1.span().start, prim2.span().end),
2500            start: prim1.into_class_literal(self)?,
2501            end: prim2.into_class_literal(self)?,
2502        };
2503        if !range.is_valid() {
2504            return Err(self.error(range.span, ast::ErrorKind::ClassRangeInvalid));
2505        }
2506        Ok(ClassSetItem::Range(range))
2507    }
2508
2509    #[inline(never)]
2510    fn parse_set_class_item(&self) -> Result<Primitive> {
2511        if self.char() == '\\' {
2512            self.parse_escape()
2513        } else {
2514            let x = Primitive::Literal(Literal {
2515                span: self.span_char(),
2516                kind: LiteralKind::Verbatim,
2517                c: self.char(),
2518            });
2519            self.bump();
2520            Ok(x)
2521        }
2522    }
2523
2524    #[inline(never)]
2525    fn parse_set_class_open(&self) -> Result<(ClassBracketed, ClassSetUnion)> {
2526        assert_eq!(self.char(), '[');
2527        let start = self.pos();
2528        if !self.bump_and_bump_space() {
2529            return Err(self.error(Span::new(start, self.pos()), ast::ErrorKind::ClassUnclosed));
2530        }
2531
2532        let negated = if self.char() != '^' {
2533            false
2534        } else {
2535            if !self.bump_and_bump_space() {
2536                return Err(self.error(Span::new(start, self.pos()), ast::ErrorKind::ClassUnclosed));
2537            }
2538            true
2539        };
2540        // Accept any number of `-` as literal `-`.
2541        let mut union = ClassSetUnion {
2542            span: self.span(),
2543            items: vec![],
2544        };
2545        while self.char() == '-' {
2546            union.push(ClassSetItem::Literal(Literal {
2547                span: self.span_char(),
2548                kind: LiteralKind::Verbatim,
2549                c: '-',
2550            }));
2551            if !self.bump_and_bump_space() {
2552                return Err(self.error(Span::new(start, start), ast::ErrorKind::ClassUnclosed));
2553            }
2554        }
2555        // If `]` is the *first* char in a set, then interpret it as a literal
2556        // `]`. That is, an empty class is impossible to write.
2557        if union.items.is_empty() && self.char() == ']' {
2558            union.push(ClassSetItem::Literal(Literal {
2559                span: self.span_char(),
2560                kind: LiteralKind::Verbatim,
2561                c: ']',
2562            }));
2563            if !self.bump_and_bump_space() {
2564                return Err(self.error(Span::new(start, self.pos()), ast::ErrorKind::ClassUnclosed));
2565            }
2566        }
2567        let set = ClassBracketed {
2568            span: Span::new(start, self.pos()),
2569            negated,
2570            kind: ClassSet::union(ClassSetUnion {
2571                span: Span::new(union.span.start, union.span.start),
2572                items: vec![],
2573            }),
2574        };
2575        Ok((set, union))
2576    }
2577
2578    #[inline(never)]
2579    fn maybe_parse_ascii_class(&self) -> Option<ClassAscii> {
2580        assert_eq!(self.char(), '[');
2581        // If parsing fails, then we back up the parser to this starting point.
2582        let start = self.pos();
2583        let mut negated = false;
2584        if !self.bump() || self.char() != ':' {
2585            self.parser().pos.set(start);
2586            return None;
2587        }
2588        if !self.bump() {
2589            self.parser().pos.set(start);
2590            return None;
2591        }
2592        if self.char() == '^' {
2593            negated = true;
2594            if !self.bump() {
2595                self.parser().pos.set(start);
2596                return None;
2597            }
2598        }
2599        let name_start = self.offset();
2600        while self.char() != ':' && self.bump() {}
2601        if self.is_eof() {
2602            self.parser().pos.set(start);
2603            return None;
2604        }
2605        let name = &self.pattern()[name_start..self.offset()];
2606        if !self.bump_if(":]") {
2607            self.parser().pos.set(start);
2608            return None;
2609        }
2610        let kind = match regex_syntax::ast::ClassAsciiKind::from_name(name) {
2611            Some(kind) => kind,
2612            None => {
2613                self.parser().pos.set(start);
2614                return None;
2615            }
2616        };
2617        Some(ClassAscii {
2618            span: Span::new(start, self.pos()),
2619            kind,
2620            negated,
2621        })
2622    }
2623
2624    #[inline(never)]
2625    fn parse_unicode_class(&self) -> Result<ClassUnicode> {
2626        assert!(self.char() == 'p' || self.char() == 'P');
2627
2628        let mut scratch = self.parser().scratch.borrow_mut();
2629        scratch.clear();
2630
2631        let negated = self.char() == 'P';
2632        if !self.bump_and_bump_space() {
2633            return Err(self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
2634        }
2635        let (start, kind) = if self.char() == '{' {
2636            let start = self.span_char().end;
2637            while self.bump_and_bump_space() && self.char() != '}' {
2638                scratch.push(self.char());
2639            }
2640            if self.is_eof() {
2641                return Err(self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
2642            }
2643            assert_eq!(self.char(), '}');
2644            self.bump();
2645
2646            let name = scratch.as_str();
2647            if let Some(i) = name.find("!=") {
2648                (
2649                    start,
2650                    ClassUnicodeKind::NamedValue {
2651                        op: ClassUnicodeOpKind::NotEqual,
2652                        name: name[..i].to_string(),
2653                        value: name[i + 2..].to_string(),
2654                    },
2655                )
2656            } else if let Some(i) = name.find(':') {
2657                (
2658                    start,
2659                    ClassUnicodeKind::NamedValue {
2660                        op: ClassUnicodeOpKind::Colon,
2661                        name: name[..i].to_string(),
2662                        value: name[i + 1..].to_string(),
2663                    },
2664                )
2665            } else if let Some(i) = name.find('=') {
2666                (
2667                    start,
2668                    ClassUnicodeKind::NamedValue {
2669                        op: ClassUnicodeOpKind::Equal,
2670                        name: name[..i].to_string(),
2671                        value: name[i + 1..].to_string(),
2672                    },
2673                )
2674            } else {
2675                (start, ClassUnicodeKind::Named(name.to_string()))
2676            }
2677        } else {
2678            let start = self.pos();
2679            let c = self.char();
2680            if c == '\\' {
2681                return Err(self.error(self.span_char(), ast::ErrorKind::UnicodeClassInvalid));
2682            }
2683            self.bump_and_bump_space();
2684            let kind = ClassUnicodeKind::OneLetter(c);
2685            (start, kind)
2686        };
2687        Ok(ClassUnicode {
2688            span: Span::new(start, self.pos()),
2689            negated,
2690            kind,
2691        })
2692    }
2693
2694    #[inline(never)]
2695    fn parse_perl_class(&self) -> ClassPerl {
2696        let c = self.char();
2697        let span = self.span_char();
2698        self.bump();
2699        let (negated, kind) = match c {
2700            'd' => (false, regex_syntax::ast::ClassPerlKind::Digit),
2701            'D' => (true, regex_syntax::ast::ClassPerlKind::Digit),
2702            's' => (false, regex_syntax::ast::ClassPerlKind::Space),
2703            'S' => (true, regex_syntax::ast::ClassPerlKind::Space),
2704            'w' => (false, regex_syntax::ast::ClassPerlKind::Word),
2705            'W' => (true, regex_syntax::ast::ClassPerlKind::Word),
2706            c => panic!("expected valid Perl class but got '{}'", c),
2707        };
2708        ClassPerl {
2709            span,
2710            kind,
2711            negated,
2712        }
2713    }
2714}
2715
2716pub fn parse_ast<'s>(
2717    tb: &mut TB<'s>,
2718    pattern: &'s str,
2719) -> std::result::Result<NodeId, ResharpError> {
2720    let mut p: ResharpParser<'s> = ResharpParser::new(pattern);
2721    p.parse(tb)
2722}
2723
2724pub fn parse_ast_with<'s>(
2725    tb: &mut TB<'s>,
2726    pattern: &'s str,
2727    flags: &PatternFlags,
2728) -> std::result::Result<NodeId, ResharpError> {
2729    let mut p: ResharpParser<'s> = ResharpParser::with_flags(pattern, flags);
2730    p.parse(tb)
2731}
2732
2733/// Parse a pattern into the raw AST without converting to algebra nodes.
2734pub fn parse_to_ast(pattern: &str) -> std::result::Result<ast::Ast, ResharpError> {
2735    let mut p: ResharpParser = ResharpParser::new(pattern);
2736    p.parse_inner()
2737}
resharp_parser/lib.rs

resharp_parser/
lib.rs