resharp_parser/
lib.rs

1//! Parser for resharp regex patterns.
2//!
3//! Converts regex pattern strings into the node representation used by resharp-algebra.
4
5#![warn(dead_code)]
6pub mod ast;
7use std::cell::{Cell, RefCell};
8
9use ast::{Ast, Concat, ErrorKind, GroupKind, LookaroundKind};
10use regex_syntax::{
11    ast::{
12        ClassAscii, ClassBracketed, ClassPerl, ClassSet, ClassSetBinaryOpKind, ClassSetItem,
13        ClassSetRange, ClassSetUnion, ClassUnicode, ClassUnicodeKind, ClassUnicodeOpKind,
14        HexLiteralKind, Literal, LiteralKind, Position, Span, SpecialLiteralKind,
15    },
16    hir::{
17        self,
18        translate::{Translator, TranslatorBuilder},
19    },
20    utf8::Utf8Sequences,
21};
22use resharp_algebra::NodeId;
23
24type TB<'s> = resharp_algebra::RegexBuilder;
25
26/// global pattern-level flags, set from `EngineOptions`.
27pub struct PatternFlags {
28    /// `\w`/`\d`/`\s` match full Unicode (true) or ASCII only (false).
29    pub unicode: bool,
30    /// `\w` covers all Unicode word chars including 3- and 4-byte sequences.
31    pub full_unicode: bool,
32    /// global case-insensitive matching.
33    pub case_insensitive: bool,
34    /// `.` matches `\n` (behaves like `_`).
35    pub dot_matches_new_line: bool,
36    /// allow whitespace and `#` comments in the pattern.
37    pub ignore_whitespace: bool,
38}
39
40impl Default for PatternFlags {
41    fn default() -> Self {
42        Self {
43            unicode: true,
44            full_unicode: false,
45            case_insensitive: false,
46            dot_matches_new_line: false,
47            ignore_whitespace: false,
48        }
49    }
50}
51
52#[derive(Clone, Copy, PartialEq)]
53enum WordCharKind {
54    Word,
55    NonWord,
56    MaybeWord,
57    MaybeNonWord,
58    Unknown,
59    Edge,
60}
61
62fn is_word_byte(b: u8) -> bool {
63    b.is_ascii_alphanumeric() || b == b'_'
64}
65
66#[derive(Clone, Debug, Eq, PartialEq)]
67enum Primitive {
68    Literal(Literal),
69    Assertion(ast::Assertion),
70    Dot(Span),
71    Top(Span),
72    Perl(ClassPerl),
73    Unicode(ClassUnicode),
74}
75
76impl Primitive {
77    fn span(&self) -> &Span {
78        match *self {
79            Primitive::Literal(ref x) => &x.span,
80            Primitive::Assertion(ref x) => &x.span,
81            Primitive::Dot(ref span) => span,
82            Primitive::Top(ref span) => span,
83            Primitive::Perl(ref x) => &x.span,
84            Primitive::Unicode(ref x) => &x.span,
85        }
86    }
87
88    fn into_ast(self) -> Ast {
89        match self {
90            Primitive::Literal(lit) => Ast::literal(lit),
91            Primitive::Assertion(assert) => Ast::assertion(assert),
92            Primitive::Dot(span) => Ast::dot(span),
93            Primitive::Top(span) => Ast::top(span),
94            Primitive::Perl(cls) => Ast::class_perl(cls),
95            Primitive::Unicode(cls) => Ast::class_unicode(cls),
96        }
97    }
98
99    fn into_class_set_item(self, p: &ResharpParser) -> Result<regex_syntax::ast::ClassSetItem> {
100        use self::Primitive::*;
101        use regex_syntax::ast::ClassSetItem;
102
103        match self {
104            Literal(lit) => Ok(ClassSetItem::Literal(lit)),
105            Perl(cls) => Ok(ClassSetItem::Perl(cls)),
106            Unicode(cls) => Ok(ClassSetItem::Unicode(cls)),
107            x => Err(p.error(*x.span(), ast::ErrorKind::ClassEscapeInvalid)),
108        }
109    }
110
111    fn into_class_literal(self, p: &ResharpParser) -> Result<Literal> {
112        use self::Primitive::*;
113
114        match self {
115            Literal(lit) => Ok(lit),
116            x => Err(p.error(*x.span(), ast::ErrorKind::ClassRangeLiteral)),
117        }
118    }
119}
120
121#[derive(Clone, Debug, Eq, PartialEq)]
122pub enum Either<Left, Right> {
123    Left(Left),
124    Right(Right),
125}
126
127#[derive(Clone, Debug, Eq, PartialEq)]
128pub struct ResharpError {
129    /// The kind of error.
130    pub kind: ErrorKind,
131    /// The original pattern that the parser generated the error from. Every
132    /// span in an error is a valid range into this string.
133    pattern: String,
134    /// The span of this error.
135    pub span: Span,
136}
137
138impl std::fmt::Display for ResharpError {
139    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
140        write!(f, "{:?}: {:?}", self.kind, self.span)
141    }
142}
143impl std::error::Error for ResharpError {}
144
145type Result<T> = core::result::Result<T, ResharpError>;
146
147#[derive(Clone, Debug)]
148enum GroupState {
149    /// This state is pushed whenever an opening group is found.
150    Group {
151        /// The concatenation immediately preceding the opening group.
152        concat: Concat,
153        /// The group that has been opened. Its sub-AST is always empty.
154        group: ast::Group,
155        /// Whether this group has the `x` flag enabled or not.
156        ignore_whitespace: bool,
157    },
158    /// This state is pushed whenever a new alternation branch is found. If
159    /// an alternation branch is found and this state is at the top of the
160    /// stack, then this state should be modified to include the new
161    /// alternation.
162    Alternation(ast::Alternation),
163    Intersection(ast::Intersection),
164}
165
166#[derive(Clone, Debug)]
167enum ClassState {
168    /// This state is pushed whenever an opening bracket is found.
169    Open {
170        /// The union of class items immediately preceding this class.
171        union: regex_syntax::ast::ClassSetUnion,
172        /// The class that has been opened. Typically this just corresponds
173        /// to the `[`, but it can also include `[^` since `^` indicates
174        /// negation of the class.
175        set: regex_syntax::ast::ClassBracketed,
176    },
177    /// This state is pushed when a operator is seen. When popped, the stored
178    /// set becomes the left hand side of the operator.
179    Op {
180        /// The type of the operation, i.e., &&, -- or ~~.
181        kind: regex_syntax::ast::ClassSetBinaryOpKind,
182        /// The left-hand side of the operator.
183        lhs: regex_syntax::ast::ClassSet,
184    },
185}
186
187/// RE# syntax parser based on the regex-syntax crate.
188pub struct ResharpParser<'s> {
189    perl_classes: Vec<(bool, regex_syntax::ast::ClassPerlKind, NodeId)>,
190    unicode_classes: resharp_algebra::UnicodeClassCache,
191    pub translator: regex_syntax::hir::translate::Translator,
192    pub pattern: &'s str,
193    pos: Cell<Position>,
194    capture_index: Cell<u32>,
195    octal: bool,
196    empty_min_range: bool,
197    ignore_whitespace: Cell<bool>,
198    dot_all: Cell<bool>,
199    global_unicode: bool,
200    global_full_unicode: bool,
201    global_case_insensitive: bool,
202    comments: RefCell<Vec<ast::Comment>>,
203    stack_group: RefCell<Vec<GroupState>>,
204    stack_class: RefCell<Vec<ClassState>>,
205    capture_names: RefCell<Vec<ast::CaptureName>>,
206    scratch: RefCell<String>,
207}
208
209fn specialize_err<T>(result: Result<T>, from: ast::ErrorKind, to: ast::ErrorKind) -> Result<T> {
210    result.map_err(|e| {
211        if e.kind == from {
212            ResharpError {
213                kind: to,
214                pattern: e.pattern,
215                span: e.span,
216            }
217        } else {
218            e
219        }
220    })
221}
222
223fn is_capture_char(c: char, first: bool) -> bool {
224    if first {
225        c == '_' || c.is_alphabetic()
226    } else {
227        c == '_' || c == '.' || c == '[' || c == ']' || c.is_alphanumeric()
228    }
229}
230
231pub fn is_meta_character(c: char) -> bool {
232    matches!(
233        c,
234        '\\' | '.'
235            | '+'
236            | '*'
237            | '?'
238            | '('
239            | ')'
240            | '|'
241            | '['
242            | ']'
243            | '{'
244            | '}'
245            | '^'
246            | '$'
247            | '#'
248            | '&'
249            | '-'
250            | '~'
251            | '_'
252    )
253}
254
255/// escapes all resharp meta characters in `text`.
256pub fn escape(text: &str) -> String {
257    let mut buf = String::new();
258    escape_into(text, &mut buf);
259    buf
260}
261
262/// escapes all resharp meta characters in `text` and appends to `buf`.
263pub fn escape_into(text: &str, buf: &mut String) {
264    buf.reserve(text.len());
265    for c in text.chars() {
266        if is_meta_character(c) {
267            buf.push('\\');
268        }
269        buf.push(c);
270    }
271}
272
273pub fn is_escapeable_character(c: char) -> bool {
274    // Certainly escapeable if it's a meta character.
275    if is_meta_character(c) {
276        return true;
277    }
278    // Any character that isn't ASCII is definitely not escapeable. There's
279    // no real need to allow things like \☃ right?
280    if !c.is_ascii() {
281        return false;
282    }
283    // Otherwise, we basically say that everything is escapeable unless it's a
284    // letter or digit. Things like \3 are either octal (when enabled) or an
285    // error, and we should keep it that way. Otherwise, letters are reserved
286    // for adding new syntax in a backwards compatible way.
287    match c {
288        '0'..='9' | 'A'..='Z' | 'a'..='z' => false,
289        // While not currently supported, we keep these as not escapeable to
290        // give us some flexibility with respect to supporting the \< and
291        // \> word boundary assertions in the future. By rejecting them as
292        // escapeable, \< and \> will result in a parse error. Thus, we can
293        // turn them into something else in the future without it being a
294        // backwards incompatible change.
295        //
296        // OK, now we support \< and \>, and we need to retain them as *not*
297        // escapeable here since the escape sequence is significant.
298        '<' | '>' => false,
299        _ => true,
300    }
301}
302
303fn is_hex(c: char) -> bool {
304    c.is_ascii_digit() || ('a'..='f').contains(&c) || ('A'..='F').contains(&c)
305}
306
307impl<'s> ResharpParser<'s> {
308    fn default_translator_builder(&self) -> TranslatorBuilder {
309        let mut trb = TranslatorBuilder::new();
310        trb.unicode(self.global_unicode);
311        trb.utf8(false);
312        trb.case_insensitive(self.global_case_insensitive);
313        trb
314    }
315
316    pub fn new(pattern: &'s str) -> Self {
317        Self::with_flags(pattern, &PatternFlags::default())
318    }
319
320    pub fn with_flags(pattern: &'s str, flags: &PatternFlags) -> Self {
321        let mut trb = TranslatorBuilder::new();
322        trb.unicode(flags.unicode);
323        trb.utf8(false);
324        trb.case_insensitive(flags.case_insensitive);
325        Self {
326            translator: trb.build(),
327            pattern,
328            perl_classes: vec![],
329            unicode_classes: resharp_algebra::UnicodeClassCache::default(),
330            pos: Cell::new(Position::new(0, 0, 0)),
331            capture_index: Cell::new(0),
332            octal: false,
333            empty_min_range: false,
334            ignore_whitespace: Cell::new(flags.ignore_whitespace),
335            dot_all: Cell::new(flags.dot_matches_new_line),
336            global_unicode: flags.unicode || flags.full_unicode,
337            global_full_unicode: flags.full_unicode,
338            global_case_insensitive: flags.case_insensitive,
339            comments: RefCell::new(vec![]),
340            stack_group: RefCell::new(vec![]),
341            stack_class: RefCell::new(vec![]),
342            capture_names: RefCell::new(vec![]),
343            scratch: RefCell::new(String::new()),
344        }
345    }
346
347    /// Return a reference to the parser state.
348    fn parser(&'_ self) -> &'_ ResharpParser<'_> {
349        self
350    }
351
352    /// Return a reference to the pattern being parsed.
353    fn pattern(&self) -> &str {
354        self.pattern
355    }
356
357    /// Create a new error with the given span and error type.
358    fn error(&self, span: Span, kind: ast::ErrorKind) -> ResharpError {
359        ResharpError {
360            kind,
361            pattern: self.pattern().to_string(),
362            span,
363        }
364    }
365
366    fn unsupported_error(&self, _: regex_syntax::hir::Error) -> ResharpError {
367        self.error(
368            Span::splat(self.pos()),
369            ast::ErrorKind::UnsupportedResharpRegex,
370        )
371    }
372
373    /// Return the current offset of the parser.
374    ///
375    /// The offset starts at `0` from the beginning of the regular expression
376    /// pattern string.
377    fn offset(&self) -> usize {
378        self.parser().pos.get().offset
379    }
380
381    /// Return the current line number of the parser.
382    ///
383    /// The line number starts at `1`.
384    fn line(&self) -> usize {
385        self.parser().pos.get().line
386    }
387
388    /// Return the current column of the parser.
389    ///
390    /// The column number starts at `1` and is reset whenever a `\n` is seen.
391    fn column(&self) -> usize {
392        self.parser().pos.get().column
393    }
394
395    /// Return the next capturing index. Each subsequent call increments the
396    /// internal index.
397    ///
398    /// The span given should correspond to the location of the opening
399    /// parenthesis.
400    ///
401    /// If the capture limit is exceeded, then an error is returned.
402    fn next_capture_index(&self, span: Span) -> Result<u32> {
403        let current = self.parser().capture_index.get();
404        let i = current
405            .checked_add(1)
406            .ok_or_else(|| self.error(span, ast::ErrorKind::CaptureLimitExceeded))?;
407        self.parser().capture_index.set(i);
408        Ok(i)
409    }
410
411    fn add_capture_name(&self, cap: &ast::CaptureName) -> Result<()> {
412        let mut names = self.parser().capture_names.borrow_mut();
413        match names.binary_search_by_key(&cap.name.as_str(), |c| c.name.as_str()) {
414            Err(i) => {
415                names.insert(i, cap.clone());
416                Ok(())
417            }
418            Ok(i) => Err(self.error(
419                cap.span,
420                ast::ErrorKind::GroupNameDuplicate {
421                    original: names[i].span,
422                },
423            )),
424        }
425    }
426
427    fn ignore_whitespace(&self) -> bool {
428        self.parser().ignore_whitespace.get()
429    }
430
431    fn char(&self) -> char {
432        self.char_at(self.offset())
433    }
434
435    fn char_at(&self, i: usize) -> char {
436        self.pattern()[i..]
437            .chars()
438            .next()
439            .unwrap_or_else(|| panic!("expected char at offset {}", i))
440    }
441
442    fn bump(&self) -> bool {
443        if self.is_eof() {
444            return false;
445        }
446        let Position {
447            mut offset,
448            mut line,
449            mut column,
450        } = self.pos();
451        if self.char() == '\n' {
452            line = line.checked_add(1).unwrap();
453            column = 1;
454        } else {
455            column = column.checked_add(1).unwrap();
456        }
457        offset += self.char().len_utf8();
458        self.parser().pos.set(Position {
459            offset,
460            line,
461            column,
462        });
463        self.pattern()[self.offset()..].chars().next().is_some()
464    }
465
466    fn bump_if(&self, prefix: &str) -> bool {
467        if self.pattern()[self.offset()..].starts_with(prefix) {
468            for _ in 0..prefix.chars().count() {
469                self.bump();
470            }
471            true
472        } else {
473            false
474        }
475    }
476
477    fn is_lookaround_prefix(&self) -> Option<(bool, bool)> {
478        if self.bump_if("?=") {
479            return Some((true, true));
480        }
481        if self.bump_if("?!") {
482            return Some((true, false));
483        }
484        if self.bump_if("?<=") {
485            return Some((false, true));
486        }
487        if self.bump_if("?<!") {
488            return Some((false, false));
489        }
490        None
491    }
492
493    fn bump_and_bump_space(&self) -> bool {
494        if !self.bump() {
495            return false;
496        }
497        self.bump_space();
498        !self.is_eof()
499    }
500
501    fn bump_space(&self) {
502        if !self.ignore_whitespace() {
503            return;
504        }
505        while !self.is_eof() {
506            if self.char().is_whitespace() {
507                self.bump();
508            } else if self.char() == '#' {
509                let start = self.pos();
510                let mut comment_text = String::new();
511                self.bump();
512                while !self.is_eof() {
513                    let c = self.char();
514                    self.bump();
515                    if c == '\n' {
516                        break;
517                    }
518                    comment_text.push(c);
519                }
520                let comment = ast::Comment {
521                    span: Span::new(start, self.pos()),
522                    comment: comment_text,
523                };
524                self.parser().comments.borrow_mut().push(comment);
525            } else {
526                break;
527            }
528        }
529    }
530
531    /// Peek at the next character in the input without advancing the parser.
532    ///
533    /// If the input has been exhausted, then this returns `None`.
534    fn peek(&self) -> Option<char> {
535        if self.is_eof() {
536            return None;
537        }
538        self.pattern()[self.offset() + self.char().len_utf8()..]
539            .chars()
540            .next()
541    }
542
543    /// Like peek, but will ignore spaces when the parser is in whitespace
544    /// insensitive mode.
545    fn peek_space(&self) -> Option<char> {
546        if !self.ignore_whitespace() {
547            return self.peek();
548        }
549        if self.is_eof() {
550            return None;
551        }
552        let mut start = self.offset() + self.char().len_utf8();
553        let mut in_comment = false;
554        for (i, c) in self.pattern()[start..].char_indices() {
555            if c.is_whitespace() {
556                continue;
557            } else if !in_comment && c == '#' {
558                in_comment = true;
559            } else if in_comment && c == '\n' {
560                in_comment = false;
561            } else {
562                start += i;
563                break;
564            }
565        }
566        self.pattern()[start..].chars().next()
567    }
568
569    /// Returns true if the next call to `bump` would return false.
570    fn is_eof(&self) -> bool {
571        self.offset() == self.pattern().len()
572    }
573
574    /// Return the current position of the parser, which includes the offset,
575    /// line and column.
576    fn pos(&self) -> Position {
577        self.parser().pos.get()
578    }
579
580    /// Create a span at the current position of the parser. Both the start
581    /// and end of the span are set.
582    fn span(&self) -> Span {
583        Span::splat(self.pos())
584    }
585
586    /// Create a span that covers the current character.
587    fn span_char(&self) -> Span {
588        let mut next = Position {
589            offset: self.offset().checked_add(self.char().len_utf8()).unwrap(),
590            line: self.line(),
591            column: self.column().checked_add(1).unwrap(),
592        };
593        if self.char() == '\n' {
594            next.line += 1;
595            next.column = 1;
596        }
597        Span::new(self.pos(), next)
598    }
599
600    /// Parse and push a single alternation on to the parser's internal stack.
601    /// If the top of the stack already has an alternation, then add to that
602    /// instead of pushing a new one.
603    ///
604    /// The concatenation given corresponds to a single alternation branch.
605    /// The concatenation returned starts the next branch and is empty.
606    ///
607    /// This assumes the parser is currently positioned at `|` and will advance
608    /// the parser to the character following `|`.
609    #[inline(never)]
610    fn push_alternate(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
611        assert_eq!(self.char(), '|');
612        concat.span.end = self.pos();
613        self.push_or_add_alternation(concat);
614        self.bump();
615        Ok(ast::Concat {
616            span: self.span(),
617            asts: vec![],
618        })
619    }
620
621    /// Pushes or adds the given branch of an alternation to the parser's
622    /// internal stack of state.
623    fn push_or_add_alternation(&self, concat: Concat) {
624        use self::GroupState::*;
625
626        let mut stack = self.parser().stack_group.borrow_mut();
627        if let Some(&mut Alternation(ref mut alts)) = stack.last_mut() {
628            alts.asts.push(concat.into_ast());
629            return;
630        }
631        stack.push(Alternation(ast::Alternation {
632            span: Span::new(concat.span.start, self.pos()),
633            asts: vec![concat.into_ast()],
634        }));
635    }
636
637    #[inline(never)]
638    fn push_intersect(&self, mut concat: Concat) -> Result<Concat> {
639        assert_eq!(self.char(), '&');
640        concat.span.end = self.pos();
641        self.push_or_add_intersect(concat);
642        self.bump();
643        Ok(Concat {
644            span: self.span(),
645            asts: vec![],
646        })
647    }
648
649    /// Pushes or adds the given branch of an alternation to the parser's
650    /// internal stack of state.
651    fn push_or_add_intersect(&self, concat: Concat) {
652        use self::GroupState::*;
653
654        let mut stack = self.parser().stack_group.borrow_mut();
655        if let Some(&mut Intersection(ref mut alts)) = stack.last_mut() {
656            alts.asts.push(concat.into_ast());
657            return;
658        }
659        stack.push(Intersection(ast::Intersection {
660            span: Span::new(concat.span.start, self.pos()),
661            asts: vec![concat.into_ast()],
662        }));
663    }
664
665    /// Parse and push a group AST (and its parent concatenation) on to the
666    /// parser's internal stack. Return a fresh concatenation corresponding
667    /// to the group's sub-AST.
668    ///
669    /// If a set of flags was found (with no group), then the concatenation
670    /// is returned with that set of flags added.
671    ///
672    /// This assumes that the parser is currently positioned on the opening
673    /// parenthesis. It advances the parser to the character at the start
674    /// of the sub-expression (or adjoining expression).
675    ///
676    /// If there was a problem parsing the start of the group, then an error
677    /// is returned.
678    #[inline(never)]
679    fn push_group(&self, mut concat: Concat) -> Result<Concat> {
680        assert_eq!(self.char(), '(');
681        match self.parse_group()? {
682            Either::Left(set) => {
683                let ignore = set.flags.flag_state(ast::Flag::IgnoreWhitespace);
684                if let Some(v) = ignore {
685                    self.parser().ignore_whitespace.set(v);
686                }
687
688                concat.asts.push(Ast::flags(set));
689                Ok(concat)
690            }
691            Either::Right(group) => {
692                let old_ignore_whitespace = self.ignore_whitespace();
693                let new_ignore_whitespace = group
694                    .flags()
695                    .and_then(|f| f.flag_state(ast::Flag::IgnoreWhitespace))
696                    .unwrap_or(old_ignore_whitespace);
697                self.parser()
698                    .stack_group
699                    .borrow_mut()
700                    .push(GroupState::Group {
701                        concat,
702                        group,
703                        ignore_whitespace: old_ignore_whitespace,
704                    });
705                self.parser().ignore_whitespace.set(new_ignore_whitespace);
706                Ok(Concat {
707                    span: self.span(),
708                    asts: vec![],
709                })
710            }
711        }
712    }
713
714    #[inline(never)]
715    fn push_compl_group(&self, concat: Concat) -> Result<Concat> {
716        assert_eq!(self.char(), '~');
717        self.bump();
718        if self.is_eof() || self.char() != '(' {
719            return Err(self.error(self.span(), ast::ErrorKind::ComplementGroupExpected));
720        }
721        let open_span = self.span_char();
722        self.bump();
723        let group = ast::Group {
724            span: open_span,
725            kind: ast::GroupKind::Complement,
726            ast: Box::new(Ast::empty(self.span())),
727        };
728
729        let old_ignore_whitespace = self.ignore_whitespace();
730        let new_ignore_whitespace = group
731            .flags()
732            .and_then(|f| f.flag_state(ast::Flag::IgnoreWhitespace))
733            .unwrap_or(old_ignore_whitespace);
734        self.parser()
735            .stack_group
736            .borrow_mut()
737            .push(GroupState::Group {
738                concat,
739                group,
740                ignore_whitespace: old_ignore_whitespace,
741            });
742        self.parser().ignore_whitespace.set(new_ignore_whitespace);
743        Ok(Concat {
744            span: self.span(),
745            asts: vec![],
746        })
747    }
748
749    /// Pop a group AST from the parser's internal stack and set the group's
750    /// AST to the given concatenation. Return the concatenation containing
751    /// the group.
752    ///
753    /// This assumes that the parser is currently positioned on the closing
754    /// parenthesis and advances the parser to the character following the `)`.
755    ///
756    /// If no such group could be popped, then an unopened group error is
757    /// returned.
758    #[inline(never)]
759    fn pop_group(&self, mut group_concat: Concat) -> Result<Concat> {
760        use self::GroupState::*;
761        assert_eq!(self.char(), ')');
762        let mut stack = self.parser().stack_group.borrow_mut();
763        let topstack = stack.pop();
764
765        let (mut prior_concat, mut group, ignore_whitespace, alt) = match topstack {
766            Some(Group {
767                concat,
768                group,
769                ignore_whitespace,
770            }) => (concat, group, ignore_whitespace, None),
771            Some(Alternation(alt)) => match stack.pop() {
772                Some(Group {
773                    concat,
774                    group,
775                    ignore_whitespace,
776                }) => (
777                    concat,
778                    group,
779                    ignore_whitespace,
780                    Some(Either::Left::<ast::Alternation, ast::Intersection>(alt)),
781                ),
782                None | Some(Alternation(_)) | Some(Intersection(_)) => {
783                    return Err(self.error(self.span_char(), ast::ErrorKind::GroupUnopened));
784                }
785            },
786            Some(Intersection(int)) => match stack.pop() {
787                Some(Group {
788                    concat,
789                    group,
790                    ignore_whitespace,
791                }) => (
792                    concat,
793                    group,
794                    ignore_whitespace,
795                    Some(Either::Right::<ast::Alternation, ast::Intersection>(int)),
796                ),
797                None | Some(Alternation(_)) | Some(Intersection(_)) => {
798                    return Err(self.error(self.span_char(), ast::ErrorKind::GroupUnopened));
799                }
800            },
801
802            None => {
803                return Err(self.error(self.span_char(), ast::ErrorKind::GroupUnopened));
804            }
805        };
806        self.parser().ignore_whitespace.set(ignore_whitespace);
807        group_concat.span.end = self.pos();
808        self.bump();
809        group.span.end = self.pos();
810        match alt {
811            Some(Either::Left(mut alt)) => {
812                alt.span.end = group_concat.span.end;
813                alt.asts.push(group_concat.into_ast());
814                group.ast = Box::new(alt.into_ast());
815            }
816            Some(Either::Right(mut int)) => {
817                int.span.end = group_concat.span.end;
818                int.asts.push(group_concat.into_ast());
819                group.ast = Box::new(int.into_ast());
820            }
821            None => {
822                group.ast = Box::new(group_concat.into_ast());
823            }
824        }
825
826        if group.kind == GroupKind::Complement {
827            let complement = ast::Complement {
828                span: self.span(),
829                ast: group.ast,
830            };
831            prior_concat.asts.push(Ast::complement(complement));
832        }
833        // ignore groups for now
834        else {
835            prior_concat.asts.push(Ast::group(group));
836        }
837        Ok(prior_concat)
838    }
839
840    /// Pop the last state from the parser's internal stack, if it exists, and
841    /// add the given concatenation to it. There either must be no state or a
842    /// single alternation item on the stack. Any other scenario produces an
843    /// error.
844    ///
845    /// This assumes that the parser has advanced to the end.
846    #[inline(never)]
847    fn pop_group_end(&self, mut concat: ast::Concat) -> Result<Ast> {
848        concat.span.end = self.pos();
849        let mut stack = self.parser().stack_group.borrow_mut();
850        let ast = match stack.pop() {
851            None => Ok(concat.into_ast()),
852            Some(GroupState::Alternation(mut alt)) => {
853                alt.span.end = self.pos();
854                alt.asts.push(concat.into_ast());
855                Ok(Ast::alternation(alt))
856            }
857            Some(GroupState::Intersection(mut int)) => {
858                int.span.end = self.pos();
859                int.asts.push(concat.into_ast());
860
861                Ok(Ast::intersection(int))
862            }
863            Some(GroupState::Group { group, .. }) => {
864                return Err(self.error(group.span, ast::ErrorKind::GroupUnclosed));
865            }
866        };
867        // If we try to pop again, there should be nothing.
868        match stack.pop() {
869            None => ast,
870            Some(GroupState::Alternation(_)) => {
871                // This unreachable is unfortunate. This case can't happen
872                // because the only way we can be here is if there were two
873                // `GroupState::Alternation`s adjacent in the parser's stack,
874                // which we guarantee to never happen because we never push a
875                // `GroupState::Alternation` if one is already at the top of
876                // the stack.
877                unreachable!()
878            }
879            Some(GroupState::Intersection(_)) => {
880                unreachable!()
881            }
882            Some(GroupState::Group { group, .. }) => {
883                Err(self.error(group.span, ast::ErrorKind::GroupUnclosed))
884            }
885        }
886    }
887
888    /// Parse the opening of a character class and push the current class
889    /// parsing context onto the parser's stack. This assumes that the parser
890    /// is positioned at an opening `[`. The given union should correspond to
891    /// the union of set items built up before seeing the `[`.
892    ///
893    /// If there was a problem parsing the opening of the class, then an error
894    /// is returned. Otherwise, a new union of set items for the class is
895    /// returned (which may be populated with either a `]` or a `-`).
896    #[inline(never)]
897    fn push_class_open(
898        &self,
899        parent_union: regex_syntax::ast::ClassSetUnion,
900    ) -> Result<regex_syntax::ast::ClassSetUnion> {
901        assert_eq!(self.char(), '[');
902
903        let (nested_set, nested_union) = self.parse_set_class_open()?;
904        self.parser()
905            .stack_class
906            .borrow_mut()
907            .push(ClassState::Open {
908                union: parent_union,
909                set: nested_set,
910            });
911        Ok(nested_union)
912    }
913
914    /// Parse the end of a character class set and pop the character class
915    /// parser stack. The union given corresponds to the last union built
916    /// before seeing the closing `]`. The union returned corresponds to the
917    /// parent character class set with the nested class added to it.
918    ///
919    /// This assumes that the parser is positioned at a `]` and will advance
920    /// the parser to the byte immediately following the `]`.
921    ///
922    /// If the stack is empty after popping, then this returns the final
923    /// "top-level" character class AST (where a "top-level" character class
924    /// is one that is not nested inside any other character class).
925    ///
926    /// If there is no corresponding opening bracket on the parser's stack,
927    /// then an error is returned.
928    #[inline(never)]
929    fn pop_class(
930        &self,
931        nested_union: regex_syntax::ast::ClassSetUnion,
932    ) -> Result<Either<regex_syntax::ast::ClassSetUnion, regex_syntax::ast::ClassBracketed>> {
933        assert_eq!(self.char(), ']');
934
935        let item = regex_syntax::ast::ClassSet::Item(nested_union.into_item());
936        let prevset = self.pop_class_op(item);
937        let mut stack = self.parser().stack_class.borrow_mut();
938        match stack.pop() {
939            None => {
940                // We can never observe an empty stack:
941                //
942                // 1) We are guaranteed to start with a non-empty stack since
943                //    the character class parser is only initiated when it sees
944                //    a `[`.
945                // 2) If we ever observe an empty stack while popping after
946                //    seeing a `]`, then we signal the character class parser
947                //    to terminate.
948                panic!("unexpected empty character class stack")
949            }
950            Some(ClassState::Op { .. }) => {
951                // This panic is unfortunate, but this case is impossible
952                // since we already popped the Op state if one exists above.
953                // Namely, every push to the class parser stack is guarded by
954                // whether an existing Op is already on the top of the stack.
955                // If it is, the existing Op is modified. That is, the stack
956                // can never have consecutive Op states.
957                panic!("unexpected ClassState::Op")
958            }
959            Some(ClassState::Open { mut union, mut set }) => {
960                self.bump();
961                set.span.end = self.pos();
962                set.kind = prevset;
963                if stack.is_empty() {
964                    Ok(Either::Right(set))
965                } else {
966                    union.push(regex_syntax::ast::ClassSetItem::Bracketed(Box::new(set)));
967                    Ok(Either::Left(union))
968                }
969            }
970        }
971    }
972
973    /// Return an "unclosed class" error whose span points to the most
974    /// recently opened class.
975    ///
976    /// This should only be called while parsing a character class.
977    #[inline(never)]
978    fn unclosed_class_error(&self) -> ResharpError {
979        for state in self.parser().stack_class.borrow().iter().rev() {
980            if let ClassState::Open { ref set, .. } = *state {
981                return self.error(set.span, ast::ErrorKind::ClassUnclosed);
982            }
983        }
984        // We are guaranteed to have a non-empty stack with at least
985        // one open bracket, so we should never get here.
986        panic!("no open character class found")
987    }
988
989    /// Push the current set of class items on to the class parser's stack as
990    /// the left hand side of the given operator.
991    ///
992    /// A fresh set union is returned, which should be used to build the right
993    /// hand side of this operator.
994    #[inline(never)]
995    fn push_class_op(
996        &self,
997        next_kind: regex_syntax::ast::ClassSetBinaryOpKind,
998        next_union: regex_syntax::ast::ClassSetUnion,
999    ) -> regex_syntax::ast::ClassSetUnion {
1000        let item = regex_syntax::ast::ClassSet::Item(next_union.into_item());
1001        let new_lhs = self.pop_class_op(item);
1002        self.parser().stack_class.borrow_mut().push(ClassState::Op {
1003            kind: next_kind,
1004            lhs: new_lhs,
1005        });
1006        regex_syntax::ast::ClassSetUnion {
1007            span: self.span(),
1008            items: vec![],
1009        }
1010    }
1011
1012    /// Pop a character class set from the character class parser stack. If the
1013    /// top of the stack is just an item (not an operation), then return the
1014    /// given set unchanged. If the top of the stack is an operation, then the
1015    /// given set will be used as the rhs of the operation on the top of the
1016    /// stack. In that case, the binary operation is returned as a set.
1017    #[inline(never)]
1018    fn pop_class_op(&self, rhs: regex_syntax::ast::ClassSet) -> regex_syntax::ast::ClassSet {
1019        let mut stack = self.parser().stack_class.borrow_mut();
1020        let (kind, lhs) = match stack.pop() {
1021            Some(ClassState::Op { kind, lhs }) => (kind, lhs),
1022            Some(state @ ClassState::Open { .. }) => {
1023                stack.push(state);
1024                return rhs;
1025            }
1026            None => unreachable!(),
1027        };
1028        let span = Span::new(lhs.span().start, rhs.span().end);
1029        regex_syntax::ast::ClassSet::BinaryOp(regex_syntax::ast::ClassSetBinaryOp {
1030            span,
1031            kind,
1032            lhs: Box::new(lhs),
1033            rhs: Box::new(rhs),
1034        })
1035    }
1036
1037    fn hir_to_node_id(&self, hir: &hir::Hir, tb: &mut TB<'s>) -> Result<NodeId> {
1038        match hir.kind() {
1039            hir::HirKind::Empty => Ok(NodeId::EPS),
1040            hir::HirKind::Literal(l) => {
1041                if l.0.len() == 1 {
1042                    let node = tb.mk_u8(l.0[0]);
1043                    Ok(node)
1044                } else {
1045                    let ws: Vec<_> = l.0.iter().map(|l| tb.mk_u8(*l)).collect();
1046                    let conc = tb.mk_concats(ws.iter().copied());
1047                    Ok(conc)
1048                }
1049            }
1050            hir::HirKind::Class(class) => match class {
1051                hir::Class::Unicode(class_unicode) => {
1052                    let ranges = class_unicode.ranges();
1053                    let mut nodes = Vec::new();
1054                    for range in ranges {
1055                        for seq in Utf8Sequences::new(range.start(), range.end()) {
1056                            let sl = seq.as_slice();
1057                            let bytes: Vec<_> = sl.iter().map(|s| (s.start, s.end)).collect();
1058                            let node = match bytes.len() {
1059                                1 => tb.mk_range_u8(bytes[0].0, bytes[0].1),
1060                                n => {
1061                                    let last = tb.mk_range_u8(bytes[n - 1].0, bytes[n - 1].1);
1062                                    let mut conc = last;
1063                                    for i in (0..n - 1).rev() {
1064                                        let b = tb.mk_range_u8(bytes[i].0, bytes[i].1);
1065                                        conc = tb.mk_concat(b, conc);
1066                                    }
1067                                    conc
1068                                }
1069                            };
1070                            nodes.push(node);
1071                        }
1072                    }
1073                    let merged = tb.mk_unions(nodes.into_iter());
1074                    Ok(merged)
1075                }
1076                hir::Class::Bytes(class_bytes) => {
1077                    let ranges = class_bytes.ranges();
1078                    let mut result = NodeId::BOT;
1079                    for range in ranges {
1080                        let start = range.start();
1081                        let end = range.end();
1082                        let node = tb.mk_range_u8(start, end);
1083                        result = tb.mk_union(result, node);
1084                    }
1085                    Ok(result)
1086                }
1087            },
1088            hir::HirKind::Look(_) => todo!(),
1089            hir::HirKind::Repetition(_) => todo!(),
1090            hir::HirKind::Capture(_) => todo!(),
1091            hir::HirKind::Concat(body) => {
1092                let mut result = NodeId::EPS;
1093                for child in body {
1094                    let node = self.hir_to_node_id(child, tb)?;
1095                    result = tb.mk_concat(result, node);
1096                }
1097                Ok(result)
1098            }
1099            hir::HirKind::Alternation(_) => todo!(),
1100        }
1101    }
1102
1103    fn translate_ast_to_hir(
1104        &mut self,
1105        orig_ast: &regex_syntax::ast::Ast,
1106        tb: &mut TB<'s>,
1107    ) -> Result<NodeId> {
1108        match self.translator.translate("", orig_ast) {
1109            Err(_) => Err(self.error(self.span(), ast::ErrorKind::UnicodeClassInvalid)),
1110            Ok(hir) => self.hir_to_node_id(&hir, tb),
1111        }
1112    }
1113
1114    fn translator_to_node_id(
1115        &mut self,
1116        orig_ast: &regex_syntax::ast::Ast,
1117        translator: &mut Option<Translator>,
1118        tb: &mut TB<'s>,
1119    ) -> Result<NodeId> {
1120        match translator {
1121            Some(tr) => {
1122                let hir = tr
1123                    .translate("", orig_ast)
1124                    .map_err(|e| self.unsupported_error(e))?;
1125                self.hir_to_node_id(&hir, tb)
1126            }
1127            None => self.translate_ast_to_hir(orig_ast, tb),
1128        }
1129    }
1130
1131    fn get_class(
1132        &mut self,
1133        negated: bool,
1134        kind: regex_syntax::ast::ClassPerlKind,
1135        tb: &mut TB<'s>,
1136    ) -> Result<NodeId> {
1137        let w = self
1138            .perl_classes
1139            .iter()
1140            .find(|(c_neg, c_kind, _)| *c_kind == kind && *c_neg == negated);
1141        match w {
1142            Some((_, _, value)) => Ok(*value),
1143            None => {
1144                let translated = if self.global_unicode {
1145                    match kind {
1146                        regex_syntax::ast::ClassPerlKind::Word => {
1147                            if self.global_full_unicode {
1148                                self.unicode_classes.ensure_word_full(tb);
1149                            } else {
1150                                self.unicode_classes.ensure_word(tb);
1151                            }
1152                            if negated { self.unicode_classes.non_word } else { self.unicode_classes.word }
1153                        }
1154                        regex_syntax::ast::ClassPerlKind::Digit => {
1155                            if self.global_full_unicode {
1156                                self.unicode_classes.ensure_digit_full(tb);
1157                            } else {
1158                                self.unicode_classes.ensure_digit(tb);
1159                            }
1160                            if negated { self.unicode_classes.non_digit } else { self.unicode_classes.digit }
1161                        }
1162                        regex_syntax::ast::ClassPerlKind::Space => {
1163                            self.unicode_classes.ensure_space(tb);
1164                            if negated {
1165                                self.unicode_classes.non_space
1166                            } else {
1167                                self.unicode_classes.space
1168                            }
1169                        }
1170                    }
1171                } else {
1172                    let pos = match kind {
1173                        regex_syntax::ast::ClassPerlKind::Word => {
1174                            let az = tb.mk_range_u8(b'a', b'z');
1175                            let big = tb.mk_range_u8(b'A', b'Z');
1176                            let dig = tb.mk_range_u8(b'0', b'9');
1177                            let us = tb.mk_u8(b'_');
1178                            tb.mk_unions([az, big, dig, us].into_iter())
1179                        }
1180                        regex_syntax::ast::ClassPerlKind::Digit => tb.mk_range_u8(b'0', b'9'),
1181                        regex_syntax::ast::ClassPerlKind::Space => {
1182                            let sp = tb.mk_u8(b' ');
1183                            let tab = tb.mk_u8(b'\t');
1184                            let nl = tb.mk_u8(b'\n');
1185                            let cr = tb.mk_u8(b'\r');
1186                            let ff = tb.mk_u8(0x0C);
1187                            let vt = tb.mk_u8(0x0B);
1188                            tb.mk_unions([sp, tab, nl, cr, ff, vt].into_iter())
1189                        }
1190                    };
1191                    if negated {
1192                        tb.mk_compl(pos)
1193                    } else {
1194                        pos
1195                    }
1196                };
1197                self.perl_classes.push((negated, kind, translated));
1198                Ok(translated)
1199            }
1200        }
1201    }
1202
1203    fn word_char_kind(ast: &Ast, left: bool) -> WordCharKind {
1204        use WordCharKind::*;
1205        match ast {
1206            Ast::Literal(lit) => {
1207                if is_word_byte(lit.c as u8) {
1208                    Word
1209                } else {
1210                    NonWord
1211                }
1212            }
1213            Ast::ClassPerl(c) => match (&c.kind, c.negated) {
1214                (&regex_syntax::ast::ClassPerlKind::Word, false) => Word,
1215                (&regex_syntax::ast::ClassPerlKind::Word, true) => NonWord,
1216                (&regex_syntax::ast::ClassPerlKind::Space, false) => NonWord,
1217                (&regex_syntax::ast::ClassPerlKind::Space, true) => Unknown,
1218                (&regex_syntax::ast::ClassPerlKind::Digit, false) => Word,
1219                (&regex_syntax::ast::ClassPerlKind::Digit, true) => Unknown,
1220            },
1221            Ast::Dot(_) | Ast::Top(_) => Unknown,
1222            Ast::Group(g) => Self::word_char_kind(&g.ast, left),
1223            Ast::Concat(c) if !c.asts.is_empty() => {
1224                let edge = if left { c.asts.len() - 1 } else { 0 };
1225                let kind = Self::word_char_kind(&c.asts[edge], left);
1226                match kind {
1227                    MaybeWord => {
1228                        let dir: isize = if left { -1 } else { 1 };
1229                        match Self::concat_neighbor_kind(&c.asts, edge, dir) {
1230                            Word => Word,
1231                            _ => MaybeWord,
1232                        }
1233                    }
1234                    MaybeNonWord => {
1235                        let dir: isize = if left { -1 } else { 1 };
1236                        match Self::concat_neighbor_kind(&c.asts, edge, dir) {
1237                            NonWord => NonWord,
1238                            _ => MaybeNonWord,
1239                        }
1240                    }
1241                    other => other,
1242                }
1243            }
1244            Ast::Alternation(alt) if !alt.asts.is_empty() => {
1245                let first = Self::word_char_kind(&alt.asts[0], left);
1246                if alt.asts[1..].iter().all(|a| Self::word_char_kind(a, left) == first) {
1247                    first
1248                } else {
1249                    Unknown
1250                }
1251            }
1252            Ast::Repetition(r) => {
1253                let inner = Self::word_char_kind(&r.ast, left);
1254                let nullable = matches!(
1255                    &r.op.kind,
1256                    ast::RepetitionKind::ZeroOrMore
1257                        | ast::RepetitionKind::ZeroOrOne
1258                        | ast::RepetitionKind::Range(ast::RepetitionRange::Bounded(0, _))
1259                );
1260                if nullable {
1261                    match inner {
1262                        Word => MaybeWord,
1263                        NonWord => MaybeNonWord,
1264                        _ => Unknown,
1265                    }
1266                } else {
1267                    inner
1268                }
1269            }
1270            Ast::Lookaround(la) => Self::word_char_kind(&la.ast, left),
1271            _ => Unknown,
1272        }
1273    }
1274
1275    fn edge_class_ast(ast: &Ast, left: bool) -> Option<&Ast> {
1276        match ast {
1277            Ast::Literal(_)
1278            | Ast::ClassPerl(_)
1279            | Ast::ClassBracketed(_)
1280            | Ast::ClassUnicode(_)
1281            | Ast::Dot(_)
1282            | Ast::Top(_) => Some(ast),
1283            Ast::Group(g) => Self::edge_class_ast(&g.ast, left),
1284            Ast::Concat(c) if !c.asts.is_empty() => {
1285                Self::edge_class_ast(&c.asts[if left { c.asts.len() - 1 } else { 0 }], left)
1286            }
1287            Ast::Repetition(r) => Self::edge_class_ast(&r.ast, left),
1288            Ast::Lookaround(la) => Self::edge_class_ast(&la.ast, left),
1289            _ => None,
1290        }
1291    }
1292
1293    fn resolve_word_kind(
1294        &mut self,
1295        asts: &[Ast],
1296        idx: usize,
1297        dir: isize,
1298        translator: &mut Option<Translator>,
1299        tb: &mut TB<'s>,
1300        word_id: NodeId,
1301        not_word_id: NodeId,
1302    ) -> Result<WordCharKind> {
1303        use WordCharKind::*;
1304        let fast = Self::concat_neighbor_kind(asts, idx, dir);
1305        if fast != Unknown {
1306            return Ok(fast);
1307        }
1308        let neighbor_idx = (idx as isize + dir) as usize;
1309        let node = if let Some(edge) = Self::edge_class_ast(&asts[neighbor_idx], dir < 0) {
1310            self.ast_to_node_id(edge, translator, tb)?
1311        } else {
1312            // check if \w_* (starts-with-word) or \W_* (starts-with-non-word) subsumes it.
1313            let neighbor_node = self.ast_to_node_id(&asts[neighbor_idx], translator, tb)?;
1314            let word_prefix = if dir > 0 {
1315                tb.mk_concat(word_id, NodeId::TS)
1316            } else {
1317                tb.mk_concat(NodeId::TS, word_id)
1318            };
1319            let non_word_prefix = if dir > 0 {
1320                tb.mk_concat(not_word_id, NodeId::TS)
1321            } else {
1322                tb.mk_concat(NodeId::TS, not_word_id)
1323            };
1324            return if tb.subsumes(word_prefix, neighbor_node) == Some(true) {
1325                Ok(Word)
1326            } else if tb.subsumes(non_word_prefix, neighbor_node) == Some(true) {
1327                Ok(NonWord)
1328            } else {
1329                Ok(Unknown)
1330            };
1331        };
1332        if tb.subsumes(word_id, node) == Some(true) {
1333            Ok(Word)
1334        } else if tb.subsumes(not_word_id, node) == Some(true) {
1335            Ok(NonWord)
1336        } else {
1337            Ok(Unknown)
1338        }
1339    }
1340
1341    fn concat_neighbor_kind(asts: &[Ast], idx: usize, dir: isize) -> WordCharKind {
1342        use WordCharKind::*;
1343        let next = idx as isize + dir;
1344        if next < 0 || next >= asts.len() as isize {
1345            return Edge;
1346        }
1347        let kind = Self::word_char_kind(&asts[next as usize], dir < 0);
1348        match kind {
1349            MaybeWord => match Self::concat_neighbor_kind(asts, next as usize, dir) {
1350                Word => Word,
1351                _ => Unknown,
1352            },
1353            MaybeNonWord => match Self::concat_neighbor_kind(asts, next as usize, dir) {
1354                NonWord => NonWord,
1355                _ => Unknown,
1356            },
1357            other => other,
1358        }
1359    }
1360
1361    fn rewrite_word_boundary_in_concat(
1362        &mut self,
1363        asts: &[Ast],
1364        idx: usize,
1365        translator: &mut Option<Translator>,
1366        tb: &mut TB<'s>,
1367    ) -> Result<(NodeId, usize)> {
1368        use WordCharKind::*;
1369        let (word_id, not_word_id) = if self.global_full_unicode {
1370            self.unicode_classes.ensure_word_full(tb);
1371            (self.unicode_classes.word, self.unicode_classes.non_word)
1372        } else if self.global_unicode {
1373            self.unicode_classes.ensure_word(tb);
1374            (self.unicode_classes.word, self.unicode_classes.non_word)
1375        } else {
1376            let az = tb.mk_range_u8(b'a', b'z');
1377            let big = tb.mk_range_u8(b'A', b'Z');
1378            let dig = tb.mk_range_u8(b'0', b'9');
1379            let us = tb.mk_u8(b'_');
1380            let w = tb.mk_unions([az, big, dig, us].into_iter());
1381            (w, tb.mk_compl(w))
1382        };
1383        let left = self.resolve_word_kind(asts, idx, -1, translator, tb, word_id, not_word_id)?;
1384        let right = self.resolve_word_kind(asts, idx, 1, translator, tb, word_id, not_word_id)?;
1385
1386        match (left, right) {
1387            (NonWord, Word) | (Word, NonWord) => Ok((NodeId::EPS, idx + 1)),
1388            (Word, _) => {
1389                let neg = tb.mk_neg_lookahead(word_id, 0);
1390                Ok((neg, idx + 1))
1391            }
1392            (NonWord, _) => {
1393                let set = tb.mk_union(NodeId::END, word_id);
1394                let tail = tb.mk_concat(set, NodeId::TS);
1395                self.merge_boundary_with_following_lookaheads(asts, idx, tail, translator, tb)
1396            }
1397            (_, Word) => Ok((tb.mk_neg_lookbehind(word_id), idx + 1)),
1398            (_, NonWord) => {
1399                let body = tb.mk_union(NodeId::BEGIN, word_id);
1400                Ok((tb.mk_lookbehind(body, NodeId::MISSING), idx + 1))
1401            }
1402            // TODO: (Unknown, Unknown) is possible via make_full_word_boundary but
1403            // the full expansion (lb(\w)·la(\W) | lb(\W)·la(\w)) is too expensive
1404            // reimplement once the builder is more optimized
1405            _ => Err(self.error(self.span(), ast::ErrorKind::UnsupportedResharpRegex)),
1406        }
1407    }
1408
1409    fn merge_boundary_with_following_lookaheads(
1410        &mut self,
1411        asts: &[Ast],
1412        wb_idx: usize,
1413        boundary_tail: NodeId,
1414        translator: &mut Option<Translator>,
1415        tb: &mut TB<'s>,
1416    ) -> Result<(NodeId, usize)> {
1417        let mut next = wb_idx + 1;
1418        let mut la_bodies = vec![boundary_tail];
1419        while next < asts.len() {
1420            match &asts[next] {
1421                Ast::Lookaround(la) if la.kind == ast::LookaroundKind::PositiveLookahead => {
1422                    let body = self.ast_to_node_id(&la.ast, translator, tb)?;
1423                    la_bodies.push(tb.mk_concat(body, NodeId::TS));
1424                    next += 1;
1425                }
1426                _ => break,
1427            }
1428        }
1429        let merged = tb.mk_inters(la_bodies.into_iter());
1430        Ok((tb.mk_lookahead(merged, NodeId::MISSING, 0), next))
1431    }
1432
1433    fn ast_to_node_id(
1434        &mut self,
1435        ast: &Ast,
1436        translator: &mut Option<Translator>,
1437        tb: &mut TB<'s>,
1438    ) -> Result<NodeId> {
1439        match ast {
1440            Ast::Empty(_) => Ok(NodeId::EPS),
1441            Ast::Flags(f) => {
1442                let mut translator_builder = self.default_translator_builder();
1443                if let Some(state) = f.flags.flag_state(ast::Flag::CaseInsensitive) {
1444                    translator_builder.case_insensitive(state);
1445                }
1446                if let Some(state) = f.flags.flag_state(ast::Flag::Unicode) {
1447                    translator_builder.unicode(state);
1448                }
1449                if let Some(state) = f.flags.flag_state(ast::Flag::DotMatchesNewLine) {
1450                    self.dot_all.set(state);
1451                }
1452                let concat_translator = Some(translator_builder.build());
1453                *translator = concat_translator;
1454                Ok(NodeId::EPS)
1455            }
1456            Ast::Literal(l) => {
1457                let ast_lit = regex_syntax::ast::Ast::literal(*l.to_owned());
1458                self.translator_to_node_id(&ast_lit, translator, tb)
1459            }
1460            Ast::Top(_) => Ok(NodeId::TOP),
1461            Ast::Dot(_) => {
1462                if self.dot_all.get() {
1463                    Ok(NodeId::TOP)
1464                } else {
1465                    let hirv = hir::Hir::dot(hir::Dot::AnyByteExceptLF);
1466                    self.hir_to_node_id(&hirv, tb)
1467                }
1468            }
1469            Ast::Assertion(a) => match &a.kind {
1470                ast::AssertionKind::StartText => Ok(NodeId::BEGIN),
1471                ast::AssertionKind::EndText => Ok(NodeId::END),
1472                ast::AssertionKind::WordBoundary => {
1473                    Err(self.error(self.span(), ast::ErrorKind::UnsupportedResharpRegex))
1474                }
1475                ast::AssertionKind::NotWordBoundary => {
1476                    Err(self.error(self.span(), ast::ErrorKind::UnsupportedResharpRegex))
1477                }
1478                ast::AssertionKind::StartLine => {
1479                    let left = NodeId::BEGIN;
1480                    let right = tb.mk_u8(b'\n');
1481                    let union = tb.mk_union(left, right);
1482                    Ok(tb.mk_lookbehind(union, NodeId::MISSING))
1483                }
1484                ast::AssertionKind::EndLine => {
1485                    let left = NodeId::END;
1486                    let right = tb.mk_u8(b'\n');
1487                    let union = tb.mk_union(left, right);
1488                    Ok(tb.mk_lookahead(union, NodeId::MISSING, 0))
1489                }
1490                ast::AssertionKind::WordBoundaryStart => todo!(),
1491                ast::AssertionKind::WordBoundaryEnd => todo!(),
1492                ast::AssertionKind::WordBoundaryStartAngle => todo!(),
1493                ast::AssertionKind::WordBoundaryEndAngle => Ok(tb.mk_string(">")),
1494                ast::AssertionKind::WordBoundaryStartHalf => todo!(),
1495                ast::AssertionKind::WordBoundaryEndHalf => todo!(),
1496            },
1497            Ast::ClassUnicode(c) => {
1498                let tmp = regex_syntax::ast::ClassUnicode {
1499                    span: c.span,
1500                    negated: c.negated,
1501                    kind: c.kind.clone(),
1502                };
1503                if !c.negated {
1504                    if let regex_syntax::ast::ClassUnicodeKind::Named(s) = &c.kind {
1505                        match s.as_str() {
1506                            // \p{ascii} for ascii, \p{ascii}&\p{Letter} => [A-Za-z]
1507                            "ascii" => return Ok(tb.mk_range_u8(0, 127)),
1508                            // restricts matches to valid utf8, \p{utf8}*&~(a) => non a, but valid utf8
1509                            "utf8" => {
1510                                let ascii = tb.mk_range_u8(0, 127);
1511                                let beta = tb.mk_range_u8(128, 0xBF);
1512                                let c0 = tb.mk_range_u8(0xC0, 0xDF);
1513                                let c0s = tb.mk_concats([c0, beta].into_iter());
1514                                let e0 = tb.mk_range_u8(0xE0, 0xEF);
1515                                let e0s = tb.mk_concats([e0, beta, beta].into_iter());
1516                                let f0 = tb.mk_range_u8(0xF0, 0xF7);
1517                                let f0s = tb.mk_concats([f0, beta, beta, beta].into_iter());
1518                                let merged = tb.mk_unions([ascii, c0s, e0s, f0s].into_iter());
1519                                return Ok(tb.mk_star(merged));
1520                            }
1521                            "hex" => {
1522                                let nums = tb.mk_range_u8(b'0', b'9');
1523                                let lets = tb.mk_range_u8(b'a', b'f');
1524                                let lets2 = tb.mk_range_u8(b'A', b'F');
1525                                let merged = tb.mk_unions([nums, lets, lets2].into_iter());
1526                                return Ok(merged);
1527                            }
1528                            _ => {}
1529                        }
1530                    };
1531                }
1532
1533                let orig_ast = regex_syntax::ast::Ast::class_unicode(tmp);
1534                self.translator_to_node_id(&orig_ast, translator, tb)
1535            }
1536            Ast::ClassPerl(c) => self.get_class(c.negated, c.kind.clone(), tb),
1537            Ast::ClassBracketed(c) => match &c.kind {
1538                regex_syntax::ast::ClassSet::Item(_) => {
1539                    let tmp = regex_syntax::ast::ClassBracketed {
1540                        span: c.span,
1541                        negated: c.negated,
1542                        kind: c.kind.clone(),
1543                    };
1544                    let orig_ast = regex_syntax::ast::Ast::class_bracketed(tmp);
1545                    self.translator_to_node_id(&orig_ast, translator, tb)
1546                }
1547                regex_syntax::ast::ClassSet::BinaryOp(_) => todo!(),
1548            },
1549            Ast::Repetition(r) => {
1550                let body = self.ast_to_node_id(&r.ast, translator, tb);
1551                match body {
1552                    Ok(body) => match &r.op.kind {
1553                        ast::RepetitionKind::ZeroOrOne => Ok(tb.mk_opt(body)),
1554                        ast::RepetitionKind::ZeroOrMore => Ok(tb.mk_star(body)),
1555                        ast::RepetitionKind::OneOrMore => Ok(tb.mk_plus(body)),
1556                        ast::RepetitionKind::Range(r) => match r {
1557                            ast::RepetitionRange::Exactly(n) => Ok(tb.mk_repeat(body, *n, *n)),
1558                            ast::RepetitionRange::AtLeast(n) => {
1559                                let rep = tb.mk_repeat(body, *n, *n);
1560                                let st = tb.mk_star(body);
1561                                Ok(tb.mk_concat(rep, st))
1562                            }
1563
1564                            ast::RepetitionRange::Bounded(n, m) => Ok(tb.mk_repeat(body, *n, *m)),
1565                        },
1566                    },
1567                    Err(_) => body,
1568                }
1569            }
1570            Ast::Lookaround(g) => {
1571                let body = self.ast_to_node_id(&g.ast, translator, tb)?;
1572                match g.kind {
1573                    ast::LookaroundKind::PositiveLookahead => {
1574                        Ok(tb.mk_lookahead(body, NodeId::MISSING, 0))
1575                    }
1576                    ast::LookaroundKind::PositiveLookbehind => {
1577                        Ok(tb.mk_lookbehind(body, NodeId::MISSING))
1578                    }
1579                    ast::LookaroundKind::NegativeLookahead => Ok(tb.mk_neg_lookahead(body, 0)),
1580                    ast::LookaroundKind::NegativeLookbehind => Ok(tb.mk_neg_lookbehind(body)),
1581                }
1582            }
1583            Ast::Group(g) => {
1584                if let ast::GroupKind::NonCapturing(ref flags) = g.kind {
1585                    if !flags.items.is_empty() {
1586                        let mut translator_builder = self.default_translator_builder();
1587                        if let Some(state) = flags.flag_state(ast::Flag::CaseInsensitive) {
1588                            translator_builder.case_insensitive(state);
1589                        }
1590                        if let Some(state) = flags.flag_state(ast::Flag::Unicode) {
1591                            translator_builder.unicode(state);
1592                        }
1593                        let saved_dot_all = self.dot_all.get();
1594                        if let Some(state) = flags.flag_state(ast::Flag::DotMatchesNewLine) {
1595                            self.dot_all.set(state);
1596                        }
1597                        let mut scoped = Some(translator_builder.build());
1598                        let result = self.ast_to_node_id(&g.ast, &mut scoped, tb);
1599                        self.dot_all.set(saved_dot_all);
1600                        return result;
1601                    }
1602                }
1603                self.ast_to_node_id(&g.ast, translator, tb)
1604            }
1605            Ast::Alternation(a) => {
1606                let mut children = vec![];
1607                for ast in &a.asts {
1608                    match self.ast_to_node_id(ast, translator, tb) {
1609                        Ok(node_id) => children.push(node_id),
1610                        Err(err) => return Err(err),
1611                    }
1612                }
1613                Ok(tb.mk_unions(children.iter().copied()))
1614            }
1615            Ast::Concat(c) => {
1616                let mut concat_translator: Option<Translator> = None;
1617                let mut children = vec![];
1618                let mut i = 0;
1619                while i < c.asts.len() {
1620                    let ast = &c.asts[i];
1621                    match ast {
1622                        Ast::Flags(f) => {
1623                            let mut translator_builder = self.default_translator_builder();
1624                            if let Some(state) = f.flags.flag_state(ast::Flag::CaseInsensitive) {
1625                                translator_builder.case_insensitive(state);
1626                            }
1627                            if let Some(state) = f.flags.flag_state(ast::Flag::Unicode) {
1628                                translator_builder.unicode(state);
1629                            }
1630                            if let Some(state) = f.flags.flag_state(ast::Flag::DotMatchesNewLine) {
1631                                self.dot_all.set(state);
1632                            }
1633                            concat_translator = Some(translator_builder.build());
1634                            i += 1;
1635                            continue;
1636                        }
1637                        Ast::Assertion(a) if a.kind == ast::AssertionKind::WordBoundary => {
1638                            let node =
1639                                self.rewrite_word_boundary_in_concat(&c.asts, i, translator, tb)?;
1640                            children.push(node.0);
1641                            i = node.1; // skip consumed lookaheads
1642                            continue;
1643                        }
1644                        _ => {}
1645                    }
1646                    match concat_translator {
1647                        Some(_) => match self.ast_to_node_id(ast, &mut concat_translator, tb) {
1648                            Ok(node_id) => children.push(node_id),
1649                            Err(err) => return Err(err),
1650                        },
1651                        None => match self.ast_to_node_id(ast, translator, tb) {
1652                            Ok(node_id) => children.push(node_id),
1653                            Err(err) => return Err(err),
1654                        },
1655                    }
1656                    i += 1;
1657                }
1658                Ok(tb.mk_concats(children.iter().cloned()))
1659            }
1660            Ast::Intersection(intersection) => {
1661                let mut children = vec![];
1662                for ast in &intersection.asts {
1663                    match self.ast_to_node_id(ast, translator, tb) {
1664                        Ok(node_id) => children.push(node_id),
1665                        Err(err) => return Err(err),
1666                    }
1667                }
1668                Ok(tb.mk_inters(children.into_iter()))
1669            }
1670            Ast::Complement(complement) => {
1671                let body = self.ast_to_node_id(&complement.ast, translator, tb);
1672                body.map(|x| tb.mk_compl(x))
1673            }
1674        }
1675    }
1676
1677    fn parse_inner(&mut self) -> Result<Ast> {
1678        let mut concat = Concat {
1679            span: self.span(),
1680            asts: vec![],
1681        };
1682        loop {
1683            self.bump_space();
1684            if self.is_eof() {
1685                break;
1686            }
1687            match self.char() {
1688                '(' => concat = self.push_group(concat)?,
1689                ')' => concat = self.pop_group(concat)?,
1690                '|' => concat = self.push_alternate(concat)?,
1691                '&' => concat = self.push_intersect(concat)?,
1692                '~' => concat = self.push_compl_group(concat)?,
1693                '[' => {
1694                    let class = self.parse_set_class()?;
1695                    concat.asts.push(Ast::class_bracketed(class));
1696                }
1697                '?' => {
1698                    concat =
1699                        self.parse_uncounted_repetition(concat, ast::RepetitionKind::ZeroOrOne)?;
1700                }
1701                '*' => {
1702                    concat =
1703                        self.parse_uncounted_repetition(concat, ast::RepetitionKind::ZeroOrMore)?;
1704                }
1705                '+' => {
1706                    concat =
1707                        self.parse_uncounted_repetition(concat, ast::RepetitionKind::OneOrMore)?;
1708                }
1709                '{' => {
1710                    concat = self.parse_counted_repetition(concat)?;
1711                }
1712                _ => concat.asts.push(self.parse_primitive()?.into_ast()),
1713            }
1714        }
1715        self.pop_group_end(concat)
1716    }
1717
1718    /// Parse the regular expression and return an abstract syntax tree with
1719    /// all of the comments found in the pattern.
1720    fn parse(&mut self, tb: &mut TB<'s>) -> Result<NodeId> {
1721        let ast = self.parse_inner()?;
1722        self.ast_to_node_id(&ast, &mut None, tb)
1723    }
1724
1725    #[inline(never)]
1726    fn parse_uncounted_repetition(
1727        &self,
1728        mut concat: ast::Concat,
1729        kind: ast::RepetitionKind,
1730    ) -> Result<ast::Concat> {
1731        // assert!(self.char() == '?' || self.char() == '*' || self.char() == '+');
1732        let op_start = self.pos();
1733        let ast = match concat.asts.pop() {
1734            Some(ast) => ast,
1735            None => return Err(self.error(self.span(), ast::ErrorKind::RepetitionMissing)),
1736        };
1737        match ast {
1738            Ast::Empty(_) | Ast::Flags(_) => {
1739                return Err(self.error(self.span(), ast::ErrorKind::RepetitionMissing))
1740            }
1741            _ => {}
1742        }
1743        if self.bump() && self.char() == '?' {
1744            return Err(self.error(
1745                Span::new(op_start, self.pos()),
1746                ast::ErrorKind::UnsupportedLazyQuantifier,
1747            ));
1748        }
1749        concat.asts.push(Ast::repetition(ast::Repetition {
1750            span: ast.span().with_end(self.pos()),
1751            op: ast::RepetitionOp {
1752                span: Span::new(op_start, self.pos()),
1753                kind,
1754            },
1755            greedy: true,
1756            ast: Box::new(ast),
1757        }));
1758        Ok(concat)
1759    }
1760
1761    #[inline(never)]
1762    fn parse_counted_repetition(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
1763        assert!(self.char() == '{');
1764        let start = self.pos();
1765        let ast = match concat.asts.pop() {
1766            Some(ast) => ast,
1767            None => return Err(self.error(self.span(), ast::ErrorKind::RepetitionMissing)),
1768        };
1769        match ast {
1770            Ast::Empty(_) | Ast::Flags(_) => {
1771                return Err(self.error(self.span(), ast::ErrorKind::RepetitionMissing))
1772            }
1773            _ => {}
1774        }
1775        if !self.bump_and_bump_space() {
1776            return Err(self.error(
1777                Span::new(start, self.pos()),
1778                ast::ErrorKind::RepetitionCountUnclosed,
1779            ));
1780        }
1781        let count_start = specialize_err(
1782            self.parse_decimal(),
1783            ast::ErrorKind::DecimalEmpty,
1784            ast::ErrorKind::RepetitionCountDecimalEmpty,
1785        );
1786        if self.is_eof() {
1787            return Err(self.error(
1788                Span::new(start, self.pos()),
1789                ast::ErrorKind::RepetitionCountUnclosed,
1790            ));
1791        }
1792        let range = if self.char() == ',' {
1793            if !self.bump_and_bump_space() {
1794                return Err(self.error(
1795                    Span::new(start, self.pos()),
1796                    ast::ErrorKind::RepetitionCountUnclosed,
1797                ));
1798            }
1799            if self.char() != '}' {
1800                let count_start = match count_start {
1801                    Ok(c) => c,
1802                    Err(err) if err.kind == ast::ErrorKind::RepetitionCountDecimalEmpty => {
1803                        if self.parser().empty_min_range {
1804                            0
1805                        } else {
1806                            return Err(err);
1807                        }
1808                    }
1809                    err => err?,
1810                };
1811                let count_end = specialize_err(
1812                    self.parse_decimal(),
1813                    ast::ErrorKind::DecimalEmpty,
1814                    ast::ErrorKind::RepetitionCountDecimalEmpty,
1815                )?;
1816                ast::RepetitionRange::Bounded(count_start, count_end)
1817            } else {
1818                ast::RepetitionRange::AtLeast(count_start?)
1819            }
1820        } else {
1821            ast::RepetitionRange::Exactly(count_start?)
1822        };
1823
1824        if self.is_eof() || self.char() != '}' {
1825            return Err(self.error(
1826                Span::new(start, self.pos()),
1827                ast::ErrorKind::RepetitionCountUnclosed,
1828            ));
1829        }
1830
1831        if self.bump_and_bump_space() && self.char() == '?' {
1832            return Err(self.error(
1833                Span::new(start, self.pos()),
1834                ast::ErrorKind::UnsupportedLazyQuantifier,
1835            ));
1836        }
1837
1838        let op_span = Span::new(start, self.pos());
1839        if !range.is_valid() {
1840            return Err(self.error(op_span, ast::ErrorKind::RepetitionCountInvalid));
1841        }
1842        concat.asts.push(Ast::repetition(ast::Repetition {
1843            span: ast.span().with_end(self.pos()),
1844            op: ast::RepetitionOp {
1845                span: op_span,
1846                kind: ast::RepetitionKind::Range(range),
1847            },
1848            greedy: true,
1849            ast: Box::new(ast),
1850        }));
1851        Ok(concat)
1852    }
1853
1854    #[inline(never)]
1855    fn parse_group(&self) -> Result<Either<ast::SetFlags, ast::Group>> {
1856        assert_eq!(self.char(), '(');
1857        let open_span = self.span_char();
1858        self.bump();
1859        self.bump_space();
1860        if let Some((ahead, pos)) = self.is_lookaround_prefix() {
1861            let kind = match (pos, ahead) {
1862                (true, true) => LookaroundKind::PositiveLookahead,
1863                (true, false) => LookaroundKind::PositiveLookbehind,
1864                (false, true) => LookaroundKind::NegativeLookahead,
1865                (false, false) => LookaroundKind::NegativeLookbehind,
1866            };
1867            return Ok(Either::Right(ast::Group {
1868                span: open_span,
1869                kind: ast::GroupKind::Lookaround(kind),
1870                ast: Box::new(Ast::empty(self.span())),
1871            }));
1872        }
1873        let inner_span = self.span();
1874        let mut starts_with_p = true;
1875        if self.bump_if("?P<") || {
1876            starts_with_p = false;
1877            self.bump_if("?<")
1878        } {
1879            let capture_index = self.next_capture_index(open_span)?;
1880            let name = self.parse_capture_name(capture_index)?;
1881            Ok(Either::Right(ast::Group {
1882                span: open_span,
1883                kind: ast::GroupKind::CaptureName {
1884                    starts_with_p,
1885                    name,
1886                },
1887                ast: Box::new(Ast::empty(self.span())),
1888            }))
1889        } else if self.bump_if("?") {
1890            if self.is_eof() {
1891                return Err(self.error(open_span, ast::ErrorKind::GroupUnclosed));
1892            }
1893            let flags = self.parse_flags()?;
1894            let char_end = self.char();
1895            self.bump();
1896            if char_end == ')' {
1897                // We don't allow empty flags, e.g., `(?)`. We instead
1898                // interpret it as a repetition operator missing its argument.
1899                if flags.items.is_empty() {
1900                    return Err(self.error(inner_span, ast::ErrorKind::RepetitionMissing));
1901                }
1902                Ok(Either::Left(ast::SetFlags {
1903                    span: Span {
1904                        end: self.pos(),
1905                        ..open_span
1906                    },
1907                    flags,
1908                }))
1909            } else {
1910                assert_eq!(char_end, ':');
1911                Ok(Either::Right(ast::Group {
1912                    span: open_span,
1913                    kind: ast::GroupKind::NonCapturing(flags),
1914                    ast: Box::new(Ast::empty(self.span())),
1915                }))
1916            }
1917        } else {
1918            let capture_index = self.next_capture_index(open_span)?;
1919            Ok(Either::Right(ast::Group {
1920                span: open_span,
1921                kind: ast::GroupKind::CaptureIndex(capture_index),
1922                ast: Box::new(Ast::empty(self.span())),
1923            }))
1924        }
1925    }
1926
1927    #[inline(never)]
1928    fn parse_capture_name(&self, capture_index: u32) -> Result<ast::CaptureName> {
1929        if self.is_eof() {
1930            return Err(self.error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
1931        }
1932        let start = self.pos();
1933        loop {
1934            if self.char() == '>' {
1935                break;
1936            }
1937            if !is_capture_char(self.char(), self.pos() == start) {
1938                return Err(self.error(self.span_char(), ast::ErrorKind::GroupNameInvalid));
1939            }
1940            if !self.bump() {
1941                break;
1942            }
1943        }
1944        let end = self.pos();
1945        if self.is_eof() {
1946            return Err(self.error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
1947        }
1948        assert_eq!(self.char(), '>');
1949        self.bump();
1950        let name = &self.pattern()[start.offset..end.offset];
1951        if name.is_empty() {
1952            return Err(self.error(Span::new(start, start), ast::ErrorKind::GroupNameEmpty));
1953        }
1954        let capname = ast::CaptureName {
1955            span: Span::new(start, end),
1956            name: name.to_string(),
1957            index: capture_index,
1958        };
1959        self.add_capture_name(&capname)?;
1960        Ok(capname)
1961    }
1962
1963    #[inline(never)]
1964    fn parse_flags(&self) -> Result<ast::Flags> {
1965        let mut flags = ast::Flags {
1966            span: self.span(),
1967            items: vec![],
1968        };
1969        let mut last_was_negation = None;
1970        while self.char() != ':' && self.char() != ')' {
1971            if self.char() == '-' {
1972                last_was_negation = Some(self.span_char());
1973                let item = ast::FlagsItem {
1974                    span: self.span_char(),
1975                    kind: ast::FlagsItemKind::Negation,
1976                };
1977                if let Some(i) = flags.add_item(item) {
1978                    return Err(self.error(
1979                        self.span_char(),
1980                        ast::ErrorKind::FlagRepeatedNegation {
1981                            original: flags.items[i].span,
1982                        },
1983                    ));
1984                }
1985            } else {
1986                last_was_negation = None;
1987                let item = ast::FlagsItem {
1988                    span: self.span_char(),
1989                    kind: ast::FlagsItemKind::Flag(self.parse_flag()?),
1990                };
1991                if let Some(i) = flags.add_item(item) {
1992                    return Err(self.error(
1993                        self.span_char(),
1994                        ast::ErrorKind::FlagDuplicate {
1995                            original: flags.items[i].span,
1996                        },
1997                    ));
1998                }
1999            }
2000            if !self.bump() {
2001                return Err(self.error(self.span(), ast::ErrorKind::FlagUnexpectedEof));
2002            }
2003        }
2004        if let Some(span) = last_was_negation {
2005            return Err(self.error(span, ast::ErrorKind::FlagDanglingNegation));
2006        }
2007        flags.span.end = self.pos();
2008        Ok(flags)
2009    }
2010
2011    #[inline(never)]
2012    fn parse_flag(&self) -> Result<ast::Flag> {
2013        match self.char() {
2014            'i' => Ok(ast::Flag::CaseInsensitive),
2015            'm' => Ok(ast::Flag::MultiLine),
2016            's' => Ok(ast::Flag::DotMatchesNewLine),
2017            'U' => Ok(ast::Flag::SwapGreed),
2018            'u' => Ok(ast::Flag::Unicode),
2019            'R' => Ok(ast::Flag::CRLF),
2020            'x' => Ok(ast::Flag::IgnoreWhitespace),
2021            _ => Err(self.error(self.span_char(), ast::ErrorKind::FlagUnrecognized)),
2022        }
2023    }
2024
2025    fn parse_primitive(&self) -> Result<Primitive> {
2026        match self.char() {
2027            '\\' => self.parse_escape(),
2028            '_' => {
2029                let ast = Primitive::Top(self.span_char());
2030                self.bump();
2031                Ok(ast)
2032            }
2033            '.' => {
2034                let ast = Primitive::Dot(self.span_char());
2035                self.bump();
2036                Ok(ast)
2037            }
2038            '^' => {
2039                let ast = Primitive::Assertion(ast::Assertion {
2040                    span: self.span_char(),
2041                    kind: ast::AssertionKind::StartLine,
2042                });
2043                self.bump();
2044                Ok(ast)
2045            }
2046            '$' => {
2047                let ast = Primitive::Assertion(ast::Assertion {
2048                    span: self.span_char(),
2049                    kind: ast::AssertionKind::EndLine,
2050                });
2051                self.bump();
2052                Ok(ast)
2053            }
2054            c => {
2055                let ast = Primitive::Literal(Literal {
2056                    span: self.span_char(),
2057                    kind: LiteralKind::Verbatim,
2058                    c,
2059                });
2060                self.bump();
2061                Ok(ast)
2062            }
2063        }
2064    }
2065
2066    #[inline(never)]
2067    fn parse_escape(&self) -> Result<Primitive> {
2068        assert_eq!(self.char(), '\\');
2069        let start = self.pos();
2070        if !self.bump() {
2071            return Err(self.error(
2072                Span::new(start, self.pos()),
2073                ast::ErrorKind::EscapeUnexpectedEof,
2074            ));
2075        }
2076        let c = self.char();
2077        // Put some of the more complicated routines into helpers.
2078        match c {
2079            '0'..='9' => {
2080                if !self.parser().octal {
2081                    return Err(self.error(
2082                        Span::new(start, self.span_char().end),
2083                        ast::ErrorKind::UnsupportedBackreference,
2084                    ));
2085                }
2086                let mut lit = self.parse_octal();
2087                lit.span.start = start;
2088                return Ok(Primitive::Literal(lit));
2089            }
2090            // '8'..='9' if !self.parser().octal => {
2091            //     return Err(self.error(
2092            //         Span::new(start, self.span_char().end),
2093            //         ast::ErrorKind::UnsupportedBackreference,
2094            //     ));
2095            // }
2096            'x' | 'u' | 'U' => {
2097                let mut lit = self.parse_hex()?;
2098                lit.span.start = start;
2099                return Ok(Primitive::Literal(lit));
2100            }
2101            'p' | 'P' => {
2102                let mut cls = self.parse_unicode_class()?;
2103                cls.span.start = start;
2104                return Ok(Primitive::Unicode(cls));
2105            }
2106            'd' | 's' | 'w' | 'D' | 'S' | 'W' => {
2107                let mut cls = self.parse_perl_class();
2108                cls.span.start = start;
2109                return Ok(Primitive::Perl(cls));
2110            }
2111            _ => {}
2112        }
2113
2114        // Handle all of the one letter sequences inline.
2115        self.bump();
2116        let span = Span::new(start, self.pos());
2117        if is_meta_character(c) {
2118            return Ok(Primitive::Literal(Literal {
2119                span,
2120                kind: LiteralKind::Meta,
2121                c,
2122            }));
2123        }
2124        if is_escapeable_character(c) {
2125            return Ok(Primitive::Literal(Literal {
2126                span,
2127                kind: LiteralKind::Superfluous,
2128                c,
2129            }));
2130        }
2131        let special = |kind, c| {
2132            Ok(Primitive::Literal(Literal {
2133                span,
2134                kind: LiteralKind::Special(kind),
2135                c,
2136            }))
2137        };
2138        match c {
2139            'a' => special(SpecialLiteralKind::Bell, '\x07'),
2140            'f' => special(SpecialLiteralKind::FormFeed, '\x0C'),
2141            't' => special(SpecialLiteralKind::Tab, '\t'),
2142            'n' => special(SpecialLiteralKind::LineFeed, '\n'),
2143            'r' => special(SpecialLiteralKind::CarriageReturn, '\r'),
2144            'v' => special(SpecialLiteralKind::VerticalTab, '\x0B'),
2145            'A' => Ok(Primitive::Assertion(ast::Assertion {
2146                span,
2147                kind: ast::AssertionKind::StartText,
2148            })),
2149            'z' => Ok(Primitive::Assertion(ast::Assertion {
2150                span,
2151                kind: ast::AssertionKind::EndText,
2152            })),
2153            'b' => {
2154                let mut wb = ast::Assertion {
2155                    span,
2156                    kind: ast::AssertionKind::WordBoundary,
2157                };
2158                // After a \b, we "try" to parse things like \b{start} for
2159                // special word boundary assertions.
2160                if !self.is_eof() && self.char() == '{' {
2161                    if let Some(kind) = self.maybe_parse_special_word_boundary(start)? {
2162                        wb.kind = kind;
2163                        wb.span.end = self.pos();
2164                    }
2165                }
2166                Ok(Primitive::Assertion(wb))
2167            }
2168            'B' => Ok(Primitive::Assertion(ast::Assertion {
2169                span,
2170                kind: ast::AssertionKind::NotWordBoundary,
2171            })),
2172            '<' => Ok(Primitive::Assertion(ast::Assertion {
2173                span,
2174                kind: ast::AssertionKind::WordBoundaryStartAngle,
2175            })),
2176            '>' => Ok(Primitive::Assertion(ast::Assertion {
2177                span,
2178                kind: ast::AssertionKind::WordBoundaryEndAngle,
2179            })),
2180            _ => Err(self.error(span, ast::ErrorKind::EscapeUnrecognized)),
2181        }
2182    }
2183
2184    fn maybe_parse_special_word_boundary(
2185        &self,
2186        wb_start: Position,
2187    ) -> Result<Option<ast::AssertionKind>> {
2188        assert_eq!(self.char(), '{');
2189
2190        let is_valid_char = |c| matches!(c, 'A'..='Z' | 'a'..='z' | '-');
2191        let start = self.pos();
2192        if !self.bump_and_bump_space() {
2193            return Err(self.error(
2194                Span::new(wb_start, self.pos()),
2195                ast::ErrorKind::SpecialWordOrRepetitionUnexpectedEof,
2196            ));
2197        }
2198        let start_contents = self.pos();
2199        // This is one of the critical bits: if the first non-whitespace
2200        // character isn't in [-A-Za-z] (i.e., this can't be a special word
2201        // boundary), then we bail and let the counted repetition parser deal
2202        // with this.
2203        if !is_valid_char(self.char()) {
2204            self.parser().pos.set(start);
2205            return Ok(None);
2206        }
2207
2208        // Now collect up our chars until we see a '}'.
2209        let mut scratch = self.parser().scratch.borrow_mut();
2210        scratch.clear();
2211        while !self.is_eof() && is_valid_char(self.char()) {
2212            scratch.push(self.char());
2213            self.bump_and_bump_space();
2214        }
2215        if self.is_eof() || self.char() != '}' {
2216            return Err(self.error(
2217                Span::new(start, self.pos()),
2218                ast::ErrorKind::SpecialWordBoundaryUnclosed,
2219            ));
2220        }
2221        let end = self.pos();
2222        self.bump();
2223        let kind = match scratch.as_str() {
2224            "start" => ast::AssertionKind::WordBoundaryStart,
2225            "end" => ast::AssertionKind::WordBoundaryEnd,
2226            "start-half" => ast::AssertionKind::WordBoundaryStartHalf,
2227            "end-half" => ast::AssertionKind::WordBoundaryEndHalf,
2228            _ => {
2229                return Err(self.error(
2230                    Span::new(start_contents, end),
2231                    ast::ErrorKind::SpecialWordBoundaryUnrecognized,
2232                ))
2233            }
2234        };
2235        Ok(Some(kind))
2236    }
2237
2238    #[inline(never)]
2239    fn parse_octal(&self) -> Literal {
2240        assert!(self.parser().octal);
2241        assert!('0' <= self.char() && self.char() <= '7');
2242        let start = self.pos();
2243        // Parse up to two more digits.
2244        while self.bump()
2245            && '0' <= self.char()
2246            && self.char() <= '7'
2247            && self.pos().offset - start.offset <= 2
2248        {}
2249        let end = self.pos();
2250        let octal = &self.pattern()[start.offset..end.offset];
2251        // Parsing the octal should never fail since the above guarantees a
2252        // valid number.
2253        let codepoint = u32::from_str_radix(octal, 8).expect("valid octal number");
2254        // The max value for 3 digit octal is 0777 = 511 and [0, 511] has no
2255        // invalid Unicode scalar values.
2256        let c = char::from_u32(codepoint).expect("Unicode scalar value");
2257        Literal {
2258            span: Span::new(start, end),
2259            kind: LiteralKind::Octal,
2260            c,
2261        }
2262    }
2263
2264    #[inline(never)]
2265    fn parse_hex(&self) -> Result<Literal> {
2266        assert!(self.char() == 'x' || self.char() == 'u' || self.char() == 'U');
2267
2268        let hex_kind = match self.char() {
2269            'x' => HexLiteralKind::X,
2270            'u' => HexLiteralKind::UnicodeShort,
2271            _ => HexLiteralKind::UnicodeLong,
2272        };
2273        if !self.bump_and_bump_space() {
2274            return Err(self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
2275        }
2276        if self.char() == '{' {
2277            self.parse_hex_brace(hex_kind)
2278        } else {
2279            self.parse_hex_digits(hex_kind)
2280        }
2281    }
2282
2283    #[inline(never)]
2284    fn parse_hex_digits(&self, kind: HexLiteralKind) -> Result<Literal> {
2285        let mut scratch = self.parser().scratch.borrow_mut();
2286        scratch.clear();
2287
2288        let start = self.pos();
2289        for i in 0..kind.digits() {
2290            if i > 0 && !self.bump_and_bump_space() {
2291                return Err(self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
2292            }
2293            if !is_hex(self.char()) {
2294                return Err(self.error(self.span_char(), ast::ErrorKind::EscapeHexInvalidDigit));
2295            }
2296            scratch.push(self.char());
2297        }
2298        // The final bump just moves the parser past the literal, which may
2299        // be EOF.
2300        self.bump_and_bump_space();
2301        let end = self.pos();
2302        let hex = scratch.as_str();
2303        match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
2304            None => Err(self.error(Span::new(start, end), ast::ErrorKind::EscapeHexInvalid)),
2305            Some(c) => Ok(Literal {
2306                span: Span::new(start, end),
2307                kind: LiteralKind::HexFixed(kind),
2308                c,
2309            }),
2310        }
2311    }
2312
2313    #[inline(never)]
2314    fn parse_hex_brace(&self, kind: HexLiteralKind) -> Result<Literal> {
2315        let mut scratch = self.parser().scratch.borrow_mut();
2316        scratch.clear();
2317
2318        let brace_pos = self.pos();
2319        let start = self.span_char().end;
2320        while self.bump_and_bump_space() && self.char() != '}' {
2321            if !is_hex(self.char()) {
2322                return Err(self.error(self.span_char(), ast::ErrorKind::EscapeHexInvalidDigit));
2323            }
2324            scratch.push(self.char());
2325        }
2326        if self.is_eof() {
2327            return Err(self.error(
2328                Span::new(brace_pos, self.pos()),
2329                ast::ErrorKind::EscapeUnexpectedEof,
2330            ));
2331        }
2332        let end = self.pos();
2333        let hex = scratch.as_str();
2334        assert_eq!(self.char(), '}');
2335        self.bump_and_bump_space();
2336
2337        if hex.is_empty() {
2338            return Err(self.error(
2339                Span::new(brace_pos, self.pos()),
2340                ast::ErrorKind::EscapeHexEmpty,
2341            ));
2342        }
2343        match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
2344            None => Err(self.error(Span::new(start, end), ast::ErrorKind::EscapeHexInvalid)),
2345            Some(c) => Ok(Literal {
2346                span: Span::new(start, self.pos()),
2347                kind: LiteralKind::HexBrace(kind),
2348                c,
2349            }),
2350        }
2351    }
2352
2353    fn parse_decimal(&self) -> Result<u32> {
2354        let mut scratch = self.parser().scratch.borrow_mut();
2355        scratch.clear();
2356
2357        while !self.is_eof() && self.char().is_whitespace() {
2358            self.bump();
2359        }
2360        let start = self.pos();
2361        while !self.is_eof() && '0' <= self.char() && self.char() <= '9' {
2362            scratch.push(self.char());
2363            self.bump_and_bump_space();
2364        }
2365        let span = Span::new(start, self.pos());
2366        while !self.is_eof() && self.char().is_whitespace() {
2367            self.bump_and_bump_space();
2368        }
2369        let digits = scratch.as_str();
2370        if digits.is_empty() {
2371            return Err(self.error(span, ast::ErrorKind::DecimalEmpty));
2372        }
2373        match digits.parse::<u32>().ok() {
2374            Some(n) => Ok(n),
2375            None => Err(self.error(span, ast::ErrorKind::DecimalInvalid)),
2376        }
2377    }
2378
2379    #[inline(never)]
2380    fn parse_set_class(&self) -> Result<ClassBracketed> {
2381        assert_eq!(self.char(), '[');
2382
2383        let mut union = ClassSetUnion {
2384            span: self.span(),
2385            items: vec![],
2386        };
2387        loop {
2388            self.bump_space();
2389            if self.is_eof() {
2390                return Err(self.unclosed_class_error());
2391            }
2392            match self.char() {
2393                '[' => {
2394                    // If we've already parsed the opening bracket, then
2395                    // attempt to treat this as the beginning of an ASCII
2396                    // class. If ASCII class parsing fails, then the parser
2397                    // backs up to `[`.
2398                    if !self.parser().stack_class.borrow().is_empty() {
2399                        if let Some(cls) = self.maybe_parse_ascii_class() {
2400                            union.push(ClassSetItem::Ascii(cls));
2401                            continue;
2402                        }
2403                    }
2404                    union = self.push_class_open(union)?;
2405                }
2406                ']' => match self.pop_class(union)? {
2407                    Either::Left(nested_union) => {
2408                        union = nested_union;
2409                    }
2410                    Either::Right(class) => return Ok(class),
2411                },
2412                '&' if self.peek() == Some('&') => {
2413                    assert!(self.bump_if("&&"));
2414                    union = self.push_class_op(ClassSetBinaryOpKind::Intersection, union);
2415                }
2416                '-' if self.peek() == Some('-') => {
2417                    assert!(self.bump_if("--"));
2418                    union = self.push_class_op(ClassSetBinaryOpKind::Difference, union);
2419                }
2420                '~' if self.peek() == Some('~') => {
2421                    assert!(self.bump_if("~~"));
2422                    union = self.push_class_op(ClassSetBinaryOpKind::SymmetricDifference, union);
2423                }
2424                _ => {
2425                    union.push(self.parse_set_class_range()?);
2426                }
2427            }
2428        }
2429    }
2430
2431    #[inline(never)]
2432    fn parse_set_class_range(&self) -> Result<ClassSetItem> {
2433        let prim1 = self.parse_set_class_item()?;
2434        self.bump_space();
2435        if self.is_eof() {
2436            return Err(self.unclosed_class_error());
2437        }
2438        if self.char() != '-' || self.peek_space() == Some(']') || self.peek_space() == Some('-') {
2439            return prim1.into_class_set_item(self);
2440        }
2441        if !self.bump_and_bump_space() {
2442            return Err(self.unclosed_class_error());
2443        }
2444        let prim2 = self.parse_set_class_item()?;
2445        let range = ClassSetRange {
2446            span: Span::new(prim1.span().start, prim2.span().end),
2447            start: prim1.into_class_literal(self)?,
2448            end: prim2.into_class_literal(self)?,
2449        };
2450        if !range.is_valid() {
2451            return Err(self.error(range.span, ast::ErrorKind::ClassRangeInvalid));
2452        }
2453        Ok(ClassSetItem::Range(range))
2454    }
2455
2456    #[inline(never)]
2457    fn parse_set_class_item(&self) -> Result<Primitive> {
2458        if self.char() == '\\' {
2459            self.parse_escape()
2460        } else {
2461            let x = Primitive::Literal(Literal {
2462                span: self.span_char(),
2463                kind: LiteralKind::Verbatim,
2464                c: self.char(),
2465            });
2466            self.bump();
2467            Ok(x)
2468        }
2469    }
2470
2471    #[inline(never)]
2472    fn parse_set_class_open(&self) -> Result<(ClassBracketed, ClassSetUnion)> {
2473        assert_eq!(self.char(), '[');
2474        let start = self.pos();
2475        if !self.bump_and_bump_space() {
2476            return Err(self.error(Span::new(start, self.pos()), ast::ErrorKind::ClassUnclosed));
2477        }
2478
2479        let negated = if self.char() != '^' {
2480            false
2481        } else {
2482            if !self.bump_and_bump_space() {
2483                return Err(self.error(Span::new(start, self.pos()), ast::ErrorKind::ClassUnclosed));
2484            }
2485            true
2486        };
2487        // Accept any number of `-` as literal `-`.
2488        let mut union = ClassSetUnion {
2489            span: self.span(),
2490            items: vec![],
2491        };
2492        while self.char() == '-' {
2493            union.push(ClassSetItem::Literal(Literal {
2494                span: self.span_char(),
2495                kind: LiteralKind::Verbatim,
2496                c: '-',
2497            }));
2498            if !self.bump_and_bump_space() {
2499                return Err(self.error(Span::new(start, start), ast::ErrorKind::ClassUnclosed));
2500            }
2501        }
2502        // If `]` is the *first* char in a set, then interpret it as a literal
2503        // `]`. That is, an empty class is impossible to write.
2504        if union.items.is_empty() && self.char() == ']' {
2505            union.push(ClassSetItem::Literal(Literal {
2506                span: self.span_char(),
2507                kind: LiteralKind::Verbatim,
2508                c: ']',
2509            }));
2510            if !self.bump_and_bump_space() {
2511                return Err(self.error(Span::new(start, self.pos()), ast::ErrorKind::ClassUnclosed));
2512            }
2513        }
2514        let set = ClassBracketed {
2515            span: Span::new(start, self.pos()),
2516            negated,
2517            kind: ClassSet::union(ClassSetUnion {
2518                span: Span::new(union.span.start, union.span.start),
2519                items: vec![],
2520            }),
2521        };
2522        Ok((set, union))
2523    }
2524
2525    #[inline(never)]
2526    fn maybe_parse_ascii_class(&self) -> Option<ClassAscii> {
2527        assert_eq!(self.char(), '[');
2528        // If parsing fails, then we back up the parser to this starting point.
2529        let start = self.pos();
2530        let mut negated = false;
2531        if !self.bump() || self.char() != ':' {
2532            self.parser().pos.set(start);
2533            return None;
2534        }
2535        if !self.bump() {
2536            self.parser().pos.set(start);
2537            return None;
2538        }
2539        if self.char() == '^' {
2540            negated = true;
2541            if !self.bump() {
2542                self.parser().pos.set(start);
2543                return None;
2544            }
2545        }
2546        let name_start = self.offset();
2547        while self.char() != ':' && self.bump() {}
2548        if self.is_eof() {
2549            self.parser().pos.set(start);
2550            return None;
2551        }
2552        let name = &self.pattern()[name_start..self.offset()];
2553        if !self.bump_if(":]") {
2554            self.parser().pos.set(start);
2555            return None;
2556        }
2557        let kind = match regex_syntax::ast::ClassAsciiKind::from_name(name) {
2558            Some(kind) => kind,
2559            None => {
2560                self.parser().pos.set(start);
2561                return None;
2562            }
2563        };
2564        Some(ClassAscii {
2565            span: Span::new(start, self.pos()),
2566            kind,
2567            negated,
2568        })
2569    }
2570
2571    #[inline(never)]
2572    fn parse_unicode_class(&self) -> Result<ClassUnicode> {
2573        assert!(self.char() == 'p' || self.char() == 'P');
2574
2575        let mut scratch = self.parser().scratch.borrow_mut();
2576        scratch.clear();
2577
2578        let negated = self.char() == 'P';
2579        if !self.bump_and_bump_space() {
2580            return Err(self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
2581        }
2582        let (start, kind) = if self.char() == '{' {
2583            let start = self.span_char().end;
2584            while self.bump_and_bump_space() && self.char() != '}' {
2585                scratch.push(self.char());
2586            }
2587            if self.is_eof() {
2588                return Err(self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
2589            }
2590            assert_eq!(self.char(), '}');
2591            self.bump();
2592
2593            let name = scratch.as_str();
2594            if let Some(i) = name.find("!=") {
2595                (
2596                    start,
2597                    ClassUnicodeKind::NamedValue {
2598                        op: ClassUnicodeOpKind::NotEqual,
2599                        name: name[..i].to_string(),
2600                        value: name[i + 2..].to_string(),
2601                    },
2602                )
2603            } else if let Some(i) = name.find(':') {
2604                (
2605                    start,
2606                    ClassUnicodeKind::NamedValue {
2607                        op: ClassUnicodeOpKind::Colon,
2608                        name: name[..i].to_string(),
2609                        value: name[i + 1..].to_string(),
2610                    },
2611                )
2612            } else if let Some(i) = name.find('=') {
2613                (
2614                    start,
2615                    ClassUnicodeKind::NamedValue {
2616                        op: ClassUnicodeOpKind::Equal,
2617                        name: name[..i].to_string(),
2618                        value: name[i + 1..].to_string(),
2619                    },
2620                )
2621            } else {
2622                (start, ClassUnicodeKind::Named(name.to_string()))
2623            }
2624        } else {
2625            let start = self.pos();
2626            let c = self.char();
2627            if c == '\\' {
2628                return Err(self.error(self.span_char(), ast::ErrorKind::UnicodeClassInvalid));
2629            }
2630            self.bump_and_bump_space();
2631            let kind = ClassUnicodeKind::OneLetter(c);
2632            (start, kind)
2633        };
2634        Ok(ClassUnicode {
2635            span: Span::new(start, self.pos()),
2636            negated,
2637            kind,
2638        })
2639    }
2640
2641    #[inline(never)]
2642    fn parse_perl_class(&self) -> ClassPerl {
2643        let c = self.char();
2644        let span = self.span_char();
2645        self.bump();
2646        let (negated, kind) = match c {
2647            'd' => (false, regex_syntax::ast::ClassPerlKind::Digit),
2648            'D' => (true, regex_syntax::ast::ClassPerlKind::Digit),
2649            's' => (false, regex_syntax::ast::ClassPerlKind::Space),
2650            'S' => (true, regex_syntax::ast::ClassPerlKind::Space),
2651            'w' => (false, regex_syntax::ast::ClassPerlKind::Word),
2652            'W' => (true, regex_syntax::ast::ClassPerlKind::Word),
2653            c => panic!("expected valid Perl class but got '{}'", c),
2654        };
2655        ClassPerl {
2656            span,
2657            kind,
2658            negated,
2659        }
2660    }
2661}
2662
2663pub fn parse_ast<'s>(
2664    tb: &mut TB<'s>,
2665    pattern: &'s str,
2666) -> std::result::Result<NodeId, ResharpError> {
2667    let mut p: ResharpParser<'s> = ResharpParser::new(pattern);
2668    p.parse(tb)
2669}
2670
2671pub fn parse_ast_with<'s>(
2672    tb: &mut TB<'s>,
2673    pattern: &'s str,
2674    flags: &PatternFlags,
2675) -> std::result::Result<NodeId, ResharpError> {
2676    let mut p: ResharpParser<'s> = ResharpParser::with_flags(pattern, flags);
2677    p.parse(tb)
2678}
2679
2680/// Parse a pattern into the raw AST without converting to algebra nodes.
2681pub fn parse_to_ast(pattern: &str) -> std::result::Result<ast::Ast, ResharpError> {
2682    let mut p: ResharpParser = ResharpParser::new(pattern);
2683    p.parse_inner()
2684}
resharp_parser/lib.rs

resharp_parser/
lib.rs