resharp_parser/
lib.rs

1//! Parser for resharp regex patterns.
2//!
3//! Converts regex pattern strings into the node representation used by resharp-algebra.
4
5#![warn(dead_code)]
6pub mod ast;
7use std::cell::{Cell, RefCell};
8
9use ast::{Ast, Concat, ErrorKind, GroupKind, LookaroundKind, RepetitionKind};
10use regex_syntax::{
11    ast::{
12        ClassAscii, ClassBracketed, ClassPerl, ClassSet, ClassSetBinaryOpKind, ClassSetItem,
13        ClassSetRange, ClassSetUnion, ClassUnicode, ClassUnicodeKind, ClassUnicodeOpKind,
14        HexLiteralKind, Literal, LiteralKind, Position, Span, SpecialLiteralKind,
15    },
16    hir::{
17        self,
18        translate::{Translator, TranslatorBuilder},
19    },
20    utf8::Utf8Sequences,
21};
22use resharp_algebra::{Kind, NodeId};
23
24type TB<'s> = resharp_algebra::RegexBuilder;
25
26/// global pattern-level flags, set from `RegexOptions`.
27pub struct PatternFlags {
28    /// `\w`/`\d`/`\s` match full Unicode (true) or ASCII only (false).
29    pub unicode: bool,
30    /// `\w` covers all Unicode word chars including 3- and 4-byte sequences.
31    pub full_unicode: bool,
32    /// global case-insensitive matching.
33    pub case_insensitive: bool,
34    /// `.` matches `\n` (behaves like `_`).
35    pub dot_matches_new_line: bool,
36    /// `^` and `$` match at line boundaries (`\n`) in addition to text boundaries.
37    pub multiline: bool,
38    /// allow whitespace and `#` comments in the pattern.
39    pub ignore_whitespace: bool,
40    /// ASCII `\w`/`\d`/`\s` tables, but
41    /// negated perl classes (`\W`/`\D`/`\S`) and `.` match a full codepoint
42    pub ascii_perl_classes: bool,
43    /// max upper bound on `expanded_ast_size` before parser rejects the
44    /// pattern as too complex. default 50_000.
45    pub expanded_ast_limit: u64,
46    /// max children allowed in any single `Concat`/`Alternation`/
47    /// `Intersection` node. default 4_000.
48    pub max_list_len: usize,
49    /// max upper bound on bounded repetition `{n,m}`. default 500.
50    pub max_repeat: u32,
51}
52
53// arbitrary safeguards, these will not prevent intentional DoS patterns
54// more to protect you from shooting yourself in the foot
55pub const DEFAULT_MAX_REPEAT: u32 = 500;
56pub const DEFAULT_EXPANDED_AST_LIMIT: u64 = 50_000;
57pub const DEFAULT_MAX_LIST_LEN: usize = 4_000;
58
59impl Default for PatternFlags {
60    fn default() -> Self {
61        Self {
62            unicode: true,
63            full_unicode: false,
64            case_insensitive: false,
65            dot_matches_new_line: false,
66            multiline: true,
67            ignore_whitespace: false,
68            ascii_perl_classes: false,
69            expanded_ast_limit: DEFAULT_EXPANDED_AST_LIMIT,
70            max_list_len: DEFAULT_MAX_LIST_LEN,
71            max_repeat: DEFAULT_MAX_REPEAT,
72        }
73    }
74}
75
76#[derive(Clone, Copy, PartialEq, Debug)]
77enum WordCharKind {
78    Word,
79    NonWord,
80    MaybeWord,
81    MaybeNonWord,
82    Unknown,
83    Edge,
84}
85
86fn is_word_byte(b: u8) -> bool {
87    b.is_ascii_alphanumeric() || b == b'_'
88}
89
90fn class_set_item_word_kind(item: &regex_syntax::ast::ClassSetItem) -> WordCharKind {
91    use regex_syntax::ast::{ClassPerlKind, ClassSetItem};
92    use WordCharKind::*;
93    match item {
94        ClassSetItem::Empty(_) => Unknown,
95        ClassSetItem::Literal(l) => {
96            if is_word_byte(l.c as u8) {
97                Word
98            } else {
99                NonWord
100            }
101        }
102        ClassSetItem::Range(r) => {
103            let all_word = (r.start.c as u8..=r.end.c as u8).all(is_word_byte);
104            let all_non = (r.start.c as u8..=r.end.c as u8).all(|b| !is_word_byte(b));
105            if all_word {
106                Word
107            } else if all_non {
108                NonWord
109            } else {
110                Unknown
111            }
112        }
113        ClassSetItem::Perl(p) => match (&p.kind, p.negated) {
114            (ClassPerlKind::Word, false) => Word,
115            (ClassPerlKind::Word, true) => NonWord,
116            (ClassPerlKind::Space, false) => NonWord,
117            (ClassPerlKind::Digit, false) => Word,
118            _ => Unknown,
119        },
120        ClassSetItem::Bracketed(b) => class_bracketed_word_kind(b),
121        ClassSetItem::Union(u) => {
122            let mut kind = Unknown;
123            for item in &u.items {
124                let k = class_set_item_word_kind(item);
125                kind = match (kind, k) {
126                    (_, Unknown) => return Unknown,
127                    (Unknown, _) => k,
128                    (Word, Word) => Word,
129                    (NonWord, NonWord) => NonWord,
130                    _ => return Unknown,
131                };
132            }
133            kind
134        }
135        _ => Unknown,
136    }
137}
138
139fn class_bracketed_word_kind(c: &regex_syntax::ast::ClassBracketed) -> WordCharKind {
140    use regex_syntax::ast::{ClassPerlKind, ClassSet, ClassSetItem};
141    use WordCharKind::*;
142    if c.negated {
143        return match &c.kind {
144            ClassSet::Item(ClassSetItem::Perl(p)) if p.kind == ClassPerlKind::Word => {
145                if p.negated {
146                    Word
147                } else {
148                    NonWord
149                }
150            }
151            _ => Unknown,
152        };
153    }
154    match &c.kind {
155        ClassSet::Item(item) => class_set_item_word_kind(item),
156        ClassSet::BinaryOp(_) => Unknown,
157    }
158}
159
160fn ascii_class_lit(span: Span, c: char) -> regex_syntax::ast::Literal {
161    regex_syntax::ast::Literal {
162        span,
163        kind: regex_syntax::ast::LiteralKind::Verbatim,
164        c,
165    }
166}
167
168fn ascii_class_range(span: Span, a: char, b: char) -> regex_syntax::ast::ClassSetItem {
169    regex_syntax::ast::ClassSetItem::Range(regex_syntax::ast::ClassSetRange {
170        span,
171        start: ascii_class_lit(span, a),
172        end: ascii_class_lit(span, b),
173    })
174}
175
176fn ascii_perl_positive(
177    span: Span,
178    kind: &regex_syntax::ast::ClassPerlKind,
179) -> regex_syntax::ast::ClassSetItem {
180    use regex_syntax::ast::{ClassPerlKind, ClassSetItem, ClassSetUnion};
181    match kind {
182        ClassPerlKind::Digit => ascii_class_range(span, '0', '9'),
183        ClassPerlKind::Word => ClassSetItem::Union(ClassSetUnion {
184            span,
185            items: vec![
186                ascii_class_range(span, 'a', 'z'),
187                ascii_class_range(span, 'A', 'Z'),
188                ascii_class_range(span, '0', '9'),
189                ClassSetItem::Literal(ascii_class_lit(span, '_')),
190            ],
191        }),
192        ClassPerlKind::Space => ClassSetItem::Union(ClassSetUnion {
193            span,
194            items: ['\t', '\n', '\x0B', '\x0C', '\r', ' ']
195                .into_iter()
196                .map(|c| ClassSetItem::Literal(ascii_class_lit(span, c)))
197                .collect(),
198        }),
199    }
200}
201
202fn ascii_perl_set_item(
203    span: Span,
204    kind: &regex_syntax::ast::ClassPerlKind,
205    negated: bool,
206) -> regex_syntax::ast::ClassSetItem {
207    use regex_syntax::ast::{ClassBracketed, ClassSet, ClassSetItem};
208    let positive = ascii_perl_positive(span, kind);
209    if negated {
210        ClassSetItem::Bracketed(Box::new(ClassBracketed {
211            span,
212            negated: true,
213            kind: ClassSet::Item(positive),
214        }))
215    } else {
216        positive
217    }
218}
219
220fn rewrite_ascii_perl_set(set: &regex_syntax::ast::ClassSet) -> regex_syntax::ast::ClassSet {
221    use regex_syntax::ast::{ClassSet, ClassSetBinaryOp};
222    match set {
223        ClassSet::Item(item) => ClassSet::Item(rewrite_ascii_perl_item(item)),
224        ClassSet::BinaryOp(op) => ClassSet::BinaryOp(ClassSetBinaryOp {
225            span: op.span,
226            kind: op.kind.clone(),
227            lhs: Box::new(rewrite_ascii_perl_set(&op.lhs)),
228            rhs: Box::new(rewrite_ascii_perl_set(&op.rhs)),
229        }),
230    }
231}
232
233fn rewrite_ascii_perl_item(
234    item: &regex_syntax::ast::ClassSetItem,
235) -> regex_syntax::ast::ClassSetItem {
236    use regex_syntax::ast::{ClassBracketed, ClassSetItem, ClassSetUnion};
237    match item {
238        ClassSetItem::Perl(p) => ascii_perl_set_item(p.span, &p.kind, p.negated),
239        ClassSetItem::Union(u) => ClassSetItem::Union(ClassSetUnion {
240            span: u.span,
241            items: u.items.iter().map(rewrite_ascii_perl_item).collect(),
242        }),
243        ClassSetItem::Bracketed(b) => ClassSetItem::Bracketed(Box::new(ClassBracketed {
244            span: b.span,
245            negated: b.negated,
246            kind: rewrite_ascii_perl_set(&b.kind),
247        })),
248        other => other.clone(),
249    }
250}
251
252#[derive(Clone, Debug, Eq, PartialEq)]
253enum Primitive {
254    Literal(Literal),
255    Assertion(ast::Assertion),
256    Dot(Span),
257    Top(Span),
258    Perl(ClassPerl),
259    Unicode(ClassUnicode),
260}
261
262impl Primitive {
263    fn span(&self) -> &Span {
264        match *self {
265            Primitive::Literal(ref x) => &x.span,
266            Primitive::Assertion(ref x) => &x.span,
267            Primitive::Dot(ref span) => span,
268            Primitive::Top(ref span) => span,
269            Primitive::Perl(ref x) => &x.span,
270            Primitive::Unicode(ref x) => &x.span,
271        }
272    }
273
274    fn into_ast(self) -> Ast {
275        match self {
276            Primitive::Literal(lit) => Ast::literal(lit),
277            Primitive::Assertion(assert) => Ast::assertion(assert),
278            Primitive::Dot(span) => Ast::dot(span),
279            Primitive::Top(span) => Ast::top(span),
280            Primitive::Perl(cls) => Ast::class_perl(cls),
281            Primitive::Unicode(cls) => Ast::class_unicode(cls),
282        }
283    }
284
285    fn into_class_set_item(self, p: &ResharpParser) -> Result<regex_syntax::ast::ClassSetItem> {
286        use self::Primitive::*;
287        use regex_syntax::ast::ClassSetItem;
288
289        match self {
290            Literal(lit) => Ok(ClassSetItem::Literal(lit)),
291            Perl(cls) => Ok(ClassSetItem::Perl(cls)),
292            Unicode(cls) => Ok(ClassSetItem::Unicode(cls)),
293            x => Err(p.error(*x.span(), ast::ErrorKind::ClassEscapeInvalid)),
294        }
295    }
296
297    fn into_class_literal(self, p: &ResharpParser) -> Result<Literal> {
298        use self::Primitive::*;
299
300        match self {
301            Literal(lit) => Ok(lit),
302            x => Err(p.error(*x.span(), ast::ErrorKind::ClassRangeLiteral)),
303        }
304    }
305}
306
307#[derive(Clone, Debug, Eq, PartialEq)]
308pub enum Either<Left, Right> {
309    Left(Left),
310    Right(Right),
311}
312
313#[derive(Clone, Debug, Eq, PartialEq)]
314pub struct ParseError {
315    /// The kind of error.
316    pub kind: ErrorKind,
317    /// The original pattern that the parser generated the error from. Every
318    /// span in an error is a valid range into this string.
319    pattern: String,
320    /// The span of this error.
321    pub span: Span,
322}
323
324impl std::fmt::Display for ParseError {
325    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
326        write!(f, "{:?}: {:?}", self.kind, self.span)
327    }
328}
329impl std::error::Error for ParseError {}
330
331type Result<T> = core::result::Result<T, ParseError>;
332
333#[derive(Clone, Debug)]
334enum GroupState {
335    /// This state is pushed whenever an opening group is found.
336    Group {
337        /// The concatenation immediately preceding the opening group.
338        concat: Concat,
339        /// The group that has been opened. Its sub-AST is always empty.
340        group: ast::Group,
341        /// Whether this group has the `x` flag enabled or not.
342        ignore_whitespace: bool,
343    },
344    Alternation(ast::Alternation),
345    Intersection(ast::Intersection),
346}
347
348#[derive(Clone, Debug)]
349enum ClassState {
350    /// This state is pushed whenever an opening bracket is found.
351    Open {
352        /// The union of class items immediately preceding this class.
353        union: regex_syntax::ast::ClassSetUnion,
354        set: regex_syntax::ast::ClassBracketed,
355    },
356    /// This state is pushed when a operator is seen. When popped, the stored
357    /// set becomes the left hand side of the operator.
358    Op {
359        /// The type of the operation, i.e., &&, -- or ~~.
360        kind: regex_syntax::ast::ClassSetBinaryOpKind,
361        /// The left-hand side of the operator.
362        lhs: regex_syntax::ast::ClassSet,
363    },
364}
365
366/// RE# syntax parser based on the regex-syntax crate.
367pub struct ResharpParser<'s> {
368    perl_classes: Vec<(bool, regex_syntax::ast::ClassPerlKind, NodeId)>,
369    unicode_classes: resharp_algebra::UnicodeClassCache,
370    pub translator: regex_syntax::hir::translate::Translator,
371    pub pattern: &'s str,
372    pos: Cell<Position>,
373    capture_index: Cell<u32>,
374    octal: bool,
375    empty_min_range: bool,
376    ignore_whitespace: Cell<bool>,
377    dot_all: Cell<bool>,
378    multiline: Cell<bool>,
379    global_unicode: bool,
380    global_full_unicode: bool,
381    global_ascii_perl: bool,
382    global_case_insensitive: bool,
383    expanded_ast_limit: u64,
384    max_list_len: usize,
385    max_repeat: u32,
386    comments: RefCell<Vec<ast::Comment>>,
387    stack_group: RefCell<Vec<GroupState>>,
388    stack_class: RefCell<Vec<ClassState>>,
389    capture_names: RefCell<Vec<ast::CaptureName>>,
390    scratch: RefCell<String>,
391}
392
393fn specialize_err<T>(result: Result<T>, from: ast::ErrorKind, to: ast::ErrorKind) -> Result<T> {
394    result.map_err(|e| {
395        if e.kind == from {
396            ParseError {
397                kind: to,
398                pattern: e.pattern,
399                span: e.span,
400            }
401        } else {
402            e
403        }
404    })
405}
406
407fn is_capture_char(c: char, first: bool) -> bool {
408    if first {
409        c == '_' || c.is_alphabetic()
410    } else {
411        c == '_' || c == '.' || c == '[' || c == ']' || c.is_alphanumeric()
412    }
413}
414
415pub fn is_meta_character(c: char) -> bool {
416    matches!(
417        c,
418        '\\' | '.'
419            | '+'
420            | '*'
421            | '?'
422            | '('
423            | ')'
424            | '|'
425            | '['
426            | ']'
427            | '{'
428            | '}'
429            | '^'
430            | '$'
431            | '#'
432            | '&'
433            | '-'
434            | '~'
435            | '_'
436    )
437}
438
439/// escapes all resharp meta characters in `text`.
440pub fn escape(text: &str) -> String {
441    let mut buf = String::new();
442    escape_into(text, &mut buf);
443    buf
444}
445
446/// escapes all resharp meta characters in `text` and appends to `buf`.
447pub fn escape_into(text: &str, buf: &mut String) {
448    buf.reserve(text.len());
449    for c in text.chars() {
450        if is_meta_character(c) {
451            buf.push('\\');
452        }
453        buf.push(c);
454    }
455}
456
457pub fn is_escapeable_character(c: char) -> bool {
458    if is_meta_character(c) {
459        return true;
460    }
461    if !c.is_ascii() {
462        return false;
463    }
464    match c {
465        '0'..='9' | 'A'..='Z' | 'a'..='z' => false,
466        '<' | '>' => false,
467        _ => true,
468    }
469}
470
471fn is_hex(c: char) -> bool {
472    c.is_ascii_digit() || ('a'..='f').contains(&c) || ('A'..='F').contains(&c)
473}
474
475fn ensure_lookbehind_at_start(ast: &Ast, at_start: bool) -> core::result::Result<(), Span> {
476    match ast {
477        Ast::Concat(c) => {
478            let mut child_at_start = at_start;
479            for child in &c.asts {
480                ensure_lookbehind_at_start(child, child_at_start)?;
481                if ast_may_consume(child) {
482                    child_at_start = false;
483                }
484            }
485            Ok(())
486        }
487        Ast::Alternation(a) => {
488            for child in &a.asts {
489                ensure_lookbehind_at_start(child, at_start)?;
490            }
491            Ok(())
492        }
493        Ast::Intersection(i) => {
494            for child in &i.asts {
495                ensure_lookbehind_at_start(child, at_start)?;
496            }
497            Ok(())
498        }
499        Ast::Complement(c) => ensure_lookbehind_at_start(&c.ast, at_start),
500        Ast::Group(g) => ensure_lookbehind_at_start(&g.ast, at_start),
501        Ast::Repetition(r) => ensure_lookbehind_at_start(&r.ast, at_start),
502        Ast::Lookaround(g) => {
503            match g.kind {
504                LookaroundKind::PositiveLookbehind | LookaroundKind::NegativeLookbehind => {
505                    if !at_start {
506                        return Err(g.span);
507                    }
508                }
509                LookaroundKind::PositiveLookahead | LookaroundKind::NegativeLookahead => {}
510            }
511            ensure_lookbehind_at_start(&g.ast, true)
512        }
513        Ast::Empty(_)
514        | Ast::Flags(_)
515        | Ast::Literal(_)
516        | Ast::Dot(_)
517        | Ast::Top(_)
518        | Ast::Assertion(_)
519        | Ast::ClassUnicode(_)
520        | Ast::ClassPerl(_)
521        | Ast::ClassBracketed(_) => Ok(()),
522    }
523}
524
525fn ast_may_consume(ast: &Ast) -> bool {
526    match ast {
527        Ast::Empty(_) | Ast::Flags(_) | Ast::Assertion(_) | Ast::Lookaround(_) => false,
528        Ast::Literal(_)
529        | Ast::Dot(_)
530        | Ast::Top(_)
531        | Ast::ClassUnicode(_)
532        | Ast::ClassPerl(_)
533        | Ast::ClassBracketed(_) => true,
534        Ast::Group(g) => ast_may_consume(&g.ast),
535        Ast::Repetition(r) => {
536            if !ast_may_consume(&r.ast) {
537                return false;
538            }
539            match r.op.kind {
540                RepetitionKind::ZeroOrOne
541                | RepetitionKind::ZeroOrMore
542                | RepetitionKind::OneOrMore => true,
543                RepetitionKind::Range(ast::RepetitionRange::Exactly(0)) => false,
544                RepetitionKind::Range(ast::RepetitionRange::Bounded(_, 0)) => false,
545                RepetitionKind::Range(_) => true,
546            }
547        }
548        Ast::Alternation(a) => a.asts.iter().any(ast_may_consume),
549        Ast::Intersection(i) => i.asts.iter().any(ast_may_consume),
550        Ast::Complement(_) => true,
551        Ast::Concat(c) => c.asts.iter().any(ast_may_consume),
552    }
553}
554
555impl<'s> ResharpParser<'s> {
556    fn default_translator_builder(&self) -> TranslatorBuilder {
557        let mut trb = TranslatorBuilder::new();
558        trb.unicode(self.global_unicode);
559        trb.utf8(false);
560        trb.case_insensitive(self.global_case_insensitive);
561        trb
562    }
563
564    pub fn new(pattern: &'s str) -> Self {
565        Self::with_flags(pattern, &PatternFlags::default())
566    }
567
568    pub fn with_flags(pattern: &'s str, flags: &PatternFlags) -> Self {
569        let mut trb = TranslatorBuilder::new();
570        trb.unicode(flags.unicode);
571        trb.utf8(false);
572        trb.case_insensitive(flags.case_insensitive);
573        Self {
574            translator: trb.build(),
575            pattern,
576            perl_classes: vec![],
577            unicode_classes: resharp_algebra::UnicodeClassCache::default(),
578            pos: Cell::new(Position::new(0, 0, 0)),
579            capture_index: Cell::new(0),
580            octal: false,
581            empty_min_range: false,
582            ignore_whitespace: Cell::new(flags.ignore_whitespace),
583            dot_all: Cell::new(flags.dot_matches_new_line),
584            multiline: Cell::new(flags.multiline),
585            global_unicode: flags.unicode || flags.full_unicode || flags.ascii_perl_classes,
586            global_full_unicode: flags.full_unicode,
587            global_ascii_perl: flags.ascii_perl_classes,
588            global_case_insensitive: flags.case_insensitive,
589            expanded_ast_limit: flags.expanded_ast_limit,
590            max_list_len: flags.max_list_len,
591            max_repeat: flags.max_repeat,
592            comments: RefCell::new(vec![]),
593            stack_group: RefCell::new(vec![]),
594            stack_class: RefCell::new(vec![]),
595            capture_names: RefCell::new(vec![]),
596            scratch: RefCell::new(String::new()),
597        }
598    }
599
600    fn parser(&'_ self) -> &'_ ResharpParser<'_> {
601        self
602    }
603
604    fn pattern(&self) -> &str {
605        self.pattern
606    }
607
608    fn error(&self, span: Span, kind: ast::ErrorKind) -> ParseError {
609        ParseError {
610            kind,
611            pattern: self.pattern().to_string(),
612            span,
613        }
614    }
615
616    fn unsupported_error(&self, _: regex_syntax::hir::Error) -> ParseError {
617        self.error(
618            Span::splat(self.pos()),
619            ast::ErrorKind::UnsupportedResharpRegex,
620        )
621    }
622
623    fn offset(&self) -> usize {
624        self.parser().pos.get().offset
625    }
626
627    fn line(&self) -> usize {
628        self.parser().pos.get().line
629    }
630
631    fn column(&self) -> usize {
632        self.parser().pos.get().column
633    }
634
635    fn next_capture_index(&self, span: Span) -> Result<u32> {
636        let current = self.parser().capture_index.get();
637        let i = current
638            .checked_add(1)
639            .ok_or_else(|| self.error(span, ast::ErrorKind::CaptureLimitExceeded))?;
640        self.parser().capture_index.set(i);
641        Ok(i)
642    }
643
644    fn add_capture_name(&self, cap: &ast::CaptureName) -> Result<()> {
645        let mut names = self.parser().capture_names.borrow_mut();
646        match names.binary_search_by_key(&cap.name.as_str(), |c| c.name.as_str()) {
647            Err(i) => {
648                names.insert(i, cap.clone());
649                Ok(())
650            }
651            Ok(i) => Err(self.error(
652                cap.span,
653                ast::ErrorKind::GroupNameDuplicate {
654                    original: names[i].span,
655                },
656            )),
657        }
658    }
659
660    fn ignore_whitespace(&self) -> bool {
661        self.parser().ignore_whitespace.get()
662    }
663
664    fn char(&self) -> char {
665        self.char_at(self.offset())
666    }
667
668    fn char_at(&self, i: usize) -> char {
669        self.pattern()[i..]
670            .chars()
671            .next()
672            .unwrap_or_else(|| panic!("expected char at offset {}", i))
673    }
674
675    fn bump(&self) -> bool {
676        if self.is_eof() {
677            return false;
678        }
679        let Position {
680            mut offset,
681            mut line,
682            mut column,
683        } = self.pos();
684        if self.char() == '\n' {
685            line = line.checked_add(1).unwrap();
686            column = 1;
687        } else {
688            column = column.checked_add(1).unwrap();
689        }
690        offset += self.char().len_utf8();
691        self.parser().pos.set(Position {
692            offset,
693            line,
694            column,
695        });
696        self.pattern()[self.offset()..].chars().next().is_some()
697    }
698
699    fn bump_if(&self, prefix: &str) -> bool {
700        if self.pattern()[self.offset()..].starts_with(prefix) {
701            for _ in 0..prefix.chars().count() {
702                self.bump();
703            }
704            true
705        } else {
706            false
707        }
708    }
709
710    fn is_lookaround_prefix(&self) -> Option<(bool, bool)> {
711        if self.bump_if("?=") {
712            return Some((true, true));
713        }
714        if self.bump_if("?!") {
715            return Some((true, false));
716        }
717        if self.bump_if("?<=") {
718            return Some((false, true));
719        }
720        if self.bump_if("?<!") {
721            return Some((false, false));
722        }
723        None
724    }
725
726    fn bump_and_bump_space(&self) -> bool {
727        if !self.bump() {
728            return false;
729        }
730        self.bump_space();
731        !self.is_eof()
732    }
733
734    fn bump_space(&self) {
735        if !self.ignore_whitespace() {
736            return;
737        }
738        while !self.is_eof() {
739            if self.char().is_whitespace() {
740                self.bump();
741            } else if self.char() == '#' {
742                let start = self.pos();
743                let mut comment_text = String::new();
744                self.bump();
745                while !self.is_eof() {
746                    let c = self.char();
747                    self.bump();
748                    if c == '\n' {
749                        break;
750                    }
751                    comment_text.push(c);
752                }
753                let comment = ast::Comment {
754                    span: Span::new(start, self.pos()),
755                    comment: comment_text,
756                };
757                self.parser().comments.borrow_mut().push(comment);
758            } else {
759                break;
760            }
761        }
762    }
763
764    fn peek(&self) -> Option<char> {
765        if self.is_eof() {
766            return None;
767        }
768        self.pattern()[self.offset() + self.char().len_utf8()..]
769            .chars()
770            .next()
771    }
772
773    /// Like peek, but will ignore spaces when the parser is in whitespace
774    /// insensitive mode.
775    fn peek_space(&self) -> Option<char> {
776        if !self.ignore_whitespace() {
777            return self.peek();
778        }
779        if self.is_eof() {
780            return None;
781        }
782        let mut start = self.offset() + self.char().len_utf8();
783        let mut in_comment = false;
784        for (i, c) in self.pattern()[start..].char_indices() {
785            if c.is_whitespace() {
786                continue;
787            } else if !in_comment && c == '#' {
788                in_comment = true;
789            } else if in_comment && c == '\n' {
790                in_comment = false;
791            } else {
792                start += i;
793                break;
794            }
795        }
796        self.pattern()[start..].chars().next()
797    }
798
799    fn is_eof(&self) -> bool {
800        self.offset() == self.pattern().len()
801    }
802
803    fn pos(&self) -> Position {
804        self.parser().pos.get()
805    }
806
807    fn span(&self) -> Span {
808        Span::splat(self.pos())
809    }
810
811    fn span_char(&self) -> Span {
812        let mut next = Position {
813            offset: self.offset().checked_add(self.char().len_utf8()).unwrap(),
814            line: self.line(),
815            column: self.column().checked_add(1).unwrap(),
816        };
817        if self.char() == '\n' {
818            next.line += 1;
819            next.column = 1;
820        }
821        Span::new(self.pos(), next)
822    }
823
824    #[inline(never)]
825    fn push_alternate(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
826        assert_eq!(self.char(), '|');
827        concat.span.end = self.pos();
828        self.push_or_add_alternation(concat);
829        self.bump();
830        Ok(ast::Concat {
831            span: self.span(),
832            asts: vec![],
833        })
834    }
835
836    fn push_or_add_alternation(&self, concat: Concat) {
837        use self::GroupState::*;
838
839        let mut stack = self.parser().stack_group.borrow_mut();
840        if let Some(&mut Alternation(ref mut alts)) = stack.last_mut() {
841            alts.asts.push(concat.into_ast());
842            return;
843        }
844        stack.push(Alternation(ast::Alternation {
845            span: Span::new(concat.span.start, self.pos()),
846            asts: vec![concat.into_ast()],
847        }));
848    }
849
850    #[inline(never)]
851    fn push_intersect(&self, mut concat: Concat) -> Result<Concat> {
852        assert_eq!(self.char(), '&');
853        concat.span.end = self.pos();
854        self.push_or_add_intersect(concat);
855        self.bump();
856        Ok(Concat {
857            span: self.span(),
858            asts: vec![],
859        })
860    }
861
862    fn push_or_add_intersect(&self, concat: Concat) {
863        use self::GroupState::*;
864
865        let mut stack = self.parser().stack_group.borrow_mut();
866        if let Some(&mut Intersection(ref mut alts)) = stack.last_mut() {
867            alts.asts.push(concat.into_ast());
868            return;
869        }
870        stack.push(Intersection(ast::Intersection {
871            span: Span::new(concat.span.start, self.pos()),
872            asts: vec![concat.into_ast()],
873        }));
874    }
875
876    #[inline(never)]
877    fn push_group(&self, mut concat: Concat) -> Result<Concat> {
878        assert_eq!(self.char(), '(');
879        match self.parse_group()? {
880            Either::Left(set) => {
881                let ignore = set.flags.flag_state(ast::Flag::IgnoreWhitespace);
882                if let Some(v) = ignore {
883                    self.parser().ignore_whitespace.set(v);
884                }
885
886                concat.asts.push(Ast::flags(set));
887                Ok(concat)
888            }
889            Either::Right(group) => {
890                let old_ignore_whitespace = self.ignore_whitespace();
891                let new_ignore_whitespace = group
892                    .flags()
893                    .and_then(|f| f.flag_state(ast::Flag::IgnoreWhitespace))
894                    .unwrap_or(old_ignore_whitespace);
895                self.parser()
896                    .stack_group
897                    .borrow_mut()
898                    .push(GroupState::Group {
899                        concat,
900                        group,
901                        ignore_whitespace: old_ignore_whitespace,
902                    });
903                self.parser().ignore_whitespace.set(new_ignore_whitespace);
904                Ok(Concat {
905                    span: self.span(),
906                    asts: vec![],
907                })
908            }
909        }
910    }
911
912    #[inline(never)]
913    fn push_compl_group(&self, concat: Concat) -> Result<Concat> {
914        assert_eq!(self.char(), '~');
915        self.bump();
916        if self.is_eof() || self.char() != '(' {
917            return Err(self.error(self.span(), ast::ErrorKind::ComplementGroupExpected));
918        }
919        let open_span = self.span_char();
920        self.bump();
921        let group = ast::Group {
922            span: open_span,
923            kind: ast::GroupKind::Complement,
924            ast: Box::new(Ast::empty(self.span())),
925        };
926
927        let old_ignore_whitespace = self.ignore_whitespace();
928        let new_ignore_whitespace = group
929            .flags()
930            .and_then(|f| f.flag_state(ast::Flag::IgnoreWhitespace))
931            .unwrap_or(old_ignore_whitespace);
932        self.parser()
933            .stack_group
934            .borrow_mut()
935            .push(GroupState::Group {
936                concat,
937                group,
938                ignore_whitespace: old_ignore_whitespace,
939            });
940        self.parser().ignore_whitespace.set(new_ignore_whitespace);
941        Ok(Concat {
942            span: self.span(),
943            asts: vec![],
944        })
945    }
946
947    #[inline(never)]
948    fn pop_group(&self, mut group_concat: Concat) -> Result<Concat> {
949        use self::GroupState::*;
950        assert_eq!(self.char(), ')');
951        let mut stack = self.parser().stack_group.borrow_mut();
952        let topstack = stack.pop();
953
954        let (mut prior_concat, mut group, ignore_whitespace, alt) = match topstack {
955            Some(Group {
956                concat,
957                group,
958                ignore_whitespace,
959            }) => (concat, group, ignore_whitespace, None),
960            Some(Alternation(alt)) => match stack.pop() {
961                Some(Group {
962                    concat,
963                    group,
964                    ignore_whitespace,
965                }) => (
966                    concat,
967                    group,
968                    ignore_whitespace,
969                    Some(Either::Left::<ast::Alternation, ast::Intersection>(alt)),
970                ),
971                None | Some(Alternation(_)) | Some(Intersection(_)) => {
972                    return Err(self.error(self.span_char(), ast::ErrorKind::GroupUnopened));
973                }
974            },
975            Some(Intersection(int)) => match stack.pop() {
976                Some(Group {
977                    concat,
978                    group,
979                    ignore_whitespace,
980                }) => (
981                    concat,
982                    group,
983                    ignore_whitespace,
984                    Some(Either::Right::<ast::Alternation, ast::Intersection>(int)),
985                ),
986                None | Some(Alternation(_)) | Some(Intersection(_)) => {
987                    return Err(self.error(self.span_char(), ast::ErrorKind::GroupUnopened));
988                }
989            },
990
991            None => {
992                return Err(self.error(self.span_char(), ast::ErrorKind::GroupUnopened));
993            }
994        };
995        self.parser().ignore_whitespace.set(ignore_whitespace);
996        group_concat.span.end = self.pos();
997        self.bump();
998        group.span.end = self.pos();
999        match alt {
1000            Some(Either::Left(mut alt)) => {
1001                alt.span.end = group_concat.span.end;
1002                alt.asts.push(group_concat.into_ast());
1003                group.ast = Box::new(alt.into_ast());
1004            }
1005            Some(Either::Right(mut int)) => {
1006                int.span.end = group_concat.span.end;
1007                int.asts.push(group_concat.into_ast());
1008                group.ast = Box::new(int.into_ast());
1009            }
1010            None => {
1011                group.ast = Box::new(group_concat.into_ast());
1012            }
1013        }
1014
1015        if group.kind == GroupKind::Complement {
1016            let complement = ast::Complement {
1017                span: self.span(),
1018                ast: group.ast,
1019            };
1020            prior_concat.asts.push(Ast::complement(complement));
1021        }
1022        // ignore groups for now
1023        else {
1024            prior_concat.asts.push(Ast::group(group));
1025        }
1026        Ok(prior_concat)
1027    }
1028
1029    #[inline(never)]
1030    fn pop_group_end(&self, mut concat: ast::Concat) -> Result<Ast> {
1031        concat.span.end = self.pos();
1032        let mut stack = self.parser().stack_group.borrow_mut();
1033        let ast = match stack.pop() {
1034            None => Ok(concat.into_ast()),
1035            Some(GroupState::Alternation(mut alt)) => {
1036                alt.span.end = self.pos();
1037                alt.asts.push(concat.into_ast());
1038                Ok(Ast::alternation(alt))
1039            }
1040            Some(GroupState::Intersection(mut int)) => {
1041                int.span.end = self.pos();
1042                int.asts.push(concat.into_ast());
1043
1044                Ok(Ast::intersection(int))
1045            }
1046            Some(GroupState::Group { group, .. }) => {
1047                return Err(self.error(group.span, ast::ErrorKind::GroupUnclosed));
1048            }
1049        };
1050        // If we try to pop again, there should be nothing.
1051        match stack.pop() {
1052            None => ast,
1053            Some(GroupState::Alternation(alt)) => {
1054                Err(self.error(alt.span, ast::ErrorKind::UnsupportedResharpRegex))
1055            }
1056            Some(GroupState::Intersection(int)) => {
1057                Err(self.error(int.span, ast::ErrorKind::UnsupportedResharpRegex))
1058            }
1059            Some(GroupState::Group { group, .. }) => {
1060                Err(self.error(group.span, ast::ErrorKind::GroupUnclosed))
1061            }
1062        }
1063    }
1064
1065    #[inline(never)]
1066    fn push_class_open(
1067        &self,
1068        parent_union: regex_syntax::ast::ClassSetUnion,
1069    ) -> Result<regex_syntax::ast::ClassSetUnion> {
1070        assert_eq!(self.char(), '[');
1071
1072        let (nested_set, nested_union) = self.parse_set_class_open()?;
1073        self.parser()
1074            .stack_class
1075            .borrow_mut()
1076            .push(ClassState::Open {
1077                union: parent_union,
1078                set: nested_set,
1079            });
1080        Ok(nested_union)
1081    }
1082
1083    #[inline(never)]
1084    fn pop_class(
1085        &self,
1086        nested_union: regex_syntax::ast::ClassSetUnion,
1087    ) -> Result<Either<regex_syntax::ast::ClassSetUnion, regex_syntax::ast::ClassBracketed>> {
1088        assert_eq!(self.char(), ']');
1089
1090        let item = regex_syntax::ast::ClassSet::Item(nested_union.into_item());
1091        let prevset = self.pop_class_op(item);
1092        let mut stack = self.parser().stack_class.borrow_mut();
1093        match stack.pop() {
1094            None => panic!("unexpected empty character class stack"),
1095            Some(ClassState::Op { .. }) => panic!("unexpected ClassState::Op"),
1096            Some(ClassState::Open { mut union, mut set }) => {
1097                self.bump();
1098                set.span.end = self.pos();
1099                set.kind = prevset;
1100                if stack.is_empty() {
1101                    Ok(Either::Right(set))
1102                } else {
1103                    union.push(regex_syntax::ast::ClassSetItem::Bracketed(Box::new(set)));
1104                    Ok(Either::Left(union))
1105                }
1106            }
1107        }
1108    }
1109
1110    #[inline(never)]
1111    fn unclosed_class_error(&self) -> ParseError {
1112        for state in self.parser().stack_class.borrow().iter().rev() {
1113            if let ClassState::Open { ref set, .. } = *state {
1114                return self.error(set.span, ast::ErrorKind::ClassUnclosed);
1115            }
1116        }
1117        panic!("no open character class found")
1118    }
1119
1120    #[inline(never)]
1121    fn push_class_op(
1122        &self,
1123        next_kind: regex_syntax::ast::ClassSetBinaryOpKind,
1124        next_union: regex_syntax::ast::ClassSetUnion,
1125    ) -> regex_syntax::ast::ClassSetUnion {
1126        let item = regex_syntax::ast::ClassSet::Item(next_union.into_item());
1127        let new_lhs = self.pop_class_op(item);
1128        self.parser().stack_class.borrow_mut().push(ClassState::Op {
1129            kind: next_kind,
1130            lhs: new_lhs,
1131        });
1132        regex_syntax::ast::ClassSetUnion {
1133            span: self.span(),
1134            items: vec![],
1135        }
1136    }
1137
1138    #[inline(never)]
1139    fn pop_class_op(&self, rhs: regex_syntax::ast::ClassSet) -> regex_syntax::ast::ClassSet {
1140        let mut stack = self.parser().stack_class.borrow_mut();
1141        let (kind, lhs) = match stack.pop() {
1142            Some(ClassState::Op { kind, lhs }) => (kind, lhs),
1143            Some(state @ ClassState::Open { .. }) => {
1144                stack.push(state);
1145                return rhs;
1146            }
1147            None => unreachable!(),
1148        };
1149        let span = Span::new(lhs.span().start, rhs.span().end);
1150        regex_syntax::ast::ClassSet::BinaryOp(regex_syntax::ast::ClassSetBinaryOp {
1151            span,
1152            kind,
1153            lhs: Box::new(lhs),
1154            rhs: Box::new(rhs),
1155        })
1156    }
1157
1158    fn hir_to_node_id(&self, hir: &hir::Hir, tb: &mut TB<'s>) -> Result<NodeId> {
1159        match hir.kind() {
1160            hir::HirKind::Empty => Ok(NodeId::EPS),
1161            hir::HirKind::Literal(l) => {
1162                if l.0.len() == 1 {
1163                    let node = tb.mk_u8(l.0[0]);
1164                    Ok(node)
1165                } else {
1166                    let ws: Vec<_> = l.0.iter().map(|l| tb.mk_u8(*l)).collect();
1167                    let conc = tb.mk_concats(ws.iter().copied());
1168                    Ok(conc)
1169                }
1170            }
1171            hir::HirKind::Class(class) => match class {
1172                hir::Class::Unicode(class_unicode) => {
1173                    let ranges = class_unicode.ranges();
1174                    if ranges.len() == 1
1175                        && ranges[0].start() == '\u{0}'
1176                        && ranges[0].end() == '\u{10FFFF}'
1177                    {
1178                        return Ok(tb.mk_range_u8(0, 255));
1179                    }
1180                    let mut nodes = Vec::new();
1181                    for range in ranges {
1182                        for seq in Utf8Sequences::new(range.start(), range.end()) {
1183                            let sl = seq.as_slice();
1184                            let bytes: Vec<_> = sl.iter().map(|s| (s.start, s.end)).collect();
1185                            let node = match bytes.len() {
1186                                1 => tb.mk_range_u8(bytes[0].0, bytes[0].1),
1187                                n => {
1188                                    let last = tb.mk_range_u8(bytes[n - 1].0, bytes[n - 1].1);
1189                                    let mut conc = last;
1190                                    for i in (0..n - 1).rev() {
1191                                        let b = tb.mk_range_u8(bytes[i].0, bytes[i].1);
1192                                        conc = tb.mk_concat(b, conc);
1193                                    }
1194                                    conc
1195                                }
1196                            };
1197                            nodes.push(node);
1198                        }
1199                    }
1200                    let merged = tb.mk_unions(nodes.into_iter());
1201                    Ok(merged)
1202                }
1203                hir::Class::Bytes(class_bytes) => {
1204                    let ranges = class_bytes.ranges();
1205                    let mut result = NodeId::BOT;
1206                    for range in ranges {
1207                        let start = range.start();
1208                        let end = range.end();
1209                        let node = tb.mk_range_u8(start, end);
1210                        result = tb.mk_union(result, node);
1211                    }
1212                    Ok(result)
1213                }
1214            },
1215            hir::HirKind::Look(_) => Err(self.error(
1216                Span::splat(self.pos()),
1217                ast::ErrorKind::UnsupportedResharpRegex,
1218            )),
1219            hir::HirKind::Repetition(_) => Err(self.error(
1220                Span::splat(self.pos()),
1221                ast::ErrorKind::UnsupportedResharpRegex,
1222            )),
1223            hir::HirKind::Capture(_) => Err(self.error(
1224                Span::splat(self.pos()),
1225                ast::ErrorKind::UnsupportedResharpRegex,
1226            )),
1227            hir::HirKind::Concat(body) => {
1228                let mut result = NodeId::EPS;
1229                for child in body {
1230                    let node = self.hir_to_node_id(child, tb)?;
1231                    result = tb.mk_concat(result, node);
1232                }
1233                Ok(result)
1234            }
1235            hir::HirKind::Alternation(_) => Err(self.error(
1236                Span::splat(self.pos()),
1237                ast::ErrorKind::UnsupportedResharpRegex,
1238            )),
1239        }
1240    }
1241
1242    fn translate_ast_to_hir(
1243        &mut self,
1244        orig_ast: &regex_syntax::ast::Ast,
1245        tb: &mut TB<'s>,
1246    ) -> Result<NodeId> {
1247        match self.translator.translate("", orig_ast) {
1248            Err(_) => Err(self.error(self.span(), ast::ErrorKind::UnicodeClassInvalid)),
1249            Ok(hir) => self.hir_to_node_id(&hir, tb),
1250        }
1251    }
1252
1253    fn translator_to_node_id(
1254        &mut self,
1255        orig_ast: &regex_syntax::ast::Ast,
1256        translator: &mut Option<Translator>,
1257        tb: &mut TB<'s>,
1258    ) -> Result<NodeId> {
1259        match translator {
1260            Some(tr) => {
1261                let hir = tr
1262                    .translate("", orig_ast)
1263                    .map_err(|e| self.unsupported_error(e))?;
1264                self.hir_to_node_id(&hir, tb)
1265            }
1266            None => self.translate_ast_to_hir(orig_ast, tb),
1267        }
1268    }
1269
1270    fn get_class(
1271        &mut self,
1272        negated: bool,
1273        kind: regex_syntax::ast::ClassPerlKind,
1274        tb: &mut TB<'s>,
1275    ) -> Result<NodeId> {
1276        let w = self
1277            .perl_classes
1278            .iter()
1279            .find(|(c_neg, c_kind, _)| *c_kind == kind && *c_neg == negated);
1280        match w {
1281            Some((_, _, value)) => Ok(*value),
1282            None => {
1283                let translated = if self.global_ascii_perl {
1284                    let pos = match kind {
1285                        regex_syntax::ast::ClassPerlKind::Word => {
1286                            let az = tb.mk_range_u8(b'a', b'z');
1287                            let big = tb.mk_range_u8(b'A', b'Z');
1288                            let dig = tb.mk_range_u8(b'0', b'9');
1289                            let us = tb.mk_u8(b'_');
1290                            tb.mk_unions([az, big, dig, us].into_iter())
1291                        }
1292                        regex_syntax::ast::ClassPerlKind::Digit => tb.mk_range_u8(b'0', b'9'),
1293                        regex_syntax::ast::ClassPerlKind::Space => {
1294                            let sp = tb.mk_u8(b' ');
1295                            let tab = tb.mk_u8(b'\t');
1296                            let nl = tb.mk_u8(b'\n');
1297                            let cr = tb.mk_u8(b'\r');
1298                            let ff = tb.mk_u8(0x0C);
1299                            let vt = tb.mk_u8(0x0B);
1300                            tb.mk_unions([sp, tab, nl, cr, ff, vt].into_iter())
1301                        }
1302                    };
1303                    if negated {
1304                        resharp_algebra::neg_class(tb, pos)
1305                    } else {
1306                        pos
1307                    }
1308                } else if self.global_unicode {
1309                    match kind {
1310                        regex_syntax::ast::ClassPerlKind::Word => {
1311                            if self.global_full_unicode {
1312                                self.unicode_classes.ensure_word_full(tb);
1313                            } else {
1314                                self.unicode_classes.ensure_word(tb);
1315                            }
1316                            if negated {
1317                                self.unicode_classes.non_word
1318                            } else {
1319                                self.unicode_classes.word
1320                            }
1321                        }
1322                        regex_syntax::ast::ClassPerlKind::Digit => {
1323                            if self.global_full_unicode {
1324                                self.unicode_classes.ensure_digit_full(tb);
1325                            } else {
1326                                self.unicode_classes.ensure_digit(tb);
1327                            }
1328                            if negated {
1329                                self.unicode_classes.non_digit
1330                            } else {
1331                                self.unicode_classes.digit
1332                            }
1333                        }
1334                        regex_syntax::ast::ClassPerlKind::Space => {
1335                            if self.global_full_unicode {
1336                                self.unicode_classes.ensure_space_full(tb);
1337                            } else {
1338                                self.unicode_classes.ensure_space(tb);
1339                            }
1340                            if negated {
1341                                self.unicode_classes.non_space
1342                            } else {
1343                                self.unicode_classes.space
1344                            }
1345                        }
1346                    }
1347                } else {
1348                    let pos = match kind {
1349                        regex_syntax::ast::ClassPerlKind::Word => {
1350                            let az = tb.mk_range_u8(b'a', b'z');
1351                            let big = tb.mk_range_u8(b'A', b'Z');
1352                            let dig = tb.mk_range_u8(b'0', b'9');
1353                            let us = tb.mk_u8(b'_');
1354                            tb.mk_unions([az, big, dig, us].into_iter())
1355                        }
1356                        regex_syntax::ast::ClassPerlKind::Digit => tb.mk_range_u8(b'0', b'9'),
1357                        regex_syntax::ast::ClassPerlKind::Space => {
1358                            let sp = tb.mk_u8(b' ');
1359                            let tab = tb.mk_u8(b'\t');
1360                            let nl = tb.mk_u8(b'\n');
1361                            let cr = tb.mk_u8(b'\r');
1362                            let ff = tb.mk_u8(0x0C);
1363                            let vt = tb.mk_u8(0x0B);
1364                            tb.mk_unions([sp, tab, nl, cr, ff, vt].into_iter())
1365                        }
1366                    };
1367                    if negated {
1368                        tb.mk_compl(pos)
1369                    } else {
1370                        pos
1371                    }
1372                };
1373                self.perl_classes.push((negated, kind, translated));
1374                Ok(translated)
1375            }
1376        }
1377    }
1378
1379    fn word_char_kind(ast: &Ast, left: bool) -> WordCharKind {
1380        use WordCharKind::*;
1381        match ast {
1382            Ast::Literal(lit) => {
1383                if is_word_byte(lit.c as u8) {
1384                    Word
1385                } else {
1386                    NonWord
1387                }
1388            }
1389            Ast::ClassPerl(c) => match (&c.kind, c.negated) {
1390                (&regex_syntax::ast::ClassPerlKind::Word, false) => Word,
1391                (&regex_syntax::ast::ClassPerlKind::Word, true) => NonWord,
1392                (&regex_syntax::ast::ClassPerlKind::Space, false) => NonWord,
1393                (&regex_syntax::ast::ClassPerlKind::Space, true) => Unknown,
1394                (&regex_syntax::ast::ClassPerlKind::Digit, false) => Word,
1395                (&regex_syntax::ast::ClassPerlKind::Digit, true) => Unknown,
1396            },
1397            Ast::ClassBracketed(c) => class_bracketed_word_kind(c),
1398            Ast::Dot(_) | Ast::Top(_) => Unknown,
1399            Ast::Group(g) => Self::word_char_kind(&g.ast, left),
1400            Ast::Concat(c) if !c.asts.is_empty() => {
1401                let edge = if left { c.asts.len() - 1 } else { 0 };
1402                let kind = Self::word_char_kind(&c.asts[edge], left);
1403                match kind {
1404                    MaybeWord => {
1405                        let dir: isize = if left { -1 } else { 1 };
1406                        match Self::concat_neighbor_kind(&c.asts, edge, dir) {
1407                            Word => Word,
1408                            _ => MaybeWord,
1409                        }
1410                    }
1411                    MaybeNonWord => {
1412                        let dir: isize = if left { -1 } else { 1 };
1413                        match Self::concat_neighbor_kind(&c.asts, edge, dir) {
1414                            NonWord => NonWord,
1415                            _ => MaybeNonWord,
1416                        }
1417                    }
1418                    other => other,
1419                }
1420            }
1421            Ast::Alternation(alt) if !alt.asts.is_empty() => {
1422                let first = Self::word_char_kind(&alt.asts[0], left);
1423                if alt.asts[1..]
1424                    .iter()
1425                    .all(|a| Self::word_char_kind(a, left) == first)
1426                {
1427                    first
1428                } else {
1429                    Unknown
1430                }
1431            }
1432            Ast::Repetition(r) => {
1433                let inner = Self::word_char_kind(&r.ast, left);
1434                let nullable = matches!(
1435                    &r.op.kind,
1436                    ast::RepetitionKind::ZeroOrMore
1437                        | ast::RepetitionKind::ZeroOrOne
1438                        | ast::RepetitionKind::Range(ast::RepetitionRange::Bounded(0, _))
1439                );
1440                if nullable {
1441                    match inner {
1442                        Word => MaybeWord,
1443                        NonWord => MaybeNonWord,
1444                        _ => Unknown,
1445                    }
1446                } else {
1447                    inner
1448                }
1449            }
1450            Ast::Lookaround(la) => match la.kind {
1451                ast::LookaroundKind::PositiveLookahead
1452                | ast::LookaroundKind::PositiveLookbehind => Self::word_char_kind(&la.ast, left),
1453                _ => Unknown,
1454            },
1455            _ => Unknown,
1456        }
1457    }
1458
1459    // ok to return None here, it's only an optimization
1460    fn edge_class_ast(ast: &Ast, left: bool) -> Option<&Ast> {
1461        match ast {
1462            Ast::Literal(_)
1463            | Ast::ClassPerl(_)
1464            | Ast::ClassBracketed(_)
1465            | Ast::ClassUnicode(_)
1466            | Ast::Dot(_)
1467            | Ast::Top(_) => Some(ast),
1468            Ast::Group(g) => Self::edge_class_ast(&g.ast, left),
1469            Ast::Concat(c) if !c.asts.is_empty() => {
1470                Self::edge_class_ast(&c.asts[if left { c.asts.len() - 1 } else { 0 }], left)
1471            }
1472            Ast::Repetition(r) => {
1473                let nullable = matches!(
1474                    &r.op.kind,
1475                    ast::RepetitionKind::ZeroOrMore
1476                        | ast::RepetitionKind::ZeroOrOne
1477                        | ast::RepetitionKind::Range(ast::RepetitionRange::Bounded(0, _))
1478                );
1479                if nullable {
1480                    None
1481                } else {
1482                    Self::edge_class_ast(&r.ast, left)
1483                }
1484            }
1485            _ => None,
1486        }
1487    }
1488
1489    fn resolve_word_kind(
1490        &mut self,
1491        asts: &[Ast],
1492        idx: usize,
1493        dir: isize,
1494        translator: &mut Option<Translator>,
1495        tb: &mut TB<'s>,
1496        word_id: NodeId,
1497        not_word_id: NodeId,
1498    ) -> Result<WordCharKind> {
1499        use WordCharKind::*;
1500        let fast = Self::concat_neighbor_kind(asts, idx, dir);
1501        if fast != Unknown {
1502            return Ok(fast);
1503        }
1504        let neighbor_idx = (idx as isize + dir) as usize;
1505        let node = if let Some(edge) = Self::edge_class_ast(&asts[neighbor_idx], dir < 0) {
1506            self.ast_to_node_id(edge, translator, tb)?
1507        } else if dir > 0 {
1508            let mut bodies: Vec<NodeId> = vec![];
1509            let mut j = neighbor_idx;
1510            while j < asts.len() {
1511                match &asts[j] {
1512                    Ast::Lookaround(la) => {
1513                        let kind = la.kind.clone();
1514                        let lookbehind = matches!(
1515                            kind,
1516                            ast::LookaroundKind::PositiveLookbehind
1517                                | ast::LookaroundKind::NegativeLookbehind
1518                        );
1519                        if lookbehind {
1520                            j += 1;
1521                            continue;
1522                        }
1523                        let body = self.ast_to_node_id(&la.ast, translator, tb)?;
1524                        let body = tb.try_elim_lookarounds(body).ok_or_else(|| {
1525                            self.error(self.span(), ast::ErrorKind::UnsupportedResharpRegex)
1526                        })?;
1527                        let body_ts = tb.mk_concat(body, NodeId::TS);
1528                        let constraint = match kind {
1529                            ast::LookaroundKind::PositiveLookahead => body_ts,
1530                            ast::LookaroundKind::NegativeLookahead => tb.mk_compl(body_ts),
1531                            _ => unreachable!(),
1532                        };
1533                        bodies.push(constraint);
1534                        j += 1;
1535                    }
1536                    other => {
1537                        let n = self.ast_to_node_id(other, translator, tb)?;
1538                        let n = tb.try_elim_lookarounds(n).ok_or_else(|| {
1539                            self.error(self.span(), ast::ErrorKind::UnsupportedResharpRegex)
1540                        })?;
1541                        bodies.push(tb.mk_concat(n, NodeId::TS));
1542                        break;
1543                    }
1544                }
1545            }
1546            if bodies.is_empty() {
1547                return Ok(Unknown);
1548            }
1549            let combined = tb.mk_inters(bodies.into_iter());
1550            let word_prefix = tb.mk_concat(word_id, NodeId::TS);
1551            let non_word_prefix = tb.mk_concat(not_word_id, NodeId::TS);
1552            return if tb.subsumes(word_prefix, combined) == Some(true) {
1553                Ok(Word)
1554            } else if tb.subsumes(non_word_prefix, combined) == Some(true) {
1555                Ok(NonWord)
1556            } else {
1557                Ok(Unknown)
1558            };
1559        } else {
1560            let neighbor_node = self.ast_to_node_id(&asts[neighbor_idx], translator, tb)?;
1561            let mut neighbor_node = tb
1562                .try_elim_lookarounds(neighbor_node)
1563                .ok_or_else(|| self.error(self.span(), ast::ErrorKind::UnsupportedResharpRegex))?;
1564            neighbor_node = tb.reverse(neighbor_node).or_else(|_| {
1565                Err(self.error(self.span(), ast::ErrorKind::UnsupportedResharpRegex))
1566            })?;
1567            let word_prefix = tb.mk_concat(word_id, NodeId::TS);
1568            let non_word_prefix = tb.mk_concat(not_word_id, NodeId::TS);
1569            return if tb.subsumes(word_prefix, neighbor_node) == Some(true) {
1570                Ok(Word)
1571            } else if tb.subsumes(non_word_prefix, neighbor_node) == Some(true) {
1572                Ok(NonWord)
1573            } else {
1574                Ok(Unknown)
1575            };
1576        };
1577        if tb.subsumes(word_id, node) == Some(true) {
1578            Ok(Word)
1579        } else if tb.subsumes(not_word_id, node) == Some(true) {
1580            Ok(NonWord)
1581        } else {
1582            Ok(Unknown)
1583        }
1584    }
1585
1586    fn is_transparent_for_dir(ast: &Ast, dir: isize) -> bool {
1587        match ast {
1588            Ast::Lookaround(la) => match la.kind {
1589                ast::LookaroundKind::PositiveLookahead | ast::LookaroundKind::NegativeLookahead => {
1590                    dir < 0
1591                }
1592                ast::LookaroundKind::PositiveLookbehind
1593                | ast::LookaroundKind::NegativeLookbehind => dir > 0,
1594            },
1595            _ => false,
1596        }
1597    }
1598
1599    fn concat_neighbor_kind(asts: &[Ast], idx: usize, dir: isize) -> WordCharKind {
1600        use WordCharKind::*;
1601        let next = idx as isize + dir;
1602        if next < 0 || next >= asts.len() as isize {
1603            return Edge;
1604        }
1605        if Self::is_transparent_for_dir(&asts[next as usize], dir) {
1606            return Self::concat_neighbor_kind(asts, next as usize, dir);
1607        }
1608        let kind = Self::word_char_kind(&asts[next as usize], dir < 0);
1609        match kind {
1610            MaybeWord => match Self::concat_neighbor_kind(asts, next as usize, dir) {
1611                Word => Word,
1612                _ => Unknown,
1613            },
1614            MaybeNonWord => match Self::concat_neighbor_kind(asts, next as usize, dir) {
1615                NonWord => NonWord,
1616                _ => Unknown,
1617            },
1618            other => other,
1619        }
1620    }
1621
1622    fn specialize_word_boundaries(
1623        &mut self,
1624        children: &mut [NodeId],
1625        tb: &mut TB<'s>,
1626    ) -> Result<()> {
1627        let wb = self.unicode_classes.wb;
1628        let non_wb = self.unicode_classes.non_wb;
1629        if wb == NodeId::MISSING {
1630            return Ok(());
1631        }
1632        let word = self.unicode_classes.word;
1633        let non_word = self.unicode_classes.non_word;
1634        if word == NodeId::MISSING {
1635            return Ok(());
1636        }
1637        // narrow check, could be done over the whole regex instead of one node
1638        // but i need to make sure the cost isnt too large
1639        let word_pref = tb.mk_concat(word, NodeId::TS);
1640        let non_word_pref = tb.mk_concat(non_word, NodeId::TS);
1641        let word_suf = tb.mk_concat(NodeId::TS, word);
1642        let non_word_suf = tb.mk_concat(NodeId::TS, non_word);
1643        let len = children.len();
1644        for k in 0..len {
1645            let l = if k == 0 {
1646                WordCharKind::Edge
1647            } else {
1648                Self::classify(tb, children[k - 1], word_suf, non_word_suf)
1649            };
1650            let r = if k + 1 >= len {
1651                WordCharKind::Edge
1652            } else {
1653                Self::classify(tb, children[k + 1], word_pref, non_word_pref)
1654            };
1655            children[k] = Self::rewrite_wb_in_node(tb, children[k], wb, non_wb, word, l, r)
1656                .ok_or_else(|| self.error(self.span(), ast::ErrorKind::UnsupportedResharpRegex))?;
1657        }
1658        Ok(())
1659    }
1660
1661    fn rewrite_wb_in_node(
1662        b: &mut TB<'s>,
1663        node: NodeId,
1664        wb: NodeId,
1665        non_wb: NodeId,
1666        word: NodeId,
1667        left: WordCharKind,
1668        right: WordCharKind,
1669    ) -> Option<NodeId> {
1670        let boundary_match = if node == wb {
1671            true
1672        } else if node == non_wb {
1673            false
1674        } else if b.get_kind(node) == Kind::Union {
1675            let l = Self::rewrite_wb_in_node(b, node.left(b), wb, non_wb, word, left, right)?;
1676            let r = Self::rewrite_wb_in_node(b, node.right(b), wb, non_wb, word, left, right)?;
1677            return Some(b.mk_union(l, r));
1678        } else {
1679            return Some(node);
1680        };
1681        use WordCharKind::*;
1682        let result = match (left, right) {
1683            (NonWord, Word) | (Word, NonWord) => {
1684                if boundary_match {
1685                    NodeId::EPS
1686                } else {
1687                    NodeId::BOT
1688                }
1689            }
1690            (Word, Word) | (NonWord, NonWord) => {
1691                if boundary_match {
1692                    NodeId::BOT
1693                } else {
1694                    NodeId::EPS
1695                }
1696            }
1697            (Word, _) => {
1698                if boundary_match {
1699                    b.mk_neg_lookahead(word, 0)
1700                } else {
1701                    let tail = b.mk_concat(word, NodeId::TS);
1702                    b.mk_lookahead(tail, NodeId::MISSING, 0)
1703                }
1704            }
1705            (NonWord, _) => {
1706                if boundary_match {
1707                    let tail = b.mk_concat(word, NodeId::TS);
1708                    b.mk_lookahead(tail, NodeId::MISSING, 0)
1709                } else {
1710                    b.mk_neg_lookahead(word, 0)
1711                }
1712            }
1713            (_, Word) => {
1714                if boundary_match {
1715                    b.mk_neg_lookbehind(word)
1716                } else {
1717                    b.mk_lookbehind(word, NodeId::MISSING)
1718                }
1719            }
1720            (_, NonWord) => {
1721                if boundary_match {
1722                    b.mk_lookbehind(word, NodeId::MISSING)
1723                } else {
1724                    b.mk_neg_lookbehind(word)
1725                }
1726            }
1727            _ => return None,
1728        };
1729        Some(result)
1730    }
1731
1732    fn classify(
1733        b: &mut TB<'s>,
1734        node: NodeId,
1735        word_dir: NodeId,
1736        non_word_dir: NodeId,
1737    ) -> WordCharKind {
1738        if b.contains_look(node) || b.contains_anchors(node) {
1739            return WordCharKind::Unknown;
1740        }
1741        if b.subsumes(word_dir, node) == Some(true) {
1742            WordCharKind::Word
1743        } else if b.subsumes(non_word_dir, node) == Some(true) {
1744            WordCharKind::NonWord
1745        } else {
1746            WordCharKind::Unknown
1747        }
1748    }
1749
1750    fn rewrite_word_boundary_in_concat(
1751        &mut self,
1752        asts: &[Ast],
1753        idx: usize,
1754        translator: &mut Option<Translator>,
1755        tb: &mut TB<'s>,
1756        negated: bool,
1757    ) -> Result<(NodeId, usize)> {
1758        use WordCharKind::*;
1759        if self.global_full_unicode {
1760            self.unicode_classes.ensure_word_full(tb);
1761        } else if self.global_unicode && !self.global_ascii_perl {
1762            self.unicode_classes.ensure_word(tb);
1763        } else {
1764            self.unicode_classes.ensure_word_ascii(tb);
1765        }
1766        let word_id = self.unicode_classes.word;
1767        let not_word_id = self.unicode_classes.non_word;
1768        let left = self.resolve_word_kind(asts, idx, -1, translator, tb, word_id, not_word_id)?;
1769        let right = self.resolve_word_kind(asts, idx, 1, translator, tb, word_id, not_word_id)?;
1770        let boundary_match = !negated;
1771        match (left, right) {
1772            (NonWord, Word) | (Word, NonWord) => Ok((
1773                if boundary_match {
1774                    NodeId::EPS
1775                } else {
1776                    NodeId::BOT
1777                },
1778                idx + 1,
1779            )),
1780            (Word, Word) | (NonWord, NonWord) => Ok((
1781                if boundary_match {
1782                    NodeId::BOT
1783                } else {
1784                    NodeId::EPS
1785                },
1786                idx + 1,
1787            )),
1788            (Word, _) => {
1789                if boundary_match {
1790                    Ok((tb.mk_neg_lookahead(word_id, 0), idx + 1))
1791                } else {
1792                    let tail = tb.mk_concat(word_id, NodeId::TS);
1793                    self.merge_boundary_with_following_lookaheads(asts, idx, tail, translator, tb)
1794                }
1795            }
1796            (NonWord, _) => {
1797                if boundary_match {
1798                    let tail = tb.mk_concat(word_id, NodeId::TS);
1799                    self.merge_boundary_with_following_lookaheads(asts, idx, tail, translator, tb)
1800                } else {
1801                    Ok((tb.mk_neg_lookahead(word_id, 0), idx + 1))
1802                }
1803            }
1804            (_, Word) => {
1805                if boundary_match {
1806                    Ok((tb.mk_neg_lookbehind(word_id), idx + 1))
1807                } else {
1808                    Ok((tb.mk_lookbehind(word_id, NodeId::MISSING), idx + 1))
1809                }
1810            }
1811            (_, NonWord) => {
1812                if boundary_match {
1813                    Ok((tb.mk_lookbehind(word_id, NodeId::MISSING), idx + 1))
1814                } else {
1815                    Ok((tb.mk_neg_lookbehind(word_id), idx + 1))
1816                }
1817            }
1818            //   \b = (?<=\w)(?!\w) | (?<!\w)(?=\w)
1819            //   \B = (?<=\w)(?=\w)  | (?<!\w)(?!\w)
1820            //   TODO: expensive and the cost could be reduced further
1821            _ => {
1822                self.unicode_classes.ensure_wb(tb);
1823                let node = if boundary_match {
1824                    self.unicode_classes.wb
1825                } else {
1826                    self.unicode_classes.non_wb
1827                };
1828                Ok((node, idx + 1))
1829            }
1830        }
1831    }
1832
1833    fn merge_boundary_with_following_lookaheads(
1834        &mut self,
1835        asts: &[Ast],
1836        wb_idx: usize,
1837        boundary_tail: NodeId,
1838        translator: &mut Option<Translator>,
1839        tb: &mut TB<'s>,
1840    ) -> Result<(NodeId, usize)> {
1841        let mut next = wb_idx + 1;
1842        let mut la_bodies = vec![boundary_tail];
1843        while next < asts.len() {
1844            match &asts[next] {
1845                Ast::Lookaround(la) if la.kind == ast::LookaroundKind::PositiveLookahead => {
1846                    let body = self.ast_to_node_id(&la.ast, translator, tb)?;
1847                    la_bodies.push(tb.mk_concat(body, NodeId::TS));
1848                    next += 1;
1849                }
1850                _ => break,
1851            }
1852        }
1853        let merged = tb.mk_inters(la_bodies.into_iter());
1854        Ok((tb.mk_lookahead(merged, NodeId::MISSING, 0), next))
1855    }
1856
1857    fn ast_to_node_id(
1858        &mut self,
1859        ast: &Ast,
1860        translator: &mut Option<Translator>,
1861        tb: &mut TB<'s>,
1862    ) -> Result<NodeId> {
1863        match ast {
1864            Ast::Empty(_) => Ok(NodeId::EPS),
1865            Ast::Flags(f) => {
1866                if f.flags.flag_state(ast::Flag::SwapGreed).is_some() {
1867                    return Err(self.error(f.span, ast::ErrorKind::UnsupportedResharpRegex));
1868                }
1869                let mut translator_builder = self.default_translator_builder();
1870                if let Some(state) = f.flags.flag_state(ast::Flag::CaseInsensitive) {
1871                    translator_builder.case_insensitive(state);
1872                }
1873                if let Some(state) = f.flags.flag_state(ast::Flag::Unicode) {
1874                    translator_builder.unicode(state);
1875                }
1876                if let Some(state) = f.flags.flag_state(ast::Flag::DotMatchesNewLine) {
1877                    self.dot_all.set(state);
1878                }
1879                if let Some(state) = f.flags.flag_state(ast::Flag::MultiLine) {
1880                    self.multiline.set(state);
1881                }
1882                let concat_translator = Some(translator_builder.build());
1883                *translator = concat_translator;
1884                Ok(NodeId::EPS)
1885            }
1886            Ast::Literal(l) => {
1887                let ast_lit = regex_syntax::ast::Ast::literal(*l.to_owned());
1888                self.translator_to_node_id(&ast_lit, translator, tb)
1889            }
1890            Ast::Top(_) => Ok(NodeId::TOP),
1891            Ast::Dot(_) => {
1892                let codepoint_dot = self.global_ascii_perl || self.global_full_unicode;
1893                let hirv = match (codepoint_dot, self.dot_all.get()) {
1894                    (true, true) => hir::Hir::dot(hir::Dot::AnyChar),
1895                    (true, false) => hir::Hir::dot(hir::Dot::AnyCharExceptLF),
1896                    (false, true) => return Ok(NodeId::TOP),
1897                    (false, false) => hir::Hir::dot(hir::Dot::AnyByteExceptLF),
1898                };
1899                self.hir_to_node_id(&hirv, tb)
1900            }
1901            Ast::Assertion(a) => match &a.kind {
1902                ast::AssertionKind::StartText => Ok(NodeId::BEGIN),
1903                ast::AssertionKind::EndText => Ok(NodeId::END),
1904                ast::AssertionKind::WordBoundary => {
1905                    let only = Ast::Assertion(a.clone());
1906                    let asts = std::slice::from_ref(&only);
1907                    let (node, _) =
1908                        self.rewrite_word_boundary_in_concat(asts, 0, translator, tb, false)?;
1909                    Ok(node)
1910                }
1911                ast::AssertionKind::NotWordBoundary => {
1912                    // bare \B with no surrounding concat: treat as singleton.
1913                    let only = Ast::Assertion(a.clone());
1914                    let asts = std::slice::from_ref(&only);
1915                    let (node, _) =
1916                        self.rewrite_word_boundary_in_concat(asts, 0, translator, tb, true)?;
1917                    Ok(node)
1918                }
1919                ast::AssertionKind::StartLine => {
1920                    if !self.multiline.get() {
1921                        return Ok(NodeId::BEGIN);
1922                    }
1923                    let left = NodeId::BEGIN;
1924                    let right = tb.mk_u8(b'\n');
1925                    let union = tb.mk_union(left, right);
1926                    Ok(tb.mk_lookbehind(union, NodeId::MISSING))
1927                }
1928                ast::AssertionKind::EndLine => {
1929                    if !self.multiline.get() {
1930                        return Ok(NodeId::END);
1931                    }
1932                    let left = NodeId::END;
1933                    let right = tb.mk_u8(b'\n');
1934                    let union = tb.mk_union(left, right);
1935                    Ok(tb.mk_lookahead(union, NodeId::MISSING, 0))
1936                }
1937                ast::AssertionKind::WordBoundaryStart => {
1938                    Err(self.error(a.span, ast::ErrorKind::UnsupportedResharpRegex))
1939                }
1940                ast::AssertionKind::WordBoundaryEnd => {
1941                    Err(self.error(a.span, ast::ErrorKind::UnsupportedResharpRegex))
1942                }
1943                ast::AssertionKind::WordBoundaryStartAngle => {
1944                    Err(self.error(a.span, ast::ErrorKind::UnsupportedResharpRegex))
1945                }
1946                ast::AssertionKind::WordBoundaryEndAngle => {
1947                    Err(self.error(a.span, ast::ErrorKind::UnsupportedResharpRegex))
1948                }
1949                ast::AssertionKind::WordBoundaryStartHalf => {
1950                    Err(self.error(a.span, ast::ErrorKind::UnsupportedResharpRegex))
1951                }
1952                ast::AssertionKind::WordBoundaryEndHalf => {
1953                    Err(self.error(a.span, ast::ErrorKind::UnsupportedResharpRegex))
1954                }
1955            },
1956            Ast::ClassUnicode(c) => {
1957                let tmp = regex_syntax::ast::ClassUnicode {
1958                    span: c.span,
1959                    negated: c.negated,
1960                    kind: c.kind.clone(),
1961                };
1962                if !c.negated {
1963                    if let regex_syntax::ast::ClassUnicodeKind::Named(s) = &c.kind {
1964                        match s.as_str() {
1965                            // \p{ascii} for ascii, \p{ascii}&\p{Letter} => [A-Za-z]
1966                            "ascii" => return Ok(tb.mk_range_u8(0, 127)),
1967                            // a single valid utf8 codepoint; \p{utf8}*&~(a) => non a, but valid utf8
1968                            "utf8" => {
1969                                let ascii = tb.mk_range_u8(0, 127);
1970                                let beta = tb.mk_range_u8(128, 0xBF);
1971                                let c0 = tb.mk_range_u8(0xC0, 0xDF);
1972                                let c0s = tb.mk_concats([c0, beta].into_iter());
1973                                let e0 = tb.mk_range_u8(0xE0, 0xEF);
1974                                let e0s = tb.mk_concats([e0, beta, beta].into_iter());
1975                                let f0 = tb.mk_range_u8(0xF0, 0xF7);
1976                                let f0s = tb.mk_concats([f0, beta, beta, beta].into_iter());
1977                                return Ok(tb.mk_unions([ascii, c0s, e0s, f0s].into_iter()));
1978                            }
1979                            "hex" => {
1980                                let nums = tb.mk_range_u8(b'0', b'9');
1981                                let lets = tb.mk_range_u8(b'a', b'f');
1982                                let lets2 = tb.mk_range_u8(b'A', b'F');
1983                                let merged = tb.mk_unions([nums, lets, lets2].into_iter());
1984                                return Ok(merged);
1985                            }
1986                            _ => {}
1987                        }
1988                    };
1989                }
1990
1991                let orig_ast = regex_syntax::ast::Ast::class_unicode(tmp);
1992                self.translator_to_node_id(&orig_ast, translator, tb)
1993            }
1994            Ast::ClassPerl(c) => self.get_class(c.negated, c.kind.clone(), tb),
1995            Ast::ClassBracketed(c) => match &c.kind {
1996                regex_syntax::ast::ClassSet::Item(item) => {
1997                    if !c.negated && is_universal_perl_pair(item) {
1998                        return Ok(NodeId::TOP);
1999                    }
2000                    let kind = if self.global_ascii_perl {
2001                        rewrite_ascii_perl_set(&c.kind)
2002                    } else {
2003                        c.kind.clone()
2004                    };
2005                    let tmp = regex_syntax::ast::ClassBracketed {
2006                        span: c.span,
2007                        negated: c.negated,
2008                        kind,
2009                    };
2010                    let orig_ast = regex_syntax::ast::Ast::class_bracketed(tmp);
2011                    self.translator_to_node_id(&orig_ast, translator, tb)
2012                }
2013                regex_syntax::ast::ClassSet::BinaryOp(_) => {
2014                    Err(self.error(c.span, ast::ErrorKind::UnsupportedResharpRegex))
2015                }
2016            },
2017            Ast::Repetition(r) => {
2018                let body = self.ast_to_node_id(&r.ast, translator, tb);
2019                match body {
2020                    Ok(body) => match &r.op.kind {
2021                        ast::RepetitionKind::ZeroOrOne => Ok(tb.mk_opt(body)),
2022                        ast::RepetitionKind::ZeroOrMore => Ok(tb.mk_star(body)),
2023                        ast::RepetitionKind::OneOrMore => Ok(tb.mk_plus(body)),
2024                        ast::RepetitionKind::Range(r) => match r {
2025                            ast::RepetitionRange::Exactly(n) => Ok(tb.mk_repeat(body, *n, *n)),
2026                            ast::RepetitionRange::AtLeast(n) => {
2027                                let rep = tb.mk_repeat(body, *n, *n);
2028                                let st = tb.mk_star(body);
2029                                Ok(tb.mk_concat(rep, st))
2030                            }
2031
2032                            ast::RepetitionRange::Bounded(n, m) => Ok(tb.mk_repeat(body, *n, *m)),
2033                        },
2034                    },
2035                    Err(_) => body,
2036                }
2037            }
2038            Ast::Lookaround(g) => {
2039                let body = self.ast_to_node_id(&g.ast, translator, tb)?;
2040                match g.kind {
2041                    ast::LookaroundKind::PositiveLookahead if body.contains_lookbehind(tb) => {
2042                        let mut prefix = NodeId::EPS;
2043                        let mut rest = body;
2044                        while tb.get_kind(rest) == Kind::Concat
2045                            && tb.get_kind(rest.left(tb)) == Kind::Lookbehind
2046                        {
2047                            prefix = tb.mk_concat(prefix, rest.left(tb));
2048                            rest = rest.right(tb);
2049                        }
2050                        if prefix == NodeId::EPS || rest.contains_lookbehind(tb) {
2051                            return Err(self.error(g.span, ast::ErrorKind::UnsupportedResharpRegex));
2052                        }
2053                        let la = tb.mk_lookahead(rest, NodeId::MISSING, 0);
2054                        Ok(tb.mk_concat(prefix, la))
2055                    }
2056                    ast::LookaroundKind::NegativeLookahead if body.contains_lookbehind(tb) => {
2057                        Err(self.error(g.span, ast::ErrorKind::UnsupportedResharpRegex))
2058                    }
2059                    ast::LookaroundKind::PositiveLookahead => {
2060                        Ok(tb.mk_lookahead(body, NodeId::MISSING, 0))
2061                    }
2062                    ast::LookaroundKind::PositiveLookbehind => {
2063                        Ok(tb.mk_lookbehind(body, NodeId::MISSING))
2064                    }
2065                    ast::LookaroundKind::NegativeLookahead => Ok(tb.mk_neg_lookahead(body, 0)),
2066                    ast::LookaroundKind::NegativeLookbehind => Ok(tb.mk_neg_lookbehind(body)),
2067                }
2068            }
2069            Ast::Group(g) => {
2070                if let ast::GroupKind::NonCapturing(ref flags) = g.kind {
2071                    if !flags.items.is_empty() {
2072                        let mut translator_builder = self.default_translator_builder();
2073                        if let Some(state) = flags.flag_state(ast::Flag::CaseInsensitive) {
2074                            translator_builder.case_insensitive(state);
2075                        }
2076                        if let Some(state) = flags.flag_state(ast::Flag::Unicode) {
2077                            translator_builder.unicode(state);
2078                        }
2079                        let saved_dot_all = self.dot_all.get();
2080                        if let Some(state) = flags.flag_state(ast::Flag::DotMatchesNewLine) {
2081                            self.dot_all.set(state);
2082                        }
2083                        let saved_multiline = self.multiline.get();
2084                        if let Some(state) = flags.flag_state(ast::Flag::MultiLine) {
2085                            self.multiline.set(state);
2086                        }
2087                        let mut scoped = Some(translator_builder.build());
2088                        let result = self.ast_to_node_id(&g.ast, &mut scoped, tb);
2089                        self.dot_all.set(saved_dot_all);
2090                        self.multiline.set(saved_multiline);
2091                        return result;
2092                    }
2093                }
2094                self.ast_to_node_id(&g.ast, translator, tb)
2095            }
2096            Ast::Alternation(a) => {
2097                let mut children = vec![];
2098                for ast in &a.asts {
2099                    match self.ast_to_node_id(ast, translator, tb) {
2100                        Ok(node_id) => children.push(node_id),
2101                        Err(err) => return Err(err),
2102                    }
2103                }
2104                Ok(tb.mk_unions(children.iter().copied()))
2105            }
2106            Ast::Concat(c) => {
2107                let mut concat_translator: Option<Translator> = None;
2108                let mut children = vec![];
2109                let mut prev_boundary_child: Option<usize> = None;
2110                let mut i = 0;
2111                while i < c.asts.len() {
2112                    let ast = &c.asts[i];
2113                    match ast {
2114                        Ast::Flags(f) => {
2115                            if f.flags.flag_state(ast::Flag::SwapGreed).is_some() {
2116                                return Err(
2117                                    self.error(f.span, ast::ErrorKind::UnsupportedResharpRegex)
2118                                );
2119                            }
2120                            let mut translator_builder = self.default_translator_builder();
2121                            if let Some(state) = f.flags.flag_state(ast::Flag::CaseInsensitive) {
2122                                translator_builder.case_insensitive(state);
2123                            }
2124                            if let Some(state) = f.flags.flag_state(ast::Flag::Unicode) {
2125                                translator_builder.unicode(state);
2126                            }
2127                            if let Some(state) = f.flags.flag_state(ast::Flag::DotMatchesNewLine) {
2128                                self.dot_all.set(state);
2129                            }
2130                            if let Some(state) = f.flags.flag_state(ast::Flag::MultiLine) {
2131                                self.multiline.set(state);
2132                            }
2133                            concat_translator = Some(translator_builder.build());
2134                            *translator = concat_translator.clone();
2135                            i += 1;
2136                            continue;
2137                        }
2138                        Ast::Assertion(a)
2139                            if a.kind == ast::AssertionKind::WordBoundary
2140                                || a.kind == ast::AssertionKind::NotWordBoundary =>
2141                        {
2142                            let negated = a.kind == ast::AssertionKind::NotWordBoundary;
2143                            let node = self.rewrite_word_boundary_in_concat(
2144                                &c.asts, i, translator, tb, negated,
2145                            )?;
2146                            match prev_boundary_child {
2147                                Some(idx) => children[idx] = tb.mk_inter(children[idx], node.0),
2148                                None => {
2149                                    children.push(node.0);
2150                                    prev_boundary_child = Some(children.len() - 1);
2151                                }
2152                            }
2153                            i = node.1; // skip consumed lookaheads
2154                            continue;
2155                        }
2156                        _ => {}
2157                    }
2158                    prev_boundary_child = None;
2159                    match concat_translator {
2160                        Some(_) => match self.ast_to_node_id(ast, &mut concat_translator, tb) {
2161                            Ok(node_id) => children.push(node_id),
2162                            Err(err) => return Err(err),
2163                        },
2164                        None => match self.ast_to_node_id(ast, translator, tb) {
2165                            Ok(node_id) => children.push(node_id),
2166                            Err(err) => return Err(err),
2167                        },
2168                    }
2169                    i += 1;
2170                }
2171                self.specialize_word_boundaries(&mut children, tb)?;
2172                Ok(tb.mk_concats(children.iter().cloned()))
2173            }
2174            Ast::Intersection(intersection) => {
2175                let mut children = vec![];
2176                for ast in &intersection.asts {
2177                    match self.ast_to_node_id(ast, translator, tb) {
2178                        Ok(node_id) => children.push(node_id),
2179                        Err(err) => return Err(err),
2180                    }
2181                }
2182                Ok(tb.mk_inters(children.into_iter()))
2183            }
2184            Ast::Complement(complement) => {
2185                let body = self.ast_to_node_id(&complement.ast, translator, tb);
2186                body.map(|x| tb.mk_compl(x))
2187            }
2188        }
2189    }
2190
2191    fn parse_inner(&mut self) -> Result<Ast> {
2192        let mut concat = Concat {
2193            span: self.span(),
2194            asts: vec![],
2195        };
2196        loop {
2197            self.bump_space();
2198            if self.is_eof() {
2199                break;
2200            }
2201            match self.char() {
2202                '(' => concat = self.push_group(concat)?,
2203                ')' => concat = self.pop_group(concat)?,
2204                '|' => concat = self.push_alternate(concat)?,
2205                '&' => concat = self.push_intersect(concat)?,
2206                '~' => concat = self.push_compl_group(concat)?,
2207                '[' => {
2208                    let class = self.parse_set_class()?;
2209                    concat.asts.push(Ast::class_bracketed(class));
2210                }
2211                '?' => {
2212                    concat =
2213                        self.parse_uncounted_repetition(concat, ast::RepetitionKind::ZeroOrOne)?;
2214                }
2215                '*' => {
2216                    concat =
2217                        self.parse_uncounted_repetition(concat, ast::RepetitionKind::ZeroOrMore)?;
2218                }
2219                '+' => {
2220                    concat =
2221                        self.parse_uncounted_repetition(concat, ast::RepetitionKind::OneOrMore)?;
2222                }
2223                '{' => {
2224                    concat = self.parse_counted_repetition(concat)?;
2225                }
2226                _ => concat.asts.push(self.parse_primitive()?.into_ast()),
2227            }
2228        }
2229        let ast = self.pop_group_end(concat)?;
2230        if expanded_ast_size(&ast, self.expanded_ast_limit) >= self.expanded_ast_limit
2231            || max_concat_length(&ast) >= self.max_list_len
2232        {
2233            return Err(self.error(*ast.span(), ast::ErrorKind::UnsupportedResharpRegex));
2234        }
2235        Ok(ast)
2236    }
2237
2238    fn parse(&mut self, tb: &mut TB<'s>) -> Result<NodeId> {
2239        let ast = self.parse_inner()?;
2240        if let Err(span) = ensure_lookbehind_at_start(&ast, true) {
2241            return Err(self.error(span, ast::ErrorKind::UnsupportedResharpRegex));
2242        }
2243        self.ast_to_node_id(&ast, &mut None, tb)
2244    }
2245
2246    #[inline(never)]
2247    fn parse_uncounted_repetition(
2248        &self,
2249        mut concat: ast::Concat,
2250        kind: ast::RepetitionKind,
2251    ) -> Result<ast::Concat> {
2252        // assert!(self.char() == '?' || self.char() == '*' || self.char() == '+');
2253        let op_start = self.pos();
2254        let ast = match concat.asts.pop() {
2255            Some(ast) => ast,
2256            None => return Err(self.error(self.span(), ast::ErrorKind::RepetitionMissing)),
2257        };
2258        match ast {
2259            Ast::Empty(_) | Ast::Flags(_) => {
2260                return Err(self.error(self.span(), ast::ErrorKind::RepetitionMissing))
2261            }
2262            _ => {}
2263        }
2264        if self.bump() && self.char() == '?' {
2265            return Err(self.error(
2266                Span::new(op_start, self.pos()),
2267                ast::ErrorKind::UnsupportedLazyQuantifier,
2268            ));
2269        }
2270        concat.asts.push(Ast::repetition(ast::Repetition {
2271            span: ast.span().with_end(self.pos()),
2272            op: ast::RepetitionOp {
2273                span: Span::new(op_start, self.pos()),
2274                kind,
2275            },
2276            greedy: true,
2277            ast: Box::new(ast),
2278        }));
2279        Ok(concat)
2280    }
2281
2282    #[inline(never)]
2283    fn parse_counted_repetition(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
2284        assert!(self.char() == '{');
2285        let start = self.pos();
2286        let ast = match concat.asts.pop() {
2287            Some(ast) => ast,
2288            None => return Err(self.error(self.span(), ast::ErrorKind::RepetitionMissing)),
2289        };
2290        match ast {
2291            Ast::Empty(_) | Ast::Flags(_) => {
2292                return Err(self.error(self.span(), ast::ErrorKind::RepetitionMissing))
2293            }
2294            _ => {}
2295        }
2296        if !self.bump_and_bump_space() {
2297            return Err(self.error(
2298                Span::new(start, self.pos()),
2299                ast::ErrorKind::RepetitionCountUnclosed,
2300            ));
2301        }
2302        let count_start = specialize_err(
2303            self.parse_decimal(),
2304            ast::ErrorKind::DecimalEmpty,
2305            ast::ErrorKind::RepetitionCountDecimalEmpty,
2306        );
2307        if self.is_eof() {
2308            return Err(self.error(
2309                Span::new(start, self.pos()),
2310                ast::ErrorKind::RepetitionCountUnclosed,
2311            ));
2312        }
2313        let range = if self.char() == ',' {
2314            if !self.bump_and_bump_space() {
2315                return Err(self.error(
2316                    Span::new(start, self.pos()),
2317                    ast::ErrorKind::RepetitionCountUnclosed,
2318                ));
2319            }
2320            if self.char() != '}' {
2321                let count_start = match count_start {
2322                    Ok(c) => c,
2323                    Err(err) if err.kind == ast::ErrorKind::RepetitionCountDecimalEmpty => {
2324                        if self.parser().empty_min_range {
2325                            0
2326                        } else {
2327                            return Err(err);
2328                        }
2329                    }
2330                    err => err?,
2331                };
2332                let count_end = specialize_err(
2333                    self.parse_decimal(),
2334                    ast::ErrorKind::DecimalEmpty,
2335                    ast::ErrorKind::RepetitionCountDecimalEmpty,
2336                )?;
2337                ast::RepetitionRange::Bounded(count_start, count_end)
2338            } else {
2339                ast::RepetitionRange::AtLeast(count_start?)
2340            }
2341        } else {
2342            ast::RepetitionRange::Exactly(count_start?)
2343        };
2344
2345        if self.is_eof() || self.char() != '}' {
2346            return Err(self.error(
2347                Span::new(start, self.pos()),
2348                ast::ErrorKind::RepetitionCountUnclosed,
2349            ));
2350        }
2351
2352        if self.bump_and_bump_space() && self.char() == '?' {
2353            return Err(self.error(
2354                Span::new(start, self.pos()),
2355                ast::ErrorKind::UnsupportedLazyQuantifier,
2356            ));
2357        }
2358
2359        let op_span = Span::new(start, self.pos());
2360        if !range.is_valid() {
2361            return Err(self.error(op_span, ast::ErrorKind::RepetitionCountInvalid));
2362        }
2363
2364        let over_limit = match &range {
2365            ast::RepetitionRange::Exactly(n) => *n > self.max_repeat,
2366            ast::RepetitionRange::AtLeast(n) => *n > self.max_repeat,
2367            ast::RepetitionRange::Bounded(n, m) => *n > self.max_repeat || *m > self.max_repeat,
2368        };
2369        if over_limit {
2370            return Err(self.error(op_span, ast::ErrorKind::UnsupportedResharpRegex));
2371        }
2372        concat.asts.push(Ast::repetition(ast::Repetition {
2373            span: ast.span().with_end(self.pos()),
2374            op: ast::RepetitionOp {
2375                span: op_span,
2376                kind: ast::RepetitionKind::Range(range),
2377            },
2378            greedy: true,
2379            ast: Box::new(ast),
2380        }));
2381        Ok(concat)
2382    }
2383
2384    #[inline(never)]
2385    fn parse_group(&self) -> Result<Either<ast::SetFlags, ast::Group>> {
2386        assert_eq!(self.char(), '(');
2387        let open_span = self.span_char();
2388        self.bump();
2389        self.bump_space();
2390        if let Some((ahead, pos)) = self.is_lookaround_prefix() {
2391            let kind = match (pos, ahead) {
2392                (true, true) => LookaroundKind::PositiveLookahead,
2393                (true, false) => LookaroundKind::PositiveLookbehind,
2394                (false, true) => LookaroundKind::NegativeLookahead,
2395                (false, false) => LookaroundKind::NegativeLookbehind,
2396            };
2397            return Ok(Either::Right(ast::Group {
2398                span: open_span,
2399                kind: ast::GroupKind::Lookaround(kind),
2400                ast: Box::new(Ast::empty(self.span())),
2401            }));
2402        }
2403        let inner_span = self.span();
2404        let mut starts_with_p = true;
2405        if self.bump_if("?P<") || {
2406            starts_with_p = false;
2407            self.bump_if("?<")
2408        } {
2409            let capture_index = self.next_capture_index(open_span)?;
2410            let name = self.parse_capture_name(capture_index)?;
2411            Ok(Either::Right(ast::Group {
2412                span: open_span,
2413                kind: ast::GroupKind::CaptureName {
2414                    starts_with_p,
2415                    name,
2416                },
2417                ast: Box::new(Ast::empty(self.span())),
2418            }))
2419        } else if self.bump_if("?") {
2420            if self.is_eof() {
2421                return Err(self.error(open_span, ast::ErrorKind::GroupUnclosed));
2422            }
2423            let flags = self.parse_flags()?;
2424            let char_end = self.char();
2425            self.bump();
2426            if char_end == ')' {
2427                // We don't allow empty flags, e.g., `(?)`. We instead
2428                // interpret it as a repetition operator missing its argument.
2429                if flags.items.is_empty() {
2430                    return Err(self.error(inner_span, ast::ErrorKind::RepetitionMissing));
2431                }
2432                Ok(Either::Left(ast::SetFlags {
2433                    span: Span {
2434                        end: self.pos(),
2435                        ..open_span
2436                    },
2437                    flags,
2438                }))
2439            } else {
2440                assert_eq!(char_end, ':');
2441                Ok(Either::Right(ast::Group {
2442                    span: open_span,
2443                    kind: ast::GroupKind::NonCapturing(flags),
2444                    ast: Box::new(Ast::empty(self.span())),
2445                }))
2446            }
2447        } else {
2448            let capture_index = self.next_capture_index(open_span)?;
2449            Ok(Either::Right(ast::Group {
2450                span: open_span,
2451                kind: ast::GroupKind::CaptureIndex(capture_index),
2452                ast: Box::new(Ast::empty(self.span())),
2453            }))
2454        }
2455    }
2456
2457    #[inline(never)]
2458    fn parse_capture_name(&self, capture_index: u32) -> Result<ast::CaptureName> {
2459        if self.is_eof() {
2460            return Err(self.error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
2461        }
2462        let start = self.pos();
2463        loop {
2464            if self.char() == '>' {
2465                break;
2466            }
2467            if !is_capture_char(self.char(), self.pos() == start) {
2468                return Err(self.error(self.span_char(), ast::ErrorKind::GroupNameInvalid));
2469            }
2470            if !self.bump() {
2471                break;
2472            }
2473        }
2474        let end = self.pos();
2475        if self.is_eof() {
2476            return Err(self.error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
2477        }
2478        assert_eq!(self.char(), '>');
2479        self.bump();
2480        let name = &self.pattern()[start.offset..end.offset];
2481        if name.is_empty() {
2482            return Err(self.error(Span::new(start, start), ast::ErrorKind::GroupNameEmpty));
2483        }
2484        let capname = ast::CaptureName {
2485            span: Span::new(start, end),
2486            name: name.to_string(),
2487            index: capture_index,
2488        };
2489        self.add_capture_name(&capname)?;
2490        Ok(capname)
2491    }
2492
2493    #[inline(never)]
2494    fn parse_flags(&self) -> Result<ast::Flags> {
2495        let mut flags = ast::Flags {
2496            span: self.span(),
2497            items: vec![],
2498        };
2499        let mut last_was_negation = None;
2500        while self.char() != ':' && self.char() != ')' {
2501            if self.char() == '-' {
2502                last_was_negation = Some(self.span_char());
2503                let item = ast::FlagsItem {
2504                    span: self.span_char(),
2505                    kind: ast::FlagsItemKind::Negation,
2506                };
2507                if let Some(i) = flags.add_item(item) {
2508                    return Err(self.error(
2509                        self.span_char(),
2510                        ast::ErrorKind::FlagRepeatedNegation {
2511                            original: flags.items[i].span,
2512                        },
2513                    ));
2514                }
2515            } else {
2516                last_was_negation = None;
2517                let item = ast::FlagsItem {
2518                    span: self.span_char(),
2519                    kind: ast::FlagsItemKind::Flag(self.parse_flag()?),
2520                };
2521                if let Some(i) = flags.add_item(item) {
2522                    return Err(self.error(
2523                        self.span_char(),
2524                        ast::ErrorKind::FlagDuplicate {
2525                            original: flags.items[i].span,
2526                        },
2527                    ));
2528                }
2529            }
2530            if !self.bump() {
2531                return Err(self.error(self.span(), ast::ErrorKind::FlagUnexpectedEof));
2532            }
2533        }
2534        if let Some(span) = last_was_negation {
2535            return Err(self.error(span, ast::ErrorKind::FlagDanglingNegation));
2536        }
2537        flags.span.end = self.pos();
2538        Ok(flags)
2539    }
2540
2541    #[inline(never)]
2542    fn parse_flag(&self) -> Result<ast::Flag> {
2543        match self.char() {
2544            'i' => Ok(ast::Flag::CaseInsensitive),
2545            'm' => Ok(ast::Flag::MultiLine),
2546            's' => Ok(ast::Flag::DotMatchesNewLine),
2547            'U' => Ok(ast::Flag::SwapGreed),
2548            'u' => Ok(ast::Flag::Unicode),
2549            'R' => Ok(ast::Flag::CRLF),
2550            'x' => Ok(ast::Flag::IgnoreWhitespace),
2551            _ => Err(self.error(self.span_char(), ast::ErrorKind::FlagUnrecognized)),
2552        }
2553    }
2554
2555    fn parse_primitive(&self) -> Result<Primitive> {
2556        match self.char() {
2557            '\\' => self.parse_escape(),
2558            '_' => {
2559                let ast = Primitive::Top(self.span_char());
2560                self.bump();
2561                Ok(ast)
2562            }
2563            '.' => {
2564                let ast = Primitive::Dot(self.span_char());
2565                self.bump();
2566                Ok(ast)
2567            }
2568            '^' => {
2569                let ast = Primitive::Assertion(ast::Assertion {
2570                    span: self.span_char(),
2571                    kind: ast::AssertionKind::StartLine,
2572                });
2573                self.bump();
2574                Ok(ast)
2575            }
2576            '$' => {
2577                let ast = Primitive::Assertion(ast::Assertion {
2578                    span: self.span_char(),
2579                    kind: ast::AssertionKind::EndLine,
2580                });
2581                self.bump();
2582                Ok(ast)
2583            }
2584            c => {
2585                let ast = Primitive::Literal(Literal {
2586                    span: self.span_char(),
2587                    kind: LiteralKind::Verbatim,
2588                    c,
2589                });
2590                self.bump();
2591                Ok(ast)
2592            }
2593        }
2594    }
2595
2596    #[inline(never)]
2597    fn parse_escape(&self) -> Result<Primitive> {
2598        assert_eq!(self.char(), '\\');
2599        let start = self.pos();
2600        if !self.bump() {
2601            return Err(self.error(
2602                Span::new(start, self.pos()),
2603                ast::ErrorKind::EscapeUnexpectedEof,
2604            ));
2605        }
2606        let c = self.char();
2607        // Put some of the more complicated routines into helpers.
2608        match c {
2609            '0'..='9' => {
2610                if !self.parser().octal {
2611                    return Err(self.error(
2612                        Span::new(start, self.span_char().end),
2613                        ast::ErrorKind::UnsupportedBackreference,
2614                    ));
2615                }
2616                let mut lit = self.parse_octal();
2617                lit.span.start = start;
2618                return Ok(Primitive::Literal(lit));
2619            }
2620            'x' | 'u' | 'U' => {
2621                let mut lit = self.parse_hex()?;
2622                lit.span.start = start;
2623                return Ok(Primitive::Literal(lit));
2624            }
2625            'p' | 'P' => {
2626                let mut cls = self.parse_unicode_class()?;
2627                cls.span.start = start;
2628                return Ok(Primitive::Unicode(cls));
2629            }
2630            'd' | 's' | 'w' | 'D' | 'S' | 'W' => {
2631                let mut cls = self.parse_perl_class();
2632                cls.span.start = start;
2633                return Ok(Primitive::Perl(cls));
2634            }
2635            _ => {}
2636        }
2637
2638        // Handle all of the one letter sequences inline.
2639        self.bump();
2640        let span = Span::new(start, self.pos());
2641        if is_meta_character(c) {
2642            return Ok(Primitive::Literal(Literal {
2643                span,
2644                kind: LiteralKind::Meta,
2645                c,
2646            }));
2647        }
2648        if is_escapeable_character(c) {
2649            return Ok(Primitive::Literal(Literal {
2650                span,
2651                kind: LiteralKind::Superfluous,
2652                c,
2653            }));
2654        }
2655        let special = |kind, c| {
2656            Ok(Primitive::Literal(Literal {
2657                span,
2658                kind: LiteralKind::Special(kind),
2659                c,
2660            }))
2661        };
2662        match c {
2663            'a' => special(SpecialLiteralKind::Bell, '\x07'),
2664            'f' => special(SpecialLiteralKind::FormFeed, '\x0C'),
2665            't' => special(SpecialLiteralKind::Tab, '\t'),
2666            'n' => special(SpecialLiteralKind::LineFeed, '\n'),
2667            'r' => special(SpecialLiteralKind::CarriageReturn, '\r'),
2668            'v' => special(SpecialLiteralKind::VerticalTab, '\x0B'),
2669            'A' => Ok(Primitive::Assertion(ast::Assertion {
2670                span,
2671                kind: ast::AssertionKind::StartText,
2672            })),
2673            'z' => Ok(Primitive::Assertion(ast::Assertion {
2674                span,
2675                kind: ast::AssertionKind::EndText,
2676            })),
2677            'b' => {
2678                let mut wb = ast::Assertion {
2679                    span,
2680                    kind: ast::AssertionKind::WordBoundary,
2681                };
2682                // After a \b, we "try" to parse things like \b{start} for
2683                // special word boundary assertions.
2684                if !self.is_eof() && self.char() == '{' {
2685                    if let Some(kind) = self.maybe_parse_special_word_boundary(start)? {
2686                        wb.kind = kind;
2687                        wb.span.end = self.pos();
2688                    }
2689                }
2690                Ok(Primitive::Assertion(wb))
2691            }
2692            'B' => Ok(Primitive::Assertion(ast::Assertion {
2693                span,
2694                kind: ast::AssertionKind::NotWordBoundary,
2695            })),
2696            '<' => Ok(Primitive::Assertion(ast::Assertion {
2697                span,
2698                kind: ast::AssertionKind::WordBoundaryStartAngle,
2699            })),
2700            '>' => Ok(Primitive::Assertion(ast::Assertion {
2701                span,
2702                kind: ast::AssertionKind::WordBoundaryEndAngle,
2703            })),
2704            _ => Err(self.error(span, ast::ErrorKind::EscapeUnrecognized)),
2705        }
2706    }
2707
2708    fn maybe_parse_special_word_boundary(
2709        &self,
2710        wb_start: Position,
2711    ) -> Result<Option<ast::AssertionKind>> {
2712        assert_eq!(self.char(), '{');
2713
2714        let is_valid_char = |c| matches!(c, 'A'..='Z' | 'a'..='z' | '-');
2715        let start = self.pos();
2716        if !self.bump_and_bump_space() {
2717            return Err(self.error(
2718                Span::new(wb_start, self.pos()),
2719                ast::ErrorKind::SpecialWordOrRepetitionUnexpectedEof,
2720            ));
2721        }
2722        let start_contents = self.pos();
2723        if !is_valid_char(self.char()) {
2724            self.parser().pos.set(start);
2725            return Ok(None);
2726        }
2727
2728        // Now collect up our chars until we see a '}'.
2729        let mut scratch = self.parser().scratch.borrow_mut();
2730        scratch.clear();
2731        while !self.is_eof() && is_valid_char(self.char()) {
2732            scratch.push(self.char());
2733            self.bump_and_bump_space();
2734        }
2735        if self.is_eof() || self.char() != '}' {
2736            return Err(self.error(
2737                Span::new(start, self.pos()),
2738                ast::ErrorKind::SpecialWordBoundaryUnclosed,
2739            ));
2740        }
2741        let end = self.pos();
2742        self.bump();
2743        let kind = match scratch.as_str() {
2744            "start" => ast::AssertionKind::WordBoundaryStart,
2745            "end" => ast::AssertionKind::WordBoundaryEnd,
2746            "start-half" => ast::AssertionKind::WordBoundaryStartHalf,
2747            "end-half" => ast::AssertionKind::WordBoundaryEndHalf,
2748            _ => {
2749                return Err(self.error(
2750                    Span::new(start_contents, end),
2751                    ast::ErrorKind::SpecialWordBoundaryUnrecognized,
2752                ))
2753            }
2754        };
2755        Ok(Some(kind))
2756    }
2757
2758    #[inline(never)]
2759    fn parse_octal(&self) -> Literal {
2760        assert!(self.parser().octal);
2761        assert!('0' <= self.char() && self.char() <= '7');
2762        let start = self.pos();
2763        // Parse up to two more digits.
2764        while self.bump()
2765            && '0' <= self.char()
2766            && self.char() <= '7'
2767            && self.pos().offset - start.offset <= 2
2768        {}
2769        let end = self.pos();
2770        let octal = &self.pattern()[start.offset..end.offset];
2771        // Parsing the octal should never fail since the above guarantees a
2772        // valid number.
2773        let codepoint = u32::from_str_radix(octal, 8).expect("valid octal number");
2774        // The max value for 3 digit octal is 0777 = 511 and [0, 511] has no
2775        // invalid Unicode scalar values.
2776        let c = char::from_u32(codepoint).expect("Unicode scalar value");
2777        Literal {
2778            span: Span::new(start, end),
2779            kind: LiteralKind::Octal,
2780            c,
2781        }
2782    }
2783
2784    #[inline(never)]
2785    fn parse_hex(&self) -> Result<Literal> {
2786        assert!(self.char() == 'x' || self.char() == 'u' || self.char() == 'U');
2787
2788        let hex_kind = match self.char() {
2789            'x' => HexLiteralKind::X,
2790            'u' => HexLiteralKind::UnicodeShort,
2791            _ => HexLiteralKind::UnicodeLong,
2792        };
2793        if !self.bump_and_bump_space() {
2794            return Err(self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
2795        }
2796        if self.char() == '{' {
2797            self.parse_hex_brace(hex_kind)
2798        } else {
2799            self.parse_hex_digits(hex_kind)
2800        }
2801    }
2802
2803    #[inline(never)]
2804    fn parse_hex_digits(&self, kind: HexLiteralKind) -> Result<Literal> {
2805        let mut scratch = self.parser().scratch.borrow_mut();
2806        scratch.clear();
2807
2808        let start = self.pos();
2809        for i in 0..kind.digits() {
2810            if i > 0 && !self.bump_and_bump_space() {
2811                return Err(self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
2812            }
2813            if !is_hex(self.char()) {
2814                return Err(self.error(self.span_char(), ast::ErrorKind::EscapeHexInvalidDigit));
2815            }
2816            scratch.push(self.char());
2817        }
2818        self.bump_and_bump_space();
2819        let end = self.pos();
2820        let hex = scratch.as_str();
2821        match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
2822            None => Err(self.error(Span::new(start, end), ast::ErrorKind::EscapeHexInvalid)),
2823            Some(c) => Ok(Literal {
2824                span: Span::new(start, end),
2825                kind: LiteralKind::HexFixed(kind),
2826                c,
2827            }),
2828        }
2829    }
2830
2831    #[inline(never)]
2832    fn parse_hex_brace(&self, kind: HexLiteralKind) -> Result<Literal> {
2833        let mut scratch = self.parser().scratch.borrow_mut();
2834        scratch.clear();
2835
2836        let brace_pos = self.pos();
2837        let start = self.span_char().end;
2838        while self.bump_and_bump_space() && self.char() != '}' {
2839            if !is_hex(self.char()) {
2840                return Err(self.error(self.span_char(), ast::ErrorKind::EscapeHexInvalidDigit));
2841            }
2842            scratch.push(self.char());
2843        }
2844        if self.is_eof() {
2845            return Err(self.error(
2846                Span::new(brace_pos, self.pos()),
2847                ast::ErrorKind::EscapeUnexpectedEof,
2848            ));
2849        }
2850        let end = self.pos();
2851        let hex = scratch.as_str();
2852        assert_eq!(self.char(), '}');
2853        self.bump_and_bump_space();
2854
2855        if hex.is_empty() {
2856            return Err(self.error(
2857                Span::new(brace_pos, self.pos()),
2858                ast::ErrorKind::EscapeHexEmpty,
2859            ));
2860        }
2861        match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
2862            None => Err(self.error(Span::new(start, end), ast::ErrorKind::EscapeHexInvalid)),
2863            Some(c) => Ok(Literal {
2864                span: Span::new(start, self.pos()),
2865                kind: LiteralKind::HexBrace(kind),
2866                c,
2867            }),
2868        }
2869    }
2870
2871    fn parse_decimal(&self) -> Result<u32> {
2872        let mut scratch = self.parser().scratch.borrow_mut();
2873        scratch.clear();
2874
2875        while !self.is_eof() && self.char().is_whitespace() {
2876            self.bump();
2877        }
2878        let start = self.pos();
2879        while !self.is_eof() && '0' <= self.char() && self.char() <= '9' {
2880            scratch.push(self.char());
2881            self.bump_and_bump_space();
2882        }
2883        let span = Span::new(start, self.pos());
2884        while !self.is_eof() && self.char().is_whitespace() {
2885            self.bump_and_bump_space();
2886        }
2887        let digits = scratch.as_str();
2888        if digits.is_empty() {
2889            return Err(self.error(span, ast::ErrorKind::DecimalEmpty));
2890        }
2891        match digits.parse::<u32>().ok() {
2892            Some(n) => Ok(n),
2893            None => Err(self.error(span, ast::ErrorKind::DecimalInvalid)),
2894        }
2895    }
2896
2897    #[inline(never)]
2898    fn parse_set_class(&self) -> Result<ClassBracketed> {
2899        assert_eq!(self.char(), '[');
2900
2901        let mut union = ClassSetUnion {
2902            span: self.span(),
2903            items: vec![],
2904        };
2905        loop {
2906            self.bump_space();
2907            if self.is_eof() {
2908                return Err(self.unclosed_class_error());
2909            }
2910            match self.char() {
2911                '[' => {
2912                    if !self.parser().stack_class.borrow().is_empty() {
2913                        if let Some(cls) = self.maybe_parse_ascii_class() {
2914                            union.push(ClassSetItem::Ascii(cls));
2915                            continue;
2916                        }
2917                    }
2918                    union = self.push_class_open(union)?;
2919                }
2920                ']' => match self.pop_class(union)? {
2921                    Either::Left(nested_union) => {
2922                        union = nested_union;
2923                    }
2924                    Either::Right(class) => return Ok(class),
2925                },
2926                '&' if self.peek() == Some('&') => {
2927                    assert!(self.bump_if("&&"));
2928                    union = self.push_class_op(ClassSetBinaryOpKind::Intersection, union);
2929                }
2930                '-' if self.peek() == Some('-') => {
2931                    assert!(self.bump_if("--"));
2932                    union = self.push_class_op(ClassSetBinaryOpKind::Difference, union);
2933                }
2934                '~' if self.peek() == Some('~') => {
2935                    assert!(self.bump_if("~~"));
2936                    union = self.push_class_op(ClassSetBinaryOpKind::SymmetricDifference, union);
2937                }
2938                _ => {
2939                    union.push(self.parse_set_class_range()?);
2940                }
2941            }
2942        }
2943    }
2944
2945    #[inline(never)]
2946    fn parse_set_class_range(&self) -> Result<ClassSetItem> {
2947        let prim1 = self.parse_set_class_item()?;
2948        self.bump_space();
2949        if self.is_eof() {
2950            return Err(self.unclosed_class_error());
2951        }
2952        if self.char() != '-' || self.peek_space() == Some(']') || self.peek_space() == Some('-') {
2953            return prim1.into_class_set_item(self);
2954        }
2955        if !self.bump_and_bump_space() {
2956            return Err(self.unclosed_class_error());
2957        }
2958        let prim2 = self.parse_set_class_item()?;
2959        let range = ClassSetRange {
2960            span: Span::new(prim1.span().start, prim2.span().end),
2961            start: prim1.into_class_literal(self)?,
2962            end: prim2.into_class_literal(self)?,
2963        };
2964        if !range.is_valid() {
2965            return Err(self.error(range.span, ast::ErrorKind::ClassRangeInvalid));
2966        }
2967        Ok(ClassSetItem::Range(range))
2968    }
2969
2970    #[inline(never)]
2971    fn parse_set_class_item(&self) -> Result<Primitive> {
2972        if self.char() == '\\' {
2973            self.parse_escape()
2974        } else {
2975            let x = Primitive::Literal(Literal {
2976                span: self.span_char(),
2977                kind: LiteralKind::Verbatim,
2978                c: self.char(),
2979            });
2980            self.bump();
2981            Ok(x)
2982        }
2983    }
2984
2985    #[inline(never)]
2986    fn parse_set_class_open(&self) -> Result<(ClassBracketed, ClassSetUnion)> {
2987        assert_eq!(self.char(), '[');
2988        let start = self.pos();
2989        if !self.bump_and_bump_space() {
2990            return Err(self.error(Span::new(start, self.pos()), ast::ErrorKind::ClassUnclosed));
2991        }
2992
2993        let negated = if self.char() != '^' {
2994            false
2995        } else {
2996            if !self.bump_and_bump_space() {
2997                return Err(self.error(Span::new(start, self.pos()), ast::ErrorKind::ClassUnclosed));
2998            }
2999            true
3000        };
3001        // Accept any number of `-` as literal `-`.
3002        let mut union = ClassSetUnion {
3003            span: self.span(),
3004            items: vec![],
3005        };
3006        while self.char() == '-' {
3007            union.push(ClassSetItem::Literal(Literal {
3008                span: self.span_char(),
3009                kind: LiteralKind::Verbatim,
3010                c: '-',
3011            }));
3012            if !self.bump_and_bump_space() {
3013                return Err(self.error(Span::new(start, start), ast::ErrorKind::ClassUnclosed));
3014            }
3015        }
3016        // If `]` is the *first* char in a set, then interpret it as a literal
3017        // `]`. That is, an empty class is impossible to write.
3018        if union.items.is_empty() && self.char() == ']' {
3019            union.push(ClassSetItem::Literal(Literal {
3020                span: self.span_char(),
3021                kind: LiteralKind::Verbatim,
3022                c: ']',
3023            }));
3024            if !self.bump_and_bump_space() {
3025                return Err(self.error(Span::new(start, self.pos()), ast::ErrorKind::ClassUnclosed));
3026            }
3027        }
3028        let set = ClassBracketed {
3029            span: Span::new(start, self.pos()),
3030            negated,
3031            kind: ClassSet::union(ClassSetUnion {
3032                span: Span::new(union.span.start, union.span.start),
3033                items: vec![],
3034            }),
3035        };
3036        Ok((set, union))
3037    }
3038
3039    #[inline(never)]
3040    fn maybe_parse_ascii_class(&self) -> Option<ClassAscii> {
3041        assert_eq!(self.char(), '[');
3042        // If parsing fails, then we back up the parser to this starting point.
3043        let start = self.pos();
3044        let mut negated = false;
3045        if !self.bump() || self.char() != ':' {
3046            self.parser().pos.set(start);
3047            return None;
3048        }
3049        if !self.bump() {
3050            self.parser().pos.set(start);
3051            return None;
3052        }
3053        if self.char() == '^' {
3054            negated = true;
3055            if !self.bump() {
3056                self.parser().pos.set(start);
3057                return None;
3058            }
3059        }
3060        let name_start = self.offset();
3061        while self.char() != ':' && self.bump() {}
3062        if self.is_eof() {
3063            self.parser().pos.set(start);
3064            return None;
3065        }
3066        let name = &self.pattern()[name_start..self.offset()];
3067        if !self.bump_if(":]") {
3068            self.parser().pos.set(start);
3069            return None;
3070        }
3071        let kind = match regex_syntax::ast::ClassAsciiKind::from_name(name) {
3072            Some(kind) => kind,
3073            None => {
3074                self.parser().pos.set(start);
3075                return None;
3076            }
3077        };
3078        Some(ClassAscii {
3079            span: Span::new(start, self.pos()),
3080            kind,
3081            negated,
3082        })
3083    }
3084
3085    #[inline(never)]
3086    fn parse_unicode_class(&self) -> Result<ClassUnicode> {
3087        assert!(self.char() == 'p' || self.char() == 'P');
3088
3089        let mut scratch = self.parser().scratch.borrow_mut();
3090        scratch.clear();
3091
3092        let negated = self.char() == 'P';
3093        if !self.bump_and_bump_space() {
3094            return Err(self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
3095        }
3096        let (start, kind) = if self.char() == '{' {
3097            let start = self.span_char().end;
3098            while self.bump_and_bump_space() && self.char() != '}' {
3099                scratch.push(self.char());
3100            }
3101            if self.is_eof() {
3102                return Err(self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
3103            }
3104            assert_eq!(self.char(), '}');
3105            self.bump();
3106
3107            let name = scratch.as_str();
3108            if let Some(i) = name.find("!=") {
3109                (
3110                    start,
3111                    ClassUnicodeKind::NamedValue {
3112                        op: ClassUnicodeOpKind::NotEqual,
3113                        name: name[..i].to_string(),
3114                        value: name[i + 2..].to_string(),
3115                    },
3116                )
3117            } else if let Some(i) = name.find(':') {
3118                (
3119                    start,
3120                    ClassUnicodeKind::NamedValue {
3121                        op: ClassUnicodeOpKind::Colon,
3122                        name: name[..i].to_string(),
3123                        value: name[i + 1..].to_string(),
3124                    },
3125                )
3126            } else if let Some(i) = name.find('=') {
3127                (
3128                    start,
3129                    ClassUnicodeKind::NamedValue {
3130                        op: ClassUnicodeOpKind::Equal,
3131                        name: name[..i].to_string(),
3132                        value: name[i + 1..].to_string(),
3133                    },
3134                )
3135            } else {
3136                (start, ClassUnicodeKind::Named(name.to_string()))
3137            }
3138        } else {
3139            let start = self.pos();
3140            let c = self.char();
3141            if c == '\\' {
3142                return Err(self.error(self.span_char(), ast::ErrorKind::UnicodeClassInvalid));
3143            }
3144            self.bump_and_bump_space();
3145            let kind = ClassUnicodeKind::OneLetter(c);
3146            (start, kind)
3147        };
3148        Ok(ClassUnicode {
3149            span: Span::new(start, self.pos()),
3150            negated,
3151            kind,
3152        })
3153    }
3154
3155    #[inline(never)]
3156    fn parse_perl_class(&self) -> ClassPerl {
3157        let c = self.char();
3158        let span = self.span_char();
3159        self.bump();
3160        let (negated, kind) = match c {
3161            'd' => (false, regex_syntax::ast::ClassPerlKind::Digit),
3162            'D' => (true, regex_syntax::ast::ClassPerlKind::Digit),
3163            's' => (false, regex_syntax::ast::ClassPerlKind::Space),
3164            'S' => (true, regex_syntax::ast::ClassPerlKind::Space),
3165            'w' => (false, regex_syntax::ast::ClassPerlKind::Word),
3166            'W' => (true, regex_syntax::ast::ClassPerlKind::Word),
3167            c => panic!("expected valid Perl class but got '{}'", c),
3168        };
3169        ClassPerl {
3170            span,
3171            kind,
3172            negated,
3173        }
3174    }
3175}
3176
3177/// `[\s\S]`, `[\w\W]`, `[\d\D]` etc into `_` canonicalization.
3178/// should really be done as a u32 BDD but good enough for now
3179fn is_universal_perl_pair(item: &regex_syntax::ast::ClassSetItem) -> bool {
3180    use regex_syntax::ast::ClassSetItem;
3181    let items = match item {
3182        ClassSetItem::Union(u) => &u.items,
3183        _ => return false,
3184    };
3185    if items.len() != 2 {
3186        return false;
3187    }
3188    match (&items[0], &items[1]) {
3189        (ClassSetItem::Perl(a), ClassSetItem::Perl(b)) => {
3190            let is_all = a.kind == b.kind && a.negated != b.negated;
3191            is_all
3192        }
3193        _ => false,
3194    }
3195}
3196
3197pub fn max_concat_length(ast: &ast::Ast) -> usize {
3198    match ast {
3199        ast::Ast::Empty(_)
3200        | ast::Ast::Flags(_)
3201        | ast::Ast::Literal(_)
3202        | ast::Ast::Dot(_)
3203        | ast::Ast::Top(_)
3204        | ast::Ast::Assertion(_)
3205        | ast::Ast::ClassUnicode(_)
3206        | ast::Ast::ClassPerl(_)
3207        | ast::Ast::ClassBracketed(_) => 0,
3208        ast::Ast::Group(g) => max_concat_length(&g.ast),
3209        ast::Ast::Complement(c) => max_concat_length(&c.ast),
3210        ast::Ast::Lookaround(l) => max_concat_length(&l.ast),
3211        ast::Ast::Repetition(r) => max_concat_length(&r.ast),
3212        ast::Ast::Concat(c) => c
3213            .asts
3214            .len()
3215            .max(c.asts.iter().map(max_concat_length).max().unwrap_or(0)),
3216        ast::Ast::Alternation(a) => a.asts.iter().map(max_concat_length).max().unwrap_or(0),
3217        ast::Ast::Intersection(i) => i.asts.iter().map(max_concat_length).max().unwrap_or(0),
3218    }
3219}
3220
3221pub fn expanded_ast_size(ast: &ast::Ast, limit: u64) -> u64 {
3222    fn go(ast: &ast::Ast, limit: u64) -> u64 {
3223        match ast {
3224            ast::Ast::Empty(_) | ast::Ast::Flags(_) => 1,
3225            ast::Ast::Literal(_) | ast::Ast::Dot(_) | ast::Ast::Top(_) => 1,
3226            ast::Ast::Assertion(_) => 1,
3227            ast::Ast::ClassUnicode(_) | ast::Ast::ClassPerl(_) | ast::Ast::ClassBracketed(_) => 1,
3228            ast::Ast::Group(g) => go(&g.ast, limit).saturating_add(1).min(limit),
3229            ast::Ast::Complement(c) => go(&c.ast, limit).saturating_add(1).min(limit),
3230            ast::Ast::Lookaround(l) => go(&l.ast, limit).saturating_add(1).min(limit),
3231            ast::Ast::Concat(c) => sum_children(&c.asts, limit),
3232            ast::Ast::Alternation(a) => sum_children(&a.asts, limit),
3233            ast::Ast::Intersection(i) => sum_children(&i.asts, limit),
3234            ast::Ast::Repetition(r) => {
3235                let body = go(&r.ast, limit);
3236                let factor: u64 = match &r.op.kind {
3237                    ast::RepetitionKind::ZeroOrOne => 2,
3238                    ast::RepetitionKind::ZeroOrMore | ast::RepetitionKind::OneOrMore => 2,
3239                    ast::RepetitionKind::Range(ast::RepetitionRange::Exactly(n)) => {
3240                        (*n as u64).max(1)
3241                    }
3242                    ast::RepetitionKind::Range(ast::RepetitionRange::AtLeast(n)) => {
3243                        (*n as u64).max(1).saturating_add(1)
3244                    }
3245                    ast::RepetitionKind::Range(ast::RepetitionRange::Bounded(_, m)) => {
3246                        (*m as u64).max(1)
3247                    }
3248                };
3249                body.saturating_mul(factor).min(limit)
3250            }
3251        }
3252    }
3253    fn sum_children(children: &[ast::Ast], limit: u64) -> u64 {
3254        let mut total: u64 = 0;
3255        for c in children {
3256            total = total.saturating_add(go(c, limit));
3257            if total >= limit {
3258                return limit;
3259            }
3260        }
3261        total
3262    }
3263    go(ast, limit)
3264}
3265
3266pub fn parse_ast<'s>(tb: &mut TB<'s>, pattern: &'s str) -> std::result::Result<NodeId, ParseError> {
3267    let mut p: ResharpParser<'s> = ResharpParser::new(pattern);
3268    p.parse(tb)
3269}
3270
3271pub fn parse_ast_with<'s>(
3272    tb: &mut TB<'s>,
3273    pattern: &'s str,
3274    flags: &PatternFlags,
3275) -> std::result::Result<NodeId, ParseError> {
3276    let mut p: ResharpParser<'s> = ResharpParser::with_flags(pattern, flags);
3277    p.parse(tb)
3278}
3279
3280/// Parse a pattern into the raw AST without converting to algebra nodes.
3281pub fn parse_to_ast(pattern: &str) -> std::result::Result<ast::Ast, ParseError> {
3282    let mut p: ResharpParser = ResharpParser::new(pattern);
3283    p.parse_inner()
3284}
resharp_parser/lib.rs

resharp_parser/
lib.rs