Skip to main content

resharp_parser/
ast.rs

1#![allow(dead_code)]
2/*!
3RE# AST based on the regex_syntax crate.
4*/
5
6use regex_syntax::ast::{ClassBracketed, ClassPerl, ClassUnicode, Literal, Span};
7
8#[derive(Clone, Debug, Eq, PartialEq)]
9pub struct Error {
10    kind: ErrorKind,
11    pattern: String,
12    span: Span,
13}
14
15impl Error {
16    /// Return the type of this error.
17    pub fn kind(&self) -> &ErrorKind {
18        &self.kind
19    }
20
21    /// The original pattern string in which this error occurred.
22    ///
23    /// Every span reported by this error is reported in terms of this string.
24    pub fn pattern(&self) -> &str {
25        &self.pattern
26    }
27
28    /// Return the span at which this error occurred.
29    pub fn span(&self) -> &Span {
30        &self.span
31    }
32
33    /// Return an auxiliary span. This span exists only for some errors that
34    /// benefit from being able to point to two locations in the original
35    /// regular expression. For example, "duplicate" errors will have the
36    /// main error position set to the duplicate occurrence while its
37    /// auxiliary span will be set to the initial occurrence.
38    pub fn auxiliary_span(&self) -> Option<&Span> {
39        use self::ErrorKind::*;
40        match self.kind {
41            FlagDuplicate { ref original } => Some(original),
42            FlagRepeatedNegation { ref original, .. } => Some(original),
43            GroupNameDuplicate { ref original, .. } => Some(original),
44            _ => None,
45        }
46    }
47}
48
49/// The type of an error that occurred while building an AST.
50///
51/// This error type is marked as `non_exhaustive`. This means that adding a
52/// new variant is not considered a breaking change.
53#[non_exhaustive]
54#[derive(Clone, Debug, Eq, PartialEq)]
55pub enum ErrorKind {
56    /// The capturing group limit was exceeded.
57    ///
58    /// Note that this represents a limit on the total number of capturing
59    /// groups in a regex and not necessarily the number of nested capturing
60    /// groups. That is, the nest limit can be low and it is still possible for
61    /// this error to occur.
62    CaptureLimitExceeded,
63    /// An invalid escape sequence was found in a character class set.
64    ClassEscapeInvalid,
65    /// An invalid character class range was found. An invalid range is any
66    /// range where the start is greater than the end.
67    ClassRangeInvalid,
68    /// An invalid range boundary was found in a character class. Range
69    /// boundaries must be a single literal codepoint, but this error indicates
70    /// that something else was found, such as a nested class.
71    ClassRangeLiteral,
72    /// An opening `[` was found with no corresponding closing `]`.
73    ClassUnclosed,
74    /// Note that this error variant is no longer used. Namely, a decimal
75    /// number can only appear as a repetition quantifier. When the number
76    /// in a repetition quantifier is empty, then it gets its own specialized
77    /// error, `RepetitionCountDecimalEmpty`.
78    DecimalEmpty,
79    /// An invalid decimal number was given where one was expected.
80    DecimalInvalid,
81    /// A bracketed hex literal was empty.
82    EscapeHexEmpty,
83    /// A bracketed hex literal did not correspond to a Unicode scalar value.
84    EscapeHexInvalid,
85    /// An invalid hexadecimal digit was found.
86    EscapeHexInvalidDigit,
87    /// EOF was found before an escape sequence was completed.
88    EscapeUnexpectedEof,
89    /// An unrecognized escape sequence.
90    EscapeUnrecognized,
91    /// A dangling negation was used when setting flags, e.g., `i-`.
92    FlagDanglingNegation,
93    /// A flag was used twice, e.g., `i-i`.
94    FlagDuplicate {
95        /// The position of the original flag. The error position
96        /// points to the duplicate flag.
97        original: Span,
98    },
99    /// The negation operator was used twice, e.g., `-i-s`.
100    FlagRepeatedNegation {
101        /// The position of the original negation operator. The error position
102        /// points to the duplicate negation operator.
103        original: Span,
104    },
105    /// Expected a flag but got EOF, e.g., `(?`.
106    FlagUnexpectedEof,
107    /// Unrecognized flag, e.g., `a`.
108    FlagUnrecognized,
109    /// A duplicate capture name was found.
110    GroupNameDuplicate {
111        /// The position of the initial occurrence of the capture name. The
112        /// error position itself points to the duplicate occurrence.
113        original: Span,
114    },
115    /// A capture group name is empty, e.g., `(?P<>abc)`.
116    GroupNameEmpty,
117    /// An invalid character was seen for a capture group name. This includes
118    /// errors where the first character is a digit (even though subsequent
119    /// characters are allowed to be digits).
120    GroupNameInvalid,
121    /// A closing `>` could not be found for a capture group name.
122    GroupNameUnexpectedEof,
123    /// An unclosed group, e.g., `(ab`.
124    ///
125    /// The span of this error corresponds to the unclosed parenthesis.
126    GroupUnclosed,
127    /// An unopened group, e.g., `ab)`.
128    GroupUnopened,
129    /// The nest limit was exceeded. The limit stored here is the limit
130    /// configured in the parser.
131    NestLimitExceeded(u32),
132    /// The range provided in a counted repetition operator is invalid. The
133    /// range is invalid if the start is greater than the end.
134    RepetitionCountInvalid,
135    /// An opening `{` was not followed by a valid decimal value.
136    /// For example, `x{}` or `x{]}` would fail.
137    RepetitionCountDecimalEmpty,
138    /// An opening `{` was found with no corresponding closing `}`.
139    RepetitionCountUnclosed,
140    /// A repetition operator was applied to a missing sub-expression. This
141    /// occurs, for example, in the regex consisting of just a `*` or even
142    /// `(?i)*`. It is, however, possible to create a repetition operating on
143    /// an empty sub-expression. For example, `()*` is still considered valid.
144    RepetitionMissing,
145    /// The special word boundary syntax, `\b{something}`, was used, but
146    /// either EOF without `}` was seen, or an invalid character in the
147    /// braces was seen.
148    SpecialWordBoundaryUnclosed,
149    /// The special word boundary syntax, `\b{something}`, was used, but
150    /// `something` was not recognized as a valid word boundary kind.
151    SpecialWordBoundaryUnrecognized,
152    /// The syntax `\b{` was observed, but afterwards the end of the pattern
153    /// was observed without being able to tell whether it was meant to be a
154    /// bounded repetition on the `\b` or the beginning of a special word
155    /// boundary assertion.
156    SpecialWordOrRepetitionUnexpectedEof,
157    /// The Unicode class is not valid. This typically occurs when a `\p` is
158    /// followed by something other than a `{`.
159    UnicodeClassInvalid,
160    /// When octal support is disabled, this error is produced when an octal
161    /// escape is used. The octal escape is assumed to be an invocation of
162    /// a backreference, which is the common case.
163    UnsupportedBackreference,
164    /// When syntax similar to PCRE's look-around is used, this error is
165    /// returned. Some example syntaxes that are rejected include, but are
166    /// not necessarily limited to, `(?=re)`, `(?!re)`, `(?<=re)` and
167    /// `(?<!re)`. Note that all of these syntaxes are otherwise invalid; this
168    /// error is used to improve the user experience.
169    UnsupportedLookAround,
170    /// Unsupported RE# regex construct.
171    UnsupportedResharpRegex,
172    /// Lazy quantifiers (e.g., `*?`, `+?`, `??`, `{n,m}?`) are not supported.
173    UnsupportedLazyQuantifier,
174    ComplementGroupExpected,
175}
176
177impl core::fmt::Display for ErrorKind {
178    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
179        use self::ErrorKind::*;
180        match *self {
181            CaptureLimitExceeded => write!(
182                f,
183                "exceeded the maximum number of \
184                 capturing groups ({})",
185                u32::MAX
186            ),
187            ClassEscapeInvalid => {
188                write!(f, "invalid escape sequence found in character class")
189            }
190            ClassRangeInvalid => write!(
191                f,
192                "invalid character class range, \
193                 the start must be <= the end"
194            ),
195            ClassRangeLiteral => {
196                write!(f, "invalid range boundary, must be a literal")
197            }
198            ClassUnclosed => write!(f, "unclosed character class"),
199            DecimalEmpty => write!(f, "decimal literal empty"),
200            DecimalInvalid => write!(f, "decimal literal invalid"),
201            EscapeHexEmpty => write!(f, "hexadecimal literal empty"),
202            EscapeHexInvalid => {
203                write!(f, "hexadecimal literal is not a Unicode scalar value")
204            }
205            EscapeHexInvalidDigit => write!(f, "invalid hexadecimal digit"),
206            EscapeUnexpectedEof => write!(
207                f,
208                "incomplete escape sequence, \
209                 reached end of pattern prematurely"
210            ),
211            EscapeUnrecognized => write!(f, "unrecognized escape sequence"),
212            FlagDanglingNegation => {
213                write!(f, "dangling flag negation operator")
214            }
215            FlagDuplicate { .. } => write!(f, "duplicate flag"),
216            FlagRepeatedNegation { .. } => {
217                write!(f, "flag negation operator repeated")
218            }
219            FlagUnexpectedEof => {
220                write!(f, "expected flag but got end of regex")
221            }
222            FlagUnrecognized => write!(f, "unrecognized flag"),
223            GroupNameDuplicate { .. } => {
224                write!(f, "duplicate capture group name")
225            }
226            GroupNameEmpty => write!(f, "empty capture group name"),
227            GroupNameInvalid => write!(f, "invalid capture group character"),
228            GroupNameUnexpectedEof => write!(f, "unclosed capture group name"),
229            GroupUnclosed => write!(f, "unclosed group"),
230            GroupUnopened => write!(f, "unopened group"),
231            NestLimitExceeded(limit) => write!(
232                f,
233                "exceed the maximum number of \
234                 nested parentheses/brackets ({})",
235                limit
236            ),
237            RepetitionCountInvalid => write!(
238                f,
239                "invalid repetition count range, \
240                 the start must be <= the end"
241            ),
242            RepetitionCountDecimalEmpty => {
243                write!(f, "repetition quantifier expects a valid decimal")
244            }
245            RepetitionCountUnclosed => {
246                write!(f, "unclosed counted repetition")
247            }
248            RepetitionMissing => {
249                write!(f, "repetition operator missing expression")
250            }
251            SpecialWordBoundaryUnclosed => {
252                write!(
253                    f,
254                    "special word boundary assertion is either \
255                     unclosed or contains an invalid character",
256                )
257            }
258            SpecialWordBoundaryUnrecognized => {
259                write!(
260                    f,
261                    "unrecognized special word boundary assertion, \
262                     valid choices are: start, end, start-half \
263                     or end-half",
264                )
265            }
266            SpecialWordOrRepetitionUnexpectedEof => {
267                write!(
268                    f,
269                    "found either the beginning of a special word \
270                     boundary or a bounded repetition on a \\b with \
271                     an opening brace, but no closing brace",
272                )
273            }
274            UnicodeClassInvalid => {
275                write!(f, "invalid Unicode character class")
276            }
277            UnsupportedBackreference => {
278                write!(f, "backreferences are not supported")
279            }
280            UnsupportedLookAround => write!(
281                f,
282                "look-around, including look-ahead and look-behind, \
283                 is not supported"
284            ),
285            UnsupportedResharpRegex => write!(f, "this pattern is not supported"),
286            UnsupportedLazyQuantifier => {
287                write!(f, "lazy quantifiers are not supported")
288            }
289            ComplementGroupExpected => write!(f, "expected ( after ~ for complement group"),
290        }
291    }
292}
293
294/// An abstract syntax tree for a singular expression along with comments
295/// found.
296///
297/// Comments are not stored in the tree itself to avoid complexity. Each
298/// comment contains a span of precisely where it occurred in the original
299/// regular expression.
300#[derive(Clone, Debug, Eq, PartialEq)]
301pub struct WithComments {
302    /// The actual ast.
303    pub ast: Ast,
304    /// All comments found in the original regular expression.
305    pub comments: Vec<Comment>,
306}
307
308/// A comment from a regular expression with an associated span.
309///
310/// A regular expression can only contain comments when the `x` flag is
311/// enabled.
312#[derive(Clone, Debug, Eq, PartialEq)]
313pub struct Comment {
314    /// The span of this comment, including the beginning `#` and ending `\n`.
315    pub span: Span,
316    /// The comment text, starting with the first character following the `#`
317    /// and ending with the last character preceding the `\n`.
318    pub comment: String,
319}
320
321/// An abstract syntax tree for a single regular expression.
322///
323/// An `Ast`'s `fmt::Display` implementation uses constant stack space and heap
324/// space proportional to the size of the `Ast`.
325///
326/// This type defines its own destructor that uses constant stack space and
327/// heap space proportional to the size of the `Ast`.
328#[derive(Clone, Debug, Eq, PartialEq)]
329pub enum Ast {
330    /// An empty regex that matches everything.
331    Empty(Box<Span>),
332    /// A set of flags, e.g., `(?is)`.
333    Flags(Box<SetFlags>),
334    /// A single character literal, which includes escape sequences.
335    Literal(Box<Literal>),
336    /// The "any character" class.
337    Dot(Box<Span>),
338    /// The "any character" class.
339    Top(Box<Span>),
340    /// A single zero-width assertion.
341    Assertion(Box<Assertion>),
342    /// A single Unicode character class, e.g., `\pL` or `\p{Greek}`.
343    ClassUnicode(Box<ClassUnicode>),
344    /// A single perl character class, e.g., `\d` or `\W`.
345    ClassPerl(Box<ClassPerl>),
346    /// A single bracketed character class set, which may contain zero or more
347    /// character ranges and/or zero or more nested classes. e.g.,
348    /// `[a-zA-Z\pL]`.
349    ClassBracketed(Box<ClassBracketed>),
350    /// A repetition operator applied to an arbitrary regular expression.
351    Repetition(Box<Repetition>),
352    /// A grouped regular expression.
353    Group(Box<Group>),
354    /// An alternation of regular expressions.
355    Alternation(Box<Alternation>),
356    /// A concatenation of regular expressions.
357    Concat(Box<Concat>),
358    Intersection(Box<Intersection>),
359    Complement(Box<Complement>),
360    Lookaround(Box<Lookaround>),
361}
362
363impl Ast {
364    /// Create an "empty" AST item.
365    pub fn empty(span: Span) -> Ast {
366        Ast::Empty(Box::new(span))
367    }
368
369    /// Create a "flags" AST item.
370    pub fn flags(e: SetFlags) -> Ast {
371        Ast::Flags(Box::new(e))
372    }
373
374    /// Create a "literal" AST item.
375    pub fn literal(e: Literal) -> Ast {
376        Ast::Literal(Box::new(e))
377    }
378
379    /// Create a "dot" AST item.
380    pub fn dot(span: Span) -> Ast {
381        Ast::Dot(Box::new(span))
382    }
383
384    pub fn top(span: Span) -> Ast {
385        Ast::Top(Box::new(span))
386    }
387
388    /// Create a "assertion" AST item.
389    pub fn assertion(e: Assertion) -> Ast {
390        Ast::Assertion(Box::new(e))
391    }
392
393    /// Create a "Unicode class" AST item.
394    pub fn class_unicode(e: ClassUnicode) -> Ast {
395        Ast::ClassUnicode(Box::new(e))
396    }
397
398    /// Create a "Perl class" AST item.
399    pub fn class_perl(e: ClassPerl) -> Ast {
400        Ast::ClassPerl(Box::new(e))
401    }
402
403    /// Create a "bracketed class" AST item.
404    pub fn class_bracketed(e: ClassBracketed) -> Ast {
405        Ast::ClassBracketed(Box::new(e))
406    }
407
408    /// Create a "repetition" AST item.
409    pub fn repetition(e: Repetition) -> Ast {
410        Ast::Repetition(Box::new(e))
411    }
412
413    /// Create a "group" AST item.
414    pub fn group(e: Group) -> Ast {
415        match &e.kind {
416            GroupKind::CaptureIndex(_) => Ast::Group(Box::new(e)),
417            GroupKind::CaptureName {
418                starts_with_p: _,
419                name: _,
420            } => Ast::Group(Box::new(e)),
421            GroupKind::NonCapturing(_flags) => Ast::Group(Box::new(e)),
422            GroupKind::Lookaround(kind) => {
423                let look = Lookaround {
424                    kind: kind.clone(),
425                    span: e.span,
426                    ast: e.ast,
427                };
428                Ast::lookaround(look)
429            }
430            GroupKind::Complement => {
431                let g = Complement {
432                    span: e.span,
433                    ast: e.ast,
434                };
435                Ast::complement(g)
436            }
437        }
438    }
439
440    /// Create a "alternation" AST item.
441    pub fn alternation(e: Alternation) -> Ast {
442        Ast::Alternation(Box::new(e))
443    }
444
445    /// Create a "concat" AST item.
446    pub fn concat(e: Concat) -> Ast {
447        Ast::Concat(Box::new(e))
448    }
449
450    /// Return the span of this abstract syntax tree.
451    pub fn span(&self) -> &Span {
452        match *self {
453            Ast::Empty(ref span) => span,
454            Ast::Flags(ref x) => &x.span,
455            Ast::Literal(ref x) => &x.span,
456            Ast::Dot(ref span) => span,
457            Ast::Top(ref span) => span,
458            Ast::Assertion(ref x) => &x.span,
459            Ast::ClassUnicode(ref x) => &x.span,
460            Ast::ClassPerl(ref x) => &x.span,
461            Ast::ClassBracketed(ref x) => &x.span,
462            Ast::Repetition(ref x) => &x.span,
463            Ast::Group(ref x) => &x.span,
464            Ast::Alternation(ref x) => &x.span,
465            Ast::Concat(ref x) => &x.span,
466            Ast::Intersection(ref x) => &x.span,
467            Ast::Complement(ref x) => &x.span,
468            Ast::Lookaround(ref x) => &x.span,
469        }
470    }
471
472    /// Return true if and only if this Ast is empty.
473    pub fn is_empty(&self) -> bool {
474        matches!(*self, Ast::Empty(_))
475    }
476
477    /// Returns true if and only if this AST has any (including possibly empty)
478    /// subexpressions.
479    fn has_subexprs(&self) -> bool {
480        match *self {
481            Ast::Empty(_)
482            | Ast::Flags(_)
483            | Ast::Literal(_)
484            | Ast::Dot(_)
485            | Ast::Top(_)
486            | Ast::Assertion(_)
487            | Ast::ClassUnicode(_)
488            | Ast::ClassPerl(_) => false,
489            Ast::ClassBracketed(_)
490            | Ast::Repetition(_)
491            | Ast::Group(_)
492            | Ast::Alternation(_)
493            | Ast::Intersection(_)
494            | Ast::Lookaround(_)
495            | Ast::Complement(_)
496            | Ast::Concat(_) => true,
497        }
498    }
499}
500
501/// An alternation of regular expressions.
502#[derive(Clone, Debug, Eq, PartialEq)]
503pub struct Alternation {
504    /// The span of this alternation.
505    pub span: Span,
506    /// The alternate regular expressions.
507    pub asts: Vec<Ast>,
508}
509
510impl Alternation {
511    /// Return this alternation as an AST.
512    ///
513    /// If this alternation contains zero ASTs, then `Ast::empty` is returned.
514    /// If this alternation contains exactly 1 AST, then the corresponding AST
515    /// is returned. Otherwise, `Ast::alternation` is returned.
516    pub fn into_ast(mut self) -> Ast {
517        match self.asts.len() {
518            0 => Ast::empty(self.span),
519            1 => self.asts.pop().unwrap(),
520            _ => Ast::alternation(self),
521        }
522    }
523}
524
525/// A concatenation of regular expressions.
526#[derive(Clone, Debug, Eq, PartialEq)]
527pub struct Concat {
528    /// The span of this concatenation.
529    pub span: Span,
530    /// The concatenation regular expressions.
531    pub asts: Vec<Ast>,
532}
533
534impl Concat {
535    /// Return this concatenation as an AST.
536    ///
537    /// If this alternation contains zero ASTs, then `Ast::empty` is returned.
538    /// If this alternation contains exactly 1 AST, then the corresponding AST
539    /// is returned. Otherwise, `Ast::concat` is returned.
540    pub fn into_ast(mut self) -> Ast {
541        match self.asts.len() {
542            0 => Ast::empty(self.span),
543            1 => self.asts.pop().unwrap(),
544            _ => Ast::concat(self),
545        }
546    }
547}
548
549/// A single zero-width assertion.
550#[derive(Clone, Debug, Eq, PartialEq)]
551pub struct Assertion {
552    /// The span of this assertion.
553    pub span: Span,
554    /// The assertion kind, e.g., `\b` or `^`.
555    pub kind: AssertionKind,
556}
557
558/// An assertion kind.
559#[derive(Clone, Debug, Eq, PartialEq)]
560pub enum AssertionKind {
561    /// `^`
562    StartLine,
563    /// `$`
564    EndLine,
565    /// `\A`
566    StartText,
567    /// `\z`
568    EndText,
569    /// `\b`
570    WordBoundary,
571    /// `\B`
572    NotWordBoundary,
573    /// `\b{start}`
574    WordBoundaryStart,
575    /// `\b{end}`
576    WordBoundaryEnd,
577    /// `\<` (alias for `\b{start}`)
578    WordBoundaryStartAngle,
579    /// `\>` (alias for `\b{end}`)
580    WordBoundaryEndAngle,
581    /// `\b{start-half}`
582    WordBoundaryStartHalf,
583    /// `\b{end-half}`
584    WordBoundaryEndHalf,
585}
586
587/// A repetition operation applied to a regular expression.
588#[derive(Clone, Debug, Eq, PartialEq)]
589pub struct Repetition {
590    /// The span of this operation.
591    pub span: Span,
592    /// The actual operation.
593    pub op: RepetitionOp,
594    /// Whether this operation was applied greedily or not.
595    pub greedy: bool,
596    /// The regular expression under repetition.
597    pub ast: Box<Ast>,
598}
599
600/// The repetition operator itself.
601#[derive(Clone, Debug, Eq, PartialEq)]
602pub struct RepetitionOp {
603    /// The span of this operator. This includes things like `+`, `*?` and
604    /// `{m,n}`.
605    pub span: Span,
606    /// The type of operation.
607    pub kind: RepetitionKind,
608}
609
610/// The kind of a repetition operator.
611#[derive(Clone, Debug, Eq, PartialEq)]
612pub enum RepetitionKind {
613    /// `?`
614    ZeroOrOne,
615    /// `*`
616    ZeroOrMore,
617    /// `+`
618    OneOrMore,
619    /// `{m,n}`
620    Range(RepetitionRange),
621}
622
623/// A range repetition operator.
624#[derive(Clone, Debug, Eq, PartialEq)]
625pub enum RepetitionRange {
626    /// `{m}`
627    Exactly(u32),
628    /// `{m,}`
629    AtLeast(u32),
630    /// `{m,n}`
631    Bounded(u32, u32),
632}
633
634impl RepetitionRange {
635    /// Returns true if and only if this repetition range is valid.
636    ///
637    /// The only case where a repetition range is invalid is if it is bounded
638    /// and its start is greater than its end.
639    pub fn is_valid(&self) -> bool {
640        !matches!(*self, RepetitionRange::Bounded(s, e) if s > e)
641    }
642}
643
644/// A grouped regular expression.
645///
646/// This includes both capturing and non-capturing groups. This does **not**
647/// include flag-only groups like `(?is)`, but does contain any group that
648/// contains a sub-expression, e.g., `(a)`, `(?P<name>a)`, `(?:a)` and
649/// `(?is:a)`.
650#[derive(Clone, Debug, Eq, PartialEq)]
651pub struct Group {
652    /// The span of this group.
653    pub span: Span,
654    /// The kind of this group.
655    pub kind: GroupKind,
656    /// The regular expression in this group.
657    pub ast: Box<Ast>,
658}
659
660impl Group {
661    /// If this group is non-capturing, then this returns the (possibly empty)
662    /// set of flags. Otherwise, `None` is returned.
663    pub fn flags(&self) -> Option<&Flags> {
664        match self.kind {
665            GroupKind::NonCapturing(ref flags) => Some(flags),
666            _ => None,
667        }
668    }
669
670    /// Returns true if and only if this group is capturing.
671    pub fn is_capturing(&self) -> bool {
672        match self.kind {
673            GroupKind::CaptureIndex(_) | GroupKind::CaptureName { .. } => true,
674            GroupKind::NonCapturing(_) => false,
675            GroupKind::Lookaround(_) => false,
676            GroupKind::Complement => false,
677        }
678    }
679
680    /// Returns the capture index of this group, if this is a capturing group.
681    ///
682    /// This returns a capture index precisely when `is_capturing` is `true`.
683    pub fn capture_index(&self) -> Option<u32> {
684        match self.kind {
685            GroupKind::CaptureIndex(i) => Some(i),
686            GroupKind::CaptureName { ref name, .. } => Some(name.index),
687            GroupKind::NonCapturing(_) => None,
688            GroupKind::Lookaround(_) => None,
689            GroupKind::Complement => None,
690        }
691    }
692}
693
694/// The kind of a group.
695#[derive(Clone, Debug, Eq, PartialEq)]
696pub enum GroupKind {
697    /// `(a)`
698    CaptureIndex(u32),
699    /// `(?<name>a)` or `(?P<name>a)`
700    CaptureName {
701        /// True if the `?P<` syntax is used and false if the `?<` syntax is used.
702        starts_with_p: bool,
703        /// The capture name.
704        name: CaptureName,
705    },
706    /// `(?:a)` and `(?i:a)`
707    NonCapturing(Flags),
708    Lookaround(LookaroundKind),
709    Complement,
710}
711
712/// A capture name.
713///
714/// This corresponds to the name itself between the angle brackets in, e.g.,
715/// `(?P<foo>expr)`.
716#[derive(Clone, Debug, Eq, PartialEq)]
717pub struct CaptureName {
718    /// The span of this capture name.
719    pub span: Span,
720    /// The capture name.
721    pub name: String,
722    /// The capture index.
723    pub index: u32,
724}
725
726/// A group of flags that is not applied to a particular regular expression.
727#[derive(Clone, Debug, Eq, PartialEq)]
728pub struct SetFlags {
729    /// The span of these flags, including the grouping parentheses.
730    pub span: Span,
731    /// The actual sequence of flags.
732    pub flags: Flags,
733}
734
735/// A group of flags.
736///
737/// This corresponds only to the sequence of flags themselves, e.g., `is-u`.
738#[derive(Clone, Debug, Eq, PartialEq)]
739pub struct Flags {
740    /// The span of this group of flags.
741    pub span: Span,
742    /// A sequence of flag items. Each item is either a flag or a negation
743    /// operator.
744    pub items: Vec<FlagsItem>,
745}
746
747impl Flags {
748    /// Add the given item to this sequence of flags.
749    ///
750    /// If the item was added successfully, then `None` is returned. If the
751    /// given item is a duplicate, then `Some(i)` is returned, where
752    /// `items[i].kind == item.kind`.
753    pub fn add_item(&mut self, item: FlagsItem) -> Option<usize> {
754        for (i, x) in self.items.iter().enumerate() {
755            if x.kind == item.kind {
756                return Some(i);
757            }
758        }
759        self.items.push(item);
760        None
761    }
762
763    /// Returns the state of the given flag in this set.
764    ///
765    /// If the given flag is in the set but is negated, then `Some(false)` is
766    /// returned.
767    ///
768    /// If the given flag is in the set and is not negated, then `Some(true)`
769    /// is returned.
770    ///
771    /// Otherwise, `None` is returned.
772    pub fn flag_state(&self, flag: Flag) -> Option<bool> {
773        let mut negated = false;
774        for x in &self.items {
775            match x.kind {
776                FlagsItemKind::Negation => {
777                    negated = true;
778                }
779                FlagsItemKind::Flag(ref xflag) if xflag == &flag => {
780                    return Some(!negated);
781                }
782                _ => {}
783            }
784        }
785        None
786    }
787}
788
789/// A single item in a group of flags.
790#[derive(Clone, Debug, Eq, PartialEq)]
791pub struct FlagsItem {
792    /// The span of this item.
793    pub span: Span,
794    /// The kind of this item.
795    pub kind: FlagsItemKind,
796}
797
798/// The kind of an item in a group of flags.
799#[derive(Clone, Debug, Eq, PartialEq)]
800pub enum FlagsItemKind {
801    /// A negation operator applied to all subsequent flags in the enclosing
802    /// group.
803    Negation,
804    /// A single flag in a group.
805    Flag(Flag),
806}
807
808impl FlagsItemKind {
809    /// Returns true if and only if this item is a negation operator.
810    pub fn is_negation(&self) -> bool {
811        matches!(*self, FlagsItemKind::Negation)
812    }
813}
814
815/// A single flag.
816#[derive(Clone, Copy, Debug, Eq, PartialEq)]
817pub enum Flag {
818    /// `i`
819    CaseInsensitive,
820    /// `m`
821    MultiLine,
822    /// `s`
823    DotMatchesNewLine,
824    /// `U`
825    SwapGreed,
826    /// `u`
827    Unicode,
828    /// `R`
829    CRLF,
830    /// `x`
831    IgnoreWhitespace,
832}
833
834// START RE#
835
836impl Ast {
837    pub fn intersection(e: Intersection) -> Ast {
838        Ast::Intersection(Box::new(e))
839    }
840    pub fn complement(e: Complement) -> Ast {
841        Ast::Complement(Box::new(e))
842    }
843    pub fn lookaround(e: Lookaround) -> Ast {
844        Ast::Lookaround(Box::new(e))
845    }
846}
847
848/// An alternation of regular expressions.
849#[derive(Clone, Debug, Eq, PartialEq)]
850pub struct Intersection {
851    /// The span of this alternation.
852    pub span: Span,
853    /// The alternate regular expressions.
854    pub asts: Vec<Ast>,
855}
856
857impl Intersection {
858    pub fn into_ast(mut self) -> Ast {
859        match self.asts.len() {
860            0 => Ast::empty(self.span),
861            1 => self.asts.pop().unwrap(),
862            _ => Ast::intersection(self),
863        }
864    }
865}
866
867#[derive(Clone, Debug, Eq, PartialEq)]
868pub struct Complement {
869    pub span: Span,
870    pub ast: Box<Ast>,
871}
872
873impl Complement {
874    pub fn into_ast(self) -> Ast {
875        Ast::complement(self)
876    }
877}
878
879#[derive(Clone, Debug, Eq, PartialEq)]
880pub enum LookaroundKind {
881    PositiveLookahead,
882    NegativeLookahead,
883    PositiveLookbehind,
884    NegativeLookbehind,
885}
886
887#[derive(Clone, Debug, Eq, PartialEq)]
888pub struct Lookaround {
889    pub kind: LookaroundKind,
890    pub span: Span,
891    pub ast: Box<Ast>,
892}
893
894impl Lookaround {
895    pub fn into_ast(self) -> Ast {
896        Ast::lookaround(self)
897    }
898}