Skip to main content

resharp_parser/
ast.rs

1#![allow(dead_code)]
2/*!
3RE# AST based on the regex_syntax crate.
4*/
5
6use regex_syntax::ast::{ClassBracketed, ClassPerl, ClassUnicode, Literal, Span};
7
8#[derive(Clone, Debug, Eq, PartialEq)]
9pub struct Error {
10    /// The kind of error.
11    kind: ErrorKind,
12    /// The original pattern that the parser generated the error from. Every
13    /// span in an error is a valid range into this string.
14    pattern: String,
15    /// The span of this error.
16    span: Span,
17}
18
19impl Error {
20    /// Return the type of this error.
21    pub fn kind(&self) -> &ErrorKind {
22        &self.kind
23    }
24
25    /// The original pattern string in which this error occurred.
26    ///
27    /// Every span reported by this error is reported in terms of this string.
28    pub fn pattern(&self) -> &str {
29        &self.pattern
30    }
31
32    /// Return the span at which this error occurred.
33    pub fn span(&self) -> &Span {
34        &self.span
35    }
36
37    /// Return an auxiliary span. This span exists only for some errors that
38    /// benefit from being able to point to two locations in the original
39    /// regular expression. For example, "duplicate" errors will have the
40    /// main error position set to the duplicate occurrence while its
41    /// auxiliary span will be set to the initial occurrence.
42    pub fn auxiliary_span(&self) -> Option<&Span> {
43        use self::ErrorKind::*;
44        match self.kind {
45            FlagDuplicate { ref original } => Some(original),
46            FlagRepeatedNegation { ref original, .. } => Some(original),
47            GroupNameDuplicate { ref original, .. } => Some(original),
48            _ => None,
49        }
50    }
51}
52
53/// The type of an error that occurred while building an AST.
54///
55/// This error type is marked as `non_exhaustive`. This means that adding a
56/// new variant is not considered a breaking change.
57#[non_exhaustive]
58#[derive(Clone, Debug, Eq, PartialEq)]
59pub enum ErrorKind {
60    /// The capturing group limit was exceeded.
61    ///
62    /// Note that this represents a limit on the total number of capturing
63    /// groups in a regex and not necessarily the number of nested capturing
64    /// groups. That is, the nest limit can be low and it is still possible for
65    /// this error to occur.
66    CaptureLimitExceeded,
67    /// An invalid escape sequence was found in a character class set.
68    ClassEscapeInvalid,
69    /// An invalid character class range was found. An invalid range is any
70    /// range where the start is greater than the end.
71    ClassRangeInvalid,
72    /// An invalid range boundary was found in a character class. Range
73    /// boundaries must be a single literal codepoint, but this error indicates
74    /// that something else was found, such as a nested class.
75    ClassRangeLiteral,
76    /// An opening `[` was found with no corresponding closing `]`.
77    ClassUnclosed,
78    /// Note that this error variant is no longer used. Namely, a decimal
79    /// number can only appear as a repetition quantifier. When the number
80    /// in a repetition quantifier is empty, then it gets its own specialized
81    /// error, `RepetitionCountDecimalEmpty`.
82    DecimalEmpty,
83    /// An invalid decimal number was given where one was expected.
84    DecimalInvalid,
85    /// A bracketed hex literal was empty.
86    EscapeHexEmpty,
87    /// A bracketed hex literal did not correspond to a Unicode scalar value.
88    EscapeHexInvalid,
89    /// An invalid hexadecimal digit was found.
90    EscapeHexInvalidDigit,
91    /// EOF was found before an escape sequence was completed.
92    EscapeUnexpectedEof,
93    /// An unrecognized escape sequence.
94    EscapeUnrecognized,
95    /// A dangling negation was used when setting flags, e.g., `i-`.
96    FlagDanglingNegation,
97    /// A flag was used twice, e.g., `i-i`.
98    FlagDuplicate {
99        /// The position of the original flag. The error position
100        /// points to the duplicate flag.
101        original: Span,
102    },
103    /// The negation operator was used twice, e.g., `-i-s`.
104    FlagRepeatedNegation {
105        /// The position of the original negation operator. The error position
106        /// points to the duplicate negation operator.
107        original: Span,
108    },
109    /// Expected a flag but got EOF, e.g., `(?`.
110    FlagUnexpectedEof,
111    /// Unrecognized flag, e.g., `a`.
112    FlagUnrecognized,
113    /// A duplicate capture name was found.
114    GroupNameDuplicate {
115        /// The position of the initial occurrence of the capture name. The
116        /// error position itself points to the duplicate occurrence.
117        original: Span,
118    },
119    /// A capture group name is empty, e.g., `(?P<>abc)`.
120    GroupNameEmpty,
121    /// An invalid character was seen for a capture group name. This includes
122    /// errors where the first character is a digit (even though subsequent
123    /// characters are allowed to be digits).
124    GroupNameInvalid,
125    /// A closing `>` could not be found for a capture group name.
126    GroupNameUnexpectedEof,
127    /// An unclosed group, e.g., `(ab`.
128    ///
129    /// The span of this error corresponds to the unclosed parenthesis.
130    GroupUnclosed,
131    /// An unopened group, e.g., `ab)`.
132    GroupUnopened,
133    /// The nest limit was exceeded. The limit stored here is the limit
134    /// configured in the parser.
135    NestLimitExceeded(u32),
136    /// The range provided in a counted repetition operator is invalid. The
137    /// range is invalid if the start is greater than the end.
138    RepetitionCountInvalid,
139    /// An opening `{` was not followed by a valid decimal value.
140    /// For example, `x{}` or `x{]}` would fail.
141    RepetitionCountDecimalEmpty,
142    /// An opening `{` was found with no corresponding closing `}`.
143    RepetitionCountUnclosed,
144    /// A repetition operator was applied to a missing sub-expression. This
145    /// occurs, for example, in the regex consisting of just a `*` or even
146    /// `(?i)*`. It is, however, possible to create a repetition operating on
147    /// an empty sub-expression. For example, `()*` is still considered valid.
148    RepetitionMissing,
149    /// The special word boundary syntax, `\b{something}`, was used, but
150    /// either EOF without `}` was seen, or an invalid character in the
151    /// braces was seen.
152    SpecialWordBoundaryUnclosed,
153    /// The special word boundary syntax, `\b{something}`, was used, but
154    /// `something` was not recognized as a valid word boundary kind.
155    SpecialWordBoundaryUnrecognized,
156    /// The syntax `\b{` was observed, but afterwards the end of the pattern
157    /// was observed without being able to tell whether it was meant to be a
158    /// bounded repetition on the `\b` or the beginning of a special word
159    /// boundary assertion.
160    SpecialWordOrRepetitionUnexpectedEof,
161    /// The Unicode class is not valid. This typically occurs when a `\p` is
162    /// followed by something other than a `{`.
163    UnicodeClassInvalid,
164    /// When octal support is disabled, this error is produced when an octal
165    /// escape is used. The octal escape is assumed to be an invocation of
166    /// a backreference, which is the common case.
167    UnsupportedBackreference,
168    /// When syntax similar to PCRE's look-around is used, this error is
169    /// returned. Some example syntaxes that are rejected include, but are
170    /// not necessarily limited to, `(?=re)`, `(?!re)`, `(?<=re)` and
171    /// `(?<!re)`. Note that all of these syntaxes are otherwise invalid; this
172    /// error is used to improve the user experience.
173    UnsupportedLookAround,
174    /// Unsupported RE# regex construct.
175    UnsupportedResharpRegex,
176    /// Lazy quantifiers (e.g., `*?`, `+?`, `??`, `{n,m}?`) are not supported.
177    UnsupportedLazyQuantifier,
178    ComplementGroupExpected,
179}
180
181impl core::fmt::Display for ErrorKind {
182    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
183        use self::ErrorKind::*;
184        match *self {
185            CaptureLimitExceeded => write!(
186                f,
187                "exceeded the maximum number of \
188                 capturing groups ({})",
189                u32::MAX
190            ),
191            ClassEscapeInvalid => {
192                write!(f, "invalid escape sequence found in character class")
193            }
194            ClassRangeInvalid => write!(
195                f,
196                "invalid character class range, \
197                 the start must be <= the end"
198            ),
199            ClassRangeLiteral => {
200                write!(f, "invalid range boundary, must be a literal")
201            }
202            ClassUnclosed => write!(f, "unclosed character class"),
203            DecimalEmpty => write!(f, "decimal literal empty"),
204            DecimalInvalid => write!(f, "decimal literal invalid"),
205            EscapeHexEmpty => write!(f, "hexadecimal literal empty"),
206            EscapeHexInvalid => {
207                write!(f, "hexadecimal literal is not a Unicode scalar value")
208            }
209            EscapeHexInvalidDigit => write!(f, "invalid hexadecimal digit"),
210            EscapeUnexpectedEof => write!(
211                f,
212                "incomplete escape sequence, \
213                 reached end of pattern prematurely"
214            ),
215            EscapeUnrecognized => write!(f, "unrecognized escape sequence"),
216            FlagDanglingNegation => {
217                write!(f, "dangling flag negation operator")
218            }
219            FlagDuplicate { .. } => write!(f, "duplicate flag"),
220            FlagRepeatedNegation { .. } => {
221                write!(f, "flag negation operator repeated")
222            }
223            FlagUnexpectedEof => {
224                write!(f, "expected flag but got end of regex")
225            }
226            FlagUnrecognized => write!(f, "unrecognized flag"),
227            GroupNameDuplicate { .. } => {
228                write!(f, "duplicate capture group name")
229            }
230            GroupNameEmpty => write!(f, "empty capture group name"),
231            GroupNameInvalid => write!(f, "invalid capture group character"),
232            GroupNameUnexpectedEof => write!(f, "unclosed capture group name"),
233            GroupUnclosed => write!(f, "unclosed group"),
234            GroupUnopened => write!(f, "unopened group"),
235            NestLimitExceeded(limit) => write!(
236                f,
237                "exceed the maximum number of \
238                 nested parentheses/brackets ({})",
239                limit
240            ),
241            RepetitionCountInvalid => write!(
242                f,
243                "invalid repetition count range, \
244                 the start must be <= the end"
245            ),
246            RepetitionCountDecimalEmpty => {
247                write!(f, "repetition quantifier expects a valid decimal")
248            }
249            RepetitionCountUnclosed => {
250                write!(f, "unclosed counted repetition")
251            }
252            RepetitionMissing => {
253                write!(f, "repetition operator missing expression")
254            }
255            SpecialWordBoundaryUnclosed => {
256                write!(
257                    f,
258                    "special word boundary assertion is either \
259                     unclosed or contains an invalid character",
260                )
261            }
262            SpecialWordBoundaryUnrecognized => {
263                write!(
264                    f,
265                    "unrecognized special word boundary assertion, \
266                     valid choices are: start, end, start-half \
267                     or end-half",
268                )
269            }
270            SpecialWordOrRepetitionUnexpectedEof => {
271                write!(
272                    f,
273                    "found either the beginning of a special word \
274                     boundary or a bounded repetition on a \\b with \
275                     an opening brace, but no closing brace",
276                )
277            }
278            UnicodeClassInvalid => {
279                write!(f, "invalid Unicode character class")
280            }
281            UnsupportedBackreference => {
282                write!(f, "backreferences are not supported")
283            }
284            UnsupportedLookAround => write!(
285                f,
286                "look-around, including look-ahead and look-behind, \
287                 is not supported"
288            ),
289            UnsupportedResharpRegex => write!(f, "this pattern is not supported"),
290            UnsupportedLazyQuantifier => {
291                write!(f, "lazy quantifiers are not supported")
292            }
293            ComplementGroupExpected => write!(f, "expected ( after ~ for complement group"),
294        }
295    }
296}
297
298/// An abstract syntax tree for a singular expression along with comments
299/// found.
300///
301/// Comments are not stored in the tree itself to avoid complexity. Each
302/// comment contains a span of precisely where it occurred in the original
303/// regular expression.
304#[derive(Clone, Debug, Eq, PartialEq)]
305pub struct WithComments {
306    /// The actual ast.
307    pub ast: Ast,
308    /// All comments found in the original regular expression.
309    pub comments: Vec<Comment>,
310}
311
312/// A comment from a regular expression with an associated span.
313///
314/// A regular expression can only contain comments when the `x` flag is
315/// enabled.
316#[derive(Clone, Debug, Eq, PartialEq)]
317pub struct Comment {
318    /// The span of this comment, including the beginning `#` and ending `\n`.
319    pub span: Span,
320    /// The comment text, starting with the first character following the `#`
321    /// and ending with the last character preceding the `\n`.
322    pub comment: String,
323}
324
325/// An abstract syntax tree for a single regular expression.
326///
327/// An `Ast`'s `fmt::Display` implementation uses constant stack space and heap
328/// space proportional to the size of the `Ast`.
329///
330/// This type defines its own destructor that uses constant stack space and
331/// heap space proportional to the size of the `Ast`.
332#[derive(Clone, Debug, Eq, PartialEq)]
333pub enum Ast {
334    /// An empty regex that matches everything.
335    Empty(Box<Span>),
336    /// A set of flags, e.g., `(?is)`.
337    Flags(Box<SetFlags>),
338    /// A single character literal, which includes escape sequences.
339    Literal(Box<Literal>),
340    /// The "any character" class.
341    Dot(Box<Span>),
342    /// The "any character" class.
343    Top(Box<Span>),
344    /// A single zero-width assertion.
345    Assertion(Box<Assertion>),
346    /// A single Unicode character class, e.g., `\pL` or `\p{Greek}`.
347    ClassUnicode(Box<ClassUnicode>),
348    /// A single perl character class, e.g., `\d` or `\W`.
349    ClassPerl(Box<ClassPerl>),
350    /// A single bracketed character class set, which may contain zero or more
351    /// character ranges and/or zero or more nested classes. e.g.,
352    /// `[a-zA-Z\pL]`.
353    ClassBracketed(Box<ClassBracketed>),
354    /// A repetition operator applied to an arbitrary regular expression.
355    Repetition(Box<Repetition>),
356    /// A grouped regular expression.
357    Group(Box<Group>),
358    /// An alternation of regular expressions.
359    Alternation(Box<Alternation>),
360    /// A concatenation of regular expressions.
361    Concat(Box<Concat>),
362    Intersection(Box<Intersection>),
363    Complement(Box<Complement>),
364    Lookaround(Box<Lookaround>),
365}
366
367impl Ast {
368    /// Create an "empty" AST item.
369    pub fn empty(span: Span) -> Ast {
370        Ast::Empty(Box::new(span))
371    }
372
373    /// Create a "flags" AST item.
374    pub fn flags(e: SetFlags) -> Ast {
375        Ast::Flags(Box::new(e))
376    }
377
378    /// Create a "literal" AST item.
379    pub fn literal(e: Literal) -> Ast {
380        Ast::Literal(Box::new(e))
381    }
382
383    /// Create a "dot" AST item.
384    pub fn dot(span: Span) -> Ast {
385        Ast::Dot(Box::new(span))
386    }
387
388    pub fn top(span: Span) -> Ast {
389        Ast::Top(Box::new(span))
390    }
391
392    /// Create a "assertion" AST item.
393    pub fn assertion(e: Assertion) -> Ast {
394        Ast::Assertion(Box::new(e))
395    }
396
397    /// Create a "Unicode class" AST item.
398    pub fn class_unicode(e: ClassUnicode) -> Ast {
399        Ast::ClassUnicode(Box::new(e))
400    }
401
402    /// Create a "Perl class" AST item.
403    pub fn class_perl(e: ClassPerl) -> Ast {
404        Ast::ClassPerl(Box::new(e))
405    }
406
407    /// Create a "bracketed class" AST item.
408    pub fn class_bracketed(e: ClassBracketed) -> Ast {
409        Ast::ClassBracketed(Box::new(e))
410    }
411
412    /// Create a "repetition" AST item.
413    pub fn repetition(e: Repetition) -> Ast {
414        Ast::Repetition(Box::new(e))
415    }
416
417    /// Create a "group" AST item.
418    pub fn group(e: Group) -> Ast {
419        match &e.kind {
420            GroupKind::CaptureIndex(_) => Ast::Group(Box::new(e)),
421            GroupKind::CaptureName {
422                starts_with_p: _,
423                name: _,
424            } => Ast::Group(Box::new(e)),
425            GroupKind::NonCapturing(_flags) => Ast::Group(Box::new(e)),
426            GroupKind::Lookaround(kind) => {
427                let look = Lookaround {
428                    kind: kind.clone(),
429                    span: e.span,
430                    ast: e.ast,
431                };
432                Ast::lookaround(look)
433            }
434            GroupKind::Complement => {
435                let g = Complement {
436                    span: e.span,
437                    ast: e.ast,
438                };
439                Ast::complement(g)
440            }
441        }
442    }
443
444    /// Create a "alternation" AST item.
445    pub fn alternation(e: Alternation) -> Ast {
446        Ast::Alternation(Box::new(e))
447    }
448
449    /// Create a "concat" AST item.
450    pub fn concat(e: Concat) -> Ast {
451        Ast::Concat(Box::new(e))
452    }
453
454    /// Return the span of this abstract syntax tree.
455    pub fn span(&self) -> &Span {
456        match *self {
457            Ast::Empty(ref span) => span,
458            Ast::Flags(ref x) => &x.span,
459            Ast::Literal(ref x) => &x.span,
460            Ast::Dot(ref span) => span,
461            Ast::Top(ref span) => span,
462            Ast::Assertion(ref x) => &x.span,
463            Ast::ClassUnicode(ref x) => &x.span,
464            Ast::ClassPerl(ref x) => &x.span,
465            Ast::ClassBracketed(ref x) => &x.span,
466            Ast::Repetition(ref x) => &x.span,
467            Ast::Group(ref x) => &x.span,
468            Ast::Alternation(ref x) => &x.span,
469            Ast::Concat(ref x) => &x.span,
470            Ast::Intersection(ref x) => &x.span,
471            Ast::Complement(ref x) => &x.span,
472            Ast::Lookaround(ref x) => &x.span,
473        }
474    }
475
476    /// Return true if and only if this Ast is empty.
477    pub fn is_empty(&self) -> bool {
478        matches!(*self, Ast::Empty(_))
479    }
480
481    /// Returns true if and only if this AST has any (including possibly empty)
482    /// subexpressions.
483    fn has_subexprs(&self) -> bool {
484        match *self {
485            Ast::Empty(_)
486            | Ast::Flags(_)
487            | Ast::Literal(_)
488            | Ast::Dot(_)
489            | Ast::Top(_)
490            | Ast::Assertion(_)
491            | Ast::ClassUnicode(_)
492            | Ast::ClassPerl(_) => false,
493            Ast::ClassBracketed(_)
494            | Ast::Repetition(_)
495            | Ast::Group(_)
496            | Ast::Alternation(_)
497            | Ast::Intersection(_)
498            | Ast::Lookaround(_)
499            | Ast::Complement(_)
500            | Ast::Concat(_) => true,
501        }
502    }
503}
504
505/// An alternation of regular expressions.
506#[derive(Clone, Debug, Eq, PartialEq)]
507pub struct Alternation {
508    /// The span of this alternation.
509    pub span: Span,
510    /// The alternate regular expressions.
511    pub asts: Vec<Ast>,
512}
513
514impl Alternation {
515    /// Return this alternation as an AST.
516    ///
517    /// If this alternation contains zero ASTs, then `Ast::empty` is returned.
518    /// If this alternation contains exactly 1 AST, then the corresponding AST
519    /// is returned. Otherwise, `Ast::alternation` is returned.
520    pub fn into_ast(mut self) -> Ast {
521        match self.asts.len() {
522            0 => Ast::empty(self.span),
523            1 => self.asts.pop().unwrap(),
524            _ => Ast::alternation(self),
525        }
526    }
527}
528
529/// A concatenation of regular expressions.
530#[derive(Clone, Debug, Eq, PartialEq)]
531pub struct Concat {
532    /// The span of this concatenation.
533    pub span: Span,
534    /// The concatenation regular expressions.
535    pub asts: Vec<Ast>,
536}
537
538impl Concat {
539    /// Return this concatenation as an AST.
540    ///
541    /// If this alternation contains zero ASTs, then `Ast::empty` is returned.
542    /// If this alternation contains exactly 1 AST, then the corresponding AST
543    /// is returned. Otherwise, `Ast::concat` is returned.
544    pub fn into_ast(mut self) -> Ast {
545        match self.asts.len() {
546            0 => Ast::empty(self.span),
547            1 => self.asts.pop().unwrap(),
548            _ => Ast::concat(self),
549        }
550    }
551}
552
553/// A single zero-width assertion.
554#[derive(Clone, Debug, Eq, PartialEq)]
555pub struct Assertion {
556    /// The span of this assertion.
557    pub span: Span,
558    /// The assertion kind, e.g., `\b` or `^`.
559    pub kind: AssertionKind,
560}
561
562/// An assertion kind.
563#[derive(Clone, Debug, Eq, PartialEq)]
564pub enum AssertionKind {
565    /// `^`
566    StartLine,
567    /// `$`
568    EndLine,
569    /// `\A`
570    StartText,
571    /// `\z`
572    EndText,
573    /// `\b`
574    WordBoundary,
575    /// `\B`
576    NotWordBoundary,
577    /// `\b{start}`
578    WordBoundaryStart,
579    /// `\b{end}`
580    WordBoundaryEnd,
581    /// `\<` (alias for `\b{start}`)
582    WordBoundaryStartAngle,
583    /// `\>` (alias for `\b{end}`)
584    WordBoundaryEndAngle,
585    /// `\b{start-half}`
586    WordBoundaryStartHalf,
587    /// `\b{end-half}`
588    WordBoundaryEndHalf,
589}
590
591/// A repetition operation applied to a regular expression.
592#[derive(Clone, Debug, Eq, PartialEq)]
593pub struct Repetition {
594    /// The span of this operation.
595    pub span: Span,
596    /// The actual operation.
597    pub op: RepetitionOp,
598    /// Whether this operation was applied greedily or not.
599    pub greedy: bool,
600    /// The regular expression under repetition.
601    pub ast: Box<Ast>,
602}
603
604/// The repetition operator itself.
605#[derive(Clone, Debug, Eq, PartialEq)]
606pub struct RepetitionOp {
607    /// The span of this operator. This includes things like `+`, `*?` and
608    /// `{m,n}`.
609    pub span: Span,
610    /// The type of operation.
611    pub kind: RepetitionKind,
612}
613
614/// The kind of a repetition operator.
615#[derive(Clone, Debug, Eq, PartialEq)]
616pub enum RepetitionKind {
617    /// `?`
618    ZeroOrOne,
619    /// `*`
620    ZeroOrMore,
621    /// `+`
622    OneOrMore,
623    /// `{m,n}`
624    Range(RepetitionRange),
625}
626
627/// A range repetition operator.
628#[derive(Clone, Debug, Eq, PartialEq)]
629pub enum RepetitionRange {
630    /// `{m}`
631    Exactly(u32),
632    /// `{m,}`
633    AtLeast(u32),
634    /// `{m,n}`
635    Bounded(u32, u32),
636}
637
638impl RepetitionRange {
639    /// Returns true if and only if this repetition range is valid.
640    ///
641    /// The only case where a repetition range is invalid is if it is bounded
642    /// and its start is greater than its end.
643    pub fn is_valid(&self) -> bool {
644        !matches!(*self, RepetitionRange::Bounded(s, e) if s > e)
645    }
646}
647
648/// A grouped regular expression.
649///
650/// This includes both capturing and non-capturing groups. This does **not**
651/// include flag-only groups like `(?is)`, but does contain any group that
652/// contains a sub-expression, e.g., `(a)`, `(?P<name>a)`, `(?:a)` and
653/// `(?is:a)`.
654#[derive(Clone, Debug, Eq, PartialEq)]
655pub struct Group {
656    /// The span of this group.
657    pub span: Span,
658    /// The kind of this group.
659    pub kind: GroupKind,
660    /// The regular expression in this group.
661    pub ast: Box<Ast>,
662}
663
664impl Group {
665    /// If this group is non-capturing, then this returns the (possibly empty)
666    /// set of flags. Otherwise, `None` is returned.
667    pub fn flags(&self) -> Option<&Flags> {
668        match self.kind {
669            GroupKind::NonCapturing(ref flags) => Some(flags),
670            _ => None,
671        }
672    }
673
674    /// Returns true if and only if this group is capturing.
675    pub fn is_capturing(&self) -> bool {
676        match self.kind {
677            GroupKind::CaptureIndex(_) | GroupKind::CaptureName { .. } => true,
678            GroupKind::NonCapturing(_) => false,
679            GroupKind::Lookaround(_) => false,
680            GroupKind::Complement => false,
681        }
682    }
683
684    /// Returns the capture index of this group, if this is a capturing group.
685    ///
686    /// This returns a capture index precisely when `is_capturing` is `true`.
687    pub fn capture_index(&self) -> Option<u32> {
688        match self.kind {
689            GroupKind::CaptureIndex(i) => Some(i),
690            GroupKind::CaptureName { ref name, .. } => Some(name.index),
691            GroupKind::NonCapturing(_) => None,
692            GroupKind::Lookaround(_) => None,
693            GroupKind::Complement => None,
694        }
695    }
696}
697
698/// The kind of a group.
699#[derive(Clone, Debug, Eq, PartialEq)]
700pub enum GroupKind {
701    /// `(a)`
702    CaptureIndex(u32),
703    /// `(?<name>a)` or `(?P<name>a)`
704    CaptureName {
705        /// True if the `?P<` syntax is used and false if the `?<` syntax is used.
706        starts_with_p: bool,
707        /// The capture name.
708        name: CaptureName,
709    },
710    /// `(?:a)` and `(?i:a)`
711    NonCapturing(Flags),
712    Lookaround(LookaroundKind),
713    Complement,
714}
715
716/// A capture name.
717///
718/// This corresponds to the name itself between the angle brackets in, e.g.,
719/// `(?P<foo>expr)`.
720#[derive(Clone, Debug, Eq, PartialEq)]
721pub struct CaptureName {
722    /// The span of this capture name.
723    pub span: Span,
724    /// The capture name.
725    pub name: String,
726    /// The capture index.
727    pub index: u32,
728}
729
730/// A group of flags that is not applied to a particular regular expression.
731#[derive(Clone, Debug, Eq, PartialEq)]
732pub struct SetFlags {
733    /// The span of these flags, including the grouping parentheses.
734    pub span: Span,
735    /// The actual sequence of flags.
736    pub flags: Flags,
737}
738
739/// A group of flags.
740///
741/// This corresponds only to the sequence of flags themselves, e.g., `is-u`.
742#[derive(Clone, Debug, Eq, PartialEq)]
743pub struct Flags {
744    /// The span of this group of flags.
745    pub span: Span,
746    /// A sequence of flag items. Each item is either a flag or a negation
747    /// operator.
748    pub items: Vec<FlagsItem>,
749}
750
751impl Flags {
752    /// Add the given item to this sequence of flags.
753    ///
754    /// If the item was added successfully, then `None` is returned. If the
755    /// given item is a duplicate, then `Some(i)` is returned, where
756    /// `items[i].kind == item.kind`.
757    pub fn add_item(&mut self, item: FlagsItem) -> Option<usize> {
758        for (i, x) in self.items.iter().enumerate() {
759            if x.kind == item.kind {
760                return Some(i);
761            }
762        }
763        self.items.push(item);
764        None
765    }
766
767    /// Returns the state of the given flag in this set.
768    ///
769    /// If the given flag is in the set but is negated, then `Some(false)` is
770    /// returned.
771    ///
772    /// If the given flag is in the set and is not negated, then `Some(true)`
773    /// is returned.
774    ///
775    /// Otherwise, `None` is returned.
776    pub fn flag_state(&self, flag: Flag) -> Option<bool> {
777        let mut negated = false;
778        for x in &self.items {
779            match x.kind {
780                FlagsItemKind::Negation => {
781                    negated = true;
782                }
783                FlagsItemKind::Flag(ref xflag) if xflag == &flag => {
784                    return Some(!negated);
785                }
786                _ => {}
787            }
788        }
789        None
790    }
791}
792
793/// A single item in a group of flags.
794#[derive(Clone, Debug, Eq, PartialEq)]
795pub struct FlagsItem {
796    /// The span of this item.
797    pub span: Span,
798    /// The kind of this item.
799    pub kind: FlagsItemKind,
800}
801
802/// The kind of an item in a group of flags.
803#[derive(Clone, Debug, Eq, PartialEq)]
804pub enum FlagsItemKind {
805    /// A negation operator applied to all subsequent flags in the enclosing
806    /// group.
807    Negation,
808    /// A single flag in a group.
809    Flag(Flag),
810}
811
812impl FlagsItemKind {
813    /// Returns true if and only if this item is a negation operator.
814    pub fn is_negation(&self) -> bool {
815        matches!(*self, FlagsItemKind::Negation)
816    }
817}
818
819/// A single flag.
820#[derive(Clone, Copy, Debug, Eq, PartialEq)]
821pub enum Flag {
822    /// `i`
823    CaseInsensitive,
824    /// `m`
825    MultiLine,
826    /// `s`
827    DotMatchesNewLine,
828    /// `U`
829    SwapGreed,
830    /// `u`
831    Unicode,
832    /// `R`
833    CRLF,
834    /// `x`
835    IgnoreWhitespace,
836}
837
838// START RE#
839
840impl Ast {
841    pub fn intersection(e: Intersection) -> Ast {
842        Ast::Intersection(Box::new(e))
843    }
844    pub fn complement(e: Complement) -> Ast {
845        Ast::Complement(Box::new(e))
846    }
847    pub fn lookaround(e: Lookaround) -> Ast {
848        Ast::Lookaround(Box::new(e))
849    }
850}
851
852/// An alternation of regular expressions.
853#[derive(Clone, Debug, Eq, PartialEq)]
854pub struct Intersection {
855    /// The span of this alternation.
856    pub span: Span,
857    /// The alternate regular expressions.
858    pub asts: Vec<Ast>,
859}
860
861impl Intersection {
862    pub fn into_ast(mut self) -> Ast {
863        match self.asts.len() {
864            0 => Ast::empty(self.span),
865            1 => self.asts.pop().unwrap(),
866            _ => Ast::intersection(self),
867        }
868    }
869}
870
871#[derive(Clone, Debug, Eq, PartialEq)]
872pub struct Complement {
873    pub span: Span,
874    pub ast: Box<Ast>,
875}
876
877impl Complement {
878    pub fn into_ast(self) -> Ast {
879        Ast::complement(self)
880    }
881}
882
883#[derive(Clone, Debug, Eq, PartialEq)]
884pub enum LookaroundKind {
885    PositiveLookahead,
886    NegativeLookahead,
887    PositiveLookbehind,
888    NegativeLookbehind,
889}
890
891/// An alternation of regular expressions.
892#[derive(Clone, Debug, Eq, PartialEq)]
893pub struct Lookaround {
894    pub kind: LookaroundKind,
895    /// The span of this alternation.
896    pub span: Span,
897    /// The regular expression in this group.
898    pub ast: Box<Ast>,
899}
900
901impl Lookaround {
902    pub fn into_ast(self) -> Ast {
903        Ast::lookaround(self)
904    }
905}