resharp_parser/ast.rs
1#![allow(dead_code)]
2/*!
3RE# AST based on the regex_syntax crate.
4*/
5
6use regex_syntax::ast::{ClassBracketed, ClassPerl, ClassUnicode, Literal, Span};
7
8#[derive(Clone, Debug, Eq, PartialEq)]
9pub struct Error {
10 /// The kind of error.
11 kind: ErrorKind,
12 /// The original pattern that the parser generated the error from. Every
13 /// span in an error is a valid range into this string.
14 pattern: String,
15 /// The span of this error.
16 span: Span,
17}
18
19impl Error {
20 /// Return the type of this error.
21 pub fn kind(&self) -> &ErrorKind {
22 &self.kind
23 }
24
25 /// The original pattern string in which this error occurred.
26 ///
27 /// Every span reported by this error is reported in terms of this string.
28 pub fn pattern(&self) -> &str {
29 &self.pattern
30 }
31
32 /// Return the span at which this error occurred.
33 pub fn span(&self) -> &Span {
34 &self.span
35 }
36
37 /// Return an auxiliary span. This span exists only for some errors that
38 /// benefit from being able to point to two locations in the original
39 /// regular expression. For example, "duplicate" errors will have the
40 /// main error position set to the duplicate occurrence while its
41 /// auxiliary span will be set to the initial occurrence.
42 pub fn auxiliary_span(&self) -> Option<&Span> {
43 use self::ErrorKind::*;
44 match self.kind {
45 FlagDuplicate { ref original } => Some(original),
46 FlagRepeatedNegation { ref original, .. } => Some(original),
47 GroupNameDuplicate { ref original, .. } => Some(original),
48 _ => None,
49 }
50 }
51}
52
53/// The type of an error that occurred while building an AST.
54///
55/// This error type is marked as `non_exhaustive`. This means that adding a
56/// new variant is not considered a breaking change.
57#[non_exhaustive]
58#[derive(Clone, Debug, Eq, PartialEq)]
59pub enum ErrorKind {
60 /// The capturing group limit was exceeded.
61 ///
62 /// Note that this represents a limit on the total number of capturing
63 /// groups in a regex and not necessarily the number of nested capturing
64 /// groups. That is, the nest limit can be low and it is still possible for
65 /// this error to occur.
66 CaptureLimitExceeded,
67 /// An invalid escape sequence was found in a character class set.
68 ClassEscapeInvalid,
69 /// An invalid character class range was found. An invalid range is any
70 /// range where the start is greater than the end.
71 ClassRangeInvalid,
72 /// An invalid range boundary was found in a character class. Range
73 /// boundaries must be a single literal codepoint, but this error indicates
74 /// that something else was found, such as a nested class.
75 ClassRangeLiteral,
76 /// An opening `[` was found with no corresponding closing `]`.
77 ClassUnclosed,
78 /// Note that this error variant is no longer used. Namely, a decimal
79 /// number can only appear as a repetition quantifier. When the number
80 /// in a repetition quantifier is empty, then it gets its own specialized
81 /// error, `RepetitionCountDecimalEmpty`.
82 DecimalEmpty,
83 /// An invalid decimal number was given where one was expected.
84 DecimalInvalid,
85 /// A bracketed hex literal was empty.
86 EscapeHexEmpty,
87 /// A bracketed hex literal did not correspond to a Unicode scalar value.
88 EscapeHexInvalid,
89 /// An invalid hexadecimal digit was found.
90 EscapeHexInvalidDigit,
91 /// EOF was found before an escape sequence was completed.
92 EscapeUnexpectedEof,
93 /// An unrecognized escape sequence.
94 EscapeUnrecognized,
95 /// A dangling negation was used when setting flags, e.g., `i-`.
96 FlagDanglingNegation,
97 /// A flag was used twice, e.g., `i-i`.
98 FlagDuplicate {
99 /// The position of the original flag. The error position
100 /// points to the duplicate flag.
101 original: Span,
102 },
103 /// The negation operator was used twice, e.g., `-i-s`.
104 FlagRepeatedNegation {
105 /// The position of the original negation operator. The error position
106 /// points to the duplicate negation operator.
107 original: Span,
108 },
109 /// Expected a flag but got EOF, e.g., `(?`.
110 FlagUnexpectedEof,
111 /// Unrecognized flag, e.g., `a`.
112 FlagUnrecognized,
113 /// A duplicate capture name was found.
114 GroupNameDuplicate {
115 /// The position of the initial occurrence of the capture name. The
116 /// error position itself points to the duplicate occurrence.
117 original: Span,
118 },
119 /// A capture group name is empty, e.g., `(?P<>abc)`.
120 GroupNameEmpty,
121 /// An invalid character was seen for a capture group name. This includes
122 /// errors where the first character is a digit (even though subsequent
123 /// characters are allowed to be digits).
124 GroupNameInvalid,
125 /// A closing `>` could not be found for a capture group name.
126 GroupNameUnexpectedEof,
127 /// An unclosed group, e.g., `(ab`.
128 ///
129 /// The span of this error corresponds to the unclosed parenthesis.
130 GroupUnclosed,
131 /// An unopened group, e.g., `ab)`.
132 GroupUnopened,
133 /// The nest limit was exceeded. The limit stored here is the limit
134 /// configured in the parser.
135 NestLimitExceeded(u32),
136 /// The range provided in a counted repetition operator is invalid. The
137 /// range is invalid if the start is greater than the end.
138 RepetitionCountInvalid,
139 /// An opening `{` was not followed by a valid decimal value.
140 /// For example, `x{}` or `x{]}` would fail.
141 RepetitionCountDecimalEmpty,
142 /// An opening `{` was found with no corresponding closing `}`.
143 RepetitionCountUnclosed,
144 /// A repetition operator was applied to a missing sub-expression. This
145 /// occurs, for example, in the regex consisting of just a `*` or even
146 /// `(?i)*`. It is, however, possible to create a repetition operating on
147 /// an empty sub-expression. For example, `()*` is still considered valid.
148 RepetitionMissing,
149 /// The special word boundary syntax, `\b{something}`, was used, but
150 /// either EOF without `}` was seen, or an invalid character in the
151 /// braces was seen.
152 SpecialWordBoundaryUnclosed,
153 /// The special word boundary syntax, `\b{something}`, was used, but
154 /// `something` was not recognized as a valid word boundary kind.
155 SpecialWordBoundaryUnrecognized,
156 /// The syntax `\b{` was observed, but afterwards the end of the pattern
157 /// was observed without being able to tell whether it was meant to be a
158 /// bounded repetition on the `\b` or the beginning of a special word
159 /// boundary assertion.
160 SpecialWordOrRepetitionUnexpectedEof,
161 /// The Unicode class is not valid. This typically occurs when a `\p` is
162 /// followed by something other than a `{`.
163 UnicodeClassInvalid,
164 /// When octal support is disabled, this error is produced when an octal
165 /// escape is used. The octal escape is assumed to be an invocation of
166 /// a backreference, which is the common case.
167 UnsupportedBackreference,
168 /// When syntax similar to PCRE's look-around is used, this error is
169 /// returned. Some example syntaxes that are rejected include, but are
170 /// not necessarily limited to, `(?=re)`, `(?!re)`, `(?<=re)` and
171 /// `(?<!re)`. Note that all of these syntaxes are otherwise invalid; this
172 /// error is used to improve the user experience.
173 UnsupportedLookAround,
174 /// Unsupported RE# regex construct.
175 UnsupportedResharpRegex,
176 /// Lazy quantifiers (e.g., `*?`, `+?`, `??`, `{n,m}?`) are not supported.
177 UnsupportedLazyQuantifier,
178 ComplementGroupExpected,
179}
180
181impl core::fmt::Display for ErrorKind {
182 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
183 use self::ErrorKind::*;
184 match *self {
185 CaptureLimitExceeded => write!(
186 f,
187 "exceeded the maximum number of \
188 capturing groups ({})",
189 u32::MAX
190 ),
191 ClassEscapeInvalid => {
192 write!(f, "invalid escape sequence found in character class")
193 }
194 ClassRangeInvalid => write!(
195 f,
196 "invalid character class range, \
197 the start must be <= the end"
198 ),
199 ClassRangeLiteral => {
200 write!(f, "invalid range boundary, must be a literal")
201 }
202 ClassUnclosed => write!(f, "unclosed character class"),
203 DecimalEmpty => write!(f, "decimal literal empty"),
204 DecimalInvalid => write!(f, "decimal literal invalid"),
205 EscapeHexEmpty => write!(f, "hexadecimal literal empty"),
206 EscapeHexInvalid => {
207 write!(f, "hexadecimal literal is not a Unicode scalar value")
208 }
209 EscapeHexInvalidDigit => write!(f, "invalid hexadecimal digit"),
210 EscapeUnexpectedEof => write!(
211 f,
212 "incomplete escape sequence, \
213 reached end of pattern prematurely"
214 ),
215 EscapeUnrecognized => write!(f, "unrecognized escape sequence"),
216 FlagDanglingNegation => {
217 write!(f, "dangling flag negation operator")
218 }
219 FlagDuplicate { .. } => write!(f, "duplicate flag"),
220 FlagRepeatedNegation { .. } => {
221 write!(f, "flag negation operator repeated")
222 }
223 FlagUnexpectedEof => {
224 write!(f, "expected flag but got end of regex")
225 }
226 FlagUnrecognized => write!(f, "unrecognized flag"),
227 GroupNameDuplicate { .. } => {
228 write!(f, "duplicate capture group name")
229 }
230 GroupNameEmpty => write!(f, "empty capture group name"),
231 GroupNameInvalid => write!(f, "invalid capture group character"),
232 GroupNameUnexpectedEof => write!(f, "unclosed capture group name"),
233 GroupUnclosed => write!(f, "unclosed group"),
234 GroupUnopened => write!(f, "unopened group"),
235 NestLimitExceeded(limit) => write!(
236 f,
237 "exceed the maximum number of \
238 nested parentheses/brackets ({})",
239 limit
240 ),
241 RepetitionCountInvalid => write!(
242 f,
243 "invalid repetition count range, \
244 the start must be <= the end"
245 ),
246 RepetitionCountDecimalEmpty => {
247 write!(f, "repetition quantifier expects a valid decimal")
248 }
249 RepetitionCountUnclosed => {
250 write!(f, "unclosed counted repetition")
251 }
252 RepetitionMissing => {
253 write!(f, "repetition operator missing expression")
254 }
255 SpecialWordBoundaryUnclosed => {
256 write!(
257 f,
258 "special word boundary assertion is either \
259 unclosed or contains an invalid character",
260 )
261 }
262 SpecialWordBoundaryUnrecognized => {
263 write!(
264 f,
265 "unrecognized special word boundary assertion, \
266 valid choices are: start, end, start-half \
267 or end-half",
268 )
269 }
270 SpecialWordOrRepetitionUnexpectedEof => {
271 write!(
272 f,
273 "found either the beginning of a special word \
274 boundary or a bounded repetition on a \\b with \
275 an opening brace, but no closing brace",
276 )
277 }
278 UnicodeClassInvalid => {
279 write!(f, "invalid Unicode character class")
280 }
281 UnsupportedBackreference => {
282 write!(f, "backreferences are not supported")
283 }
284 UnsupportedLookAround => write!(
285 f,
286 "look-around, including look-ahead and look-behind, \
287 is not supported"
288 ),
289 UnsupportedResharpRegex => write!(f, "this pattern is not supported"),
290 UnsupportedLazyQuantifier => {
291 write!(f, "lazy quantifiers are not supported")
292 }
293 ComplementGroupExpected => write!(f, "expected ( after ~ for complement group"),
294 }
295 }
296}
297
298/// An abstract syntax tree for a singular expression along with comments
299/// found.
300///
301/// Comments are not stored in the tree itself to avoid complexity. Each
302/// comment contains a span of precisely where it occurred in the original
303/// regular expression.
304#[derive(Clone, Debug, Eq, PartialEq)]
305pub struct WithComments {
306 /// The actual ast.
307 pub ast: Ast,
308 /// All comments found in the original regular expression.
309 pub comments: Vec<Comment>,
310}
311
312/// A comment from a regular expression with an associated span.
313///
314/// A regular expression can only contain comments when the `x` flag is
315/// enabled.
316#[derive(Clone, Debug, Eq, PartialEq)]
317pub struct Comment {
318 /// The span of this comment, including the beginning `#` and ending `\n`.
319 pub span: Span,
320 /// The comment text, starting with the first character following the `#`
321 /// and ending with the last character preceding the `\n`.
322 pub comment: String,
323}
324
325/// An abstract syntax tree for a single regular expression.
326///
327/// An `Ast`'s `fmt::Display` implementation uses constant stack space and heap
328/// space proportional to the size of the `Ast`.
329///
330/// This type defines its own destructor that uses constant stack space and
331/// heap space proportional to the size of the `Ast`.
332#[derive(Clone, Debug, Eq, PartialEq)]
333pub enum Ast {
334 /// An empty regex that matches everything.
335 Empty(Box<Span>),
336 /// A set of flags, e.g., `(?is)`.
337 Flags(Box<SetFlags>),
338 /// A single character literal, which includes escape sequences.
339 Literal(Box<Literal>),
340 /// The "any character" class.
341 Dot(Box<Span>),
342 /// The "any character" class.
343 Top(Box<Span>),
344 /// A single zero-width assertion.
345 Assertion(Box<Assertion>),
346 /// A single Unicode character class, e.g., `\pL` or `\p{Greek}`.
347 ClassUnicode(Box<ClassUnicode>),
348 /// A single perl character class, e.g., `\d` or `\W`.
349 ClassPerl(Box<ClassPerl>),
350 /// A single bracketed character class set, which may contain zero or more
351 /// character ranges and/or zero or more nested classes. e.g.,
352 /// `[a-zA-Z\pL]`.
353 ClassBracketed(Box<ClassBracketed>),
354 /// A repetition operator applied to an arbitrary regular expression.
355 Repetition(Box<Repetition>),
356 /// A grouped regular expression.
357 Group(Box<Group>),
358 /// An alternation of regular expressions.
359 Alternation(Box<Alternation>),
360 /// A concatenation of regular expressions.
361 Concat(Box<Concat>),
362 Intersection(Box<Intersection>),
363 Complement(Box<Complement>),
364 Lookaround(Box<Lookaround>),
365}
366
367impl Ast {
368 /// Create an "empty" AST item.
369 pub fn empty(span: Span) -> Ast {
370 Ast::Empty(Box::new(span))
371 }
372
373 /// Create a "flags" AST item.
374 pub fn flags(e: SetFlags) -> Ast {
375 Ast::Flags(Box::new(e))
376 }
377
378 /// Create a "literal" AST item.
379 pub fn literal(e: Literal) -> Ast {
380 Ast::Literal(Box::new(e))
381 }
382
383 /// Create a "dot" AST item.
384 pub fn dot(span: Span) -> Ast {
385 Ast::Dot(Box::new(span))
386 }
387
388 pub fn top(span: Span) -> Ast {
389 Ast::Top(Box::new(span))
390 }
391
392 /// Create a "assertion" AST item.
393 pub fn assertion(e: Assertion) -> Ast {
394 Ast::Assertion(Box::new(e))
395 }
396
397 /// Create a "Unicode class" AST item.
398 pub fn class_unicode(e: ClassUnicode) -> Ast {
399 Ast::ClassUnicode(Box::new(e))
400 }
401
402 /// Create a "Perl class" AST item.
403 pub fn class_perl(e: ClassPerl) -> Ast {
404 Ast::ClassPerl(Box::new(e))
405 }
406
407 /// Create a "bracketed class" AST item.
408 pub fn class_bracketed(e: ClassBracketed) -> Ast {
409 Ast::ClassBracketed(Box::new(e))
410 }
411
412 /// Create a "repetition" AST item.
413 pub fn repetition(e: Repetition) -> Ast {
414 Ast::Repetition(Box::new(e))
415 }
416
417 /// Create a "group" AST item.
418 pub fn group(e: Group) -> Ast {
419 match &e.kind {
420 GroupKind::CaptureIndex(_) => Ast::Group(Box::new(e)),
421 GroupKind::CaptureName {
422 starts_with_p: _,
423 name: _,
424 } => Ast::Group(Box::new(e)),
425 GroupKind::NonCapturing(_flags) => Ast::Group(Box::new(e)),
426 GroupKind::Lookaround(kind) => {
427 let look = Lookaround {
428 kind: kind.clone(),
429 span: e.span,
430 ast: e.ast,
431 };
432 Ast::lookaround(look)
433 }
434 GroupKind::Complement => {
435 let g = Complement {
436 span: e.span,
437 ast: e.ast,
438 };
439 Ast::complement(g)
440 }
441 }
442 }
443
444 /// Create a "alternation" AST item.
445 pub fn alternation(e: Alternation) -> Ast {
446 Ast::Alternation(Box::new(e))
447 }
448
449 /// Create a "concat" AST item.
450 pub fn concat(e: Concat) -> Ast {
451 Ast::Concat(Box::new(e))
452 }
453
454 /// Return the span of this abstract syntax tree.
455 pub fn span(&self) -> &Span {
456 match *self {
457 Ast::Empty(ref span) => span,
458 Ast::Flags(ref x) => &x.span,
459 Ast::Literal(ref x) => &x.span,
460 Ast::Dot(ref span) => span,
461 Ast::Top(ref span) => span,
462 Ast::Assertion(ref x) => &x.span,
463 Ast::ClassUnicode(ref x) => &x.span,
464 Ast::ClassPerl(ref x) => &x.span,
465 Ast::ClassBracketed(ref x) => &x.span,
466 Ast::Repetition(ref x) => &x.span,
467 Ast::Group(ref x) => &x.span,
468 Ast::Alternation(ref x) => &x.span,
469 Ast::Concat(ref x) => &x.span,
470 Ast::Intersection(ref x) => &x.span,
471 Ast::Complement(ref x) => &x.span,
472 Ast::Lookaround(ref x) => &x.span,
473 }
474 }
475
476 /// Return true if and only if this Ast is empty.
477 pub fn is_empty(&self) -> bool {
478 matches!(*self, Ast::Empty(_))
479 }
480
481 /// Returns true if and only if this AST has any (including possibly empty)
482 /// subexpressions.
483 fn has_subexprs(&self) -> bool {
484 match *self {
485 Ast::Empty(_)
486 | Ast::Flags(_)
487 | Ast::Literal(_)
488 | Ast::Dot(_)
489 | Ast::Top(_)
490 | Ast::Assertion(_)
491 | Ast::ClassUnicode(_)
492 | Ast::ClassPerl(_) => false,
493 Ast::ClassBracketed(_)
494 | Ast::Repetition(_)
495 | Ast::Group(_)
496 | Ast::Alternation(_)
497 | Ast::Intersection(_)
498 | Ast::Lookaround(_)
499 | Ast::Complement(_)
500 | Ast::Concat(_) => true,
501 }
502 }
503}
504
505/// An alternation of regular expressions.
506#[derive(Clone, Debug, Eq, PartialEq)]
507pub struct Alternation {
508 /// The span of this alternation.
509 pub span: Span,
510 /// The alternate regular expressions.
511 pub asts: Vec<Ast>,
512}
513
514impl Alternation {
515 /// Return this alternation as an AST.
516 ///
517 /// If this alternation contains zero ASTs, then `Ast::empty` is returned.
518 /// If this alternation contains exactly 1 AST, then the corresponding AST
519 /// is returned. Otherwise, `Ast::alternation` is returned.
520 pub fn into_ast(mut self) -> Ast {
521 match self.asts.len() {
522 0 => Ast::empty(self.span),
523 1 => self.asts.pop().unwrap(),
524 _ => Ast::alternation(self),
525 }
526 }
527}
528
529/// A concatenation of regular expressions.
530#[derive(Clone, Debug, Eq, PartialEq)]
531pub struct Concat {
532 /// The span of this concatenation.
533 pub span: Span,
534 /// The concatenation regular expressions.
535 pub asts: Vec<Ast>,
536}
537
538impl Concat {
539 /// Return this concatenation as an AST.
540 ///
541 /// If this alternation contains zero ASTs, then `Ast::empty` is returned.
542 /// If this alternation contains exactly 1 AST, then the corresponding AST
543 /// is returned. Otherwise, `Ast::concat` is returned.
544 pub fn into_ast(mut self) -> Ast {
545 match self.asts.len() {
546 0 => Ast::empty(self.span),
547 1 => self.asts.pop().unwrap(),
548 _ => Ast::concat(self),
549 }
550 }
551}
552
553/// A single zero-width assertion.
554#[derive(Clone, Debug, Eq, PartialEq)]
555pub struct Assertion {
556 /// The span of this assertion.
557 pub span: Span,
558 /// The assertion kind, e.g., `\b` or `^`.
559 pub kind: AssertionKind,
560}
561
562/// An assertion kind.
563#[derive(Clone, Debug, Eq, PartialEq)]
564pub enum AssertionKind {
565 /// `^`
566 StartLine,
567 /// `$`
568 EndLine,
569 /// `\A`
570 StartText,
571 /// `\z`
572 EndText,
573 /// `\b`
574 WordBoundary,
575 /// `\B`
576 NotWordBoundary,
577 /// `\b{start}`
578 WordBoundaryStart,
579 /// `\b{end}`
580 WordBoundaryEnd,
581 /// `\<` (alias for `\b{start}`)
582 WordBoundaryStartAngle,
583 /// `\>` (alias for `\b{end}`)
584 WordBoundaryEndAngle,
585 /// `\b{start-half}`
586 WordBoundaryStartHalf,
587 /// `\b{end-half}`
588 WordBoundaryEndHalf,
589}
590
591/// A repetition operation applied to a regular expression.
592#[derive(Clone, Debug, Eq, PartialEq)]
593pub struct Repetition {
594 /// The span of this operation.
595 pub span: Span,
596 /// The actual operation.
597 pub op: RepetitionOp,
598 /// Whether this operation was applied greedily or not.
599 pub greedy: bool,
600 /// The regular expression under repetition.
601 pub ast: Box<Ast>,
602}
603
604/// The repetition operator itself.
605#[derive(Clone, Debug, Eq, PartialEq)]
606pub struct RepetitionOp {
607 /// The span of this operator. This includes things like `+`, `*?` and
608 /// `{m,n}`.
609 pub span: Span,
610 /// The type of operation.
611 pub kind: RepetitionKind,
612}
613
614/// The kind of a repetition operator.
615#[derive(Clone, Debug, Eq, PartialEq)]
616pub enum RepetitionKind {
617 /// `?`
618 ZeroOrOne,
619 /// `*`
620 ZeroOrMore,
621 /// `+`
622 OneOrMore,
623 /// `{m,n}`
624 Range(RepetitionRange),
625}
626
627/// A range repetition operator.
628#[derive(Clone, Debug, Eq, PartialEq)]
629pub enum RepetitionRange {
630 /// `{m}`
631 Exactly(u32),
632 /// `{m,}`
633 AtLeast(u32),
634 /// `{m,n}`
635 Bounded(u32, u32),
636}
637
638impl RepetitionRange {
639 /// Returns true if and only if this repetition range is valid.
640 ///
641 /// The only case where a repetition range is invalid is if it is bounded
642 /// and its start is greater than its end.
643 pub fn is_valid(&self) -> bool {
644 !matches!(*self, RepetitionRange::Bounded(s, e) if s > e)
645 }
646}
647
648/// A grouped regular expression.
649///
650/// This includes both capturing and non-capturing groups. This does **not**
651/// include flag-only groups like `(?is)`, but does contain any group that
652/// contains a sub-expression, e.g., `(a)`, `(?P<name>a)`, `(?:a)` and
653/// `(?is:a)`.
654#[derive(Clone, Debug, Eq, PartialEq)]
655pub struct Group {
656 /// The span of this group.
657 pub span: Span,
658 /// The kind of this group.
659 pub kind: GroupKind,
660 /// The regular expression in this group.
661 pub ast: Box<Ast>,
662}
663
664impl Group {
665 /// If this group is non-capturing, then this returns the (possibly empty)
666 /// set of flags. Otherwise, `None` is returned.
667 pub fn flags(&self) -> Option<&Flags> {
668 match self.kind {
669 GroupKind::NonCapturing(ref flags) => Some(flags),
670 _ => None,
671 }
672 }
673
674 /// Returns true if and only if this group is capturing.
675 pub fn is_capturing(&self) -> bool {
676 match self.kind {
677 GroupKind::CaptureIndex(_) | GroupKind::CaptureName { .. } => true,
678 GroupKind::NonCapturing(_) => false,
679 GroupKind::Lookaround(_) => false,
680 GroupKind::Complement => false,
681 }
682 }
683
684 /// Returns the capture index of this group, if this is a capturing group.
685 ///
686 /// This returns a capture index precisely when `is_capturing` is `true`.
687 pub fn capture_index(&self) -> Option<u32> {
688 match self.kind {
689 GroupKind::CaptureIndex(i) => Some(i),
690 GroupKind::CaptureName { ref name, .. } => Some(name.index),
691 GroupKind::NonCapturing(_) => None,
692 GroupKind::Lookaround(_) => None,
693 GroupKind::Complement => None,
694 }
695 }
696}
697
698/// The kind of a group.
699#[derive(Clone, Debug, Eq, PartialEq)]
700pub enum GroupKind {
701 /// `(a)`
702 CaptureIndex(u32),
703 /// `(?<name>a)` or `(?P<name>a)`
704 CaptureName {
705 /// True if the `?P<` syntax is used and false if the `?<` syntax is used.
706 starts_with_p: bool,
707 /// The capture name.
708 name: CaptureName,
709 },
710 /// `(?:a)` and `(?i:a)`
711 NonCapturing(Flags),
712 Lookaround(LookaroundKind),
713 Complement,
714}
715
716/// A capture name.
717///
718/// This corresponds to the name itself between the angle brackets in, e.g.,
719/// `(?P<foo>expr)`.
720#[derive(Clone, Debug, Eq, PartialEq)]
721pub struct CaptureName {
722 /// The span of this capture name.
723 pub span: Span,
724 /// The capture name.
725 pub name: String,
726 /// The capture index.
727 pub index: u32,
728}
729
730/// A group of flags that is not applied to a particular regular expression.
731#[derive(Clone, Debug, Eq, PartialEq)]
732pub struct SetFlags {
733 /// The span of these flags, including the grouping parentheses.
734 pub span: Span,
735 /// The actual sequence of flags.
736 pub flags: Flags,
737}
738
739/// A group of flags.
740///
741/// This corresponds only to the sequence of flags themselves, e.g., `is-u`.
742#[derive(Clone, Debug, Eq, PartialEq)]
743pub struct Flags {
744 /// The span of this group of flags.
745 pub span: Span,
746 /// A sequence of flag items. Each item is either a flag or a negation
747 /// operator.
748 pub items: Vec<FlagsItem>,
749}
750
751impl Flags {
752 /// Add the given item to this sequence of flags.
753 ///
754 /// If the item was added successfully, then `None` is returned. If the
755 /// given item is a duplicate, then `Some(i)` is returned, where
756 /// `items[i].kind == item.kind`.
757 pub fn add_item(&mut self, item: FlagsItem) -> Option<usize> {
758 for (i, x) in self.items.iter().enumerate() {
759 if x.kind == item.kind {
760 return Some(i);
761 }
762 }
763 self.items.push(item);
764 None
765 }
766
767 /// Returns the state of the given flag in this set.
768 ///
769 /// If the given flag is in the set but is negated, then `Some(false)` is
770 /// returned.
771 ///
772 /// If the given flag is in the set and is not negated, then `Some(true)`
773 /// is returned.
774 ///
775 /// Otherwise, `None` is returned.
776 pub fn flag_state(&self, flag: Flag) -> Option<bool> {
777 let mut negated = false;
778 for x in &self.items {
779 match x.kind {
780 FlagsItemKind::Negation => {
781 negated = true;
782 }
783 FlagsItemKind::Flag(ref xflag) if xflag == &flag => {
784 return Some(!negated);
785 }
786 _ => {}
787 }
788 }
789 None
790 }
791}
792
793/// A single item in a group of flags.
794#[derive(Clone, Debug, Eq, PartialEq)]
795pub struct FlagsItem {
796 /// The span of this item.
797 pub span: Span,
798 /// The kind of this item.
799 pub kind: FlagsItemKind,
800}
801
802/// The kind of an item in a group of flags.
803#[derive(Clone, Debug, Eq, PartialEq)]
804pub enum FlagsItemKind {
805 /// A negation operator applied to all subsequent flags in the enclosing
806 /// group.
807 Negation,
808 /// A single flag in a group.
809 Flag(Flag),
810}
811
812impl FlagsItemKind {
813 /// Returns true if and only if this item is a negation operator.
814 pub fn is_negation(&self) -> bool {
815 matches!(*self, FlagsItemKind::Negation)
816 }
817}
818
819/// A single flag.
820#[derive(Clone, Copy, Debug, Eq, PartialEq)]
821pub enum Flag {
822 /// `i`
823 CaseInsensitive,
824 /// `m`
825 MultiLine,
826 /// `s`
827 DotMatchesNewLine,
828 /// `U`
829 SwapGreed,
830 /// `u`
831 Unicode,
832 /// `R`
833 CRLF,
834 /// `x`
835 IgnoreWhitespace,
836}
837
838// START RE#
839
840impl Ast {
841 pub fn intersection(e: Intersection) -> Ast {
842 Ast::Intersection(Box::new(e))
843 }
844 pub fn complement(e: Complement) -> Ast {
845 Ast::Complement(Box::new(e))
846 }
847 pub fn lookaround(e: Lookaround) -> Ast {
848 Ast::Lookaround(Box::new(e))
849 }
850}
851
852/// An alternation of regular expressions.
853#[derive(Clone, Debug, Eq, PartialEq)]
854pub struct Intersection {
855 /// The span of this alternation.
856 pub span: Span,
857 /// The alternate regular expressions.
858 pub asts: Vec<Ast>,
859}
860
861impl Intersection {
862 pub fn into_ast(mut self) -> Ast {
863 match self.asts.len() {
864 0 => Ast::empty(self.span),
865 1 => self.asts.pop().unwrap(),
866 _ => Ast::intersection(self),
867 }
868 }
869}
870
871#[derive(Clone, Debug, Eq, PartialEq)]
872pub struct Complement {
873 pub span: Span,
874 pub ast: Box<Ast>,
875}
876
877impl Complement {
878 pub fn into_ast(self) -> Ast {
879 Ast::complement(self)
880 }
881}
882
883#[derive(Clone, Debug, Eq, PartialEq)]
884pub enum LookaroundKind {
885 PositiveLookahead,
886 NegativeLookahead,
887 PositiveLookbehind,
888 NegativeLookbehind,
889}
890
891/// An alternation of regular expressions.
892#[derive(Clone, Debug, Eq, PartialEq)]
893pub struct Lookaround {
894 pub kind: LookaroundKind,
895 /// The span of this alternation.
896 pub span: Span,
897 /// The regular expression in this group.
898 pub ast: Box<Ast>,
899}
900
901impl Lookaround {
902 pub fn into_ast(self) -> Ast {
903 Ast::lookaround(self)
904 }
905}