resharp_parser/ast.rs
1#![allow(dead_code)]
2/*!
3RE# AST based on the regex_syntax crate.
4*/
5
6use regex_syntax::ast::{ClassBracketed, ClassPerl, ClassUnicode, Literal, Span};
7
8#[derive(Clone, Debug, Eq, PartialEq)]
9pub struct Error {
10 kind: ErrorKind,
11 pattern: String,
12 span: Span,
13}
14
15impl Error {
16 /// Return the type of this error.
17 pub fn kind(&self) -> &ErrorKind {
18 &self.kind
19 }
20
21 /// The original pattern string in which this error occurred.
22 ///
23 /// Every span reported by this error is reported in terms of this string.
24 pub fn pattern(&self) -> &str {
25 &self.pattern
26 }
27
28 /// Return the span at which this error occurred.
29 pub fn span(&self) -> &Span {
30 &self.span
31 }
32
33 /// Return an auxiliary span. This span exists only for some errors that
34 /// benefit from being able to point to two locations in the original
35 /// regular expression. For example, "duplicate" errors will have the
36 /// main error position set to the duplicate occurrence while its
37 /// auxiliary span will be set to the initial occurrence.
38 pub fn auxiliary_span(&self) -> Option<&Span> {
39 use self::ErrorKind::*;
40 match self.kind {
41 FlagDuplicate { ref original } => Some(original),
42 FlagRepeatedNegation { ref original, .. } => Some(original),
43 GroupNameDuplicate { ref original, .. } => Some(original),
44 _ => None,
45 }
46 }
47}
48
49/// The type of an error that occurred while building an AST.
50///
51/// This error type is marked as `non_exhaustive`. This means that adding a
52/// new variant is not considered a breaking change.
53#[non_exhaustive]
54#[derive(Clone, Debug, Eq, PartialEq)]
55pub enum ErrorKind {
56 /// The capturing group limit was exceeded.
57 ///
58 /// Note that this represents a limit on the total number of capturing
59 /// groups in a regex and not necessarily the number of nested capturing
60 /// groups. That is, the nest limit can be low and it is still possible for
61 /// this error to occur.
62 CaptureLimitExceeded,
63 /// An invalid escape sequence was found in a character class set.
64 ClassEscapeInvalid,
65 /// An invalid character class range was found. An invalid range is any
66 /// range where the start is greater than the end.
67 ClassRangeInvalid,
68 /// An invalid range boundary was found in a character class. Range
69 /// boundaries must be a single literal codepoint, but this error indicates
70 /// that something else was found, such as a nested class.
71 ClassRangeLiteral,
72 /// An opening `[` was found with no corresponding closing `]`.
73 ClassUnclosed,
74 /// Note that this error variant is no longer used. Namely, a decimal
75 /// number can only appear as a repetition quantifier. When the number
76 /// in a repetition quantifier is empty, then it gets its own specialized
77 /// error, `RepetitionCountDecimalEmpty`.
78 DecimalEmpty,
79 /// An invalid decimal number was given where one was expected.
80 DecimalInvalid,
81 /// A bracketed hex literal was empty.
82 EscapeHexEmpty,
83 /// A bracketed hex literal did not correspond to a Unicode scalar value.
84 EscapeHexInvalid,
85 /// An invalid hexadecimal digit was found.
86 EscapeHexInvalidDigit,
87 /// EOF was found before an escape sequence was completed.
88 EscapeUnexpectedEof,
89 /// An unrecognized escape sequence.
90 EscapeUnrecognized,
91 /// A dangling negation was used when setting flags, e.g., `i-`.
92 FlagDanglingNegation,
93 /// A flag was used twice, e.g., `i-i`.
94 FlagDuplicate {
95 /// The position of the original flag. The error position
96 /// points to the duplicate flag.
97 original: Span,
98 },
99 /// The negation operator was used twice, e.g., `-i-s`.
100 FlagRepeatedNegation {
101 /// The position of the original negation operator. The error position
102 /// points to the duplicate negation operator.
103 original: Span,
104 },
105 /// Expected a flag but got EOF, e.g., `(?`.
106 FlagUnexpectedEof,
107 /// Unrecognized flag, e.g., `a`.
108 FlagUnrecognized,
109 /// A duplicate capture name was found.
110 GroupNameDuplicate {
111 /// The position of the initial occurrence of the capture name. The
112 /// error position itself points to the duplicate occurrence.
113 original: Span,
114 },
115 /// A capture group name is empty, e.g., `(?P<>abc)`.
116 GroupNameEmpty,
117 /// An invalid character was seen for a capture group name. This includes
118 /// errors where the first character is a digit (even though subsequent
119 /// characters are allowed to be digits).
120 GroupNameInvalid,
121 /// A closing `>` could not be found for a capture group name.
122 GroupNameUnexpectedEof,
123 /// An unclosed group, e.g., `(ab`.
124 ///
125 /// The span of this error corresponds to the unclosed parenthesis.
126 GroupUnclosed,
127 /// An unopened group, e.g., `ab)`.
128 GroupUnopened,
129 /// The nest limit was exceeded. The limit stored here is the limit
130 /// configured in the parser.
131 NestLimitExceeded(u32),
132 /// The range provided in a counted repetition operator is invalid. The
133 /// range is invalid if the start is greater than the end.
134 RepetitionCountInvalid,
135 /// An opening `{` was not followed by a valid decimal value.
136 /// For example, `x{}` or `x{]}` would fail.
137 RepetitionCountDecimalEmpty,
138 /// An opening `{` was found with no corresponding closing `}`.
139 RepetitionCountUnclosed,
140 /// A repetition operator was applied to a missing sub-expression. This
141 /// occurs, for example, in the regex consisting of just a `*` or even
142 /// `(?i)*`. It is, however, possible to create a repetition operating on
143 /// an empty sub-expression. For example, `()*` is still considered valid.
144 RepetitionMissing,
145 /// The special word boundary syntax, `\b{something}`, was used, but
146 /// either EOF without `}` was seen, or an invalid character in the
147 /// braces was seen.
148 SpecialWordBoundaryUnclosed,
149 /// The special word boundary syntax, `\b{something}`, was used, but
150 /// `something` was not recognized as a valid word boundary kind.
151 SpecialWordBoundaryUnrecognized,
152 /// The syntax `\b{` was observed, but afterwards the end of the pattern
153 /// was observed without being able to tell whether it was meant to be a
154 /// bounded repetition on the `\b` or the beginning of a special word
155 /// boundary assertion.
156 SpecialWordOrRepetitionUnexpectedEof,
157 /// The Unicode class is not valid. This typically occurs when a `\p` is
158 /// followed by something other than a `{`.
159 UnicodeClassInvalid,
160 /// When octal support is disabled, this error is produced when an octal
161 /// escape is used. The octal escape is assumed to be an invocation of
162 /// a backreference, which is the common case.
163 UnsupportedBackreference,
164 /// When syntax similar to PCRE's look-around is used, this error is
165 /// returned. Some example syntaxes that are rejected include, but are
166 /// not necessarily limited to, `(?=re)`, `(?!re)`, `(?<=re)` and
167 /// `(?<!re)`. Note that all of these syntaxes are otherwise invalid; this
168 /// error is used to improve the user experience.
169 UnsupportedLookAround,
170 /// Unsupported RE# regex construct.
171 UnsupportedResharpRegex,
172 /// Lazy quantifiers (e.g., `*?`, `+?`, `??`, `{n,m}?`) are not supported.
173 UnsupportedLazyQuantifier,
174 ComplementGroupExpected,
175}
176
177impl core::fmt::Display for ErrorKind {
178 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
179 use self::ErrorKind::*;
180 match *self {
181 CaptureLimitExceeded => write!(
182 f,
183 "exceeded the maximum number of \
184 capturing groups ({})",
185 u32::MAX
186 ),
187 ClassEscapeInvalid => {
188 write!(f, "invalid escape sequence found in character class")
189 }
190 ClassRangeInvalid => write!(
191 f,
192 "invalid character class range, \
193 the start must be <= the end"
194 ),
195 ClassRangeLiteral => {
196 write!(f, "invalid range boundary, must be a literal")
197 }
198 ClassUnclosed => write!(f, "unclosed character class"),
199 DecimalEmpty => write!(f, "decimal literal empty"),
200 DecimalInvalid => write!(f, "decimal literal invalid"),
201 EscapeHexEmpty => write!(f, "hexadecimal literal empty"),
202 EscapeHexInvalid => {
203 write!(f, "hexadecimal literal is not a Unicode scalar value")
204 }
205 EscapeHexInvalidDigit => write!(f, "invalid hexadecimal digit"),
206 EscapeUnexpectedEof => write!(
207 f,
208 "incomplete escape sequence, \
209 reached end of pattern prematurely"
210 ),
211 EscapeUnrecognized => write!(f, "unrecognized escape sequence"),
212 FlagDanglingNegation => {
213 write!(f, "dangling flag negation operator")
214 }
215 FlagDuplicate { .. } => write!(f, "duplicate flag"),
216 FlagRepeatedNegation { .. } => {
217 write!(f, "flag negation operator repeated")
218 }
219 FlagUnexpectedEof => {
220 write!(f, "expected flag but got end of regex")
221 }
222 FlagUnrecognized => write!(f, "unrecognized flag"),
223 GroupNameDuplicate { .. } => {
224 write!(f, "duplicate capture group name")
225 }
226 GroupNameEmpty => write!(f, "empty capture group name"),
227 GroupNameInvalid => write!(f, "invalid capture group character"),
228 GroupNameUnexpectedEof => write!(f, "unclosed capture group name"),
229 GroupUnclosed => write!(f, "unclosed group"),
230 GroupUnopened => write!(f, "unopened group"),
231 NestLimitExceeded(limit) => write!(
232 f,
233 "exceed the maximum number of \
234 nested parentheses/brackets ({})",
235 limit
236 ),
237 RepetitionCountInvalid => write!(
238 f,
239 "invalid repetition count range, \
240 the start must be <= the end"
241 ),
242 RepetitionCountDecimalEmpty => {
243 write!(f, "repetition quantifier expects a valid decimal")
244 }
245 RepetitionCountUnclosed => {
246 write!(f, "unclosed counted repetition")
247 }
248 RepetitionMissing => {
249 write!(f, "repetition operator missing expression")
250 }
251 SpecialWordBoundaryUnclosed => {
252 write!(
253 f,
254 "special word boundary assertion is either \
255 unclosed or contains an invalid character",
256 )
257 }
258 SpecialWordBoundaryUnrecognized => {
259 write!(
260 f,
261 "unrecognized special word boundary assertion, \
262 valid choices are: start, end, start-half \
263 or end-half",
264 )
265 }
266 SpecialWordOrRepetitionUnexpectedEof => {
267 write!(
268 f,
269 "found either the beginning of a special word \
270 boundary or a bounded repetition on a \\b with \
271 an opening brace, but no closing brace",
272 )
273 }
274 UnicodeClassInvalid => {
275 write!(f, "invalid Unicode character class")
276 }
277 UnsupportedBackreference => {
278 write!(f, "backreferences are not supported")
279 }
280 UnsupportedLookAround => write!(
281 f,
282 "look-around, including look-ahead and look-behind, \
283 is not supported"
284 ),
285 UnsupportedResharpRegex => write!(f, "this pattern is not supported"),
286 UnsupportedLazyQuantifier => {
287 write!(f, "lazy quantifiers are not supported")
288 }
289 ComplementGroupExpected => write!(f, "expected ( after ~ for complement group"),
290 }
291 }
292}
293
294/// An abstract syntax tree for a singular expression along with comments
295/// found.
296///
297/// Comments are not stored in the tree itself to avoid complexity. Each
298/// comment contains a span of precisely where it occurred in the original
299/// regular expression.
300#[derive(Clone, Debug, Eq, PartialEq)]
301pub struct WithComments {
302 /// The actual ast.
303 pub ast: Ast,
304 /// All comments found in the original regular expression.
305 pub comments: Vec<Comment>,
306}
307
308/// A comment from a regular expression with an associated span.
309///
310/// A regular expression can only contain comments when the `x` flag is
311/// enabled.
312#[derive(Clone, Debug, Eq, PartialEq)]
313pub struct Comment {
314 /// The span of this comment, including the beginning `#` and ending `\n`.
315 pub span: Span,
316 /// The comment text, starting with the first character following the `#`
317 /// and ending with the last character preceding the `\n`.
318 pub comment: String,
319}
320
321/// An abstract syntax tree for a single regular expression.
322///
323/// An `Ast`'s `fmt::Display` implementation uses constant stack space and heap
324/// space proportional to the size of the `Ast`.
325///
326/// This type defines its own destructor that uses constant stack space and
327/// heap space proportional to the size of the `Ast`.
328#[derive(Clone, Debug, Eq, PartialEq)]
329pub enum Ast {
330 /// An empty regex that matches everything.
331 Empty(Box<Span>),
332 /// A set of flags, e.g., `(?is)`.
333 Flags(Box<SetFlags>),
334 /// A single character literal, which includes escape sequences.
335 Literal(Box<Literal>),
336 /// The "any character" class.
337 Dot(Box<Span>),
338 /// The "any character" class.
339 Top(Box<Span>),
340 /// A single zero-width assertion.
341 Assertion(Box<Assertion>),
342 /// A single Unicode character class, e.g., `\pL` or `\p{Greek}`.
343 ClassUnicode(Box<ClassUnicode>),
344 /// A single perl character class, e.g., `\d` or `\W`.
345 ClassPerl(Box<ClassPerl>),
346 /// A single bracketed character class set, which may contain zero or more
347 /// character ranges and/or zero or more nested classes. e.g.,
348 /// `[a-zA-Z\pL]`.
349 ClassBracketed(Box<ClassBracketed>),
350 /// A repetition operator applied to an arbitrary regular expression.
351 Repetition(Box<Repetition>),
352 /// A grouped regular expression.
353 Group(Box<Group>),
354 /// An alternation of regular expressions.
355 Alternation(Box<Alternation>),
356 /// A concatenation of regular expressions.
357 Concat(Box<Concat>),
358 Intersection(Box<Intersection>),
359 Complement(Box<Complement>),
360 Lookaround(Box<Lookaround>),
361}
362
363impl Ast {
364 /// Create an "empty" AST item.
365 pub fn empty(span: Span) -> Ast {
366 Ast::Empty(Box::new(span))
367 }
368
369 /// Create a "flags" AST item.
370 pub fn flags(e: SetFlags) -> Ast {
371 Ast::Flags(Box::new(e))
372 }
373
374 /// Create a "literal" AST item.
375 pub fn literal(e: Literal) -> Ast {
376 Ast::Literal(Box::new(e))
377 }
378
379 /// Create a "dot" AST item.
380 pub fn dot(span: Span) -> Ast {
381 Ast::Dot(Box::new(span))
382 }
383
384 pub fn top(span: Span) -> Ast {
385 Ast::Top(Box::new(span))
386 }
387
388 /// Create a "assertion" AST item.
389 pub fn assertion(e: Assertion) -> Ast {
390 Ast::Assertion(Box::new(e))
391 }
392
393 /// Create a "Unicode class" AST item.
394 pub fn class_unicode(e: ClassUnicode) -> Ast {
395 Ast::ClassUnicode(Box::new(e))
396 }
397
398 /// Create a "Perl class" AST item.
399 pub fn class_perl(e: ClassPerl) -> Ast {
400 Ast::ClassPerl(Box::new(e))
401 }
402
403 /// Create a "bracketed class" AST item.
404 pub fn class_bracketed(e: ClassBracketed) -> Ast {
405 Ast::ClassBracketed(Box::new(e))
406 }
407
408 /// Create a "repetition" AST item.
409 pub fn repetition(e: Repetition) -> Ast {
410 Ast::Repetition(Box::new(e))
411 }
412
413 /// Create a "group" AST item.
414 pub fn group(e: Group) -> Ast {
415 match &e.kind {
416 GroupKind::CaptureIndex(_) => Ast::Group(Box::new(e)),
417 GroupKind::CaptureName {
418 starts_with_p: _,
419 name: _,
420 } => Ast::Group(Box::new(e)),
421 GroupKind::NonCapturing(_flags) => Ast::Group(Box::new(e)),
422 GroupKind::Lookaround(kind) => {
423 let look = Lookaround {
424 kind: kind.clone(),
425 span: e.span,
426 ast: e.ast,
427 };
428 Ast::lookaround(look)
429 }
430 GroupKind::Complement => {
431 let g = Complement {
432 span: e.span,
433 ast: e.ast,
434 };
435 Ast::complement(g)
436 }
437 }
438 }
439
440 /// Create a "alternation" AST item.
441 pub fn alternation(e: Alternation) -> Ast {
442 Ast::Alternation(Box::new(e))
443 }
444
445 /// Create a "concat" AST item.
446 pub fn concat(e: Concat) -> Ast {
447 Ast::Concat(Box::new(e))
448 }
449
450 /// Return the span of this abstract syntax tree.
451 pub fn span(&self) -> &Span {
452 match *self {
453 Ast::Empty(ref span) => span,
454 Ast::Flags(ref x) => &x.span,
455 Ast::Literal(ref x) => &x.span,
456 Ast::Dot(ref span) => span,
457 Ast::Top(ref span) => span,
458 Ast::Assertion(ref x) => &x.span,
459 Ast::ClassUnicode(ref x) => &x.span,
460 Ast::ClassPerl(ref x) => &x.span,
461 Ast::ClassBracketed(ref x) => &x.span,
462 Ast::Repetition(ref x) => &x.span,
463 Ast::Group(ref x) => &x.span,
464 Ast::Alternation(ref x) => &x.span,
465 Ast::Concat(ref x) => &x.span,
466 Ast::Intersection(ref x) => &x.span,
467 Ast::Complement(ref x) => &x.span,
468 Ast::Lookaround(ref x) => &x.span,
469 }
470 }
471
472 /// Return true if and only if this Ast is empty.
473 pub fn is_empty(&self) -> bool {
474 matches!(*self, Ast::Empty(_))
475 }
476
477 /// Returns true if and only if this AST has any (including possibly empty)
478 /// subexpressions.
479 fn has_subexprs(&self) -> bool {
480 match *self {
481 Ast::Empty(_)
482 | Ast::Flags(_)
483 | Ast::Literal(_)
484 | Ast::Dot(_)
485 | Ast::Top(_)
486 | Ast::Assertion(_)
487 | Ast::ClassUnicode(_)
488 | Ast::ClassPerl(_) => false,
489 Ast::ClassBracketed(_)
490 | Ast::Repetition(_)
491 | Ast::Group(_)
492 | Ast::Alternation(_)
493 | Ast::Intersection(_)
494 | Ast::Lookaround(_)
495 | Ast::Complement(_)
496 | Ast::Concat(_) => true,
497 }
498 }
499}
500
501/// An alternation of regular expressions.
502#[derive(Clone, Debug, Eq, PartialEq)]
503pub struct Alternation {
504 /// The span of this alternation.
505 pub span: Span,
506 /// The alternate regular expressions.
507 pub asts: Vec<Ast>,
508}
509
510impl Alternation {
511 /// Return this alternation as an AST.
512 ///
513 /// If this alternation contains zero ASTs, then `Ast::empty` is returned.
514 /// If this alternation contains exactly 1 AST, then the corresponding AST
515 /// is returned. Otherwise, `Ast::alternation` is returned.
516 pub fn into_ast(mut self) -> Ast {
517 match self.asts.len() {
518 0 => Ast::empty(self.span),
519 1 => self.asts.pop().unwrap(),
520 _ => Ast::alternation(self),
521 }
522 }
523}
524
525/// A concatenation of regular expressions.
526#[derive(Clone, Debug, Eq, PartialEq)]
527pub struct Concat {
528 /// The span of this concatenation.
529 pub span: Span,
530 /// The concatenation regular expressions.
531 pub asts: Vec<Ast>,
532}
533
534impl Concat {
535 /// Return this concatenation as an AST.
536 ///
537 /// If this alternation contains zero ASTs, then `Ast::empty` is returned.
538 /// If this alternation contains exactly 1 AST, then the corresponding AST
539 /// is returned. Otherwise, `Ast::concat` is returned.
540 pub fn into_ast(mut self) -> Ast {
541 match self.asts.len() {
542 0 => Ast::empty(self.span),
543 1 => self.asts.pop().unwrap(),
544 _ => Ast::concat(self),
545 }
546 }
547}
548
549/// A single zero-width assertion.
550#[derive(Clone, Debug, Eq, PartialEq)]
551pub struct Assertion {
552 /// The span of this assertion.
553 pub span: Span,
554 /// The assertion kind, e.g., `\b` or `^`.
555 pub kind: AssertionKind,
556}
557
558/// An assertion kind.
559#[derive(Clone, Debug, Eq, PartialEq)]
560pub enum AssertionKind {
561 /// `^`
562 StartLine,
563 /// `$`
564 EndLine,
565 /// `\A`
566 StartText,
567 /// `\z`
568 EndText,
569 /// `\b`
570 WordBoundary,
571 /// `\B`
572 NotWordBoundary,
573 /// `\b{start}`
574 WordBoundaryStart,
575 /// `\b{end}`
576 WordBoundaryEnd,
577 /// `\<` (alias for `\b{start}`)
578 WordBoundaryStartAngle,
579 /// `\>` (alias for `\b{end}`)
580 WordBoundaryEndAngle,
581 /// `\b{start-half}`
582 WordBoundaryStartHalf,
583 /// `\b{end-half}`
584 WordBoundaryEndHalf,
585}
586
587/// A repetition operation applied to a regular expression.
588#[derive(Clone, Debug, Eq, PartialEq)]
589pub struct Repetition {
590 /// The span of this operation.
591 pub span: Span,
592 /// The actual operation.
593 pub op: RepetitionOp,
594 /// Whether this operation was applied greedily or not.
595 pub greedy: bool,
596 /// The regular expression under repetition.
597 pub ast: Box<Ast>,
598}
599
600/// The repetition operator itself.
601#[derive(Clone, Debug, Eq, PartialEq)]
602pub struct RepetitionOp {
603 /// The span of this operator. This includes things like `+`, `*?` and
604 /// `{m,n}`.
605 pub span: Span,
606 /// The type of operation.
607 pub kind: RepetitionKind,
608}
609
610/// The kind of a repetition operator.
611#[derive(Clone, Debug, Eq, PartialEq)]
612pub enum RepetitionKind {
613 /// `?`
614 ZeroOrOne,
615 /// `*`
616 ZeroOrMore,
617 /// `+`
618 OneOrMore,
619 /// `{m,n}`
620 Range(RepetitionRange),
621}
622
623/// A range repetition operator.
624#[derive(Clone, Debug, Eq, PartialEq)]
625pub enum RepetitionRange {
626 /// `{m}`
627 Exactly(u32),
628 /// `{m,}`
629 AtLeast(u32),
630 /// `{m,n}`
631 Bounded(u32, u32),
632}
633
634impl RepetitionRange {
635 /// Returns true if and only if this repetition range is valid.
636 ///
637 /// The only case where a repetition range is invalid is if it is bounded
638 /// and its start is greater than its end.
639 pub fn is_valid(&self) -> bool {
640 !matches!(*self, RepetitionRange::Bounded(s, e) if s > e)
641 }
642}
643
644/// A grouped regular expression.
645///
646/// This includes both capturing and non-capturing groups. This does **not**
647/// include flag-only groups like `(?is)`, but does contain any group that
648/// contains a sub-expression, e.g., `(a)`, `(?P<name>a)`, `(?:a)` and
649/// `(?is:a)`.
650#[derive(Clone, Debug, Eq, PartialEq)]
651pub struct Group {
652 /// The span of this group.
653 pub span: Span,
654 /// The kind of this group.
655 pub kind: GroupKind,
656 /// The regular expression in this group.
657 pub ast: Box<Ast>,
658}
659
660impl Group {
661 /// If this group is non-capturing, then this returns the (possibly empty)
662 /// set of flags. Otherwise, `None` is returned.
663 pub fn flags(&self) -> Option<&Flags> {
664 match self.kind {
665 GroupKind::NonCapturing(ref flags) => Some(flags),
666 _ => None,
667 }
668 }
669
670 /// Returns true if and only if this group is capturing.
671 pub fn is_capturing(&self) -> bool {
672 match self.kind {
673 GroupKind::CaptureIndex(_) | GroupKind::CaptureName { .. } => true,
674 GroupKind::NonCapturing(_) => false,
675 GroupKind::Lookaround(_) => false,
676 GroupKind::Complement => false,
677 }
678 }
679
680 /// Returns the capture index of this group, if this is a capturing group.
681 ///
682 /// This returns a capture index precisely when `is_capturing` is `true`.
683 pub fn capture_index(&self) -> Option<u32> {
684 match self.kind {
685 GroupKind::CaptureIndex(i) => Some(i),
686 GroupKind::CaptureName { ref name, .. } => Some(name.index),
687 GroupKind::NonCapturing(_) => None,
688 GroupKind::Lookaround(_) => None,
689 GroupKind::Complement => None,
690 }
691 }
692}
693
694/// The kind of a group.
695#[derive(Clone, Debug, Eq, PartialEq)]
696pub enum GroupKind {
697 /// `(a)`
698 CaptureIndex(u32),
699 /// `(?<name>a)` or `(?P<name>a)`
700 CaptureName {
701 /// True if the `?P<` syntax is used and false if the `?<` syntax is used.
702 starts_with_p: bool,
703 /// The capture name.
704 name: CaptureName,
705 },
706 /// `(?:a)` and `(?i:a)`
707 NonCapturing(Flags),
708 Lookaround(LookaroundKind),
709 Complement,
710}
711
712/// A capture name.
713///
714/// This corresponds to the name itself between the angle brackets in, e.g.,
715/// `(?P<foo>expr)`.
716#[derive(Clone, Debug, Eq, PartialEq)]
717pub struct CaptureName {
718 /// The span of this capture name.
719 pub span: Span,
720 /// The capture name.
721 pub name: String,
722 /// The capture index.
723 pub index: u32,
724}
725
726/// A group of flags that is not applied to a particular regular expression.
727#[derive(Clone, Debug, Eq, PartialEq)]
728pub struct SetFlags {
729 /// The span of these flags, including the grouping parentheses.
730 pub span: Span,
731 /// The actual sequence of flags.
732 pub flags: Flags,
733}
734
735/// A group of flags.
736///
737/// This corresponds only to the sequence of flags themselves, e.g., `is-u`.
738#[derive(Clone, Debug, Eq, PartialEq)]
739pub struct Flags {
740 /// The span of this group of flags.
741 pub span: Span,
742 /// A sequence of flag items. Each item is either a flag or a negation
743 /// operator.
744 pub items: Vec<FlagsItem>,
745}
746
747impl Flags {
748 /// Add the given item to this sequence of flags.
749 ///
750 /// If the item was added successfully, then `None` is returned. If the
751 /// given item is a duplicate, then `Some(i)` is returned, where
752 /// `items[i].kind == item.kind`.
753 pub fn add_item(&mut self, item: FlagsItem) -> Option<usize> {
754 for (i, x) in self.items.iter().enumerate() {
755 if x.kind == item.kind {
756 return Some(i);
757 }
758 }
759 self.items.push(item);
760 None
761 }
762
763 /// Returns the state of the given flag in this set.
764 ///
765 /// If the given flag is in the set but is negated, then `Some(false)` is
766 /// returned.
767 ///
768 /// If the given flag is in the set and is not negated, then `Some(true)`
769 /// is returned.
770 ///
771 /// Otherwise, `None` is returned.
772 pub fn flag_state(&self, flag: Flag) -> Option<bool> {
773 let mut negated = false;
774 for x in &self.items {
775 match x.kind {
776 FlagsItemKind::Negation => {
777 negated = true;
778 }
779 FlagsItemKind::Flag(ref xflag) if xflag == &flag => {
780 return Some(!negated);
781 }
782 _ => {}
783 }
784 }
785 None
786 }
787}
788
789/// A single item in a group of flags.
790#[derive(Clone, Debug, Eq, PartialEq)]
791pub struct FlagsItem {
792 /// The span of this item.
793 pub span: Span,
794 /// The kind of this item.
795 pub kind: FlagsItemKind,
796}
797
798/// The kind of an item in a group of flags.
799#[derive(Clone, Debug, Eq, PartialEq)]
800pub enum FlagsItemKind {
801 /// A negation operator applied to all subsequent flags in the enclosing
802 /// group.
803 Negation,
804 /// A single flag in a group.
805 Flag(Flag),
806}
807
808impl FlagsItemKind {
809 /// Returns true if and only if this item is a negation operator.
810 pub fn is_negation(&self) -> bool {
811 matches!(*self, FlagsItemKind::Negation)
812 }
813}
814
815/// A single flag.
816#[derive(Clone, Copy, Debug, Eq, PartialEq)]
817pub enum Flag {
818 /// `i`
819 CaseInsensitive,
820 /// `m`
821 MultiLine,
822 /// `s`
823 DotMatchesNewLine,
824 /// `U`
825 SwapGreed,
826 /// `u`
827 Unicode,
828 /// `R`
829 CRLF,
830 /// `x`
831 IgnoreWhitespace,
832}
833
834// START RE#
835
836impl Ast {
837 pub fn intersection(e: Intersection) -> Ast {
838 Ast::Intersection(Box::new(e))
839 }
840 pub fn complement(e: Complement) -> Ast {
841 Ast::Complement(Box::new(e))
842 }
843 pub fn lookaround(e: Lookaround) -> Ast {
844 Ast::Lookaround(Box::new(e))
845 }
846}
847
848/// An alternation of regular expressions.
849#[derive(Clone, Debug, Eq, PartialEq)]
850pub struct Intersection {
851 /// The span of this alternation.
852 pub span: Span,
853 /// The alternate regular expressions.
854 pub asts: Vec<Ast>,
855}
856
857impl Intersection {
858 pub fn into_ast(mut self) -> Ast {
859 match self.asts.len() {
860 0 => Ast::empty(self.span),
861 1 => self.asts.pop().unwrap(),
862 _ => Ast::intersection(self),
863 }
864 }
865}
866
867#[derive(Clone, Debug, Eq, PartialEq)]
868pub struct Complement {
869 pub span: Span,
870 pub ast: Box<Ast>,
871}
872
873impl Complement {
874 pub fn into_ast(self) -> Ast {
875 Ast::complement(self)
876 }
877}
878
879#[derive(Clone, Debug, Eq, PartialEq)]
880pub enum LookaroundKind {
881 PositiveLookahead,
882 NegativeLookahead,
883 PositiveLookbehind,
884 NegativeLookbehind,
885}
886
887#[derive(Clone, Debug, Eq, PartialEq)]
888pub struct Lookaround {
889 pub kind: LookaroundKind,
890 pub span: Span,
891 pub ast: Box<Ast>,
892}
893
894impl Lookaround {
895 pub fn into_ast(self) -> Ast {
896 Ast::lookaround(self)
897 }
898}