fancy_regex/lib.rs
1// Copyright 2016 The Fancy Regex Authors.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19// THE SOFTWARE.
20
21/*!
22An implementation of regexes, supporting a relatively rich set of features, including backreferences
23and lookaround.
24
25It builds on top of the excellent [regex] crate. If you are not
26familiar with it, make sure you read its documentation and maybe you don't even need fancy-regex.
27
28If your regex or parts of it does not use any special features, the matching is delegated to the
29regex crate. That means it has linear runtime. But if you use "fancy" features such as
30backreferences or look-around, an engine with backtracking needs to be used. In that case, the regex
31can be slow and take exponential time to run because of what is called "catastrophic backtracking".
32This depends on the regex and the input.
33
34# Usage
35
36The API should feel very similar to the regex crate, and involves compiling a regex and then using
37it to find matches in text.
38
39## Example: Matching text
40
41An example with backreferences to check if a text consists of two identical words:
42
43```rust
44use fancy_regex::Regex;
45
46let re = Regex::new(r"^(\w+) (\1)$").unwrap();
47let result = re.is_match("foo foo");
48
49assert!(result.is_ok());
50let did_match = result.unwrap();
51assert!(did_match);
52```
53
54Note that like in the regex crate, the regex needs anchors like `^` and `$` to match against the
55entire input text.
56
57## Example: Finding the position of matches
58
59```rust
60use fancy_regex::Regex;
61
62let re = Regex::new(r"(\d)\1").unwrap();
63let result = re.find("foo 22");
64
65assert!(result.is_ok(), "execution was successful");
66let match_option = result.unwrap();
67
68assert!(match_option.is_some(), "found a match");
69let m = match_option.unwrap();
70
71assert_eq!(m.start(), 4);
72assert_eq!(m.end(), 6);
73assert_eq!(m.as_str(), "22");
74```
75
76## Example: Capturing groups
77
78```rust
79use fancy_regex::Regex;
80
81let re = Regex::new(r"(?<!AU)\$(\d+)").unwrap();
82let result = re.captures("AU$10, $20");
83
84let captures = result.expect("Error running regex").expect("No match found");
85let group = captures.get(1).expect("No group");
86assert_eq!(group.as_str(), "20");
87```
88
89## Example: Splitting text
90
91```rust
92use fancy_regex::Regex;
93
94let re = Regex::new(r"[ \t]+").unwrap();
95let target = "a b \t c\td e";
96let fields: Vec<&str> = re.split(target).map(|x| x.unwrap()).collect();
97assert_eq!(fields, vec!["a", "b", "c", "d", "e"]);
98
99let fields: Vec<&str> = re.splitn(target, 3).map(|x| x.unwrap()).collect();
100assert_eq!(fields, vec!["a", "b", "c\td e"]);
101```
102
103# Syntax
104
105The regex syntax is based on the [regex] crate's, with some additional supported syntax.
106
107Escapes:
108
109`\h`
110: hex digit (`[0-9A-Fa-f]`) \
111`\H`
112: not hex digit (`[^0-9A-Fa-f]`) \
113`\e`
114: escape control character (`\x1B`) \
115`\K`
116: keep text matched so far out of the overall match ([docs](https://www.regular-expressions.info/keep.html))\
117`\G`
118: anchor to where the previous match ended ([docs](https://www.regular-expressions.info/continue.html))\
119`\Z`
120: anchor to the end of the text before any trailing newlines\
121`\O`
122: any character including newline
123
124Backreferences:
125
126`\1`
127: match the exact string that the first capture group matched \
128`\2`
129: backref to the second capture group, etc
130
131Named capture groups:
132
133`(?<name>exp)`
134: match *exp*, creating capture group named *name* \
135`\k<name>`
136: match the exact string that the capture group named *name* matched \
137`(?P<name>exp)`
138: same as `(?<name>exp)` for compatibility with Python, etc. \
139`(?P=name)`
140: same as `\k<name>` for compatibility with Python, etc.
141
142Look-around assertions for matching without changing the current position:
143
144`(?=exp)`
145: look-ahead, succeeds if *exp* matches to the right of the current position \
146`(?!exp)`
147: negative look-ahead, succeeds if *exp* doesn't match to the right \
148`(?<=exp)`
149: look-behind, succeeds if *exp* matches to the left of the current position \
150`(?<!exp)`
151: negative look-behind, succeeds if *exp* doesn't match to the left
152
153Atomic groups using `(?>exp)` to prevent backtracking within `exp`, e.g.:
154
155```
156# use fancy_regex::Regex;
157let re = Regex::new(r"^a(?>bc|b)c$").unwrap();
158assert!(re.is_match("abcc").unwrap());
159// Doesn't match because `|b` is never tried because of the atomic group
160assert!(!re.is_match("abc").unwrap());
161```
162
163Conditionals - if/then/else:
164
165`(?(1))`
166: continue only if first capture group matched \
167`(?(<name>))`
168: continue only if capture group named *name* matched \
169`(?(1)true_branch|false_branch)`
170: if the first capture group matched then execute the true_branch regex expression, else execute false_branch ([docs](https://www.regular-expressions.info/conditional.html)) \
171`(?(condition)true_branch|false_branch)`
172: if the condition matches then execute the true_branch regex expression, else execute false_branch from the point just before the condition was evaluated
173
174[regex]: https://crates.io/crates/regex
175*/
176
177#![deny(missing_docs)]
178#![deny(missing_debug_implementations)]
179#![cfg_attr(not(feature = "std"), no_std)]
180
181extern crate alloc;
182
183use alloc::borrow::Cow;
184use alloc::boxed::Box;
185use alloc::string::{String, ToString};
186use alloc::sync::Arc;
187use alloc::vec;
188use alloc::vec::Vec;
189
190use core::convert::TryFrom;
191use core::fmt::{Debug, Formatter};
192use core::ops::{Index, Range};
193use core::str::FromStr;
194use core::{fmt, usize};
195use regex_automata::meta::Regex as RaRegex;
196use regex_automata::util::captures::Captures as RaCaptures;
197use regex_automata::util::syntax::Config as SyntaxConfig;
198use regex_automata::Input as RaInput;
199
200mod analyze;
201mod compile;
202mod error;
203mod expand;
204mod flags;
205mod optimize;
206mod parse;
207mod replacer;
208mod vm;
209
210use crate::analyze::analyze;
211use crate::analyze::can_compile_as_anchored;
212use crate::compile::compile;
213use crate::flags::*;
214use crate::optimize::optimize;
215use crate::parse::{ExprTree, NamedGroups, Parser};
216use crate::vm::{Prog, OPTION_SKIPPED_EMPTY_MATCH};
217
218pub use crate::error::{CompileError, Error, ParseError, Result, RuntimeError};
219pub use crate::expand::Expander;
220pub use crate::replacer::{NoExpand, Replacer, ReplacerRef};
221
222const MAX_RECURSION: usize = 64;
223
224// the public API
225
226/// A builder for a `Regex` to allow configuring options.
227#[derive(Debug)]
228pub struct RegexBuilder(RegexOptions);
229
230/// A compiled regular expression.
231#[derive(Clone)]
232pub struct Regex {
233 inner: RegexImpl,
234 named_groups: Arc<NamedGroups>,
235}
236
237// Separate enum because we don't want to expose any of this
238#[derive(Clone)]
239enum RegexImpl {
240 // Do we want to box this? It's pretty big...
241 Wrap {
242 inner: RaRegex,
243 options: RegexOptions,
244 /// Some optimizations avoid the VM, but need to use an extra capture group to represent the match boundaries
245 explicit_capture_group_0: bool,
246 },
247 Fancy {
248 prog: Prog,
249 n_groups: usize,
250 options: RegexOptions,
251 },
252}
253
254/// A single match of a regex or group in an input text
255#[derive(Copy, Clone, Debug, Eq, PartialEq)]
256pub struct Match<'t> {
257 text: &'t str,
258 start: usize,
259 end: usize,
260}
261
262/// An iterator over all non-overlapping matches for a particular string.
263///
264/// The iterator yields a `Result<Match>`. The iterator stops when no more
265/// matches can be found.
266///
267/// `'r` is the lifetime of the compiled regular expression and `'t` is the
268/// lifetime of the matched string.
269#[derive(Debug)]
270pub struct Matches<'r, 't> {
271 re: &'r Regex,
272 text: &'t str,
273 last_end: usize,
274 last_match: Option<usize>,
275}
276
277impl<'r, 't> Matches<'r, 't> {
278 /// Return the text being searched.
279 pub fn text(&self) -> &'t str {
280 self.text
281 }
282
283 /// Return the underlying regex.
284 pub fn regex(&self) -> &'r Regex {
285 &self.re
286 }
287}
288
289impl<'r, 't> Iterator for Matches<'r, 't> {
290 type Item = Result<Match<'t>>;
291
292 /// Adapted from the `regex` crate. Calls `find_from_pos` repeatedly.
293 /// Ignores empty matches immediately after a match.
294 fn next(&mut self) -> Option<Self::Item> {
295 if self.last_end > self.text.len() {
296 return None;
297 }
298
299 let option_flags = if let Some(last_match) = self.last_match {
300 if self.last_end > last_match {
301 OPTION_SKIPPED_EMPTY_MATCH
302 } else {
303 0
304 }
305 } else {
306 0
307 };
308 let mat =
309 match self
310 .re
311 .find_from_pos_with_option_flags(self.text, self.last_end, option_flags)
312 {
313 Err(error) => {
314 // Stop on first error: If an error is encountered, return it, and set the "last match position"
315 // to the string length, so that the next next() call will return None, to prevent an infinite loop.
316 self.last_end = self.text.len() + 1;
317 return Some(Err(error));
318 }
319 Ok(None) => return None,
320 Ok(Some(mat)) => mat,
321 };
322
323 if mat.start == mat.end {
324 // This is an empty match. To ensure we make progress, start
325 // the next search at the smallest possible starting position
326 // of the next match following this one.
327 self.last_end = next_utf8(self.text, mat.end);
328 // Don't accept empty matches immediately following a match.
329 // Just move on to the next match.
330 if Some(mat.end) == self.last_match {
331 return self.next();
332 }
333 } else {
334 self.last_end = mat.end;
335 }
336
337 self.last_match = Some(mat.end);
338
339 Some(Ok(mat))
340 }
341}
342
343/// An iterator that yields all non-overlapping capture groups matching a
344/// particular regular expression.
345///
346/// The iterator stops when no more matches can be found.
347///
348/// `'r` is the lifetime of the compiled regular expression and `'t` is the
349/// lifetime of the matched string.
350#[derive(Debug)]
351pub struct CaptureMatches<'r, 't>(Matches<'r, 't>);
352
353impl<'r, 't> CaptureMatches<'r, 't> {
354 /// Return the text being searched.
355 pub fn text(&self) -> &'t str {
356 self.0.text
357 }
358
359 /// Return the underlying regex.
360 pub fn regex(&self) -> &'r Regex {
361 &self.0.re
362 }
363}
364
365impl<'r, 't> Iterator for CaptureMatches<'r, 't> {
366 type Item = Result<Captures<'t>>;
367
368 /// Adapted from the `regex` crate. Calls `captures_from_pos` repeatedly.
369 /// Ignores empty matches immediately after a match.
370 fn next(&mut self) -> Option<Self::Item> {
371 if self.0.last_end > self.0.text.len() {
372 return None;
373 }
374
375 let captures = match self.0.re.captures_from_pos(self.0.text, self.0.last_end) {
376 Err(error) => {
377 // Stop on first error: If an error is encountered, return it, and set the "last match position"
378 // to the string length, so that the next next() call will return None, to prevent an infinite loop.
379 self.0.last_end = self.0.text.len() + 1;
380 return Some(Err(error));
381 }
382 Ok(None) => return None,
383 Ok(Some(captures)) => captures,
384 };
385
386 let mat = captures
387 .get(0)
388 .expect("`Captures` is expected to have entire match at 0th position");
389 if mat.start == mat.end {
390 self.0.last_end = next_utf8(self.0.text, mat.end);
391 if Some(mat.end) == self.0.last_match {
392 return self.next();
393 }
394 } else {
395 self.0.last_end = mat.end;
396 }
397
398 self.0.last_match = Some(mat.end);
399
400 Some(Ok(captures))
401 }
402}
403
404/// A set of capture groups found for a regex.
405#[derive(Debug)]
406pub struct Captures<'t> {
407 inner: CapturesImpl<'t>,
408 named_groups: Arc<NamedGroups>,
409}
410
411#[derive(Debug)]
412enum CapturesImpl<'t> {
413 Wrap {
414 text: &'t str,
415 locations: RaCaptures,
416 /// Some optimizations avoid the VM but need an extra capture group to represent the match boundaries.
417 /// Therefore what is actually capture group 1 should be treated as capture group 0, and all other
418 /// capture groups should have their index reduced by one as well to line up with what the pattern specifies.
419 explicit_capture_group_0: bool,
420 },
421 Fancy {
422 text: &'t str,
423 saves: Vec<usize>,
424 },
425}
426
427/// Iterator for captured groups in order in which they appear in the regex.
428#[derive(Debug)]
429pub struct SubCaptureMatches<'c, 't> {
430 caps: &'c Captures<'t>,
431 i: usize,
432}
433
434/// An iterator over all substrings delimited by a regex.
435///
436/// This iterator yields `Result<&'h str>`, where each item is a substring of the
437/// target string that is delimited by matches of the regular expression. It stops when there
438/// are no more substrings to yield.
439///
440/// `'r` is the lifetime of the compiled regular expression, and `'h` is the
441/// lifetime of the target string being split.
442///
443/// This iterator can be created by the [`Regex::split`] method.
444#[derive(Debug)]
445pub struct Split<'r, 'h> {
446 matches: Matches<'r, 'h>,
447 next_start: usize,
448 target: &'h str,
449}
450
451impl<'r, 'h> Iterator for Split<'r, 'h> {
452 type Item = Result<&'h str>;
453
454 /// Returns the next substring that results from splitting the target string by the regex.
455 ///
456 /// If no more matches are found, returns the remaining part of the string,
457 /// or `None` if all substrings have been yielded.
458 fn next(&mut self) -> Option<Result<&'h str>> {
459 match self.matches.next() {
460 None => {
461 let len = self.target.len();
462 if self.next_start > len {
463 // No more substrings to return
464 None
465 } else {
466 // Return the last part of the target string
467 // Next call will return None
468 let part = &self.target[self.next_start..len];
469 self.next_start = len + 1;
470 Some(Ok(part))
471 }
472 }
473 // Return the next substring
474 Some(Ok(m)) => {
475 let part = &self.target[self.next_start..m.start()];
476 self.next_start = m.end();
477 Some(Ok(part))
478 }
479 Some(Err(e)) => Some(Err(e)),
480 }
481 }
482}
483
484impl<'r, 'h> core::iter::FusedIterator for Split<'r, 'h> {}
485
486/// An iterator over at most `N` substrings delimited by a regex.
487///
488/// This iterator yields `Result<&'h str>`, where each item is a substring of the
489/// target that is delimited by matches of the regular expression. It stops either when
490/// there are no more substrings to yield, or after `N` substrings have been yielded.
491///
492/// The `N`th substring is the remaining part of the target.
493///
494/// `'r` is the lifetime of the compiled regular expression, and `'h` is the
495/// lifetime of the target string being split.
496///
497/// This iterator can be created by the [`Regex::splitn`] method.
498#[derive(Debug)]
499pub struct SplitN<'r, 'h> {
500 splits: Split<'r, 'h>,
501 limit: usize,
502}
503
504impl<'r, 'h> Iterator for SplitN<'r, 'h> {
505 type Item = Result<&'h str>;
506
507 /// Returns the next substring resulting from splitting the target by the regex,
508 /// limited to `N` splits.
509 ///
510 /// Returns `None` if no more matches are found or if the limit is reached after yielding
511 /// the remaining part of the target.
512 fn next(&mut self) -> Option<Result<&'h str>> {
513 if self.limit == 0 {
514 // Limit reached. No more substrings available.
515 return None;
516 }
517
518 // Decrement the limit for each split.
519 self.limit -= 1;
520 if self.limit > 0 {
521 return self.splits.next();
522 }
523
524 // Nth split
525 let len = self.splits.target.len();
526 if self.splits.next_start > len {
527 // No more substrings available.
528 return None;
529 } else {
530 // Return the remaining part of the target
531 let start = self.splits.next_start;
532 self.splits.next_start = len + 1;
533 return Some(Ok(&self.splits.target[start..len]));
534 }
535 }
536
537 fn size_hint(&self) -> (usize, Option<usize>) {
538 (0, Some(self.limit))
539 }
540}
541
542impl<'r, 'h> core::iter::FusedIterator for SplitN<'r, 'h> {}
543
544#[derive(Clone, Debug)]
545struct RegexOptions {
546 pattern: String,
547 syntaxc: SyntaxConfig,
548 backtrack_limit: usize,
549 delegate_size_limit: Option<usize>,
550 delegate_dfa_size_limit: Option<usize>,
551}
552
553impl RegexOptions {
554 fn get_flag_value(flag_value: bool, enum_value: u32) -> u32 {
555 if flag_value {
556 enum_value
557 } else {
558 0
559 }
560 }
561
562 fn compute_flags(&self) -> u32 {
563 let insensitive = Self::get_flag_value(self.syntaxc.get_case_insensitive(), FLAG_CASEI);
564 let multiline = Self::get_flag_value(self.syntaxc.get_multi_line(), FLAG_MULTI);
565 let whitespace =
566 Self::get_flag_value(self.syntaxc.get_ignore_whitespace(), FLAG_IGNORE_SPACE);
567 let dotnl = Self::get_flag_value(self.syntaxc.get_dot_matches_new_line(), FLAG_DOTNL);
568 let unicode = Self::get_flag_value(self.syntaxc.get_unicode(), FLAG_UNICODE);
569
570 let all_flags = insensitive | multiline | whitespace | dotnl | unicode | unicode;
571 all_flags
572 }
573}
574
575impl Default for RegexOptions {
576 fn default() -> Self {
577 RegexOptions {
578 pattern: String::new(),
579 syntaxc: SyntaxConfig::default(),
580 backtrack_limit: 1_000_000,
581 delegate_size_limit: None,
582 delegate_dfa_size_limit: None,
583 }
584 }
585}
586
587impl RegexBuilder {
588 /// Create a new regex builder with a regex pattern.
589 ///
590 /// If the pattern is invalid, the call to `build` will fail later.
591 pub fn new(pattern: &str) -> Self {
592 let mut builder = RegexBuilder(RegexOptions::default());
593 builder.0.pattern = pattern.to_string();
594 builder
595 }
596
597 /// Build the `Regex`.
598 ///
599 /// Returns an [`Error`](enum.Error.html) if the pattern could not be parsed.
600 pub fn build(&self) -> Result<Regex> {
601 Regex::new_options(self.0.clone())
602 }
603
604 fn set_config(&mut self, func: impl Fn(SyntaxConfig) -> SyntaxConfig) -> &mut Self {
605 self.0.syntaxc = func(self.0.syntaxc);
606 self
607 }
608
609 /// Override default case insensitive
610 /// this is to enable/disable casing via builder instead of a flag within
611 /// the raw string provided to the regex builder
612 ///
613 /// Default is false
614 pub fn case_insensitive(&mut self, yes: bool) -> &mut Self {
615 self.set_config(|x| x.case_insensitive(yes))
616 }
617
618 /// Enable multi-line regex
619 pub fn multi_line(&mut self, yes: bool) -> &mut Self {
620 self.set_config(|x| x.multi_line(yes))
621 }
622
623 /// Allow ignore whitespace
624 pub fn ignore_whitespace(&mut self, yes: bool) -> &mut Self {
625 self.set_config(|x| x.ignore_whitespace(yes))
626 }
627
628 /// Enable or disable the "dot matches any character" flag.
629 /// When this is enabled, `.` will match any character. When it's disabled, then `.` will match any character
630 /// except for a new line character.
631 pub fn dot_matches_new_line(&mut self, yes: bool) -> &mut Self {
632 self.set_config(|x| x.dot_matches_new_line(yes))
633 }
634
635 /// Enable verbose mode in the regular expression.
636 ///
637 /// The same as ignore_whitespace
638 ///
639 /// When enabled, verbose mode permits insigificant whitespace in many
640 /// places in the regular expression, as well as comments. Comments are
641 /// started using `#` and continue until the end of the line.
642 ///
643 /// By default, this is disabled. It may be selectively enabled in the
644 /// regular expression by using the `x` flag regardless of this setting.
645 pub fn verbose_mode(&mut self, yes: bool) -> &mut Self {
646 self.set_config(|x| x.ignore_whitespace(yes))
647 }
648
649 /// Enable or disable the Unicode flag (`u`) by default.
650 ///
651 /// By default this is **enabled**. It may alternatively be selectively
652 /// disabled in the regular expression itself via the `u` flag.
653 ///
654 /// Note that unless "allow invalid UTF-8" is enabled (it's disabled by
655 /// default), a regular expression will fail to parse if Unicode mode is
656 /// disabled and a sub-expression could possibly match invalid UTF-8.
657 ///
658 /// **WARNING**: Unicode mode can greatly increase the size of the compiled
659 /// DFA, which can noticeably impact both memory usage and compilation
660 /// time. This is especially noticeable if your regex contains character
661 /// classes like `\w` that are impacted by whether Unicode is enabled or
662 /// not. If Unicode is not necessary, you are encouraged to disable it.
663 pub fn unicode_mode(&mut self, yes: bool) -> &mut Self {
664 self.set_config(|x| x.unicode(yes))
665 }
666
667 /// Limit for how many times backtracking should be attempted for fancy regexes (where
668 /// backtracking is used). If this limit is exceeded, execution returns an error with
669 /// [`Error::BacktrackLimitExceeded`](enum.Error.html#variant.BacktrackLimitExceeded).
670 /// This is for preventing a regex with catastrophic backtracking to run for too long.
671 ///
672 /// Default is `1_000_000` (1 million).
673 pub fn backtrack_limit(&mut self, limit: usize) -> &mut Self {
674 self.0.backtrack_limit = limit;
675 self
676 }
677
678 /// Set the approximate size limit of the compiled regular expression.
679 ///
680 /// This option is forwarded from the wrapped `regex` crate. Note that depending on the used
681 /// regex features there may be multiple delegated sub-regexes fed to the `regex` crate. As
682 /// such the actual limit is closer to `<number of delegated regexes> * delegate_size_limit`.
683 pub fn delegate_size_limit(&mut self, limit: usize) -> &mut Self {
684 self.0.delegate_size_limit = Some(limit);
685 self
686 }
687
688 /// Set the approximate size of the cache used by the DFA.
689 ///
690 /// This option is forwarded from the wrapped `regex` crate. Note that depending on the used
691 /// regex features there may be multiple delegated sub-regexes fed to the `regex` crate. As
692 /// such the actual limit is closer to `<number of delegated regexes> *
693 /// delegate_dfa_size_limit`.
694 pub fn delegate_dfa_size_limit(&mut self, limit: usize) -> &mut Self {
695 self.0.delegate_dfa_size_limit = Some(limit);
696 self
697 }
698}
699
700impl fmt::Debug for Regex {
701 /// Shows the original regular expression.
702 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
703 write!(f, "{}", self.as_str())
704 }
705}
706
707impl fmt::Display for Regex {
708 /// Shows the original regular expression
709 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
710 write!(f, "{}", self.as_str())
711 }
712}
713
714impl FromStr for Regex {
715 type Err = Error;
716
717 /// Attempts to parse a string into a regular expression
718 fn from_str(s: &str) -> Result<Regex> {
719 Regex::new(s)
720 }
721}
722
723impl Regex {
724 /// Parse and compile a regex with default options, see `RegexBuilder`.
725 ///
726 /// Returns an [`Error`](enum.Error.html) if the pattern could not be parsed.
727 pub fn new(re: &str) -> Result<Regex> {
728 let options = RegexOptions {
729 pattern: re.to_string(),
730 ..RegexOptions::default()
731 };
732 Self::new_options(options)
733 }
734
735 fn new_options(options: RegexOptions) -> Result<Regex> {
736 let mut tree = Expr::parse_tree_with_flags(&options.pattern, options.compute_flags())?;
737
738 // try to optimize the expression tree
739 let requires_capture_group_fixup = optimize(&mut tree);
740 let info = analyze(&tree, if requires_capture_group_fixup { 0 } else { 1 })?;
741
742 if !info.hard {
743 // easy case, wrap regex
744
745 // we do our own to_str because escapes are different
746 // NOTE: there is a good opportunity here to use Hir to avoid regex-automata re-parsing it
747 let mut re_cooked = String::new();
748 tree.expr.to_str(&mut re_cooked, 0);
749 let inner = compile::compile_inner(&re_cooked, &options)?;
750 return Ok(Regex {
751 inner: RegexImpl::Wrap {
752 inner,
753 options: RegexOptions {
754 pattern: re_cooked.clone(),
755 ..options
756 },
757 explicit_capture_group_0: requires_capture_group_fixup,
758 },
759 named_groups: Arc::new(tree.named_groups),
760 });
761 }
762
763 let prog = compile(&info, can_compile_as_anchored(&tree.expr))?;
764 Ok(Regex {
765 inner: RegexImpl::Fancy {
766 prog,
767 n_groups: info.end_group,
768 options,
769 },
770 named_groups: Arc::new(tree.named_groups),
771 })
772 }
773
774 /// Returns the original string of this regex.
775 pub fn as_str(&self) -> &str {
776 match &self.inner {
777 RegexImpl::Wrap { options, .. } => &options.pattern,
778 RegexImpl::Fancy { options, .. } => &options.pattern,
779 }
780 }
781
782 /// Check if the regex matches the input text.
783 ///
784 /// # Example
785 ///
786 /// Test if some text contains the same word twice:
787 ///
788 /// ```rust
789 /// # use fancy_regex::Regex;
790 ///
791 /// let re = Regex::new(r"(\w+) \1").unwrap();
792 /// assert!(re.is_match("mirror mirror on the wall").unwrap());
793 /// ```
794 pub fn is_match(&self, text: &str) -> Result<bool> {
795 match &self.inner {
796 RegexImpl::Wrap { ref inner, .. } => Ok(inner.is_match(text)),
797 RegexImpl::Fancy {
798 ref prog, options, ..
799 } => {
800 let result = vm::run(prog, text, 0, 0, options)?;
801 Ok(result.is_some())
802 }
803 }
804 }
805
806 /// Returns an iterator for each successive non-overlapping match in `text`.
807 ///
808 /// If you have capturing groups in your regex that you want to extract, use the [Regex::captures_iter()]
809 /// method.
810 ///
811 /// # Example
812 ///
813 /// Find all words followed by an exclamation point:
814 ///
815 /// ```rust
816 /// # use fancy_regex::Regex;
817 ///
818 /// let re = Regex::new(r"\w+(?=!)").unwrap();
819 /// let mut matches = re.find_iter("so fancy! even with! iterators!");
820 /// assert_eq!(matches.next().unwrap().unwrap().as_str(), "fancy");
821 /// assert_eq!(matches.next().unwrap().unwrap().as_str(), "with");
822 /// assert_eq!(matches.next().unwrap().unwrap().as_str(), "iterators");
823 /// assert!(matches.next().is_none());
824 /// ```
825 pub fn find_iter<'r, 't>(&'r self, text: &'t str) -> Matches<'r, 't> {
826 Matches {
827 re: &self,
828 text,
829 last_end: 0,
830 last_match: None,
831 }
832 }
833
834 /// Find the first match in the input text.
835 ///
836 /// If you have capturing groups in your regex that you want to extract, use the [Regex::captures()]
837 /// method.
838 ///
839 /// # Example
840 ///
841 /// Find a word that is followed by an exclamation point:
842 ///
843 /// ```rust
844 /// # use fancy_regex::Regex;
845 ///
846 /// let re = Regex::new(r"\w+(?=!)").unwrap();
847 /// assert_eq!(re.find("so fancy!").unwrap().unwrap().as_str(), "fancy");
848 /// ```
849 pub fn find<'t>(&self, text: &'t str) -> Result<Option<Match<'t>>> {
850 self.find_from_pos(text, 0)
851 }
852
853 /// Returns the first match in `text`, starting from the specified byte position `pos`.
854 ///
855 /// # Examples
856 ///
857 /// Finding match starting at a position:
858 ///
859 /// ```
860 /// # use fancy_regex::Regex;
861 /// let re = Regex::new(r"(?m:^)(\d+)").unwrap();
862 /// let text = "1 test 123\n2 foo";
863 /// let mat = re.find_from_pos(text, 7).unwrap().unwrap();
864 ///
865 /// assert_eq!(mat.start(), 11);
866 /// assert_eq!(mat.end(), 12);
867 /// ```
868 ///
869 /// Note that in some cases this is not the same as using the `find`
870 /// method and passing a slice of the string, see [Regex::captures_from_pos()] for details.
871 pub fn find_from_pos<'t>(&self, text: &'t str, pos: usize) -> Result<Option<Match<'t>>> {
872 self.find_from_pos_with_option_flags(text, pos, 0)
873 }
874
875 fn find_from_pos_with_option_flags<'t>(
876 &self,
877 text: &'t str,
878 pos: usize,
879 option_flags: u32,
880 ) -> Result<Option<Match<'t>>> {
881 match &self.inner {
882 RegexImpl::Wrap {
883 inner,
884 explicit_capture_group_0,
885 ..
886 } => {
887 if !*explicit_capture_group_0 {
888 Ok(inner
889 .search(&RaInput::new(text).span(pos..text.len()))
890 .map(|m| Match::new(text, m.start(), m.end())))
891 } else {
892 let mut locations = inner.create_captures();
893 inner.captures(RaInput::new(text).span(pos..text.len()), &mut locations);
894 Ok(locations.is_match().then(|| {
895 Match::new(
896 text,
897 locations.get_group(1).unwrap().start,
898 locations.get_group(1).unwrap().end,
899 )
900 }))
901 }
902 }
903 RegexImpl::Fancy { prog, options, .. } => {
904 let result = vm::run(prog, text, pos, option_flags, options)?;
905 Ok(result.map(|saves| Match::new(text, saves[0], saves[1])))
906 }
907 }
908 }
909
910 /// Returns an iterator over all the non-overlapping capture groups matched in `text`.
911 ///
912 /// # Examples
913 ///
914 /// Finding all matches and capturing parts of each:
915 ///
916 /// ```rust
917 /// # use fancy_regex::Regex;
918 ///
919 /// let re = Regex::new(r"(\d{4})-(\d{2})").unwrap();
920 /// let text = "It was between 2018-04 and 2020-01";
921 /// let mut all_captures = re.captures_iter(text);
922 ///
923 /// let first = all_captures.next().unwrap().unwrap();
924 /// assert_eq!(first.get(1).unwrap().as_str(), "2018");
925 /// assert_eq!(first.get(2).unwrap().as_str(), "04");
926 /// assert_eq!(first.get(0).unwrap().as_str(), "2018-04");
927 ///
928 /// let second = all_captures.next().unwrap().unwrap();
929 /// assert_eq!(second.get(1).unwrap().as_str(), "2020");
930 /// assert_eq!(second.get(2).unwrap().as_str(), "01");
931 /// assert_eq!(second.get(0).unwrap().as_str(), "2020-01");
932 ///
933 /// assert!(all_captures.next().is_none());
934 /// ```
935 pub fn captures_iter<'r, 't>(&'r self, text: &'t str) -> CaptureMatches<'r, 't> {
936 CaptureMatches(self.find_iter(text))
937 }
938
939 /// Returns the capture groups for the first match in `text`.
940 ///
941 /// If no match is found, then `Ok(None)` is returned.
942 ///
943 /// # Examples
944 ///
945 /// Finding matches and capturing parts of the match:
946 ///
947 /// ```rust
948 /// # use fancy_regex::Regex;
949 ///
950 /// let re = Regex::new(r"(\d{4})-(\d{2})-(\d{2})").unwrap();
951 /// let text = "The date was 2018-04-07";
952 /// let captures = re.captures(text).unwrap().unwrap();
953 ///
954 /// assert_eq!(captures.get(1).unwrap().as_str(), "2018");
955 /// assert_eq!(captures.get(2).unwrap().as_str(), "04");
956 /// assert_eq!(captures.get(3).unwrap().as_str(), "07");
957 /// assert_eq!(captures.get(0).unwrap().as_str(), "2018-04-07");
958 /// ```
959 pub fn captures<'t>(&self, text: &'t str) -> Result<Option<Captures<'t>>> {
960 self.captures_from_pos(text, 0)
961 }
962
963 /// Returns the capture groups for the first match in `text`, starting from
964 /// the specified byte position `pos`.
965 ///
966 /// # Examples
967 ///
968 /// Finding captures starting at a position:
969 ///
970 /// ```
971 /// # use fancy_regex::Regex;
972 /// let re = Regex::new(r"(?m:^)(\d+)").unwrap();
973 /// let text = "1 test 123\n2 foo";
974 /// let captures = re.captures_from_pos(text, 7).unwrap().unwrap();
975 ///
976 /// let group = captures.get(1).unwrap();
977 /// assert_eq!(group.as_str(), "2");
978 /// assert_eq!(group.start(), 11);
979 /// assert_eq!(group.end(), 12);
980 /// ```
981 ///
982 /// Note that in some cases this is not the same as using the `captures`
983 /// method and passing a slice of the string, see the capture that we get
984 /// when we do this:
985 ///
986 /// ```
987 /// # use fancy_regex::Regex;
988 /// let re = Regex::new(r"(?m:^)(\d+)").unwrap();
989 /// let text = "1 test 123\n2 foo";
990 /// let captures = re.captures(&text[7..]).unwrap().unwrap();
991 /// assert_eq!(captures.get(1).unwrap().as_str(), "123");
992 /// ```
993 ///
994 /// This matched the number "123" because it's at the beginning of the text
995 /// of the string slice.
996 ///
997 pub fn captures_from_pos<'t>(&self, text: &'t str, pos: usize) -> Result<Option<Captures<'t>>> {
998 let named_groups = self.named_groups.clone();
999 match &self.inner {
1000 RegexImpl::Wrap {
1001 inner,
1002 explicit_capture_group_0,
1003 ..
1004 } => {
1005 let mut locations = inner.create_captures();
1006 inner.captures(RaInput::new(text).span(pos..text.len()), &mut locations);
1007 Ok(locations.is_match().then(|| Captures {
1008 inner: CapturesImpl::Wrap {
1009 text,
1010 locations,
1011 explicit_capture_group_0: *explicit_capture_group_0,
1012 },
1013 named_groups,
1014 }))
1015 }
1016 RegexImpl::Fancy {
1017 prog,
1018 n_groups,
1019 options,
1020 ..
1021 } => {
1022 let result = vm::run(prog, text, pos, 0, options)?;
1023 Ok(result.map(|mut saves| {
1024 saves.truncate(n_groups * 2);
1025 Captures {
1026 inner: CapturesImpl::Fancy { text, saves },
1027 named_groups,
1028 }
1029 }))
1030 }
1031 }
1032 }
1033
1034 /// Returns the number of captures, including the implicit capture of the entire expression.
1035 pub fn captures_len(&self) -> usize {
1036 match &self.inner {
1037 RegexImpl::Wrap {
1038 inner,
1039 explicit_capture_group_0,
1040 ..
1041 } => inner.captures_len() - if *explicit_capture_group_0 { 1 } else { 0 },
1042 RegexImpl::Fancy { n_groups, .. } => *n_groups,
1043 }
1044 }
1045
1046 /// Returns an iterator over the capture names.
1047 pub fn capture_names(&self) -> CaptureNames {
1048 let mut names = Vec::new();
1049 names.resize(self.captures_len(), None);
1050 for (name, &i) in self.named_groups.iter() {
1051 names[i] = Some(name.as_str());
1052 }
1053 CaptureNames(names.into_iter())
1054 }
1055
1056 // for debugging only
1057 #[doc(hidden)]
1058 pub fn debug_print(&self, writer: &mut Formatter<'_>) -> fmt::Result {
1059 match &self.inner {
1060 RegexImpl::Wrap {
1061 options,
1062 explicit_capture_group_0,
1063 ..
1064 } => {
1065 write!(
1066 writer,
1067 "wrapped Regex {:?}, explicit_capture_group_0: {:}",
1068 options.pattern, *explicit_capture_group_0
1069 )
1070 }
1071 RegexImpl::Fancy { prog, .. } => prog.debug_print(writer),
1072 }
1073 }
1074
1075 /// Replaces the leftmost-first match with the replacement provided.
1076 /// The replacement can be a regular string (where `$N` and `$name` are
1077 /// expanded to match capture groups) or a function that takes the matches'
1078 /// `Captures` and returns the replaced string.
1079 ///
1080 /// If no match is found, then a copy of the string is returned unchanged.
1081 ///
1082 /// # Replacement string syntax
1083 ///
1084 /// All instances of `$name` in the replacement text is replaced with the
1085 /// corresponding capture group `name`.
1086 ///
1087 /// `name` may be an integer corresponding to the index of the
1088 /// capture group (counted by order of opening parenthesis where `0` is the
1089 /// entire match) or it can be a name (consisting of letters, digits or
1090 /// underscores) corresponding to a named capture group.
1091 ///
1092 /// If `name` isn't a valid capture group (whether the name doesn't exist
1093 /// or isn't a valid index), then it is replaced with the empty string.
1094 ///
1095 /// The longest possible name is used. e.g., `$1a` looks up the capture
1096 /// group named `1a` and not the capture group at index `1`. To exert more
1097 /// precise control over the name, use braces, e.g., `${1}a`.
1098 ///
1099 /// To write a literal `$` use `$$`.
1100 ///
1101 /// # Examples
1102 ///
1103 /// Note that this function is polymorphic with respect to the replacement.
1104 /// In typical usage, this can just be a normal string:
1105 ///
1106 /// ```rust
1107 /// # use fancy_regex::Regex;
1108 /// let re = Regex::new("[^01]+").unwrap();
1109 /// assert_eq!(re.replace("1078910", ""), "1010");
1110 /// ```
1111 ///
1112 /// But anything satisfying the `Replacer` trait will work. For example,
1113 /// a closure of type `|&Captures| -> String` provides direct access to the
1114 /// captures corresponding to a match. This allows one to access
1115 /// capturing group matches easily:
1116 ///
1117 /// ```rust
1118 /// # use fancy_regex::{Regex, Captures};
1119 /// let re = Regex::new(r"([^,\s]+),\s+(\S+)").unwrap();
1120 /// let result = re.replace("Springsteen, Bruce", |caps: &Captures| {
1121 /// format!("{} {}", &caps[2], &caps[1])
1122 /// });
1123 /// assert_eq!(result, "Bruce Springsteen");
1124 /// ```
1125 ///
1126 /// But this is a bit cumbersome to use all the time. Instead, a simple
1127 /// syntax is supported that expands `$name` into the corresponding capture
1128 /// group. Here's the last example, but using this expansion technique
1129 /// with named capture groups:
1130 ///
1131 /// ```rust
1132 /// # use fancy_regex::Regex;
1133 /// let re = Regex::new(r"(?P<last>[^,\s]+),\s+(?P<first>\S+)").unwrap();
1134 /// let result = re.replace("Springsteen, Bruce", "$first $last");
1135 /// assert_eq!(result, "Bruce Springsteen");
1136 /// ```
1137 ///
1138 /// Note that using `$2` instead of `$first` or `$1` instead of `$last`
1139 /// would produce the same result. To write a literal `$` use `$$`.
1140 ///
1141 /// Sometimes the replacement string requires use of curly braces to
1142 /// delineate a capture group replacement and surrounding literal text.
1143 /// For example, if we wanted to join two words together with an
1144 /// underscore:
1145 ///
1146 /// ```rust
1147 /// # use fancy_regex::Regex;
1148 /// let re = Regex::new(r"(?P<first>\w+)\s+(?P<second>\w+)").unwrap();
1149 /// let result = re.replace("deep fried", "${first}_$second");
1150 /// assert_eq!(result, "deep_fried");
1151 /// ```
1152 ///
1153 /// Without the curly braces, the capture group name `first_` would be
1154 /// used, and since it doesn't exist, it would be replaced with the empty
1155 /// string.
1156 ///
1157 /// Finally, sometimes you just want to replace a literal string with no
1158 /// regard for capturing group expansion. This can be done by wrapping a
1159 /// byte string with `NoExpand`:
1160 ///
1161 /// ```rust
1162 /// # use fancy_regex::Regex;
1163 /// use fancy_regex::NoExpand;
1164 ///
1165 /// let re = Regex::new(r"(?P<last>[^,\s]+),\s+(\S+)").unwrap();
1166 /// let result = re.replace("Springsteen, Bruce", NoExpand("$2 $last"));
1167 /// assert_eq!(result, "$2 $last");
1168 /// ```
1169 pub fn replace<'t, R: Replacer>(&self, text: &'t str, rep: R) -> Cow<'t, str> {
1170 self.replacen(text, 1, rep)
1171 }
1172
1173 /// Replaces all non-overlapping matches in `text` with the replacement
1174 /// provided. This is the same as calling `replacen` with `limit` set to
1175 /// `0`.
1176 ///
1177 /// See the documentation for `replace` for details on how to access
1178 /// capturing group matches in the replacement string.
1179 pub fn replace_all<'t, R: Replacer>(&self, text: &'t str, rep: R) -> Cow<'t, str> {
1180 self.replacen(text, 0, rep)
1181 }
1182
1183 /// Replaces at most `limit` non-overlapping matches in `text` with the
1184 /// replacement provided. If `limit` is 0, then all non-overlapping matches
1185 /// are replaced.
1186 ///
1187 /// Will panic if any errors are encountered. Use `try_replacen`, which this
1188 /// function unwraps, if you want to handle errors.
1189 ///
1190 /// See the documentation for `replace` for details on how to access
1191 /// capturing group matches in the replacement string.
1192 ///
1193 pub fn replacen<'t, R: Replacer>(&self, text: &'t str, limit: usize, rep: R) -> Cow<'t, str> {
1194 self.try_replacen(text, limit, rep).unwrap()
1195 }
1196
1197 /// Replaces at most `limit` non-overlapping matches in `text` with the
1198 /// replacement provided. If `limit` is 0, then all non-overlapping matches
1199 /// are replaced.
1200 ///
1201 /// Propagates any errors encountered, such as `RuntimeError::BacktrackLimitExceeded`.
1202 ///
1203 /// See the documentation for `replace` for details on how to access
1204 /// capturing group matches in the replacement string.
1205 pub fn try_replacen<'t, R: Replacer>(
1206 &self,
1207 text: &'t str,
1208 limit: usize,
1209 mut rep: R,
1210 ) -> Result<Cow<'t, str>> {
1211 // If we know that the replacement doesn't have any capture expansions,
1212 // then we can fast path. The fast path can make a tremendous
1213 // difference:
1214 //
1215 // 1) We use `find_iter` instead of `captures_iter`. Not asking for
1216 // captures generally makes the regex engines faster.
1217 // 2) We don't need to look up all of the capture groups and do
1218 // replacements inside the replacement string. We just push it
1219 // at each match and be done with it.
1220 if let Some(rep) = rep.no_expansion() {
1221 let mut it = self.find_iter(text).enumerate().peekable();
1222 if it.peek().is_none() {
1223 return Ok(Cow::Borrowed(text));
1224 }
1225 let mut new = String::with_capacity(text.len());
1226 let mut last_match = 0;
1227 for (i, m) in it {
1228 let m = m?;
1229
1230 if limit > 0 && i >= limit {
1231 break;
1232 }
1233 new.push_str(&text[last_match..m.start()]);
1234 new.push_str(&rep);
1235 last_match = m.end();
1236 }
1237 new.push_str(&text[last_match..]);
1238 return Ok(Cow::Owned(new));
1239 }
1240
1241 // The slower path, which we use if the replacement needs access to
1242 // capture groups.
1243 let mut it = self.captures_iter(text).enumerate().peekable();
1244 if it.peek().is_none() {
1245 return Ok(Cow::Borrowed(text));
1246 }
1247 let mut new = String::with_capacity(text.len());
1248 let mut last_match = 0;
1249 for (i, cap) in it {
1250 let cap = cap?;
1251
1252 if limit > 0 && i >= limit {
1253 break;
1254 }
1255 // unwrap on 0 is OK because captures only reports matches
1256 let m = cap.get(0).unwrap();
1257 new.push_str(&text[last_match..m.start()]);
1258 rep.replace_append(&cap, &mut new);
1259 last_match = m.end();
1260 }
1261 new.push_str(&text[last_match..]);
1262 Ok(Cow::Owned(new))
1263 }
1264
1265 /// Splits the string by matches of the regex.
1266 ///
1267 /// Returns an iterator over the substrings of the target string
1268 /// that *aren't* matched by the regex.
1269 ///
1270 /// # Example
1271 ///
1272 /// To split a string delimited by arbitrary amounts of spaces or tabs:
1273 ///
1274 /// ```rust
1275 /// # use fancy_regex::Regex;
1276 /// let re = Regex::new(r"[ \t]+").unwrap();
1277 /// let target = "a b \t c\td e";
1278 /// let fields: Vec<&str> = re.split(target).map(|x| x.unwrap()).collect();
1279 /// assert_eq!(fields, vec!["a", "b", "c", "d", "e"]);
1280 /// ```
1281 pub fn split<'r, 'h>(&'r self, target: &'h str) -> Split<'r, 'h> {
1282 Split {
1283 matches: self.find_iter(target),
1284 next_start: 0,
1285 target,
1286 }
1287 }
1288
1289 /// Splits the string by matches of the regex at most `limit` times.
1290 ///
1291 /// Returns an iterator over the substrings of the target string
1292 /// that *aren't* matched by the regex.
1293 ///
1294 /// The `N`th substring is the remaining part of the target.
1295 ///
1296 /// # Example
1297 ///
1298 /// To split a string delimited by arbitrary amounts of spaces or tabs
1299 /// 3 times:
1300 ///
1301 /// ```rust
1302 /// # use fancy_regex::Regex;
1303 /// let re = Regex::new(r"[ \t]+").unwrap();
1304 /// let target = "a b \t c\td e";
1305 /// let fields: Vec<&str> = re.splitn(target, 3).map(|x| x.unwrap()).collect();
1306 /// assert_eq!(fields, vec!["a", "b", "c\td e"]);
1307 /// ```
1308 pub fn splitn<'r, 'h>(&'r self, target: &'h str, limit: usize) -> SplitN<'r, 'h> {
1309 SplitN {
1310 splits: self.split(target),
1311 limit: limit,
1312 }
1313 }
1314}
1315
1316impl TryFrom<&str> for Regex {
1317 type Error = Error;
1318
1319 /// Attempts to parse a string into a regular expression
1320 fn try_from(s: &str) -> Result<Self> {
1321 Self::new(s)
1322 }
1323}
1324
1325impl TryFrom<String> for Regex {
1326 type Error = Error;
1327
1328 /// Attempts to parse a string into a regular expression
1329 fn try_from(s: String) -> Result<Self> {
1330 Self::new(&s)
1331 }
1332}
1333
1334impl<'t> Match<'t> {
1335 /// Returns the starting byte offset of the match in the text.
1336 #[inline]
1337 pub fn start(&self) -> usize {
1338 self.start
1339 }
1340
1341 /// Returns the ending byte offset of the match in the text.
1342 #[inline]
1343 pub fn end(&self) -> usize {
1344 self.end
1345 }
1346
1347 /// Returns the range over the starting and ending byte offsets of the match in text.
1348 #[inline]
1349 pub fn range(&self) -> Range<usize> {
1350 self.start..self.end
1351 }
1352
1353 /// Returns the matched text.
1354 #[inline]
1355 pub fn as_str(&self) -> &'t str {
1356 &self.text[self.start..self.end]
1357 }
1358
1359 /// Creates a new match from the given text and byte offsets.
1360 fn new(text: &'t str, start: usize, end: usize) -> Match<'t> {
1361 Match { text, start, end }
1362 }
1363}
1364
1365impl<'t> From<Match<'t>> for &'t str {
1366 fn from(m: Match<'t>) -> &'t str {
1367 m.as_str()
1368 }
1369}
1370
1371impl<'t> From<Match<'t>> for Range<usize> {
1372 fn from(m: Match<'t>) -> Range<usize> {
1373 m.range()
1374 }
1375}
1376
1377#[allow(clippy::len_without_is_empty)] // follow regex's API
1378impl<'t> Captures<'t> {
1379 /// Get the capture group by its index in the regex.
1380 ///
1381 /// If there is no match for that group or the index does not correspond to a group, `None` is
1382 /// returned. The index 0 returns the whole match.
1383 pub fn get(&self, i: usize) -> Option<Match<'t>> {
1384 match &self.inner {
1385 CapturesImpl::Wrap {
1386 text,
1387 locations,
1388 explicit_capture_group_0,
1389 } => locations
1390 .get_group(i + if *explicit_capture_group_0 { 1 } else { 0 })
1391 .map(|span| Match {
1392 text,
1393 start: span.start,
1394 end: span.end,
1395 }),
1396 CapturesImpl::Fancy { text, ref saves } => {
1397 let slot = i * 2;
1398 if slot >= saves.len() {
1399 return None;
1400 }
1401 let lo = saves[slot];
1402 if lo == usize::MAX {
1403 return None;
1404 }
1405 let hi = saves[slot + 1];
1406 Some(Match {
1407 text,
1408 start: lo,
1409 end: hi,
1410 })
1411 }
1412 }
1413 }
1414
1415 /// Returns the match for a named capture group. Returns `None` the capture
1416 /// group did not match or if there is no group with the given name.
1417 pub fn name(&self, name: &str) -> Option<Match<'t>> {
1418 self.named_groups.get(name).and_then(|i| self.get(*i))
1419 }
1420
1421 /// Expands all instances of `$group` in `replacement` to the corresponding
1422 /// capture group `name`, and writes them to the `dst` buffer given.
1423 ///
1424 /// `group` may be an integer corresponding to the index of the
1425 /// capture group (counted by order of opening parenthesis where `\0` is the
1426 /// entire match) or it can be a name (consisting of letters, digits or
1427 /// underscores) corresponding to a named capture group.
1428 ///
1429 /// If `group` isn't a valid capture group (whether the name doesn't exist
1430 /// or isn't a valid index), then it is replaced with the empty string.
1431 ///
1432 /// The longest possible name is used. e.g., `$1a` looks up the capture
1433 /// group named `1a` and not the capture group at index `1`. To exert more
1434 /// precise control over the name, use braces, e.g., `${1}a`.
1435 ///
1436 /// To write a literal `$`, use `$$`.
1437 ///
1438 /// For more control over expansion, see [`Expander`].
1439 ///
1440 /// [`Expander`]: expand/struct.Expander.html
1441 pub fn expand(&self, replacement: &str, dst: &mut String) {
1442 Expander::default().append_expansion(dst, replacement, self);
1443 }
1444
1445 /// Iterate over the captured groups in order in which they appeared in the regex. The first
1446 /// capture corresponds to the whole match.
1447 pub fn iter<'c>(&'c self) -> SubCaptureMatches<'c, 't> {
1448 SubCaptureMatches { caps: self, i: 0 }
1449 }
1450
1451 /// How many groups were captured. This is always at least 1 because group 0 returns the whole
1452 /// match.
1453 pub fn len(&self) -> usize {
1454 match &self.inner {
1455 CapturesImpl::Wrap {
1456 locations,
1457 explicit_capture_group_0,
1458 ..
1459 } => locations.group_len() - if *explicit_capture_group_0 { 1 } else { 0 },
1460 CapturesImpl::Fancy { saves, .. } => saves.len() / 2,
1461 }
1462 }
1463}
1464
1465/// Get a group by index.
1466///
1467/// `'t` is the lifetime of the matched text.
1468///
1469/// The text can't outlive the `Captures` object if this method is
1470/// used, because of how `Index` is defined (normally `a[i]` is part
1471/// of `a` and can't outlive it); to do that, use `get()` instead.
1472///
1473/// # Panics
1474///
1475/// If there is no group at the given index.
1476impl<'t> Index<usize> for Captures<'t> {
1477 type Output = str;
1478
1479 fn index(&self, i: usize) -> &str {
1480 self.get(i)
1481 .map(|m| m.as_str())
1482 .unwrap_or_else(|| panic!("no group at index '{}'", i))
1483 }
1484}
1485
1486/// Get a group by name.
1487///
1488/// `'t` is the lifetime of the matched text and `'i` is the lifetime
1489/// of the group name (the index).
1490///
1491/// The text can't outlive the `Captures` object if this method is
1492/// used, because of how `Index` is defined (normally `a[i]` is part
1493/// of `a` and can't outlive it); to do that, use `name` instead.
1494///
1495/// # Panics
1496///
1497/// If there is no group named by the given value.
1498impl<'t, 'i> Index<&'i str> for Captures<'t> {
1499 type Output = str;
1500
1501 fn index<'a>(&'a self, name: &'i str) -> &'a str {
1502 self.name(name)
1503 .map(|m| m.as_str())
1504 .unwrap_or_else(|| panic!("no group named '{}'", name))
1505 }
1506}
1507
1508impl<'c, 't> Iterator for SubCaptureMatches<'c, 't> {
1509 type Item = Option<Match<'t>>;
1510
1511 fn next(&mut self) -> Option<Option<Match<'t>>> {
1512 if self.i < self.caps.len() {
1513 let result = self.caps.get(self.i);
1514 self.i += 1;
1515 Some(result)
1516 } else {
1517 None
1518 }
1519 }
1520}
1521
1522// TODO: might be nice to implement ExactSizeIterator etc for SubCaptures
1523
1524/// Regular expression AST. This is public for now but may change.
1525#[derive(Debug, PartialEq, Eq, Clone)]
1526pub enum Expr {
1527 /// An empty expression, e.g. the last branch in `(a|b|)`
1528 Empty,
1529 /// Any character, regex `.`
1530 Any {
1531 /// Whether it also matches newlines or not
1532 newline: bool,
1533 },
1534 /// An assertion
1535 Assertion(Assertion),
1536 /// The string as a literal, e.g. `a`
1537 Literal {
1538 /// The string to match
1539 val: String,
1540 /// Whether match is case-insensitive or not
1541 casei: bool,
1542 },
1543 /// Concatenation of multiple expressions, must match in order, e.g. `a.` is a concatenation of
1544 /// the literal `a` and `.` for any character
1545 Concat(Vec<Expr>),
1546 /// Alternative of multiple expressions, one of them must match, e.g. `a|b` is an alternative
1547 /// where either the literal `a` or `b` must match
1548 Alt(Vec<Expr>),
1549 /// Capturing group of expression, e.g. `(a.)` matches `a` and any character and "captures"
1550 /// (remembers) the match
1551 Group(Box<Expr>),
1552 /// Look-around (e.g. positive/negative look-ahead or look-behind) with an expression, e.g.
1553 /// `(?=a)` means the next character must be `a` (but the match is not consumed)
1554 LookAround(Box<Expr>, LookAround),
1555 /// Repeat of an expression, e.g. `a*` or `a+` or `a{1,3}`
1556 Repeat {
1557 /// The expression that is being repeated
1558 child: Box<Expr>,
1559 /// The minimum number of repetitions
1560 lo: usize,
1561 /// The maximum number of repetitions (or `usize::MAX`)
1562 hi: usize,
1563 /// Greedy means as much as possible is matched, e.g. `.*b` would match all of `abab`.
1564 /// Non-greedy means as little as possible, e.g. `.*?b` would match only `ab` in `abab`.
1565 greedy: bool,
1566 },
1567 /// Delegate a regex to the regex crate. This is used as a simplification so that we don't have
1568 /// to represent all the expressions in the AST, e.g. character classes.
1569 Delegate {
1570 /// The regex
1571 inner: String,
1572 /// How many characters the regex matches
1573 size: usize, // TODO: move into analysis result
1574 /// Whether the matching is case-insensitive or not
1575 casei: bool,
1576 },
1577 /// Back reference to a capture group, e.g. `\1` in `(abc|def)\1` references the captured group
1578 /// and the whole regex matches either `abcabc` or `defdef`.
1579 Backref {
1580 /// The capture group number being referenced
1581 group: usize,
1582 /// Whether the matching is case-insensitive or not
1583 casei: bool,
1584 },
1585 /// Back reference to a capture group at the given specified relative recursion level.
1586 BackrefWithRelativeRecursionLevel {
1587 /// The capture group number being referenced
1588 group: usize,
1589 /// Relative recursion level
1590 relative_level: isize,
1591 /// Whether the matching is case-insensitive or not
1592 casei: bool,
1593 },
1594 /// Atomic non-capturing group, e.g. `(?>ab|a)` in text that contains `ab` will match `ab` and
1595 /// never backtrack and try `a`, even if matching fails after the atomic group.
1596 AtomicGroup(Box<Expr>),
1597 /// Keep matched text so far out of overall match
1598 KeepOut,
1599 /// Anchor to match at the position where the previous match ended
1600 ContinueFromPreviousMatchEnd,
1601 /// Conditional expression based on whether the numbered capture group matched or not
1602 BackrefExistsCondition(usize),
1603 /// If/Then/Else Condition. If there is no Then/Else, these will just be empty expressions.
1604 Conditional {
1605 /// The conditional expression to evaluate
1606 condition: Box<Expr>,
1607 /// What to execute if the condition is true
1608 true_branch: Box<Expr>,
1609 /// What to execute if the condition is false
1610 false_branch: Box<Expr>,
1611 },
1612 /// Subroutine call to the specified group number
1613 SubroutineCall(usize),
1614 /// Unresolved subroutine call to the specified group name
1615 UnresolvedNamedSubroutineCall {
1616 /// The capture group name
1617 name: String,
1618 /// The position in the original regex pattern where the subroutine call is made
1619 ix: usize,
1620 },
1621}
1622
1623/// Type of look-around assertion as used for a look-around expression.
1624#[derive(Debug, PartialEq, Eq, Clone, Copy)]
1625pub enum LookAround {
1626 /// Look-ahead assertion, e.g. `(?=a)`
1627 LookAhead,
1628 /// Negative look-ahead assertion, e.g. `(?!a)`
1629 LookAheadNeg,
1630 /// Look-behind assertion, e.g. `(?<=a)`
1631 LookBehind,
1632 /// Negative look-behind assertion, e.g. `(?<!a)`
1633 LookBehindNeg,
1634}
1635
1636/// An iterator over capture names in a [Regex]. The iterator
1637/// returns the name of each group, or [None] if the group has
1638/// no name. Because capture group 0 cannot have a name, the
1639/// first item returned is always [None].
1640pub struct CaptureNames<'r>(vec::IntoIter<Option<&'r str>>);
1641
1642impl Debug for CaptureNames<'_> {
1643 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
1644 f.write_str("<CaptureNames>")
1645 }
1646}
1647
1648impl<'r> Iterator for CaptureNames<'r> {
1649 type Item = Option<&'r str>;
1650
1651 fn next(&mut self) -> Option<Self::Item> {
1652 self.0.next()
1653 }
1654}
1655
1656// silly to write my own, but this is super-fast for the common 1-digit
1657// case.
1658fn push_usize(s: &mut String, x: usize) {
1659 if x >= 10 {
1660 push_usize(s, x / 10);
1661 s.push((b'0' + (x % 10) as u8) as char);
1662 } else {
1663 s.push((b'0' + (x as u8)) as char);
1664 }
1665}
1666
1667fn is_special(c: char) -> bool {
1668 match c {
1669 '\\' | '.' | '+' | '*' | '?' | '(' | ')' | '|' | '[' | ']' | '{' | '}' | '^' | '$'
1670 | '#' => true,
1671 _ => false,
1672 }
1673}
1674
1675fn push_quoted(buf: &mut String, s: &str) {
1676 for c in s.chars() {
1677 if is_special(c) {
1678 buf.push('\\');
1679 }
1680 buf.push(c);
1681 }
1682}
1683
1684/// Escapes special characters in `text` with '\\'. Returns a string which, when interpreted
1685/// as a regex, matches exactly `text`.
1686pub fn escape(text: &str) -> Cow<str> {
1687 // Using bytes() is OK because all special characters are single bytes.
1688 match text.bytes().filter(|&b| is_special(b as char)).count() {
1689 0 => Cow::Borrowed(text),
1690 n => {
1691 // The capacity calculation is exact because '\\' is a single byte.
1692 let mut buf = String::with_capacity(text.len() + n);
1693 push_quoted(&mut buf, text);
1694 Cow::Owned(buf)
1695 }
1696 }
1697}
1698
1699/// Type of assertions
1700#[derive(Debug, PartialEq, Eq, Clone, Copy)]
1701pub enum Assertion {
1702 /// Start of input text
1703 StartText,
1704 /// End of input text
1705 EndText,
1706 /// Start of a line
1707 StartLine {
1708 /// CRLF mode
1709 crlf: bool,
1710 },
1711 /// End of a line
1712 EndLine {
1713 /// CRLF mode
1714 crlf: bool,
1715 },
1716 /// Left word boundary
1717 LeftWordBoundary,
1718 /// Right word boundary
1719 RightWordBoundary,
1720 /// Both word boundaries
1721 WordBoundary,
1722 /// Not word boundary
1723 NotWordBoundary,
1724}
1725
1726impl Assertion {
1727 pub(crate) fn is_hard(&self) -> bool {
1728 use Assertion::*;
1729 matches!(
1730 self,
1731 // these will make regex-automata use PikeVM
1732 LeftWordBoundary | RightWordBoundary | WordBoundary | NotWordBoundary
1733 )
1734 }
1735}
1736
1737impl Expr {
1738 /// Parse the regex and return an expression (AST) and a bit set with the indexes of groups
1739 /// that are referenced by backrefs.
1740 pub fn parse_tree(re: &str) -> Result<ExprTree> {
1741 Parser::parse(re)
1742 }
1743
1744 /// Parse the regex and return an expression (AST)
1745 /// Flags should be bit based based on flags
1746 pub fn parse_tree_with_flags(re: &str, flags: u32) -> Result<ExprTree> {
1747 Parser::parse_with_flags(re, flags)
1748 }
1749
1750 /// Convert expression to a regex string in the regex crate's syntax.
1751 ///
1752 /// # Panics
1753 ///
1754 /// Panics for expressions that are hard, i.e. can not be handled by the regex crate.
1755 pub fn to_str(&self, buf: &mut String, precedence: u8) {
1756 match *self {
1757 Expr::Empty => (),
1758 Expr::Any { newline } => buf.push_str(if newline { "(?s:.)" } else { "." }),
1759 Expr::Literal { ref val, casei } => {
1760 if casei {
1761 buf.push_str("(?i:");
1762 }
1763 push_quoted(buf, val);
1764 if casei {
1765 buf.push_str(")");
1766 }
1767 }
1768 Expr::Assertion(Assertion::StartText) => buf.push('^'),
1769 Expr::Assertion(Assertion::EndText) => buf.push('$'),
1770 Expr::Assertion(Assertion::StartLine { crlf: false }) => buf.push_str("(?m:^)"),
1771 Expr::Assertion(Assertion::EndLine { crlf: false }) => buf.push_str("(?m:$)"),
1772 Expr::Assertion(Assertion::StartLine { crlf: true }) => buf.push_str("(?Rm:^)"),
1773 Expr::Assertion(Assertion::EndLine { crlf: true }) => buf.push_str("(?Rm:$)"),
1774 Expr::Concat(ref children) => {
1775 if precedence > 1 {
1776 buf.push_str("(?:");
1777 }
1778 for child in children {
1779 child.to_str(buf, 2);
1780 }
1781 if precedence > 1 {
1782 buf.push(')')
1783 }
1784 }
1785 Expr::Alt(ref children) => {
1786 if precedence > 0 {
1787 buf.push_str("(?:");
1788 }
1789 for (i, child) in children.iter().enumerate() {
1790 if i != 0 {
1791 buf.push('|');
1792 }
1793 child.to_str(buf, 1);
1794 }
1795 if precedence > 0 {
1796 buf.push(')');
1797 }
1798 }
1799 Expr::Group(ref child) => {
1800 buf.push('(');
1801 child.to_str(buf, 0);
1802 buf.push(')');
1803 }
1804 Expr::Repeat {
1805 ref child,
1806 lo,
1807 hi,
1808 greedy,
1809 } => {
1810 if precedence > 2 {
1811 buf.push_str("(?:");
1812 }
1813 child.to_str(buf, 3);
1814 match (lo, hi) {
1815 (0, 1) => buf.push('?'),
1816 (0, usize::MAX) => buf.push('*'),
1817 (1, usize::MAX) => buf.push('+'),
1818 (lo, hi) => {
1819 buf.push('{');
1820 push_usize(buf, lo);
1821 if lo != hi {
1822 buf.push(',');
1823 if hi != usize::MAX {
1824 push_usize(buf, hi);
1825 }
1826 }
1827 buf.push('}');
1828 }
1829 }
1830 if !greedy {
1831 buf.push('?');
1832 }
1833 if precedence > 2 {
1834 buf.push(')');
1835 }
1836 }
1837 Expr::Delegate {
1838 ref inner, casei, ..
1839 } => {
1840 // at the moment, delegate nodes are just atoms
1841 if casei {
1842 buf.push_str("(?i:");
1843 }
1844 buf.push_str(inner);
1845 if casei {
1846 buf.push_str(")");
1847 }
1848 }
1849 _ => panic!("attempting to format hard expr {:?}", self),
1850 }
1851 }
1852}
1853
1854// precondition: ix > 0
1855fn prev_codepoint_ix(s: &str, mut ix: usize) -> usize {
1856 let bytes = s.as_bytes();
1857 loop {
1858 ix -= 1;
1859 // fancy bit magic for ranges 0..0x80 + 0xc0..
1860 if (bytes[ix] as i8) >= -0x40 {
1861 break;
1862 }
1863 }
1864 ix
1865}
1866
1867fn codepoint_len(b: u8) -> usize {
1868 match b {
1869 b if b < 0x80 => 1,
1870 b if b < 0xe0 => 2,
1871 b if b < 0xf0 => 3,
1872 _ => 4,
1873 }
1874}
1875
1876/// Returns the smallest possible index of the next valid UTF-8 sequence
1877/// starting after `i`.
1878/// Adapted from a function with the same name in the `regex` crate.
1879fn next_utf8(text: &str, i: usize) -> usize {
1880 let b = match text.as_bytes().get(i) {
1881 None => return i + 1,
1882 Some(&b) => b,
1883 };
1884 i + codepoint_len(b)
1885}
1886
1887// If this returns false, then there is no possible backref in the re
1888
1889// Both potential implementations are turned off, because we currently
1890// always need to do a deeper analysis because of 1-character
1891// look-behind. If we could call a find_from_pos method of regex::Regex,
1892// it would make sense to bring this back.
1893/*
1894pub fn detect_possible_backref(re: &str) -> bool {
1895 let mut last = b'\x00';
1896 for b in re.as_bytes() {
1897 if b'0' <= *b && *b <= b'9' && last == b'\\' { return true; }
1898 last = *b;
1899 }
1900 false
1901}
1902
1903pub fn detect_possible_backref(re: &str) -> bool {
1904 let mut bytes = re.as_bytes();
1905 loop {
1906 match memchr::memchr(b'\\', &bytes[..bytes.len() - 1]) {
1907 Some(i) => {
1908 bytes = &bytes[i + 1..];
1909 let c = bytes[0];
1910 if b'0' <= c && c <= b'9' { return true; }
1911 }
1912 None => return false
1913 }
1914 }
1915}
1916*/
1917
1918/// The internal module only exists so that the toy example can access internals for debugging and
1919/// experimenting.
1920#[doc(hidden)]
1921pub mod internal {
1922 pub use crate::analyze::{analyze, can_compile_as_anchored};
1923 pub use crate::compile::compile;
1924 pub use crate::optimize::optimize;
1925 pub use crate::vm::{run_default, run_trace, Insn, Prog};
1926}
1927
1928#[cfg(test)]
1929mod tests {
1930 use alloc::borrow::Cow;
1931 use alloc::boxed::Box;
1932 use alloc::string::String;
1933 use alloc::{format, vec};
1934
1935 use crate::parse::make_literal;
1936 use crate::{Expr, Regex, RegexImpl};
1937
1938 //use detect_possible_backref;
1939
1940 // tests for to_str
1941
1942 fn to_str(e: Expr) -> String {
1943 let mut s = String::new();
1944 e.to_str(&mut s, 0);
1945 s
1946 }
1947
1948 #[test]
1949 fn to_str_concat_alt() {
1950 let e = Expr::Concat(vec![
1951 Expr::Alt(vec![make_literal("a"), make_literal("b")]),
1952 make_literal("c"),
1953 ]);
1954 assert_eq!(to_str(e), "(?:a|b)c");
1955 }
1956
1957 #[test]
1958 fn to_str_rep_concat() {
1959 let e = Expr::Repeat {
1960 child: Box::new(Expr::Concat(vec![make_literal("a"), make_literal("b")])),
1961 lo: 2,
1962 hi: 3,
1963 greedy: true,
1964 };
1965 assert_eq!(to_str(e), "(?:ab){2,3}");
1966 }
1967
1968 #[test]
1969 fn to_str_group_alt() {
1970 let e = Expr::Group(Box::new(Expr::Alt(vec![
1971 make_literal("a"),
1972 make_literal("b"),
1973 ])));
1974 assert_eq!(to_str(e), "(a|b)");
1975 }
1976
1977 #[test]
1978 fn as_str_debug() {
1979 let s = r"(a+)b\1";
1980 let regex = Regex::new(s).unwrap();
1981 assert_eq!(s, regex.as_str());
1982 assert_eq!(s, format!("{:?}", regex));
1983 }
1984
1985 #[test]
1986 fn display() {
1987 let s = r"(a+)b\1";
1988 let regex = Regex::new(s).unwrap();
1989 assert_eq!(s, format!("{}", regex));
1990 }
1991
1992 #[test]
1993 fn from_str() {
1994 let s = r"(a+)b\1";
1995 let regex = s.parse::<Regex>().unwrap();
1996 assert_eq!(regex.as_str(), s);
1997 }
1998
1999 #[test]
2000 fn to_str_repeat() {
2001 fn repeat(lo: usize, hi: usize, greedy: bool) -> Expr {
2002 Expr::Repeat {
2003 child: Box::new(make_literal("a")),
2004 lo,
2005 hi,
2006 greedy,
2007 }
2008 }
2009
2010 assert_eq!(to_str(repeat(2, 2, true)), "a{2}");
2011 assert_eq!(to_str(repeat(2, 2, false)), "a{2}?");
2012 assert_eq!(to_str(repeat(2, 3, true)), "a{2,3}");
2013 assert_eq!(to_str(repeat(2, 3, false)), "a{2,3}?");
2014 assert_eq!(to_str(repeat(2, usize::MAX, true)), "a{2,}");
2015 assert_eq!(to_str(repeat(2, usize::MAX, false)), "a{2,}?");
2016 assert_eq!(to_str(repeat(0, 1, true)), "a?");
2017 assert_eq!(to_str(repeat(0, 1, false)), "a??");
2018 assert_eq!(to_str(repeat(0, usize::MAX, true)), "a*");
2019 assert_eq!(to_str(repeat(0, usize::MAX, false)), "a*?");
2020 assert_eq!(to_str(repeat(1, usize::MAX, true)), "a+");
2021 assert_eq!(to_str(repeat(1, usize::MAX, false)), "a+?");
2022 }
2023
2024 #[test]
2025 fn escape() {
2026 // Check that strings that need no quoting are borrowed, and that non-special punctuation
2027 // is not quoted.
2028 match crate::escape("@foo") {
2029 Cow::Borrowed(s) => assert_eq!(s, "@foo"),
2030 _ => panic!("Value should be borrowed."),
2031 }
2032
2033 // Check typical usage.
2034 assert_eq!(crate::escape("fo*o").into_owned(), "fo\\*o");
2035
2036 // Check that multibyte characters are handled correctly.
2037 assert_eq!(crate::escape("fø*ø").into_owned(), "fø\\*ø");
2038 }
2039
2040 #[test]
2041 fn trailing_positive_lookahead_wrap_capture_group_fixup() {
2042 let s = r"(a+)(?=c)";
2043 let regex = s.parse::<Regex>().unwrap();
2044 assert!(matches!(regex.inner,
2045 RegexImpl::Wrap { explicit_capture_group_0: true, .. }),
2046 "trailing positive lookahead for an otherwise easy pattern should avoid going through the VM");
2047 }
2048
2049 #[test]
2050 fn easy_regex() {
2051 let s = r"(a+)b";
2052 let regex = s.parse::<Regex>().unwrap();
2053 assert!(
2054 matches!(regex.inner, RegexImpl::Wrap { explicit_capture_group_0: false, .. }),
2055 "easy pattern should avoid going through the VM, and capture group 0 should be implicit"
2056 );
2057 }
2058
2059 #[test]
2060 fn hard_regex() {
2061 let s = r"(a+)(?>c)";
2062 let regex = s.parse::<Regex>().unwrap();
2063 assert!(
2064 matches!(regex.inner, RegexImpl::Fancy { .. }),
2065 "hard regex should be compiled into a VM"
2066 );
2067 }
2068
2069 /*
2070 #[test]
2071 fn detect_backref() {
2072 assert_eq!(detect_possible_backref("a0a1a2"), false);
2073 assert_eq!(detect_possible_backref("a0a1\\a2"), false);
2074 assert_eq!(detect_possible_backref("a0a\\1a2"), true);
2075 assert_eq!(detect_possible_backref("a0a1a2\\"), false);
2076 }
2077 */
2078}