#![doc(html_root_url = "https://docs.rs/fancy-regex/0.11.0")]
#![deny(missing_docs)]
#![deny(missing_debug_implementations)]
use std::fmt;
use std::fmt::{Debug, Formatter};
use std::ops::{Index, Range};
use std::str::FromStr;
use std::sync::Arc;
use std::usize;
mod analyze;
mod compile;
mod error;
mod expand;
mod parse;
mod replacer;
mod vm;
use crate::analyze::analyze;
use crate::compile::compile;
use crate::parse::{ExprTree, NamedGroups, Parser};
use crate::vm::{Prog, OPTION_SKIPPED_EMPTY_MATCH};
pub use crate::error::{CompileError, Error, ParseError, Result, RuntimeError};
pub use crate::expand::Expander;
pub use crate::replacer::{NoExpand, Replacer, ReplacerRef};
use std::borrow::Cow;
const MAX_RECURSION: usize = 64;
#[derive(Debug)]
pub struct RegexBuilder(RegexOptions);
#[derive(Clone)]
pub struct Regex {
inner: RegexImpl,
named_groups: Arc<NamedGroups>,
}
#[derive(Clone)]
enum RegexImpl {
Wrap {
inner: regex::Regex,
options: RegexOptions,
},
Fancy {
prog: Prog,
n_groups: usize,
options: RegexOptions,
},
}
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub struct Match<'t> {
text: &'t str,
start: usize,
end: usize,
}
#[derive(Debug)]
pub struct Matches<'r, 't> {
re: &'r Regex,
text: &'t str,
last_end: usize,
last_match: Option<usize>,
}
impl<'r, 't> Matches<'r, 't> {
pub fn text(&self) -> &'t str {
self.text
}
pub fn regex(&self) -> &'r Regex {
&self.re
}
}
impl<'r, 't> Iterator for Matches<'r, 't> {
type Item = Result<Match<'t>>;
fn next(&mut self) -> Option<Self::Item> {
if self.last_end > self.text.len() {
return None;
}
let option_flags = if let Some(last_match) = self.last_match {
if self.last_end > last_match {
OPTION_SKIPPED_EMPTY_MATCH
} else {
0
}
} else {
0
};
let mat =
match self
.re
.find_from_pos_with_option_flags(self.text, self.last_end, option_flags)
{
Err(error) => return Some(Err(error)),
Ok(None) => return None,
Ok(Some(mat)) => mat,
};
if mat.start == mat.end {
self.last_end = next_utf8(self.text, mat.end);
if Some(mat.end) == self.last_match {
return self.next();
}
} else {
self.last_end = mat.end;
}
self.last_match = Some(mat.end);
Some(Ok(mat))
}
}
#[derive(Debug)]
pub struct CaptureMatches<'r, 't>(Matches<'r, 't>);
impl<'r, 't> CaptureMatches<'r, 't> {
pub fn text(&self) -> &'t str {
self.0.text
}
pub fn regex(&self) -> &'r Regex {
&self.0.re
}
}
impl<'r, 't> Iterator for CaptureMatches<'r, 't> {
type Item = Result<Captures<'t>>;
fn next(&mut self) -> Option<Self::Item> {
if self.0.last_end > self.0.text.len() {
return None;
}
let captures = match self.0.re.captures_from_pos(self.0.text, self.0.last_end) {
Err(error) => return Some(Err(error)),
Ok(None) => return None,
Ok(Some(captures)) => captures,
};
let mat = captures
.get(0)
.expect("`Captures` is expected to have entire match at 0th position");
if mat.start == mat.end {
self.0.last_end = next_utf8(self.0.text, mat.end);
if Some(mat.end) == self.0.last_match {
return self.next();
}
} else {
self.0.last_end = mat.end;
}
self.0.last_match = Some(mat.end);
Some(Ok(captures))
}
}
#[derive(Debug)]
pub struct Captures<'t> {
inner: CapturesImpl<'t>,
named_groups: Arc<NamedGroups>,
}
#[derive(Debug)]
enum CapturesImpl<'t> {
Wrap {
text: &'t str,
locations: regex::CaptureLocations,
},
Fancy {
text: &'t str,
saves: Vec<usize>,
},
}
#[derive(Debug)]
pub struct SubCaptureMatches<'c, 't> {
caps: &'c Captures<'t>,
i: usize,
}
#[derive(Clone, Debug)]
struct RegexOptions {
pattern: String,
backtrack_limit: usize,
delegate_size_limit: Option<usize>,
delegate_dfa_size_limit: Option<usize>,
}
impl Default for RegexOptions {
fn default() -> Self {
RegexOptions {
pattern: String::new(),
backtrack_limit: 1_000_000,
delegate_size_limit: None,
delegate_dfa_size_limit: None,
}
}
}
impl RegexBuilder {
pub fn new(pattern: &str) -> Self {
let mut builder = RegexBuilder(RegexOptions::default());
builder.0.pattern = pattern.to_string();
builder
}
pub fn build(&self) -> Result<Regex> {
Regex::new_options(self.0.clone())
}
pub fn backtrack_limit(&mut self, limit: usize) -> &mut Self {
self.0.backtrack_limit = limit;
self
}
pub fn delegate_size_limit(&mut self, limit: usize) -> &mut Self {
self.0.delegate_size_limit = Some(limit);
self
}
pub fn delegate_dfa_size_limit(&mut self, limit: usize) -> &mut Self {
self.0.delegate_dfa_size_limit = Some(limit);
self
}
}
impl fmt::Debug for Regex {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.as_str())
}
}
impl fmt::Display for Regex {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.as_str())
}
}
impl FromStr for Regex {
type Err = Error;
fn from_str(s: &str) -> Result<Regex> {
Regex::new(s)
}
}
impl Regex {
pub fn new(re: &str) -> Result<Regex> {
let options = RegexOptions {
pattern: re.to_string(),
..RegexOptions::default()
};
Self::new_options(options)
}
fn new_options(options: RegexOptions) -> Result<Regex> {
let raw_tree = Expr::parse_tree(&options.pattern)?;
let tree = ExprTree {
expr: Expr::Concat(vec![
Expr::Repeat {
child: Box::new(Expr::Any { newline: true }),
lo: 0,
hi: usize::MAX,
greedy: false,
},
Expr::Group(Box::new(raw_tree.expr)),
]),
..raw_tree
};
let info = analyze(&tree)?;
let inner_info = &info.children[1].children[0]; if !inner_info.hard {
let mut re_cooked = String::new();
let raw_e = match tree.expr {
Expr::Concat(ref v) => match v[1] {
Expr::Group(ref child) => child,
_ => unreachable!(),
},
_ => unreachable!(),
};
raw_e.to_str(&mut re_cooked, 0);
let inner = compile::compile_inner(&re_cooked, &options)?;
return Ok(Regex {
inner: RegexImpl::Wrap { inner, options },
named_groups: Arc::new(tree.named_groups),
});
}
let prog = compile(&info)?;
Ok(Regex {
inner: RegexImpl::Fancy {
prog,
n_groups: info.end_group,
options,
},
named_groups: Arc::new(tree.named_groups),
})
}
pub fn as_str(&self) -> &str {
match &self.inner {
RegexImpl::Wrap { options, .. } => &options.pattern,
RegexImpl::Fancy { options, .. } => &options.pattern,
}
}
pub fn is_match(&self, text: &str) -> Result<bool> {
match &self.inner {
RegexImpl::Wrap { ref inner, .. } => Ok(inner.is_match(text)),
RegexImpl::Fancy {
ref prog, options, ..
} => {
let result = vm::run(prog, text, 0, 0, options)?;
Ok(result.is_some())
}
}
}
pub fn find_iter<'r, 't>(&'r self, text: &'t str) -> Matches<'r, 't> {
Matches {
re: &self,
text,
last_end: 0,
last_match: None,
}
}
pub fn find<'t>(&self, text: &'t str) -> Result<Option<Match<'t>>> {
self.find_from_pos(text, 0)
}
pub fn find_from_pos<'t>(&self, text: &'t str, pos: usize) -> Result<Option<Match<'t>>> {
self.find_from_pos_with_option_flags(text, pos, 0)
}
fn find_from_pos_with_option_flags<'t>(
&self,
text: &'t str,
pos: usize,
option_flags: u32,
) -> Result<Option<Match<'t>>> {
match &self.inner {
RegexImpl::Wrap { inner, .. } => Ok(inner
.find_at(text, pos)
.map(|m| Match::new(text, m.start(), m.end()))),
RegexImpl::Fancy { prog, options, .. } => {
let result = vm::run(prog, text, pos, option_flags, options)?;
Ok(result.map(|saves| Match::new(text, saves[0], saves[1])))
}
}
}
pub fn captures_iter<'r, 't>(&'r self, text: &'t str) -> CaptureMatches<'r, 't> {
CaptureMatches(self.find_iter(text))
}
pub fn captures<'t>(&self, text: &'t str) -> Result<Option<Captures<'t>>> {
self.captures_from_pos(text, 0)
}
pub fn captures_from_pos<'t>(&self, text: &'t str, pos: usize) -> Result<Option<Captures<'t>>> {
let named_groups = self.named_groups.clone();
match &self.inner {
RegexImpl::Wrap { inner, .. } => {
let mut locations = inner.capture_locations();
let result = inner.captures_read_at(&mut locations, text, pos);
Ok(result.map(|_| Captures {
inner: CapturesImpl::Wrap { text, locations },
named_groups,
}))
}
RegexImpl::Fancy {
prog,
n_groups,
options,
..
} => {
let result = vm::run(prog, text, pos, 0, options)?;
Ok(result.map(|mut saves| {
saves.truncate(n_groups * 2);
Captures {
inner: CapturesImpl::Fancy { text, saves },
named_groups,
}
}))
}
}
}
pub fn captures_len(&self) -> usize {
match &self.inner {
RegexImpl::Wrap { inner, .. } => inner.captures_len(),
RegexImpl::Fancy { n_groups, .. } => *n_groups,
}
}
pub fn capture_names(&self) -> CaptureNames {
let mut names = Vec::new();
names.resize(self.captures_len(), None);
for (name, &i) in self.named_groups.iter() {
names[i] = Some(name.as_str());
}
CaptureNames(names.into_iter())
}
#[doc(hidden)]
pub fn debug_print(&self) {
match &self.inner {
RegexImpl::Wrap { inner, .. } => println!("wrapped {:?}", inner),
RegexImpl::Fancy { prog, .. } => prog.debug_print(),
}
}
pub fn replace<'t, R: Replacer>(&self, text: &'t str, rep: R) -> Cow<'t, str> {
self.replacen(text, 1, rep)
}
pub fn replace_all<'t, R: Replacer>(&self, text: &'t str, rep: R) -> Cow<'t, str> {
self.replacen(text, 0, rep)
}
pub fn replacen<'t, R: Replacer>(
&self,
text: &'t str,
limit: usize,
mut rep: R,
) -> Cow<'t, str> {
if let Some(rep) = rep.no_expansion() {
let mut it = self.find_iter(text).enumerate().peekable();
if it.peek().is_none() {
return Cow::Borrowed(text);
}
let mut new = String::with_capacity(text.len());
let mut last_match = 0;
for (i, m) in it {
let m = m.unwrap();
if limit > 0 && i >= limit {
break;
}
new.push_str(&text[last_match..m.start()]);
new.push_str(&rep);
last_match = m.end();
}
new.push_str(&text[last_match..]);
return Cow::Owned(new);
}
let mut it = self.captures_iter(text).enumerate().peekable();
if it.peek().is_none() {
return Cow::Borrowed(text);
}
let mut new = String::with_capacity(text.len());
let mut last_match = 0;
for (i, cap) in it {
let cap = cap.unwrap();
if limit > 0 && i >= limit {
break;
}
let m = cap.get(0).unwrap();
new.push_str(&text[last_match..m.start()]);
rep.replace_append(&cap, &mut new);
last_match = m.end();
}
new.push_str(&text[last_match..]);
Cow::Owned(new)
}
}
impl<'t> Match<'t> {
#[inline]
pub fn start(&self) -> usize {
self.start
}
#[inline]
pub fn end(&self) -> usize {
self.end
}
#[inline]
pub fn range(&self) -> Range<usize> {
self.start..self.end
}
#[inline]
pub fn as_str(&self) -> &'t str {
&self.text[self.start..self.end]
}
fn new(text: &'t str, start: usize, end: usize) -> Match<'t> {
Match { text, start, end }
}
}
impl<'t> From<Match<'t>> for &'t str {
fn from(m: Match<'t>) -> &'t str {
m.as_str()
}
}
impl<'t> From<Match<'t>> for Range<usize> {
fn from(m: Match<'t>) -> Range<usize> {
m.range()
}
}
#[allow(clippy::len_without_is_empty)] impl<'t> Captures<'t> {
pub fn get(&self, i: usize) -> Option<Match<'t>> {
match &self.inner {
CapturesImpl::Wrap { text, locations } => {
locations
.get(i)
.map(|(start, end)| Match { text, start, end })
}
CapturesImpl::Fancy { text, ref saves } => {
let slot = i * 2;
if slot >= saves.len() {
return None;
}
let lo = saves[slot];
if lo == std::usize::MAX {
return None;
}
let hi = saves[slot + 1];
Some(Match {
text,
start: lo,
end: hi,
})
}
}
}
pub fn name(&self, name: &str) -> Option<Match<'t>> {
self.named_groups.get(name).and_then(|i| self.get(*i))
}
pub fn expand(&self, replacement: &str, dst: &mut String) {
Expander::default().append_expansion(dst, replacement, self);
}
pub fn iter<'c>(&'c self) -> SubCaptureMatches<'c, 't> {
SubCaptureMatches { caps: self, i: 0 }
}
pub fn len(&self) -> usize {
match &self.inner {
CapturesImpl::Wrap { locations, .. } => locations.len(),
CapturesImpl::Fancy { saves, .. } => saves.len() / 2,
}
}
}
impl<'t> Index<usize> for Captures<'t> {
type Output = str;
fn index(&self, i: usize) -> &str {
self.get(i)
.map(|m| m.as_str())
.unwrap_or_else(|| panic!("no group at index '{}'", i))
}
}
impl<'t, 'i> Index<&'i str> for Captures<'t> {
type Output = str;
fn index<'a>(&'a self, name: &'i str) -> &'a str {
self.name(name)
.map(|m| m.as_str())
.unwrap_or_else(|| panic!("no group named '{}'", name))
}
}
impl<'c, 't> Iterator for SubCaptureMatches<'c, 't> {
type Item = Option<Match<'t>>;
fn next(&mut self) -> Option<Option<Match<'t>>> {
if self.i < self.caps.len() {
let result = self.caps.get(self.i);
self.i += 1;
Some(result)
} else {
None
}
}
}
#[derive(Debug, PartialEq, Eq)]
pub enum Expr {
Empty,
Any {
newline: bool,
},
StartText,
EndText,
StartLine,
EndLine,
Literal {
val: String,
casei: bool,
},
Concat(Vec<Expr>),
Alt(Vec<Expr>),
Group(Box<Expr>),
LookAround(Box<Expr>, LookAround),
Repeat {
child: Box<Expr>,
lo: usize,
hi: usize,
greedy: bool,
},
Delegate {
inner: String,
size: usize, casei: bool,
},
Backref(usize),
AtomicGroup(Box<Expr>),
KeepOut,
ContinueFromPreviousMatchEnd,
BackrefExistsCondition(usize),
Conditional {
condition: Box<Expr>,
true_branch: Box<Expr>,
false_branch: Box<Expr>,
},
}
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub enum LookAround {
LookAhead,
LookAheadNeg,
LookBehind,
LookBehindNeg,
}
pub struct CaptureNames<'r>(std::vec::IntoIter<Option<&'r str>>);
impl Debug for CaptureNames<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.write_str("<CaptureNames>")
}
}
impl<'r> Iterator for CaptureNames<'r> {
type Item = Option<&'r str>;
fn next(&mut self) -> Option<Self::Item> {
self.0.next()
}
}
fn push_usize(s: &mut String, x: usize) {
if x >= 10 {
push_usize(s, x / 10);
s.push((b'0' + (x % 10) as u8) as char);
} else {
s.push((b'0' + (x as u8)) as char);
}
}
fn is_special(c: char) -> bool {
match c {
'\\' | '.' | '+' | '*' | '?' | '(' | ')' | '|' | '[' | ']' | '{' | '}' | '^' | '$'
| '#' => true,
_ => false,
}
}
fn push_quoted(buf: &mut String, s: &str) {
for c in s.chars() {
if is_special(c) {
buf.push('\\');
}
buf.push(c);
}
}
pub fn escape(text: &str) -> Cow<str> {
match text.bytes().filter(|&b| is_special(b as char)).count() {
0 => Cow::Borrowed(text),
n => {
let mut buf = String::with_capacity(text.len() + n);
push_quoted(&mut buf, text);
Cow::Owned(buf)
}
}
}
impl Expr {
pub fn parse_tree(re: &str) -> Result<ExprTree> {
Parser::parse(re)
}
pub fn to_str(&self, buf: &mut String, precedence: u8) {
match *self {
Expr::Empty => (),
Expr::Any { newline } => buf.push_str(if newline { "(?s:.)" } else { "." }),
Expr::Literal { ref val, casei } => {
if casei {
buf.push_str("(?i:");
}
push_quoted(buf, val);
if casei {
buf.push_str(")");
}
}
Expr::StartText => buf.push('^'),
Expr::EndText => buf.push('$'),
Expr::StartLine => buf.push_str("(?m:^)"),
Expr::EndLine => buf.push_str("(?m:$)"),
Expr::Concat(ref children) => {
if precedence > 1 {
buf.push_str("(?:");
}
for child in children {
child.to_str(buf, 2);
}
if precedence > 1 {
buf.push(')')
}
}
Expr::Alt(ref children) => {
if precedence > 0 {
buf.push_str("(?:");
}
for (i, child) in children.iter().enumerate() {
if i != 0 {
buf.push('|');
}
child.to_str(buf, 1);
}
if precedence > 0 {
buf.push(')');
}
}
Expr::Group(ref child) => {
buf.push('(');
child.to_str(buf, 0);
buf.push(')');
}
Expr::Repeat {
ref child,
lo,
hi,
greedy,
} => {
if precedence > 2 {
buf.push_str("(?:");
}
child.to_str(buf, 3);
match (lo, hi) {
(0, 1) => buf.push('?'),
(0, usize::MAX) => buf.push('*'),
(1, usize::MAX) => buf.push('+'),
(lo, hi) => {
buf.push('{');
push_usize(buf, lo);
if lo != hi {
buf.push(',');
if hi != usize::MAX {
push_usize(buf, hi);
}
}
buf.push('}');
}
}
if !greedy {
buf.push('?');
}
if precedence > 2 {
buf.push(')');
}
}
Expr::Delegate {
ref inner, casei, ..
} => {
if casei {
buf.push_str("(?i:");
}
buf.push_str(inner);
if casei {
buf.push_str(")");
}
}
_ => panic!("attempting to format hard expr"),
}
}
}
fn prev_codepoint_ix(s: &str, mut ix: usize) -> usize {
let bytes = s.as_bytes();
loop {
ix -= 1;
if (bytes[ix] as i8) >= -0x40 {
break;
}
}
ix
}
fn codepoint_len(b: u8) -> usize {
match b {
b if b < 0x80 => 1,
b if b < 0xe0 => 2,
b if b < 0xf0 => 3,
_ => 4,
}
}
fn next_utf8(text: &str, i: usize) -> usize {
let b = match text.as_bytes().get(i) {
None => return i + 1,
Some(&b) => b,
};
i + codepoint_len(b)
}
#[doc(hidden)]
pub mod internal {
pub use crate::analyze::analyze;
pub use crate::compile::compile;
pub use crate::vm::{run_default, run_trace, Insn, Prog};
}
#[cfg(test)]
mod tests {
use crate::parse::make_literal;
use crate::Expr;
use crate::Regex;
use std::borrow::Cow;
use std::usize;
fn to_str(e: Expr) -> String {
let mut s = String::new();
e.to_str(&mut s, 0);
s
}
#[test]
fn to_str_concat_alt() {
let e = Expr::Concat(vec![
Expr::Alt(vec![make_literal("a"), make_literal("b")]),
make_literal("c"),
]);
assert_eq!(to_str(e), "(?:a|b)c");
}
#[test]
fn to_str_rep_concat() {
let e = Expr::Repeat {
child: Box::new(Expr::Concat(vec![make_literal("a"), make_literal("b")])),
lo: 2,
hi: 3,
greedy: true,
};
assert_eq!(to_str(e), "(?:ab){2,3}");
}
#[test]
fn to_str_group_alt() {
let e = Expr::Group(Box::new(Expr::Alt(vec![
make_literal("a"),
make_literal("b"),
])));
assert_eq!(to_str(e), "(a|b)");
}
#[test]
fn as_str_debug() {
let s = r"(a+)b\1";
let regex = Regex::new(s).unwrap();
assert_eq!(s, regex.as_str());
assert_eq!(s, format!("{:?}", regex));
}
#[test]
fn display() {
let s = r"(a+)b\1";
let regex = Regex::new(s).unwrap();
assert_eq!(s, format!("{}", regex));
}
#[test]
fn from_str() {
let s = r"(a+)b\1";
let regex = s.parse::<Regex>().unwrap();
assert_eq!(regex.as_str(), s);
}
#[test]
fn to_str_repeat() {
fn repeat(lo: usize, hi: usize, greedy: bool) -> Expr {
Expr::Repeat {
child: Box::new(make_literal("a")),
lo,
hi,
greedy,
}
}
assert_eq!(to_str(repeat(2, 2, true)), "a{2}");
assert_eq!(to_str(repeat(2, 2, false)), "a{2}?");
assert_eq!(to_str(repeat(2, 3, true)), "a{2,3}");
assert_eq!(to_str(repeat(2, 3, false)), "a{2,3}?");
assert_eq!(to_str(repeat(2, usize::MAX, true)), "a{2,}");
assert_eq!(to_str(repeat(2, usize::MAX, false)), "a{2,}?");
assert_eq!(to_str(repeat(0, 1, true)), "a?");
assert_eq!(to_str(repeat(0, 1, false)), "a??");
assert_eq!(to_str(repeat(0, usize::MAX, true)), "a*");
assert_eq!(to_str(repeat(0, usize::MAX, false)), "a*?");
assert_eq!(to_str(repeat(1, usize::MAX, true)), "a+");
assert_eq!(to_str(repeat(1, usize::MAX, false)), "a+?");
}
#[test]
fn escape() {
match crate::escape("@foo") {
Cow::Borrowed(s) => assert_eq!(s, "@foo"),
_ => panic!("Value should be borrowed."),
}
assert_eq!(crate::escape("fo*o").into_owned(), "fo\\*o");
assert_eq!(crate::escape("fø*ø").into_owned(), "fø\\*ø");
}
}