#![warn(missing_docs)]
#![warn(rust_2018_idioms)]
pub mod dfa;
pub mod engine;
pub mod error;
pub mod hir;
pub mod literal;
pub mod nfa;
pub mod parser;
pub mod vm;
#[cfg(feature = "jit")]
pub mod jit;
#[cfg(feature = "simd")]
pub mod simd;
pub use error::{Error, Result};
use engine::CompiledRegex;
use std::collections::HashMap;
use std::sync::Arc;
#[derive(Debug, Clone, Default)]
pub struct RegexBuilder {
pattern: String,
jit: bool,
optimize_prefixes: bool,
}
impl RegexBuilder {
pub fn new(pattern: &str) -> Self {
Self {
pattern: pattern.to_string(),
jit: false,
optimize_prefixes: false,
}
}
pub fn jit(mut self, enabled: bool) -> Self {
self.jit = enabled;
self
}
pub fn optimize_prefixes(mut self, enabled: bool) -> Self {
self.optimize_prefixes = enabled;
self
}
pub fn build(self) -> Result<Regex> {
let ast = parser::parse(&self.pattern)?;
let mut hir_result = hir::translate(&ast)?;
if self.optimize_prefixes {
hir_result = hir::optimize_prefixes(hir_result);
}
let named_groups = Arc::new(hir_result.props.named_groups.clone());
let inner = if self.jit {
engine::compile_with_jit(&hir_result)?
} else {
engine::compile_from_hir(&hir_result)?
};
Ok(Regex {
inner,
pattern: self.pattern,
named_groups,
})
}
}
#[derive(Debug)]
pub struct Regex {
inner: CompiledRegex,
pattern: String,
named_groups: Arc<HashMap<String, u32>>,
}
impl Regex {
pub fn new(pattern: &str) -> Result<Regex> {
let ast = parser::parse(pattern)?;
let hir = hir::translate(&ast)?;
let named_groups = Arc::new(hir.props.named_groups.clone());
let inner = engine::compile_from_hir(&hir)?;
Ok(Regex {
inner,
pattern: pattern.to_string(),
named_groups,
})
}
pub fn capture_names(&self) -> impl Iterator<Item = &str> {
self.named_groups.keys().map(|s| s.as_str())
}
pub fn as_str(&self) -> &str {
&self.pattern
}
pub fn is_match(&self, text: &str) -> bool {
self.inner.is_match(text.as_bytes())
}
pub fn find<'t>(&self, text: &'t str) -> Option<Match<'t>> {
self.inner
.find(text.as_bytes())
.map(|(start, end)| Match { text, start, end })
}
pub fn find_iter<'a>(&'a self, text: &'a str) -> Matches<'a> {
Matches::new(self, text)
}
pub fn captures<'t>(&self, text: &'t str) -> Option<Captures<'t>> {
self.inner.captures(text.as_bytes()).map(|slots| Captures {
text,
slots,
named_groups: Arc::clone(&self.named_groups),
})
}
pub fn captures_iter<'r, 't>(&'r self, text: &'t str) -> CapturesIter<'r, 't> {
CapturesIter {
regex: self,
text,
last_end: 0,
}
}
pub fn replace<'t>(&self, text: &'t str, rep: &str) -> std::borrow::Cow<'t, str> {
match self.find(text) {
None => std::borrow::Cow::Borrowed(text),
Some(m) => {
let mut result = String::with_capacity(text.len() + rep.len());
result.push_str(&text[..m.start()]);
result.push_str(rep);
result.push_str(&text[m.end()..]);
std::borrow::Cow::Owned(result)
}
}
}
pub fn engine_name(&self) -> &'static str {
self.inner.engine_name()
}
pub fn replace_all<'t>(&self, text: &'t str, rep: &str) -> std::borrow::Cow<'t, str> {
let mut last_end = 0;
let mut result = String::new();
let mut had_match = false;
for m in self.find_iter(text) {
had_match = true;
result.push_str(&text[last_end..m.start()]);
result.push_str(rep);
last_end = m.end();
}
if !had_match {
std::borrow::Cow::Borrowed(text)
} else {
result.push_str(&text[last_end..]);
std::borrow::Cow::Owned(result)
}
}
}
#[derive(Debug, Clone, Copy)]
pub struct Match<'t> {
text: &'t str,
start: usize,
end: usize,
}
impl<'t> Match<'t> {
pub fn start(&self) -> usize {
self.start
}
pub fn end(&self) -> usize {
self.end
}
pub fn as_str(&self) -> &'t str {
&self.text[self.start..self.end]
}
pub fn range(&self) -> std::ops::Range<usize> {
self.start..self.end
}
pub fn len(&self) -> usize {
self.end - self.start
}
pub fn is_empty(&self) -> bool {
self.start == self.end
}
}
pub struct Matches<'a> {
inner: MatchesInner<'a>,
text: &'a str,
}
impl<'a> std::fmt::Debug for Matches<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Matches")
.field("text_len", &self.text.len())
.finish_non_exhaustive()
}
}
enum MatchesInner<'a> {
TeddyFull(literal::FullMatchIter<'a, 'a>),
Generic { regex: &'a Regex, last_end: usize },
}
impl<'a> Matches<'a> {
fn new(regex: &'a Regex, text: &'a str) -> Self {
let inner = if regex.inner.is_full_match_prefilter() {
MatchesInner::TeddyFull(regex.inner.find_full_matches(text.as_bytes()))
} else {
MatchesInner::Generic { regex, last_end: 0 }
};
Matches { inner, text }
}
}
impl<'a> Iterator for Matches<'a> {
type Item = Match<'a>;
fn next(&mut self) -> Option<Match<'a>> {
match &mut self.inner {
MatchesInner::TeddyFull(iter) => {
iter.next().map(|(start, end)| Match {
text: self.text,
start,
end,
})
}
MatchesInner::Generic { regex, last_end } => {
if *last_end > self.text.len() {
return None;
}
let search_text = &self.text[*last_end..];
match regex.inner.find(search_text.as_bytes()) {
None => None,
Some((start, end)) => {
let abs_start = *last_end + start;
let abs_end = *last_end + end;
*last_end = if abs_start == abs_end {
let remaining = &self.text[abs_end..];
let next_char_len =
remaining.chars().next().map(|c| c.len_utf8()).unwrap_or(1);
abs_end + next_char_len
} else {
abs_end
};
Some(Match {
text: self.text,
start: abs_start,
end: abs_end,
})
}
}
}
}
}
}
#[derive(Debug)]
pub struct CapturesIter<'r, 't> {
regex: &'r Regex,
text: &'t str,
last_end: usize,
}
impl<'r, 't> Iterator for CapturesIter<'r, 't> {
type Item = Captures<'t>;
fn next(&mut self) -> Option<Captures<'t>> {
if self.last_end > self.text.len() {
return None;
}
let search_text = &self.text[self.last_end..];
match self.regex.inner.captures(search_text.as_bytes()) {
None => None,
Some(slots) => {
let (start, end) = slots.first().and_then(|s| *s)?;
let offset = self.last_end;
let abs_end = offset + end;
self.last_end = if start == end {
let remaining = &self.text[abs_end..];
let next_char_len = remaining.chars().next().map(|c| c.len_utf8()).unwrap_or(1);
abs_end + next_char_len
} else {
abs_end
};
let adjusted_slots: Vec<_> = slots
.into_iter()
.map(|slot| slot.map(|(s, e)| (offset + s, offset + e)))
.collect();
Some(Captures {
text: self.text,
slots: adjusted_slots,
named_groups: Arc::clone(&self.regex.named_groups),
})
}
}
}
}
#[derive(Debug, Clone)]
pub struct Captures<'t> {
text: &'t str,
slots: Vec<Option<(usize, usize)>>,
named_groups: Arc<HashMap<String, u32>>,
}
impl<'t> Captures<'t> {
pub fn len(&self) -> usize {
self.slots.len()
}
pub fn is_empty(&self) -> bool {
self.slots.is_empty()
}
pub fn get(&self, i: usize) -> Option<Match<'t>> {
self.slots.get(i).and_then(|slot| {
slot.map(|(start, end)| Match {
text: self.text,
start,
end,
})
})
}
pub fn name(&self, name: &str) -> Option<Match<'t>> {
self.named_groups
.get(name)
.and_then(|&idx| self.get(idx as usize))
}
}
impl<'t> std::ops::Index<usize> for Captures<'t> {
type Output = str;
fn index(&self, i: usize) -> &str {
self.get(i)
.map(|m| m.as_str())
.unwrap_or_else(|| panic!("no capture group at index {}", i))
}
}
impl<'t> std::ops::Index<&str> for Captures<'t> {
type Output = str;
fn index(&self, name: &str) -> &str {
self.name(name)
.map(|m| m.as_str())
.unwrap_or_else(|| panic!("no capture group named '{}'", name))
}
}