use alloc::{
borrow::Cow, boxed::Box, string::String, string::ToString, sync::Arc, vec,
vec::Vec,
};
use crate::{
error::Error,
hir::{self, Hir},
int::NonMaxUsize,
interpolate,
nfa::{self, NFA},
pikevm::{self, Cache, PikeVM},
pool::CachePool,
};
pub struct Regex {
pikevm: Arc<PikeVM>,
pool: CachePool,
}
impl Clone for Regex {
fn clone(&self) -> Regex {
let pikevm = Arc::clone(&self.pikevm);
let pool = {
let pikevm = Arc::clone(&self.pikevm);
let create = Box::new(move || Cache::new(&pikevm));
CachePool::new(create)
};
Regex { pikevm, pool }
}
}
impl core::fmt::Display for Regex {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
write!(f, "{}", self.as_str())
}
}
impl core::fmt::Debug for Regex {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
f.debug_tuple("Regex").field(&self.as_str()).finish()
}
}
impl core::str::FromStr for Regex {
type Err = Error;
fn from_str(s: &str) -> Result<Regex, Error> {
Regex::new(s)
}
}
impl TryFrom<&str> for Regex {
type Error = Error;
fn try_from(s: &str) -> Result<Regex, Error> {
Regex::new(s)
}
}
impl TryFrom<String> for Regex {
type Error = Error;
fn try_from(s: String) -> Result<Regex, Error> {
Regex::new(&s)
}
}
impl Regex {
pub fn new(pattern: &str) -> Result<Regex, Error> {
RegexBuilder::new(pattern).build()
}
#[inline]
pub fn is_match(&self, haystack: &str) -> bool {
self.is_match_at(haystack, 0)
}
#[inline]
pub fn find<'h>(&self, haystack: &'h str) -> Option<Match<'h>> {
self.find_at(haystack, 0)
}
#[inline]
pub fn find_iter<'r, 'h>(&'r self, haystack: &'h str) -> Matches<'r, 'h> {
Matches {
haystack,
it: self.pikevm.find_iter(self.pool.get(), haystack.as_bytes()),
}
}
#[inline]
pub fn captures<'h>(&self, haystack: &'h str) -> Option<Captures<'h>> {
self.captures_at(haystack, 0)
}
#[inline]
pub fn captures_iter<'r, 'h>(
&'r self,
haystack: &'h str,
) -> CaptureMatches<'r, 'h> {
CaptureMatches {
haystack,
re: self,
it: self
.pikevm
.captures_iter(self.pool.get(), haystack.as_bytes()),
}
}
#[inline]
pub fn split<'r, 'h>(&'r self, haystack: &'h str) -> Split<'r, 'h> {
Split { haystack, finder: self.find_iter(haystack), last: 0 }
}
#[inline]
pub fn splitn<'r, 'h>(
&'r self,
haystack: &'h str,
limit: usize,
) -> SplitN<'r, 'h> {
SplitN { splits: self.split(haystack), limit }
}
#[inline]
pub fn replace<'h, R: Replacer>(
&self,
haystack: &'h str,
rep: R,
) -> Cow<'h, str> {
self.replacen(haystack, 1, rep)
}
#[inline]
pub fn replace_all<'h, R: Replacer>(
&self,
haystack: &'h str,
rep: R,
) -> Cow<'h, str> {
self.replacen(haystack, 0, rep)
}
#[inline]
pub fn replacen<'h, R: Replacer>(
&self,
haystack: &'h str,
limit: usize,
mut rep: R,
) -> Cow<'h, str> {
if let Some(rep) = rep.no_expansion() {
let mut it = self.find_iter(haystack).enumerate().peekable();
if it.peek().is_none() {
return Cow::Borrowed(haystack);
}
let mut new = String::with_capacity(haystack.len());
let mut last_match = 0;
for (i, m) in it {
new.push_str(&haystack[last_match..m.start()]);
new.push_str(&rep);
last_match = m.end();
if limit > 0 && i >= limit - 1 {
break;
}
}
new.push_str(&haystack[last_match..]);
return Cow::Owned(new);
}
let mut it = self.captures_iter(haystack).enumerate().peekable();
if it.peek().is_none() {
return Cow::Borrowed(haystack);
}
let mut new = String::with_capacity(haystack.len());
let mut last_match = 0;
for (i, cap) in it {
let m = cap.get(0).unwrap();
new.push_str(&haystack[last_match..m.start()]);
rep.replace_append(&cap, &mut new);
last_match = m.end();
if limit > 0 && i >= limit - 1 {
break;
}
}
new.push_str(&haystack[last_match..]);
Cow::Owned(new)
}
}
impl Regex {
#[inline]
pub fn shortest_match(&self, haystack: &str) -> Option<usize> {
self.shortest_match_at(haystack, 0)
}
#[inline]
pub fn shortest_match_at(
&self,
haystack: &str,
start: usize,
) -> Option<usize> {
let mut cache = self.pool.get();
let mut slots = [None, None];
let matched = self.pikevm.search(
&mut cache,
haystack.as_bytes(),
start,
haystack.len(),
true,
&mut slots,
);
if !matched {
return None;
}
Some(slots[1].unwrap().get())
}
#[inline]
pub fn is_match_at(&self, haystack: &str, start: usize) -> bool {
let mut cache = self.pool.get();
self.pikevm.search(
&mut cache,
haystack.as_bytes(),
start,
haystack.len(),
true,
&mut [],
)
}
#[inline]
pub fn find_at<'h>(
&self,
haystack: &'h str,
start: usize,
) -> Option<Match<'h>> {
let mut cache = self.pool.get();
let mut slots = [None, None];
let matched = self.pikevm.search(
&mut cache,
haystack.as_bytes(),
start,
haystack.len(),
false,
&mut slots,
);
if !matched {
return None;
}
let (start, end) = (slots[0].unwrap().get(), slots[1].unwrap().get());
Some(Match::new(haystack, start, end))
}
#[inline]
pub fn captures_at<'h>(
&self,
haystack: &'h str,
start: usize,
) -> Option<Captures<'h>> {
let mut caps = Captures {
haystack,
slots: self.capture_locations(),
pikevm: Arc::clone(&self.pikevm),
};
let mut cache = self.pool.get();
let matched = self.pikevm.search(
&mut cache,
haystack.as_bytes(),
start,
haystack.len(),
false,
&mut caps.slots.0,
);
if !matched {
return None;
}
Some(caps)
}
#[inline]
pub fn captures_read<'h>(
&self,
locs: &mut CaptureLocations,
haystack: &'h str,
) -> Option<Match<'h>> {
self.captures_read_at(locs, haystack, 0)
}
#[inline]
pub fn captures_read_at<'h>(
&self,
locs: &mut CaptureLocations,
haystack: &'h str,
start: usize,
) -> Option<Match<'h>> {
let mut cache = self.pool.get();
let matched = self.pikevm.search(
&mut cache,
haystack.as_bytes(),
start,
haystack.len(),
false,
&mut locs.0,
);
if !matched {
return None;
}
let (start, end) = locs.get(0).unwrap();
Some(Match::new(haystack, start, end))
}
}
impl Regex {
#[inline]
pub fn as_str(&self) -> &str {
&self.pikevm.nfa().pattern()
}
#[inline]
pub fn capture_names(&self) -> CaptureNames<'_> {
CaptureNames(self.pikevm.nfa().capture_names())
}
#[inline]
pub fn captures_len(&self) -> usize {
self.pikevm.nfa().group_len()
}
#[inline]
pub fn static_captures_len(&self) -> Option<usize> {
self.pikevm
.nfa()
.static_explicit_captures_len()
.map(|len| len.saturating_add(1))
}
#[inline]
pub fn capture_locations(&self) -> CaptureLocations {
let len = self.pikevm.nfa().group_len().checked_mul(2).unwrap();
CaptureLocations(vec![None; len])
}
}
#[derive(Copy, Clone, Eq, PartialEq)]
pub struct Match<'h> {
haystack: &'h str,
start: usize,
end: usize,
}
impl<'h> Match<'h> {
#[inline]
fn new(haystack: &'h str, start: usize, end: usize) -> Match<'h> {
Match { haystack, start, end }
}
#[inline]
pub fn start(&self) -> usize {
self.start
}
#[inline]
pub fn end(&self) -> usize {
self.end
}
#[inline]
pub fn is_empty(&self) -> bool {
self.start == self.end
}
#[inline]
pub fn len(&self) -> usize {
self.end - self.start
}
#[inline]
pub fn range(&self) -> core::ops::Range<usize> {
self.start..self.end
}
#[inline]
pub fn as_str(&self) -> &'h str {
&self.haystack[self.range()]
}
}
impl<'h> core::fmt::Debug for Match<'h> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
f.debug_struct("Match")
.field("start", &self.start)
.field("end", &self.end)
.field("string", &self.as_str())
.finish()
}
}
impl<'h> From<Match<'h>> for &'h str {
fn from(m: Match<'h>) -> &'h str {
m.as_str()
}
}
impl<'h> From<Match<'h>> for core::ops::Range<usize> {
fn from(m: Match<'h>) -> core::ops::Range<usize> {
m.range()
}
}
pub struct Captures<'h> {
haystack: &'h str,
slots: CaptureLocations,
pikevm: Arc<PikeVM>,
}
impl<'h> Captures<'h> {
#[inline]
pub fn get(&self, i: usize) -> Option<Match<'h>> {
self.slots.get(i).map(|(s, e)| Match::new(self.haystack, s, e))
}
#[inline]
pub fn name(&self, name: &str) -> Option<Match<'h>> {
let i = self.pikevm.nfa().to_index(name)?;
self.get(i)
}
pub fn extract<const N: usize>(&self) -> (&'h str, [&'h str; N]) {
let len = self
.pikevm
.nfa()
.static_explicit_captures_len()
.expect("number of capture groups can vary in a match");
assert_eq!(N, len, "asked for {N} groups, but must ask for {len}");
let mut matched = self.iter().flatten();
let whole_match = matched.next().expect("a match").as_str();
let group_matches = [0; N].map(|_| {
matched.next().expect("too few matching groups").as_str()
});
(whole_match, group_matches)
}
#[inline]
pub fn expand(&self, replacement: &str, dst: &mut String) {
interpolate::string(
replacement,
|index, dst| {
let m = match self.get(index) {
None => return,
Some(m) => m,
};
dst.push_str(&self.haystack[m.range()]);
},
|name| self.pikevm.nfa().to_index(name),
dst,
);
}
#[inline]
pub fn iter<'c>(&'c self) -> SubCaptureMatches<'c, 'h> {
SubCaptureMatches {
caps: self,
it: self.pikevm.nfa().capture_names().enumerate(),
}
}
#[inline]
pub fn len(&self) -> usize {
self.pikevm.nfa().group_len()
}
}
impl<'h> core::fmt::Debug for Captures<'h> {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
struct CapturesDebugMap<'a> {
caps: &'a Captures<'a>,
}
impl<'a> core::fmt::Debug for CapturesDebugMap<'a> {
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
let mut map = f.debug_map();
let names = self.caps.pikevm.nfa().capture_names();
for (group_index, maybe_name) in names.enumerate() {
let key = Key(group_index, maybe_name);
match self.caps.get(group_index) {
None => map.entry(&key, &None::<()>),
Some(mat) => map.entry(&key, &Value(mat)),
};
}
map.finish()
}
}
struct Key<'a>(usize, Option<&'a str>);
impl<'a> core::fmt::Debug for Key<'a> {
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
write!(f, "{}", self.0)?;
if let Some(name) = self.1 {
write!(f, "/{name:?}")?;
}
Ok(())
}
}
struct Value<'a>(Match<'a>);
impl<'a> core::fmt::Debug for Value<'a> {
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
write!(
f,
"{}..{}/{:?}",
self.0.start(),
self.0.end(),
self.0.as_str()
)
}
}
f.debug_tuple("Captures")
.field(&CapturesDebugMap { caps: self })
.finish()
}
}
impl<'h> core::ops::Index<usize> for Captures<'h> {
type Output = str;
fn index(&self, i: usize) -> &str {
self.get(i)
.map(|m| m.as_str())
.unwrap_or_else(|| panic!("no group at index '{i}'"))
}
}
impl<'h, 'n> core::ops::Index<&'n str> for Captures<'h> {
type Output = str;
fn index<'a>(&'a self, name: &'n str) -> &'a str {
self.name(name)
.map(|m| m.as_str())
.unwrap_or_else(|| panic!("no group named '{name}'"))
}
}
#[derive(Clone, Debug)]
pub struct CaptureLocations(Vec<Option<NonMaxUsize>>);
impl CaptureLocations {
#[inline]
pub fn get(&self, i: usize) -> Option<(usize, usize)> {
let slot = i.checked_mul(2)?;
let start = self.0.get(slot).copied()??.get();
let slot = slot.checked_add(1)?;
let end = self.0.get(slot).copied()??.get();
Some((start, end))
}
#[inline]
pub fn len(&self) -> usize {
self.0.len().checked_shr(1).unwrap()
}
}
#[derive(Debug)]
pub struct Matches<'r, 'h> {
haystack: &'h str,
it: pikevm::FindMatches<'r, 'h>,
}
impl<'r, 'h> Iterator for Matches<'r, 'h> {
type Item = Match<'h>;
#[inline]
fn next(&mut self) -> Option<Match<'h>> {
self.it.next().map(|(s, e)| Match::new(self.haystack, s, e))
}
#[inline]
fn count(self) -> usize {
self.it.count()
}
}
impl<'r, 'h> core::iter::FusedIterator for Matches<'r, 'h> {}
#[derive(Debug)]
pub struct CaptureMatches<'r, 'h> {
haystack: &'h str,
re: &'r Regex,
it: pikevm::CapturesMatches<'r, 'h>,
}
impl<'r, 'h> Iterator for CaptureMatches<'r, 'h> {
type Item = Captures<'h>;
#[inline]
fn next(&mut self) -> Option<Captures<'h>> {
self.it.next().map(|slots| Captures {
haystack: self.haystack,
slots: CaptureLocations(slots),
pikevm: Arc::clone(&self.re.pikevm),
})
}
#[inline]
fn count(self) -> usize {
self.it.count()
}
}
impl<'r, 'h> core::iter::FusedIterator for CaptureMatches<'r, 'h> {}
#[derive(Debug)]
pub struct Split<'r, 'h> {
haystack: &'h str,
finder: Matches<'r, 'h>,
last: usize,
}
impl<'r, 'h> Iterator for Split<'r, 'h> {
type Item = &'h str;
#[inline]
fn next(&mut self) -> Option<&'h str> {
match self.finder.next() {
None => {
let len = self.haystack.len();
if self.last > len {
None
} else {
let range = self.last..len;
self.last = len + 1; Some(&self.haystack[range])
}
}
Some(m) => {
let range = self.last..m.start();
self.last = m.end();
Some(&self.haystack[range])
}
}
}
}
impl<'r, 't> core::iter::FusedIterator for Split<'r, 't> {}
#[derive(Debug)]
pub struct SplitN<'r, 'h> {
splits: Split<'r, 'h>,
limit: usize,
}
impl<'r, 'h> Iterator for SplitN<'r, 'h> {
type Item = &'h str;
#[inline]
fn next(&mut self) -> Option<&'h str> {
if self.limit == 0 {
return None;
}
self.limit -= 1;
if self.limit > 0 {
return self.splits.next();
}
let len = self.splits.haystack.len();
if self.splits.last > len {
None
} else {
Some(&self.splits.haystack[self.splits.last..len])
}
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.splits.size_hint()
}
}
impl<'r, 't> core::iter::FusedIterator for SplitN<'r, 't> {}
#[derive(Clone, Debug)]
pub struct CaptureNames<'r>(nfa::CaptureNames<'r>);
impl<'r> Iterator for CaptureNames<'r> {
type Item = Option<&'r str>;
#[inline]
fn next(&mut self) -> Option<Option<&'r str>> {
self.0.next()
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.0.size_hint()
}
#[inline]
fn count(self) -> usize {
self.0.count()
}
}
impl<'r> ExactSizeIterator for CaptureNames<'r> {}
impl<'r> core::iter::FusedIterator for CaptureNames<'r> {}
#[derive(Clone, Debug)]
pub struct SubCaptureMatches<'c, 'h> {
caps: &'c Captures<'h>,
it: core::iter::Enumerate<nfa::CaptureNames<'c>>,
}
impl<'c, 'h> Iterator for SubCaptureMatches<'c, 'h> {
type Item = Option<Match<'h>>;
#[inline]
fn next(&mut self) -> Option<Option<Match<'h>>> {
let (group_index, _) = self.it.next()?;
Some(self.caps.get(group_index))
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.it.size_hint()
}
#[inline]
fn count(self) -> usize {
self.it.count()
}
}
impl<'c, 'h> ExactSizeIterator for SubCaptureMatches<'c, 'h> {}
impl<'c, 'h> core::iter::FusedIterator for SubCaptureMatches<'c, 'h> {}
pub trait Replacer {
fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String);
fn no_expansion<'r>(&'r mut self) -> Option<Cow<'r, str>> {
None
}
fn by_ref<'r>(&'r mut self) -> ReplacerRef<'r, Self> {
ReplacerRef(self)
}
}
impl<'a> Replacer for &'a str {
fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) {
caps.expand(*self, dst);
}
fn no_expansion(&mut self) -> Option<Cow<'_, str>> {
no_expansion(self)
}
}
impl<'a> Replacer for &'a String {
fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) {
self.as_str().replace_append(caps, dst)
}
fn no_expansion(&mut self) -> Option<Cow<'_, str>> {
no_expansion(self)
}
}
impl Replacer for String {
fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) {
self.as_str().replace_append(caps, dst)
}
fn no_expansion(&mut self) -> Option<Cow<'_, str>> {
no_expansion(self)
}
}
impl<'a> Replacer for Cow<'a, str> {
fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) {
self.as_ref().replace_append(caps, dst)
}
fn no_expansion(&mut self) -> Option<Cow<'_, str>> {
no_expansion(self)
}
}
impl<'a> Replacer for &'a Cow<'a, str> {
fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) {
self.as_ref().replace_append(caps, dst)
}
fn no_expansion(&mut self) -> Option<Cow<'_, str>> {
no_expansion(self)
}
}
impl<F, T> Replacer for F
where
F: FnMut(&Captures<'_>) -> T,
T: AsRef<str>,
{
fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) {
dst.push_str((*self)(caps).as_ref());
}
}
#[derive(Debug)]
pub struct ReplacerRef<'a, R: ?Sized>(&'a mut R);
impl<'a, R: Replacer + ?Sized + 'a> Replacer for ReplacerRef<'a, R> {
fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) {
self.0.replace_append(caps, dst)
}
fn no_expansion(&mut self) -> Option<Cow<'_, str>> {
self.0.no_expansion()
}
}
#[derive(Clone, Debug)]
pub struct NoExpand<'t>(pub &'t str);
impl<'t> Replacer for NoExpand<'t> {
fn replace_append(&mut self, _: &Captures<'_>, dst: &mut String) {
dst.push_str(self.0);
}
fn no_expansion(&mut self) -> Option<Cow<'_, str>> {
Some(Cow::Borrowed(self.0))
}
}
fn no_expansion<T: AsRef<str>>(t: &T) -> Option<Cow<'_, str>> {
let s = t.as_ref();
match s.find('$') {
Some(_) => None,
None => Some(Cow::Borrowed(s)),
}
}
#[derive(Debug)]
pub struct RegexBuilder {
pattern: String,
hir_config: hir::Config,
nfa_config: nfa::Config,
}
impl RegexBuilder {
pub fn new(pattern: &str) -> RegexBuilder {
RegexBuilder {
pattern: pattern.to_string(),
hir_config: hir::Config::default(),
nfa_config: nfa::Config::default(),
}
}
pub fn build(&self) -> Result<Regex, Error> {
let hir = Hir::parse(self.hir_config, &self.pattern)?;
let nfa = NFA::new(self.nfa_config, self.pattern.clone(), &hir)?;
let pikevm = Arc::new(PikeVM::new(nfa));
let pool = {
let pikevm = Arc::clone(&pikevm);
let create = Box::new(move || Cache::new(&pikevm));
CachePool::new(create)
};
Ok(Regex { pikevm, pool })
}
pub fn case_insensitive(&mut self, yes: bool) -> &mut RegexBuilder {
self.hir_config.flags.case_insensitive = yes;
self
}
pub fn multi_line(&mut self, yes: bool) -> &mut RegexBuilder {
self.hir_config.flags.multi_line = yes;
self
}
pub fn dot_matches_new_line(&mut self, yes: bool) -> &mut RegexBuilder {
self.hir_config.flags.dot_matches_new_line = yes;
self
}
pub fn crlf(&mut self, yes: bool) -> &mut RegexBuilder {
self.hir_config.flags.crlf = yes;
self
}
pub fn swap_greed(&mut self, yes: bool) -> &mut RegexBuilder {
self.hir_config.flags.swap_greed = yes;
self
}
pub fn ignore_whitespace(&mut self, yes: bool) -> &mut RegexBuilder {
self.hir_config.flags.ignore_whitespace = yes;
self
}
pub fn size_limit(&mut self, limit: usize) -> &mut RegexBuilder {
self.nfa_config.size_limit = Some(limit);
self
}
pub fn nest_limit(&mut self, limit: u32) -> &mut RegexBuilder {
self.hir_config.nest_limit = limit;
self
}
}