use crate::classicalbacktrack;
use crate::emit;
use crate::exec;
use crate::indexing;
use crate::insn::CompiledRegex;
use crate::optimizer;
use crate::parse;
use crate::types::MAX_CAPTURE_GROUPS;
use std::iter::FusedIterator;
#[cfg(feature = "utf16")]
use crate::{
classicalbacktrack::MatchAttempter,
indexing::{InputIndexer, Ucs2Input, Utf16Input},
};
#[cfg(feature = "backend-pikevm")]
use crate::pikevm;
use crate::util::to_char_sat;
use core::{fmt, str::FromStr};
#[cfg(feature = "std")]
#[cfg(not(feature = "std"))]
use {
alloc::{string::String, vec::Vec},
hashbrown::{HashMap, hash_map::Iter},
};
pub use parse::Error;
#[derive(Debug, Copy, Clone, Default)]
pub struct Flags {
pub icase: bool,
pub multiline: bool,
pub dot_all: bool,
pub no_opt: bool,
pub unicode: bool,
pub unicode_sets: bool,
}
impl Flags {
#[inline]
pub fn new<T: Iterator<Item = u32>>(chars: T) -> Self {
let mut result = Self::default();
for c in chars {
match to_char_sat(c) {
'm' => {
result.multiline = true;
}
'i' => {
result.icase = true;
}
's' => {
result.dot_all = true;
}
'u' => {
result.unicode = true;
}
'v' => {
result.unicode_sets = true;
}
_ => {
}
}
}
result
}
}
impl From<&str> for Flags {
#[inline]
fn from(s: &str) -> Self {
Self::new(s.chars().map(u32::from))
}
}
impl fmt::Display for Flags {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
if self.multiline {
f.write_str("m")?;
}
if self.icase {
f.write_str("i")?;
}
if self.dot_all {
f.write_str("s")?;
}
if self.unicode {
f.write_str("u")?;
}
Ok(())
}
}
pub type Range = core::ops::Range<usize>;
pub type Matches<'r, 't> = exec::Matches<backends::DefaultExecutor<'r, 't>>;
pub type AsciiMatches<'r, 't> = exec::Matches<backends::DefaultAsciiExecutor<'r, 't>>;
#[derive(Debug, Clone)]
pub struct Match {
pub range: Range,
pub captures: Vec<Option<Range>>,
pub(crate) group_names: Box<[Box<str>]>,
}
impl Match {
#[inline]
pub fn group(&self, idx: usize) -> Option<Range> {
if idx == 0 {
Some(self.range.clone())
} else if idx <= self.captures.len() {
self.captures[idx - 1].clone()
} else {
None
}
}
#[inline]
pub fn named_group(&self, name: &str) -> Option<Range> {
if name.is_empty() {
return None;
}
let pos = self.group_names.iter().position(|s| s.as_ref() == name)?;
self.captures[pos].clone()
}
#[inline]
pub fn named_groups(&self) -> NamedGroups<'_> {
NamedGroups::new(self)
}
#[inline]
pub fn range(&self) -> Range {
self.range.clone()
}
#[inline]
pub fn start(&self) -> usize {
self.range.start
}
#[inline]
pub fn end(&self) -> usize {
self.range.end
}
#[inline]
pub fn as_str<'t>(&self, text: &'t str) -> &'t str {
&text[self.range()]
}
#[inline]
pub fn groups(&self) -> Groups<'_> {
Groups::new(self)
}
}
#[derive(Clone)]
pub struct Groups<'m> {
mat: &'m Match,
next_group_idx: usize,
max: usize,
}
impl<'m> Groups<'m> {
#[inline]
fn new(mat: &'m Match) -> Self {
Self {
mat,
next_group_idx: 0,
max: mat.captures.len() + 1, }
}
}
impl Iterator for Groups<'_> {
type Item = Option<Range>;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
let i = self.next_group_idx;
if i < self.max {
self.next_group_idx += 1;
Some(self.mat.group(i))
} else {
None
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
let size = self.max.saturating_sub(self.next_group_idx);
(size, Some(size))
}
}
impl<'m> ExactSizeIterator for Groups<'m> {}
impl<'m> FusedIterator for Groups<'m> {}
#[derive(Clone)]
pub struct NamedGroups<'m> {
mat: &'m Match,
next_group_idx: usize,
}
impl<'m> NamedGroups<'m> {
#[inline]
fn new(mat: &'m Match) -> Self {
Self {
mat,
next_group_idx: 0,
}
}
}
impl<'m> Iterator for NamedGroups<'m> {
type Item = (&'m str, Option<Range>);
#[inline]
fn next(&mut self) -> Option<Self::Item> {
debug_assert!(self.next_group_idx <= self.mat.group_names.len());
let end = self.mat.group_names.len();
loop {
let mut idx = self.next_group_idx;
while idx < end && self.mat.group_names[idx].is_empty() {
idx += 1;
}
if idx == end {
return None;
}
let name = self.mat.group_names[idx].as_ref();
let already_seen = self.mat.group_names[..idx]
.iter()
.any(|n| n.as_ref() == name);
if already_seen {
self.next_group_idx = idx + 1;
continue;
}
let mut best_range = self.mat.captures[idx].clone();
for check_idx in (idx + 1)..end {
if self.mat.group_names[check_idx].as_ref() == name {
if best_range.is_none() && self.mat.captures[check_idx].is_some() {
best_range = self.mat.captures[check_idx].clone();
break; }
}
}
self.next_group_idx = idx + 1;
return Some((name, best_range));
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
let size = self.mat.group_names[self.next_group_idx..]
.iter()
.filter(|s| !s.is_empty())
.count();
(size, Some(size))
}
}
impl<'m> ExactSizeIterator for NamedGroups<'m> {}
impl<'m> FusedIterator for NamedGroups<'m> {}
#[derive(Debug, Clone)]
pub struct Regex {
cr: CompiledRegex,
}
impl From<CompiledRegex> for Regex {
fn from(cr: CompiledRegex) -> Self {
Self { cr }
}
}
impl Regex {
#[inline]
pub fn new(pattern: &str) -> Result<Regex, Error> {
Self::with_flags(pattern, Flags::default())
}
#[inline]
pub fn with_flags<F>(pattern: &str, flags: F) -> Result<Regex, Error>
where
F: Into<Flags>,
{
Self::from_unicode(pattern.chars().map(u32::from), flags)
}
pub fn from_unicode<I, F>(pattern: I, flags: F) -> Result<Regex, Error>
where
I: Iterator<Item = u32> + Clone,
F: Into<Flags>,
{
let flags = flags.into();
let mut ire = parse::try_parse(pattern, flags)?;
if !flags.no_opt {
optimizer::optimize(&mut ire);
}
let cr = emit::emit(&ire);
Ok(Regex { cr })
}
#[inline]
pub fn find(&self, text: &str) -> Option<Match> {
self.find_iter(text).next()
}
#[inline]
pub fn find_iter<'r, 't>(&'r self, text: &'t str) -> Matches<'r, 't> {
self.find_from(text, 0)
}
#[inline]
pub fn find_from<'r, 't>(&'r self, text: &'t str, start: usize) -> Matches<'r, 't> {
backends::find(self, text, start)
}
#[inline]
pub fn find_ascii(&self, text: &str) -> Option<Match> {
self.find_iter_ascii(text).next()
}
#[inline]
pub fn find_iter_ascii<'r, 't>(&'r self, text: &'t str) -> AsciiMatches<'r, 't> {
self.find_from_ascii(text, 0)
}
#[inline]
pub fn find_from_ascii<'r, 't>(&'r self, text: &'t str, start: usize) -> AsciiMatches<'r, 't> {
backends::find(self, text, start)
}
#[cfg(feature = "utf16")]
pub fn find_from_utf16<'r, 't>(
&'r self,
text: &'t [u16],
start: usize,
) -> exec::Matches<super::classicalbacktrack::BacktrackExecutor<'r, indexing::Utf16Input<'t>>>
{
let input = Utf16Input::new(text, self.cr.flags.unicode);
exec::Matches::new(
super::classicalbacktrack::BacktrackExecutor::new(
input,
MatchAttempter::new(&self.cr, input.left_end()),
),
start,
)
}
#[cfg(feature = "utf16")]
pub fn find_from_ucs2<'r, 't>(
&'r self,
text: &'t [u16],
start: usize,
) -> exec::Matches<super::classicalbacktrack::BacktrackExecutor<'r, indexing::Ucs2Input<'t>>>
{
let input = Ucs2Input::new(text, self.cr.flags.unicode);
exec::Matches::new(
super::classicalbacktrack::BacktrackExecutor::new(
input,
MatchAttempter::new(&self.cr, input.left_end()),
),
start,
)
}
pub fn replace(&self, text: &str, replacement: &str) -> String {
match self.find(text) {
Some(m) => {
let mut result = String::with_capacity(text.len());
result.push_str(&text[..m.start()]);
self.expand_replacement(&m, text, replacement, &mut result);
result.push_str(&text[m.end()..]);
result
}
None => text.to_string(),
}
}
pub fn replace_all(&self, text: &str, replacement: &str) -> String {
let mut result = String::with_capacity(text.len());
let mut last_end = 0;
for m in self.find_iter(text) {
result.push_str(&text[last_end..m.start()]);
self.expand_replacement(&m, text, replacement, &mut result);
last_end = m.end();
}
result.push_str(&text[last_end..]);
result
}
pub fn replace_with<F>(&self, text: &str, replacement: F) -> String
where
F: FnOnce(&Match) -> String,
{
match self.find(text) {
Some(m) => {
let mut result = String::with_capacity(text.len());
result.push_str(&text[..m.start()]);
result.push_str(&replacement(&m));
result.push_str(&text[m.end()..]);
result
}
None => text.to_string(),
}
}
pub fn replace_all_with<F>(&self, text: &str, replacement: F) -> String
where
F: Fn(&Match) -> String,
{
let mut result = String::with_capacity(text.len());
let mut last_end = 0;
for m in self.find_iter(text) {
result.push_str(&text[last_end..m.start()]);
result.push_str(&replacement(&m));
last_end = m.end();
}
result.push_str(&text[last_end..]);
result
}
fn expand_replacement(&self, m: &Match, text: &str, replacement: &str, output: &mut String) {
let mut chars = replacement.chars().peekable();
while let Some(ch) = chars.next() {
if ch == '$' {
match chars.peek() {
Some('$') => {
chars.next();
output.push('$');
}
Some(&digit) if digit.is_ascii_digit() => {
let mut group_num = 0;
while let Some(&digit) = chars.peek() {
if digit.is_ascii_digit() {
chars.next();
group_num = group_num * 10 + (digit as u32 - '0' as u32) as usize;
if group_num > MAX_CAPTURE_GROUPS {
break;
}
} else {
break;
}
}
if let Some(range) = m.group(group_num) {
output.push_str(&text[range]);
}
}
Some('{') => {
chars.next(); let mut name = String::new();
let mut found_closing_brace = false;
for ch in chars.by_ref() {
if ch == '}' {
found_closing_brace = true;
break;
}
name.push(ch);
}
if found_closing_brace {
if let Some(range) = m.named_group(&name) {
output.push_str(&text[range]);
}
} else {
output.push_str("${");
output.push_str(&name);
}
}
_ => {
output.push('$');
}
}
} else {
output.push(ch);
}
}
}
}
impl FromStr for Regex {
type Err = Error;
#[inline]
fn from_str(s: &str) -> Result<Self, Error> {
Self::new(s)
}
}
#[cfg(feature = "pattern")]
mod pattern_impl {
use super::*;
use core::str::pattern::{Pattern, ReverseSearcher, SearchStep, Searcher};
pub struct RegexSearcher<'r, 't> {
haystack: &'t str,
regex: &'r Regex,
current_pos: usize,
done: bool,
reverse_pos: usize,
reverse_done: bool,
}
impl<'r, 't> RegexSearcher<'r, 't> {
fn new(regex: &'r Regex, haystack: &'t str) -> Self {
Self {
haystack,
regex,
current_pos: 0,
done: false,
reverse_pos: haystack.len(),
reverse_done: false,
}
}
fn find_last_match_before(&self, pos: usize) -> Option<super::Match> {
let mut last_match = None;
for m in self.regex.find_from(self.haystack, 0) {
if m.end() <= pos {
last_match = Some(m);
} else {
break;
}
}
last_match
}
}
unsafe impl<'r, 't> Searcher<'t> for RegexSearcher<'r, 't> {
fn haystack(&self) -> &'t str {
self.haystack
}
fn next(&mut self) -> SearchStep {
if self.done {
return SearchStep::Done;
}
if let Some(m) = self.regex.find_from(self.haystack, self.current_pos).next() {
let match_start = m.start();
let match_end = m.end();
if self.current_pos < match_start {
let reject_end = match_start;
let reject_start = self.current_pos;
self.current_pos = match_start;
return SearchStep::Reject(reject_start, reject_end);
}
self.current_pos = match_end;
if match_start == match_end {
if match_end < self.haystack.len() {
let mut next_pos = match_end + 1;
while next_pos < self.haystack.len()
&& !self.haystack.is_char_boundary(next_pos)
{
next_pos += 1;
}
self.current_pos = next_pos;
} else {
self.done = true;
}
}
SearchStep::Match(match_start, match_end)
} else {
if self.current_pos < self.haystack.len() {
let reject_start = self.current_pos;
let reject_end = self.haystack.len();
self.current_pos = self.haystack.len();
self.done = true;
SearchStep::Reject(reject_start, reject_end)
} else {
self.done = true;
SearchStep::Done
}
}
}
}
unsafe impl<'r, 't> ReverseSearcher<'t> for RegexSearcher<'r, 't> {
fn next_back(&mut self) -> SearchStep {
if self.reverse_done {
return SearchStep::Done;
}
if let Some(m) = self.find_last_match_before(self.reverse_pos) {
let match_start = m.start();
let match_end = m.end();
if match_end < self.reverse_pos {
let reject_start = match_end;
let reject_end = self.reverse_pos;
self.reverse_pos = match_end;
return SearchStep::Reject(reject_start, reject_end);
}
self.reverse_pos = match_start;
if match_start == match_end {
if match_start > 0 {
let mut prev_pos = match_start - 1;
while prev_pos > 0 && !self.haystack.is_char_boundary(prev_pos) {
prev_pos -= 1;
}
self.reverse_pos = prev_pos;
} else {
self.reverse_done = true;
}
}
SearchStep::Match(match_start, match_end)
} else {
if self.reverse_pos > 0 {
let reject_start = 0;
let reject_end = self.reverse_pos;
self.reverse_pos = 0;
self.reverse_done = true;
SearchStep::Reject(reject_start, reject_end)
} else {
self.reverse_done = true;
SearchStep::Done
}
}
}
}
impl<'r> Pattern for &'r Regex {
type Searcher<'a> = RegexSearcher<'r, 'a>;
fn into_searcher(self, haystack: &str) -> Self::Searcher<'_> {
RegexSearcher::new(self, haystack)
}
}
}
#[cfg(feature = "pattern")]
pub use pattern_impl::*;
#[doc(hidden)]
pub mod backends {
use super::Regex;
use super::exec;
use super::indexing;
pub use crate::emit::emit;
pub use crate::optimizer::optimize;
pub use crate::parse::try_parse;
pub type BacktrackExecutor<'r, 't> =
super::classicalbacktrack::BacktrackExecutor<'r, indexing::Utf8Input<'t>>;
#[cfg(feature = "backend-pikevm")]
pub type PikeVMExecutor<'r, 't> = super::pikevm::PikeVMExecutor<'r, indexing::Utf8Input<'t>>;
pub type DefaultExecutor<'r, 't> = BacktrackExecutor<'r, 't>;
pub type DefaultAsciiExecutor<'r, 't> =
<DefaultExecutor<'r, 't> as exec::Executor<'r, 't>>::AsAscii;
pub fn find<'r, 't, Executor: exec::Executor<'r, 't>>(
re: &'r Regex,
text: &'t str,
start: usize,
) -> exec::Matches<Executor> {
exec::Matches::new(Executor::new(&re.cr, text), start)
}
pub fn find_ascii<'r, 't, Executor: exec::Executor<'r, 't>>(
re: &'r Regex,
text: &'t str,
start: usize,
) -> exec::Matches<Executor::AsAscii> {
find::<Executor::AsAscii>(re, text, start)
}
}
pub fn escape(text: &str) -> String {
let mut result = String::with_capacity(text.len());
for c in text.chars() {
match c {
'\\' | '^' | '$' | '.' | '|' | '?' | '*' | '+' | '(' | ')' | '[' | ']' | '{' | '}' => {
result.push('\\');
result.push(c);
}
_ => result.push(c),
}
}
result
}