#![warn(missing_docs)]
#![deny(rustdoc::missing_crate_level_docs)]
#![doc(test(attr(deny(warnings))))]
pub(crate) use re2_sys::{re2, re2_c};
pub mod error;
pub use error::RE2Error;
use error::{CompileError, RE2ErrorCode, RewriteError};
pub mod options;
pub use options::{Anchor, CannedOptions, Options};
pub mod string;
use string::{StringView, StringWrapper};
pub mod set;
pub mod filtered;
use std::{
cmp, fmt, hash,
marker::PhantomData,
mem::{self, MaybeUninit},
ops, ptr, str,
};
fn uninit_array<T, const N: usize>() -> [MaybeUninit<T>; N] {
unsafe { MaybeUninit::<[MaybeUninit<T>; N]>::uninit().assume_init() }
}
unsafe fn array_assume_init<T: Sized, const N: usize>(x: [MaybeUninit<T>; N]) -> [T; N] {
let x: *const [MaybeUninit<T>; N] = &x;
let y: *const [T; N] = mem::transmute(x);
ptr::read(y)
}
fn map_array<T, U, const N: usize, F: Fn(T) -> U>(argv: [T; N], f: F) -> [U; N] {
let mut ret: [MaybeUninit<U>; N] = uninit_array();
for (output, input) in ret.iter_mut().zip(argv.into_iter()) {
output.write(f(input));
}
unsafe { array_assume_init(ret) }
}
#[repr(transparent)]
pub struct RE2(re2_c::RE2Wrapper);
impl RE2 {
pub fn compile(pattern: &str, options: Options) -> Result<Self, CompileError> {
let pattern = StringView::from_str(pattern);
let s = Self(unsafe { re2_c::RE2Wrapper::new(pattern.into_native(), &options.into_native()) });
s.check_error()?;
Ok(s)
}
fn check_error_code(&self) -> Result<(), RE2ErrorCode> {
RE2ErrorCode::from_native(unsafe { self.0.error_code() })
}
pub fn pattern(&self) -> &str { unsafe { StringView::from_native(self.0.pattern()).as_str() } }
pub fn options(&self) -> Options { unsafe { *self.0.options() }.into() }
pub fn expensive_clone(&self) -> Self { Self::compile(self.pattern(), self.options()).unwrap() }
fn error(&self) -> StringView { unsafe { StringView::from_native(self.0.error()) } }
fn error_arg(&self) -> StringView { unsafe { StringView::from_native(self.0.error_arg()) } }
fn check_error(&self) -> Result<(), CompileError> {
self.check_error_code().map_err(|code| CompileError {
message: String::from_utf8_lossy(self.error().as_slice()).to_string(),
arg: String::from_utf8_lossy(self.error_arg().as_slice()).to_string(),
code,
})
}
pub fn quote_meta(pattern: &str) -> StringWrapper {
let pattern = StringView::from_str(pattern);
let mut out = StringWrapper::from_view(pattern);
unsafe { re2_c::RE2Wrapper::quote_meta(pattern.into_native(), out.as_mut_native()) };
out
}
}
impl str::FromStr for RE2 {
type Err = CompileError;
fn from_str(s: &str) -> Result<Self, Self::Err> { Self::compile(s, Options::default()) }
}
impl ops::Drop for RE2 {
fn drop(&mut self) {
unsafe {
self.0.clear();
}
}
}
impl RE2 {
fn empty_result<'a, const N: usize>() -> [StringView<'a>; N] {
assert_eq!(N, 0);
let ret: [MaybeUninit<StringView<'a>>; N] = uninit_array();
unsafe { array_assume_init(ret) }
}
fn convert_string_views<'a, const N: usize>(argv: [re2_c::StringView; N]) -> [StringView<'a>; N] {
map_array(argv, StringView::from_native)
}
fn convert_strings<const N: usize>(argv: [StringView; N]) -> [&str; N] {
map_array(argv, |s| unsafe { s.as_str() })
}
fn convert_from_strings<const N: usize>(argv: [&str; N]) -> [StringView; N] {
map_array(argv, StringView::from_str)
}
pub(crate) fn full_match_view(&self, text: StringView) -> bool {
unsafe { self.0.full_match(text.into_native()) }
}
pub fn full_match(&self, text: &str) -> bool { self.full_match_view(StringView::from_str(text)) }
pub(crate) fn full_match_capturing_view<'a, const N: usize>(
&self,
text_view: StringView<'a>,
) -> Option<[StringView<'a>; N]> {
if N == 0 {
return if self.full_match_view(text_view) {
Some(Self::empty_result())
} else {
None
};
}
if N > self.num_captures() {
return None;
}
let mut argv: [MaybeUninit<re2_c::StringView>; N] = uninit_array();
if !unsafe {
self.0.full_match_n(
text_view.into_native(),
mem::transmute(argv.as_mut_ptr()),
argv.len(),
)
} {
return None;
}
Some(unsafe { Self::convert_string_views(array_assume_init(argv)) })
}
pub fn full_match_capturing<'a, const N: usize>(&self, text: &'a str) -> Option<[&'a str; N]> {
self
.full_match_capturing_view(StringView::from_str(text))
.map(Self::convert_strings)
}
pub(crate) fn partial_match_view(&self, text: StringView) -> bool {
unsafe { self.0.partial_match(text.into_native()) }
}
pub fn partial_match(&self, text: &str) -> bool {
self.partial_match_view(StringView::from_str(text))
}
pub(crate) fn partial_match_capturing_view<'a, const N: usize>(
&self,
text_view: StringView<'a>,
) -> Option<[StringView<'a>; N]> {
if N == 0 {
return if self.partial_match_view(text_view) {
Some(Self::empty_result())
} else {
None
};
}
if N > self.num_captures() {
return None;
}
let mut argv: [MaybeUninit<re2_c::StringView>; N] = uninit_array();
if !unsafe {
self.0.partial_match_n(
text_view.into_native(),
mem::transmute(argv.as_mut_ptr()),
argv.len(),
)
} {
return None;
}
Some(unsafe { Self::convert_string_views(array_assume_init(argv)) })
}
pub fn partial_match_capturing<'a, const N: usize>(&self, text: &'a str) -> Option<[&'a str; N]> {
self
.partial_match_capturing_view(StringView::from_str(text))
.map(Self::convert_strings)
}
pub(crate) fn consume_view(&self, text_view: &mut StringView) -> bool {
if !unsafe { self.0.consume(text_view.as_mut_native()) } {
return false;
}
true
}
pub fn consume(&self, text: &mut &str) -> bool {
let mut text_view = StringView::from_str(text);
let ret = self.consume_view(&mut text_view);
if ret {
*text = unsafe { text_view.as_str() };
}
ret
}
pub(crate) fn consume_capturing_view<'a, const N: usize>(
&self,
text_view: &mut StringView<'a>,
) -> Option<[StringView<'a>; N]> {
if N == 0 {
return if self.consume_view(text_view) {
Some(Self::empty_result())
} else {
None
};
}
if N > self.num_captures() {
return None;
}
let mut argv: [MaybeUninit<re2_c::StringView>; N] = uninit_array();
if !unsafe {
self.0.consume_n(
text_view.as_mut_native(),
mem::transmute(argv.as_mut_ptr()),
argv.len(),
)
} {
return None;
}
Some(unsafe { Self::convert_string_views(array_assume_init(argv)) })
}
pub fn consume_capturing<'a, const N: usize>(&self, text: &mut &'a str) -> Option<[&'a str; N]> {
let mut text_view = StringView::from_str(text);
let ret = self.consume_capturing_view(&mut text_view);
if ret.is_some() {
*text = unsafe { text_view.as_str() };
}
ret.map(Self::convert_strings)
}
pub(crate) fn find_and_consume_view(&self, text_view: &mut StringView) -> bool {
if !unsafe { self.0.find_and_consume(text_view.as_mut_native()) } {
return false;
}
true
}
pub fn find_and_consume(&self, text: &mut &str) -> bool {
let mut text_view = StringView::from_str(text);
let ret = self.find_and_consume_view(&mut text_view);
if ret {
*text = unsafe { text_view.as_str() };
}
ret
}
pub(crate) fn find_and_consume_capturing_view<'a, const N: usize>(
&self,
text_view: &mut StringView<'a>,
) -> Option<[StringView<'a>; N]> {
if N == 0 {
return if self.find_and_consume_view(text_view) {
Some(Self::empty_result())
} else {
None
};
}
if N > self.num_captures() {
return None;
}
let mut argv: [MaybeUninit<re2_c::StringView>; N] = uninit_array();
if !unsafe {
self.0.find_and_consume_n(
text_view.as_mut_native(),
mem::transmute(argv.as_mut_ptr()),
argv.len(),
)
} {
return None;
}
Some(unsafe { Self::convert_string_views(array_assume_init(argv)) })
}
pub fn find_and_consume_capturing<'a, const N: usize>(
&self,
text: &mut &'a str,
) -> Option<[&'a str; N]> {
let mut text_view = StringView::from_str(text);
let ret = self.find_and_consume_capturing_view(&mut text_view);
if ret.is_some() {
*text = unsafe { text_view.as_str() };
}
ret.map(Self::convert_strings)
}
pub(crate) fn match_no_captures_view(
&self,
text: StringView,
range: ops::Range<usize>,
anchor: Anchor,
) -> bool {
let ops::Range { start, end } = range;
unsafe {
self
.0
.match_single(text.into_native(), start, end, anchor.into_native())
}
}
pub fn match_no_captures(&self, text: &str, range: ops::Range<usize>, anchor: Anchor) -> bool {
self.match_no_captures_view(StringView::from_str(text), range, anchor)
}
pub(crate) fn match_routine_view<'a, const N: usize>(
&self,
text_view: StringView<'a>,
range: ops::Range<usize>,
anchor: Anchor,
) -> Option<[StringView<'a>; N]> {
if N == 0 {
return if self.match_no_captures_view(text_view, range, anchor) {
Some(Self::empty_result())
} else {
None
};
}
let ops::Range { start, end } = range;
let mut submatches: [MaybeUninit<re2_c::StringView>; N] = uninit_array();
if !unsafe {
self.0.match_routine(
text_view.into_native(),
start,
end,
anchor.into_native(),
mem::transmute(submatches.as_mut_ptr()),
submatches.len(),
)
} {
return None;
}
Some(Self::convert_string_views(unsafe {
array_assume_init(submatches)
}))
}
pub fn match_routine<'a, const N: usize>(
&self,
text: &'a str,
range: ops::Range<usize>,
anchor: Anchor,
) -> Option<[&'a str; N]> {
self
.match_routine_view(StringView::from_str(text), range, anchor)
.map(Self::convert_strings)
}
}
impl RE2 {
pub(crate) fn replace_view(&self, text: &mut StringWrapper, rewrite: StringView) -> bool {
unsafe { self.0.replace(text.as_mut_native(), rewrite.into_native()) }
}
pub fn replace(&self, text: &mut StringWrapper, rewrite: &str) -> bool {
self.replace_view(text, StringView::from_str(rewrite))
}
pub(crate) fn replace_n_view(
&self,
text: &mut StringWrapper,
rewrite: StringView,
limit: usize,
) -> usize {
if limit == 0 {
self.global_replace_view(text, rewrite)
} else {
let mut num_replacements_made: usize = 0;
while self.replace_view(text, rewrite) {
num_replacements_made += 1;
}
num_replacements_made
}
}
pub fn replace_n(&self, text: &mut StringWrapper, rewrite: &str, limit: usize) -> usize {
self.replace_n_view(text, StringView::from_str(rewrite), limit)
}
pub(crate) fn global_replace_view(&self, text: &mut StringWrapper, rewrite: StringView) -> usize {
unsafe {
self
.0
.global_replace(text.as_mut_native(), rewrite.into_native())
}
}
pub fn global_replace(&self, text: &mut StringWrapper, rewrite: &str) -> usize {
self.global_replace_view(text, StringView::from_str(rewrite))
}
pub(crate) fn extract_view(
&self,
text: StringView,
rewrite: StringView,
out: &mut StringWrapper,
) -> bool {
unsafe {
self.0.extract(
text.into_native(),
rewrite.into_native(),
out.as_mut_native(),
)
}
}
pub fn extract(&self, text: &str, rewrite: &str, out: &mut StringWrapper) -> bool {
self.extract_view(
StringView::from_str(text),
StringView::from_str(rewrite),
out,
)
}
}
impl RE2 {
pub(crate) fn find_iter_view<'r, 'h: 'r, const N: usize>(
&'r self,
hay: StringView<'h>,
) -> impl Iterator<Item=[StringView<'h>; N]>+'r {
assert_ne!(
N, 0,
"N must be at least 1 to capture the match text for non-overlapping matches"
);
MatchIter {
remaining_input: hay,
pattern: self,
}
}
pub fn find_iter<'r, 'h: 'r, const N: usize>(
&'r self,
hay: &'h str,
) -> impl Iterator<Item=[&'h str; N]>+'r {
self
.find_iter_view(StringView::from_str(hay))
.map(Self::convert_strings)
}
pub(crate) fn split_view<'r, 'h: 'r>(
&'r self,
hay: StringView<'h>,
) -> impl Iterator<Item=StringView<'h>>+'r {
SplitIter {
remaining_input: Some(hay),
pattern: self,
}
}
pub fn split<'r, 'h: 'r>(&'r self, hay: &'h str) -> impl Iterator<Item=&'h str>+'r {
self
.split_view(StringView::from_str(hay))
.map(|s| unsafe { s.as_str() })
}
}
impl RE2 {
pub fn max_submatch(rewrite: &str) -> usize {
let rewrite = StringView::from_str(rewrite);
unsafe { re2_c::RE2Wrapper::max_submatch(rewrite.into_native()) }
}
pub fn num_captures(&self) -> usize { unsafe { self.0.num_captures() } }
pub fn named_groups(&self) -> impl Iterator<Item=NamedGroup<'_>>+'_ { self.make_named_groups() }
fn make_named_groups(&self) -> NamedCapturingGroups<'_> {
unsafe { NamedCapturingGroups::from_native(self.0.named_groups()) }
}
pub fn named_and_numbered_groups(&self) -> impl ExactSizeIterator<Item=Option<&str>> {
NamedAndNumberedGroups::new(self.num_captures(), self.make_named_groups())
}
pub(crate) fn check_rewrite_view(&self, rewrite: StringView) -> Result<(), RewriteError> {
let mut sw = StringWrapper::blank();
if unsafe {
self
.0
.check_rewrite_string(rewrite.into_native(), sw.as_mut_native())
} {
Ok(())
} else {
Err(RewriteError {
message: String::from_utf8_lossy(sw.as_view().as_slice()).to_string(),
})
}
}
pub fn check_rewrite(&self, rewrite: &str) -> Result<(), RewriteError> {
self.check_rewrite_view(StringView::from_str(rewrite))
}
pub(crate) fn vector_rewrite_view<const N: usize>(
&self,
out: &mut StringWrapper,
rewrite: StringView,
inputs: [StringView; N],
) -> bool {
let mut input_views: [MaybeUninit<re2_c::StringView>; N] = uninit_array();
for (sv, s) in input_views.iter_mut().zip(inputs.into_iter()) {
sv.write(s.into_native());
}
let input_views = unsafe { array_assume_init(input_views) };
unsafe {
self.0.vector_rewrite(
out.as_mut_native(),
rewrite.into_native(),
input_views.as_ptr(),
input_views.len(),
)
}
}
pub fn vector_rewrite<const N: usize>(
&self,
out: &mut StringWrapper,
rewrite: &str,
inputs: [&str; N],
) -> bool {
let rewrite = StringView::from_str(rewrite);
let inputs = Self::convert_from_strings(inputs);
self.vector_rewrite_view(out, rewrite, inputs)
}
}
impl fmt::Debug for RE2 {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"RE2(pattern={:?}, options={:?})",
self.pattern(),
self.options()
)
}
}
impl fmt::Display for RE2 {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let o = self.options();
if o == Options::default() {
write!(f, "/{}/", self.pattern())
} else {
write!(f, "RE2(/{}/, options={:?})", self.pattern(), o)
}
}
}
impl cmp::PartialEq for RE2 {
fn eq(&self, other: &Self) -> bool {
self.pattern().eq(other.pattern()) && self.options().eq(&other.options())
}
}
impl cmp::Eq for RE2 {}
impl cmp::PartialOrd for RE2 {
fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> { Some(self.cmp(other)) }
}
impl cmp::Ord for RE2 {
fn cmp(&self, other: &Self) -> cmp::Ordering {
let intermediate = self.pattern().cmp(other.pattern());
if intermediate != cmp::Ordering::Equal {
return intermediate;
}
self.options().cmp(&other.options())
}
}
impl hash::Hash for RE2 {
fn hash<H>(&self, state: &mut H)
where H: hash::Hasher {
self.pattern().hash(state);
self.options().hash(state);
}
}
#[derive(Copy, Clone)]
#[repr(transparent)]
pub struct NamedGroup<'a> {
inner: re2_c::NamedGroup,
_ph: PhantomData<&'a u8>,
}
impl<'a> NamedGroup<'a> {
pub(crate) const unsafe fn from_native(inner: re2_c::NamedGroup) -> Self {
Self {
inner,
_ph: PhantomData,
}
}
pub const fn name(&self) -> &'a str {
unsafe { mem::transmute(StringView::from_native(self.inner.name_).as_str()) }
}
pub const fn index(&self) -> &'a usize { unsafe { mem::transmute(&self.inner.index_) } }
}
impl<'a> fmt::Debug for NamedGroup<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "NamedGroup(i={}, name={:?})", self.index(), self.name())
}
}
#[repr(transparent)]
struct NamedCapturingGroups<'a> {
inner: re2_c::NamedCapturingGroups,
_ph: PhantomData<&'a u8>,
}
impl<'a> NamedCapturingGroups<'a> {
pub(crate) const unsafe fn from_native(inner: re2_c::NamedCapturingGroups) -> Self {
Self {
inner,
_ph: PhantomData,
}
}
fn deref(&self) -> NamedGroup<'a> {
let mut out: MaybeUninit<re2_c::NamedGroup> = MaybeUninit::uninit();
unsafe {
self.inner.deref(out.as_mut_ptr());
NamedGroup::from_native(out.assume_init())
}
}
fn advance(&mut self) {
unsafe {
self.inner.advance();
}
}
fn completed(&self) -> bool { unsafe { self.inner.completed() } }
}
impl<'a> Iterator for NamedCapturingGroups<'a> {
type Item = NamedGroup<'a>;
fn next(&mut self) -> Option<Self::Item> {
if self.completed() {
return None;
}
let ret = self.deref();
self.advance();
Some(ret)
}
}
struct NamedAndNumberedGroups<'a> {
at_start: bool,
total_num_captures: usize,
groups_iter: Option<NamedCapturingGroups<'a>>,
next_named_group: Option<NamedGroup<'a>>,
cur_index: usize,
}
impl<'a> NamedAndNumberedGroups<'a> {
pub fn new(total_num_captures: usize, groups_iter: NamedCapturingGroups<'a>) -> Self {
Self {
at_start: true,
total_num_captures,
groups_iter: Some(groups_iter),
next_named_group: None,
cur_index: 0,
}
}
const fn remaining(&self) -> usize {
if self.at_start {
self.total_num_captures + 1
} else {
self.total_num_captures - self.cur_index + 1
}
}
}
impl<'a> Iterator for NamedAndNumberedGroups<'a> {
type Item = Option<&'a str>;
fn next(&mut self) -> Option<Self::Item> {
let Self {
ref mut at_start,
ref total_num_captures,
ref mut groups_iter,
ref mut next_named_group,
ref mut cur_index,
} = self;
if *at_start {
*at_start = false;
*cur_index = 1;
return Some(None);
}
if *cur_index > *total_num_captures {
return None;
}
if next_named_group.is_none() {
let reset_groups_iter = if let Some(ref mut g) = groups_iter {
if g.completed() {
true
} else {
*next_named_group = Some(g.deref());
g.advance();
false
}
} else {
false
};
if reset_groups_iter {
*groups_iter = None;
}
}
let ret = if let Some(named_group) = next_named_group {
if *cur_index < *named_group.index() {
None
} else {
debug_assert_eq!(cur_index, named_group.index());
Some(named_group.name())
}
} else {
None
};
if ret.is_some() {
*next_named_group = None;
}
*cur_index += 1;
Some(ret)
}
fn size_hint(&self) -> (usize, Option<usize>) { (self.remaining(), Some(self.remaining())) }
}
impl<'a> ExactSizeIterator for NamedAndNumberedGroups<'a> {}
struct MatchIter<'r, 'h, const N: usize> {
remaining_input: StringView<'h>,
pattern: &'r RE2,
}
impl<'r, 'h, const N: usize> Iterator for MatchIter<'r, 'h, N> {
type Item = [StringView<'h>; N];
fn next(&mut self) -> Option<Self::Item> {
let matches = self.pattern.match_routine_view(
self.remaining_input,
0..self.remaining_input.len(),
Anchor::Unanchored,
)?;
let consumed = unsafe {
let full_match = matches.get_unchecked(0).as_slice();
let new_start = full_match.as_ptr().add(full_match.len());
let consumed = new_start.offset_from(self.remaining_input.as_slice().as_ptr());
debug_assert!(consumed >= 0);
consumed as usize
};
if consumed == 0 {
return None;
}
self.remaining_input = self.remaining_input.index_range(consumed..).unwrap();
Some(matches)
}
}
struct SplitIter<'r, 'h> {
remaining_input: Option<StringView<'h>>,
pattern: &'r RE2,
}
impl<'r, 'h> Iterator for SplitIter<'r, 'h> {
type Item = StringView<'h>;
fn next(&mut self) -> Option<Self::Item> {
let remaining = self.remaining_input?;
if let Some([m]) =
self
.pattern
.match_routine_view(remaining, 0..remaining.len(), Anchor::Unanchored)
{
let m = m.as_slice();
let prev_start = remaining.as_slice().as_ptr();
let consumed = unsafe { m.as_ptr().offset_from(prev_start) };
debug_assert!(consumed >= 0);
let consumed = consumed as usize;
let ret = remaining.index_range(..consumed).unwrap();
let consumed_with_match = consumed + m.len();
self.remaining_input = if consumed_with_match == 0 {
None
} else {
Some(remaining.index_range(consumed_with_match..).unwrap())
};
Some(ret)
} else {
mem::take(&mut self.remaining_input)
}
}
}