use crate::{
error::{RE2ErrorCode, SetError},
options::{Anchor, Options},
re2, re2_c,
set::{ExpressionIndex, MatchedSetInfo, Set, SetBuilder},
string::{StringView, StringWrapper},
RE2,
};
use indexmap::IndexMap;
use std::{marker::PhantomData, mem, ops, os::raw::c_int, slice};
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[repr(transparent)]
pub struct AtomIndex(pub(crate) c_int);
impl AtomIndex {
pub const fn as_index(self) -> u16 { self.0 as u16 }
pub const fn from_index(x: u16) -> Self { Self(x as c_int) }
}
#[derive(Debug)]
#[repr(transparent)]
pub struct AtomSet(re2_c::StringSet);
impl AtomSet {
pub(crate) const fn from_native(s: re2_c::StringSet) -> Self { Self(s) }
fn as_ptr(&self) -> *const re2_c::StringWrapper { unsafe { self.0.cdata() } }
pub fn len(&self) -> usize { unsafe { self.0.size() } }
pub fn is_empty(&self) -> bool { self.len() == 0 }
pub fn as_slice(&self) -> &[StringWrapper] {
unsafe { mem::transmute(slice::from_raw_parts(self.as_ptr(), self.len())) }
}
pub fn indexed_atoms(&self) -> impl ExactSizeIterator<Item=(AtomIndex, StringView)>+'_ {
self
.as_slice()
.iter()
.enumerate()
.map(|(i, sw)| (AtomIndex(i as c_int), sw.as_view()))
}
pub fn index_by<'a>(
&'a self,
m: &'a MatchedSetInfo,
) -> impl ExactSizeIterator<Item=&'a StringWrapper>+'a {
let s = self.as_slice();
m.as_atom_slice()
.iter()
.map(move |i| &s[i.as_index() as usize])
}
}
impl ops::Drop for AtomSet {
fn drop(&mut self) {
unsafe {
self.0.clear();
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[repr(transparent)]
pub struct SelectedAtoms<'a>(pub IndexMap<StringView<'a>, AtomIndex>);
impl<'a> SelectedAtoms<'a> {
pub fn from_atom_set(atom_set: &'a AtomSet) -> Self {
Self(atom_set.indexed_atoms().map(|(x, y)| (y, x)).collect())
}
pub fn allocate_match_set(&self) -> MatchedSetInfo {
let mut ret = MatchedSetInfo::empty();
self.allocate_into_match_set(&mut ret);
ret
}
pub fn allocate_into_match_set(&self, ret: &mut MatchedSetInfo) {
ret.set_len(self.0.len());
for (out_index, arg_index) in ret.as_mut_atom_slice().iter_mut().zip(self.0.values()) {
*out_index = *arg_index;
}
}
}
#[derive(Debug)]
#[repr(transparent)]
pub struct FilteredRE2Builder(re2_c::FilteredRE2Wrapper);
impl FilteredRE2Builder {
#[allow(clippy::new_without_default)]
pub fn new() -> Self { Self(unsafe { re2_c::FilteredRE2Wrapper::new() }) }
pub fn with_min_atom_length(min_atom_len: usize) -> Self {
Self(unsafe { re2_c::FilteredRE2Wrapper::new1(min_atom_len as c_int) })
}
pub(crate) fn add_view(
&mut self,
pattern: StringView,
options: Options,
) -> Result<ExpressionIndex, RE2ErrorCode> {
let mut id = mem::MaybeUninit::<c_int>::uninit();
RE2ErrorCode::from_native(unsafe {
self.0.add(
pattern.into_native(),
&options.into_native(),
id.as_mut_ptr(),
)
})?;
Ok(ExpressionIndex(unsafe { id.assume_init() }))
}
pub fn add(&mut self, pattern: &str, options: Options) -> Result<ExpressionIndex, RE2ErrorCode> {
self.add_view(StringView::from_str(pattern), options)
}
pub fn compile(self) -> (FilteredRE2, AtomSet) {
let mut s: mem::ManuallyDrop<Self> = mem::ManuallyDrop::new(self);
let mut set = mem::MaybeUninit::<re2_c::StringSet>::uninit();
unsafe {
s.0.compile(set.as_mut_ptr());
}
let set = AtomSet::from_native(unsafe { set.assume_init() });
let ret = FilteredRE2::from_native(re2_c::FilteredRE2Wrapper { inner_: s.0.inner_ });
(ret, set)
}
pub(crate) fn slow_first_match_view(&self, text: StringView) -> Option<ExpressionIndex> {
let ret = unsafe { self.0.slow_first_match(text.into_native()) };
if ret == -1 {
None
} else {
Some(ExpressionIndex(ret))
}
}
pub fn slow_first_match(&self, text: &str) -> Option<ExpressionIndex> {
self.slow_first_match_view(StringView::from_str(text))
}
pub fn num_regexps(&self) -> usize { unsafe { self.0.num_regexps() } }
}
impl ops::Drop for FilteredRE2Builder {
fn drop(&mut self) {
unsafe {
self.0.clear();
}
}
}
#[derive(Debug)]
#[repr(transparent)]
pub struct InnerRE2<'o> {
inner: mem::ManuallyDrop<RE2>,
_ph: PhantomData<&'o u8>,
}
impl<'o> InnerRE2<'o> {
pub(crate) fn new(re2_ptr: *const re2::RE2) -> Self {
let inner = RE2(re2_c::RE2Wrapper {
re_: unsafe { mem::transmute(re2_ptr) },
});
Self {
inner: mem::ManuallyDrop::new(inner),
_ph: PhantomData,
}
}
pub const fn as_re2(&self) -> &'o RE2 { unsafe { mem::transmute(&self.inner) } }
}
#[derive(Debug)]
#[repr(transparent)]
pub struct FilteredRE2(re2_c::FilteredRE2Wrapper);
impl FilteredRE2 {
pub(crate) const fn from_native(w: re2_c::FilteredRE2Wrapper) -> Self { Self(w) }
pub(crate) fn first_match_view(
&self,
text: StringView,
atoms: &MatchedSetInfo,
) -> Option<ExpressionIndex> {
let ret = unsafe {
self
.0
.first_match(text.into_native(), atoms.as_ref_native())
};
if ret == -1 {
None
} else {
Some(ExpressionIndex(ret))
}
}
pub fn first_match(&self, text: &str, atoms: &MatchedSetInfo) -> Option<ExpressionIndex> {
self.first_match_view(StringView::from_str(text), atoms)
}
pub(crate) fn all_matches_view(
&self,
text: StringView,
atoms: &MatchedSetInfo,
matching_regexps: &mut MatchedSetInfo,
) -> bool {
unsafe {
self.0.all_matches(
text.into_native(),
atoms.as_ref_native(),
matching_regexps.as_mut_native(),
)
}
}
pub fn all_matches(
&self,
text: &str,
atoms: &MatchedSetInfo,
matching_regexps: &mut MatchedSetInfo,
) -> bool {
self.all_matches_view(StringView::from_str(text), atoms, matching_regexps)
}
pub fn all_potentials(
&self,
atoms: &MatchedSetInfo,
potential_regexps: &mut MatchedSetInfo,
) -> bool {
unsafe {
self
.0
.all_potentials(atoms.as_ref_native(), potential_regexps.as_mut_native());
}
!potential_regexps.is_empty()
}
pub fn num_regexps(&self) -> usize { unsafe { self.0.num_regexps() } }
fn get_re2(&self, index: usize) -> InnerRE2 {
let re2_ptr: *const re2::RE2 = unsafe { self.0.get_re2(index as c_int) };
InnerRE2::new(re2_ptr)
}
pub fn inner_regexps(&self) -> impl ExactSizeIterator<Item=InnerRE2> {
(0..self.num_regexps()).map(|i| self.get_re2(i))
}
pub fn index_by<'o>(
&'o self,
m: &'o MatchedSetInfo,
) -> impl ExactSizeIterator<Item=InnerRE2<'o>> {
let n = self.num_regexps();
m.as_expression_slice().iter().map(move |i| {
let i = i.as_index() as usize;
assert!(i < n);
self.get_re2(i)
})
}
}
impl ops::Drop for FilteredRE2 {
fn drop(&mut self) {
unsafe {
self.0.clear();
}
}
}
pub struct Filter {
filter: FilteredRE2,
atom_set: AtomSet,
set: Set,
}
impl Filter {
pub fn compile(builder: FilteredRE2Builder) -> Result<Self, SetError> {
let (filter, atom_set) = builder.compile();
let options = Options {
literal: true,
case_sensitive: false,
..Default::default()
};
let mut set_builder = SetBuilder::new(options, Anchor::Unanchored);
for (i, atom) in atom_set.indexed_atoms() {
let j = set_builder.add_view(atom)?;
assert_eq!(i.as_index(), j.as_index());
}
let set = set_builder.compile()?;
Ok(Self {
filter,
atom_set,
set,
})
}
pub(crate) fn all_matches_view(
&self,
text: StringView,
atoms: &mut MatchedSetInfo,
matches: &mut MatchedSetInfo,
) -> bool {
self.set.match_routine_view(text, atoms) && self.filter.all_matches_view(text, atoms, matches)
}
pub fn all_matches(
&self,
text: &str,
atoms: &mut MatchedSetInfo,
matches: &mut MatchedSetInfo,
) -> bool {
self.all_matches_view(StringView::from_str(text), atoms, matches)
}
pub(crate) fn potential_matches_view(
&self,
text: StringView,
atoms: &mut MatchedSetInfo,
matches: &mut MatchedSetInfo,
) -> bool {
self.set.match_routine_view(text, atoms) && self.filter.all_potentials(atoms, matches)
}
pub fn potential_matches(
&self,
text: &str,
atoms: &mut MatchedSetInfo,
matches: &mut MatchedSetInfo,
) -> bool {
self.potential_matches_view(StringView::from_str(text), atoms, matches)
}
pub fn get_atoms<'a>(
&'a self,
atoms: &'a MatchedSetInfo,
) -> impl ExactSizeIterator<Item=StringView<'a>> {
self.atom_set.index_by(atoms).map(|sw| sw.as_view())
}
pub fn get_matches<'a>(
&'a self,
matches: &'a MatchedSetInfo,
) -> impl ExactSizeIterator<Item=InnerRE2<'a>> {
self.filter.index_by(matches)
}
}