use aho_corasick::AhoCorasick;
#[cfg(feature = "dfa")]
use aho_corasick::{AhoCorasickBuilder, AhoCorasickKind, MatchKind as AhoCorasickMatchKind};
#[cfg(not(feature = "dfa"))]
use daachorse::CharwiseDoubleArrayAhoCorasick;
#[cfg(all(not(feature = "dfa"), feature = "runtime_build"))]
use daachorse::{
CharwiseDoubleArrayAhoCorasickBuilder, MatchKind as DoubleArrayAhoCorasickMatchKind,
};
#[cfg(feature = "runtime_build")]
use std::collections::HashMap;
#[derive(Clone)]
enum MultiCharEngine {
#[cfg(not(feature = "dfa"))]
DAAC(CharwiseDoubleArrayAhoCorasick<u32>),
AC(AhoCorasick),
}
#[derive(Clone)]
pub(crate) struct MultiCharMatcher {
engine: MultiCharEngine,
replace_list: Vec<&'static str>,
}
pub(crate) enum MultiCharFindIter<'a> {
#[cfg(not(feature = "dfa"))]
DAAC(daachorse::charwise::iter::LestmostFindIterator<'a, &'a str, u32>),
AC(aho_corasick::FindIter<'a, 'a>),
}
impl<'a> Iterator for MultiCharFindIter<'a> {
type Item = (usize, usize, usize);
#[inline(always)]
fn next(&mut self) -> Option<Self::Item> {
match self {
#[cfg(not(feature = "dfa"))]
MultiCharFindIter::DAAC(iter) => iter
.next()
.map(|m| (m.start(), m.end(), m.value() as usize)),
MultiCharFindIter::AC(iter) => iter
.next()
.map(|m| (m.start(), m.end(), m.pattern().as_usize())),
}
}
}
impl MultiCharMatcher {
#[inline(always)]
pub(crate) fn replace_list(&self) -> &[&'static str] {
&self.replace_list
}
#[inline(always)]
pub(crate) fn find_iter<'a>(&'a self, text: &'a str) -> MultiCharFindIter<'a> {
match &self.engine {
#[cfg(not(feature = "dfa"))]
MultiCharEngine::DAAC(ac) => MultiCharFindIter::DAAC(ac.leftmost_find_iter(text)),
MultiCharEngine::AC(ac) => MultiCharFindIter::AC(ac.find_iter(text)),
}
}
pub(crate) fn new_empty() -> Self {
Self {
engine: MultiCharEngine::AC(AhoCorasick::new(Vec::<&str>::new()).unwrap()),
replace_list: Vec::new(),
}
}
#[cfg(any(feature = "runtime_build", feature = "dfa"))]
pub(crate) fn new<I, P>(patterns: I) -> Self
where
I: IntoIterator<Item = P>,
P: AsRef<str> + AsRef<[u8]>,
{
#[cfg(not(feature = "dfa"))]
{
Self {
engine: MultiCharEngine::DAAC(
CharwiseDoubleArrayAhoCorasickBuilder::new()
.match_kind(DoubleArrayAhoCorasickMatchKind::LeftmostLongest)
.build(patterns)
.unwrap(),
),
replace_list: Vec::new(),
}
}
#[cfg(feature = "dfa")]
{
Self {
engine: MultiCharEngine::AC(
AhoCorasickBuilder::new()
.kind(Some(AhoCorasickKind::DFA))
.match_kind(AhoCorasickMatchKind::LeftmostLongest)
.build(patterns)
.unwrap(),
),
replace_list: Vec::new(),
}
}
}
pub(crate) fn with_replace_list(mut self, replace_list: Vec<&'static str>) -> Self {
self.replace_list = replace_list;
self
}
#[cfg(not(feature = "dfa"))]
pub(crate) fn deserialize_from(bytes: &'static [u8]) -> Self {
Self {
engine: MultiCharEngine::DAAC(unsafe {
CharwiseDoubleArrayAhoCorasick::<u32>::deserialize_unchecked(bytes).0
}),
replace_list: Vec::new(),
}
}
#[cfg(feature = "runtime_build")]
pub(crate) fn new_from_dict(dict: HashMap<&'static str, &'static str>) -> Self {
let mut pairs: Vec<(&'static str, &'static str)> = dict.into_iter().collect();
pairs.sort_unstable_by_key(|&(k, _)| k);
let replace_list: Vec<&'static str> = pairs.iter().map(|&(_, v)| v).collect();
Self::new(pairs.into_iter().map(|(k, _)| k)).with_replace_list(replace_list)
}
}