#[cfg(feature = "runtime_build")]
use std::collections::HashMap;
use aho_corasick::{AhoCorasick, FindIter as AhoCorasickFindIter};
#[cfg(feature = "dfa")]
use aho_corasick::{AhoCorasickBuilder, AhoCorasickKind, MatchKind as AhoCorasickMatchKind};
#[cfg(not(feature = "dfa"))]
use daachorse::{
CharwiseDoubleArrayAhoCorasick,
charwise::iter::LestmostFindIterator as DoubleArrayAhoCorasickFindIter,
};
#[cfg(all(not(feature = "dfa"), feature = "runtime_build"))]
use daachorse::{
CharwiseDoubleArrayAhoCorasickBuilder, MatchKind as DoubleArrayAhoCorasickMatchKind,
};
#[derive(Clone)]
enum MultiCharEngine {
#[cfg(not(feature = "dfa"))]
DoubleArrayAhoCorasick(CharwiseDoubleArrayAhoCorasick<u32>),
AhoCorasick(AhoCorasick),
}
#[derive(Clone)]
pub(crate) struct MultiCharMatcher {
engine: MultiCharEngine,
replace_list: Vec<&'static str>,
}
pub(crate) enum MultiCharFindIter<'a> {
#[cfg(not(feature = "dfa"))]
DoubleArrayAhoCorasick(DoubleArrayAhoCorasickFindIter<'a, &'a str, u32>),
AhoCorasick(AhoCorasickFindIter<'a, 'a>),
}
impl<'a> Iterator for MultiCharFindIter<'a> {
type Item = (usize, usize, usize);
#[inline(always)]
fn next(&mut self) -> Option<Self::Item> {
match self {
#[cfg(not(feature = "dfa"))]
MultiCharFindIter::DoubleArrayAhoCorasick(iter) => iter
.next()
.map(|m| (m.start(), m.end(), m.value() as usize)),
MultiCharFindIter::AhoCorasick(iter) => iter
.next()
.map(|m| (m.start(), m.end(), m.pattern().as_usize())),
}
}
}
impl MultiCharMatcher {
#[inline(always)]
pub(crate) fn replace_list(&self) -> &[&'static str] {
&self.replace_list
}
#[inline(always)]
pub(crate) fn find_iter<'a>(&'a self, text: &'a str) -> MultiCharFindIter<'a> {
match &self.engine {
#[cfg(not(feature = "dfa"))]
MultiCharEngine::DoubleArrayAhoCorasick(ac) => {
MultiCharFindIter::DoubleArrayAhoCorasick(ac.leftmost_find_iter(text))
}
MultiCharEngine::AhoCorasick(ac) => MultiCharFindIter::AhoCorasick(ac.find_iter(text)),
}
}
pub(crate) fn new_empty() -> Self {
Self {
engine: MultiCharEngine::AhoCorasick(AhoCorasick::new(Vec::<&str>::new()).unwrap()),
replace_list: Vec::new(),
}
}
#[cfg(any(feature = "runtime_build", feature = "dfa"))]
pub(crate) fn new<I, P>(patterns: I) -> Self
where
I: IntoIterator<Item = P>,
P: AsRef<str> + AsRef<[u8]>,
{
#[cfg(not(feature = "dfa"))]
{
Self {
engine: MultiCharEngine::DoubleArrayAhoCorasick(
CharwiseDoubleArrayAhoCorasickBuilder::new()
.match_kind(DoubleArrayAhoCorasickMatchKind::LeftmostLongest)
.build(patterns)
.unwrap(),
),
replace_list: Vec::new(),
}
}
#[cfg(feature = "dfa")]
{
Self {
engine: MultiCharEngine::AhoCorasick(
AhoCorasickBuilder::new()
.kind(Some(AhoCorasickKind::DFA))
.match_kind(AhoCorasickMatchKind::LeftmostLongest)
.build(patterns)
.unwrap(),
),
replace_list: Vec::new(),
}
}
}
pub(crate) fn with_replace_list(mut self, replace_list: Vec<&'static str>) -> Self {
self.replace_list = replace_list;
self
}
#[cfg(all(not(feature = "dfa"), not(feature = "runtime_build")))]
pub(crate) fn deserialize_from(bytes: &'static [u8]) -> Self {
Self {
engine: MultiCharEngine::DoubleArrayAhoCorasick(unsafe {
CharwiseDoubleArrayAhoCorasick::<u32>::deserialize_unchecked(bytes).0
}),
replace_list: Vec::new(),
}
}
#[cfg(feature = "runtime_build")]
pub(crate) fn new_from_dict(dict: HashMap<&'static str, &'static str>) -> Self {
let mut pairs: Vec<(&'static str, &'static str)> = dict.into_iter().collect();
pairs.sort_unstable_by_key(|&(k, _)| k);
let replace_list: Vec<&'static str> = pairs.iter().map(|&(_, v)| v).collect();
Self::new(pairs.into_iter().map(|(k, _)| k)).with_replace_list(replace_list)
}
}