use aho_corasick::{AhoCorasick, FindIter, Input, Match, MatchKind, StreamFindIter};
#[doc(inline)]
pub use aho_corasick::BuildError;
#[derive(Clone, Debug)]
pub struct LigatureTree<V> {
aho: aho_corasick::AhoCorasick,
ligatures: Vec<V>,
}
impl<V> LigatureTree<V> {
pub fn try_from_bindings<S: AsRef<[u8]>>(
bindings: impl IntoIterator<Item = (S, V)>,
) -> Result<Self, BuildError> {
let (patterns, ligatures): (Vec<_>, Vec<_>) = bindings.into_iter().unzip();
let aho = AhoCorasick::builder()
.match_kind(MatchKind::LeftmostLongest)
.build(patterns)?;
Ok(Self { aho, ligatures })
}
pub fn get_by_pattern_index(&self, pattern_index: usize) -> Option<&V> {
self.ligatures.get(pattern_index)
}
pub fn find(&self, input: impl AsRef<str>) -> Option<&V> {
let m = self.aho.find(input.as_ref())?;
self.get_by_pattern_index(m.pattern().as_usize())
}
#[inline]
pub fn valid<'i>(
&'_ self,
input: impl Into<Input<'i>>,
) -> LigatureIter<'_, V, FindIter<'_, 'i>> {
LigatureIter {
aho_iter: self.aho.find_iter(input),
ligatures: &self.ligatures,
}
}
#[inline]
pub fn valid_stream<R: std::io::Read>(
&'_ self,
stream: R,
) -> LigatureStreamIter<'_, V, StreamFindIter<'_, R>> {
LigatureStreamIter {
aho_iter: self.aho.stream_find_iter(stream),
ligatures: &self.ligatures,
}
}
}
pub struct LigatureIter<'l, V, I>
where
I: Iterator,
{
aho_iter: I,
ligatures: &'l [V],
}
impl<'l, V, I> Iterator for LigatureIter<'l, V, I>
where
I: Iterator<Item = Match>,
{
type Item = &'l V;
fn next(&mut self) -> Option<Self::Item> {
self.ligatures
.get(self.aho_iter.next()?.pattern().as_usize())
}
}
pub struct LigatureStreamIter<'l, V, I>
where
I: Iterator,
{
aho_iter: I,
ligatures: &'l [V],
}
impl<'l, V, I> Iterator for LigatureStreamIter<'l, V, I>
where
I: Iterator<Item = Result<Match, std::io::Error>>,
{
type Item = Result<&'l V, std::io::Error>;
fn next(&mut self) -> Option<Self::Item> {
self.aho_iter
.next()?
.map(|m| self.ligatures.get(m.pattern().as_usize()))
.transpose()
}
}
#[cfg(test)]
#[test]
fn test_ligature_tree() {
let bindings = vec![
("abc", "lig1"),
("ab", "lig2"),
("bc", "lig3"),
("c", "lig4"),
("y", "lig5"),
];
let tree = LigatureTree::try_from_bindings(bindings).unwrap();
assert_eq!(tree.find("abc"), Some(&"lig1"));
assert_eq!(tree.find("ab"), Some(&"lig2"));
assert_eq!(tree.find("bc"), Some(&"lig3"));
assert_eq!(tree.find("c"), Some(&"lig4"));
assert_eq!(tree.find("d"), None);
assert_eq!(tree.find("y"), Some(&"lig5"));
assert_eq!(tree.find("dabcy"), Some(&"lig1"));
assert_eq!(tree.find("ydabcy"), Some(&"lig5"));
let valid: Vec<_> = tree.valid("xabcdy").cloned().collect();
assert_eq!(valid, vec!["lig1", "lig5"]);
}
pub trait AsLigatureTree<T> {
fn as_ligature_tree(&self) -> &LigatureTree<T>;
}
impl<T, U: AsRef<LigatureTree<T>>> AsLigatureTree<T> for U {
#[inline]
fn as_ligature_tree(&self) -> &LigatureTree<T> {
self.as_ref()
}
}
pub trait MapLigature<T>: AsLigatureTree<T> {
type Output;
fn map_ligature(&self, value: &T) -> Self::Output;
}
pub trait InputResolver<T>: AsLigatureTree<T> + MapLigature<T> {
fn get_by_pattern_index(&self, pattern_index: usize) -> Option<Self::Output> {
let lig = self
.as_ligature_tree()
.get_by_pattern_index(pattern_index)?;
Some(self.map_ligature(lig))
}
fn find(&self, input: impl AsRef<str>) -> Option<Self::Output> {
let lig = self.as_ligature_tree().find(input)?;
Some(self.map_ligature(lig))
}
fn valid<'a, 'i>(&'a self, input: impl Into<Input<'i>>) -> impl Iterator<Item = Self::Output>
where
T: 'a,
{
self.as_ligature_tree()
.valid(input)
.map(|lig| self.map_ligature(lig))
}
fn valid_stream<'a, R: std::io::Read>(
&'a self,
stream: R,
) -> impl Iterator<Item = Result<Self::Output, std::io::Error>>
where
T: 'a,
{
self.as_ligature_tree()
.valid_stream(stream)
.map(|res| res.map(|lig| self.map_ligature(lig)))
}
}
impl<T, U: AsLigatureTree<T> + MapLigature<T>> InputResolver<T> for U {}