use aho_corasick::{AhoCorasick, Anchored, Input, MatchKind, StartKind};
use std::collections::{HashMap, HashSet};
use crate::style::Style;
#[derive(Debug, Clone, Eq, Hash, PartialEq)]
struct SuffixKey {
rev_bytes: Box<[u8]>,
}
impl SuffixKey {
fn new(suffix: &[u8]) -> Self {
let mut suffix: Box<[u8]> = Box::from(suffix);
suffix.reverse();
Self { rev_bytes: suffix }
}
}
impl AsRef<[u8]> for SuffixKey {
fn as_ref(&self) -> &[u8] {
&self.rev_bytes
}
}
#[derive(Debug, Default)]
pub struct SuffixMapBuilder {
keys: Vec<SuffixKey>,
styles: Vec<Option<Style>>,
max_len: usize,
}
impl SuffixMapBuilder {
pub fn push(&mut self, suffix: impl AsRef<[u8]>, style: Option<Style>) {
let suffix = suffix.as_ref();
self.keys.push(SuffixKey::new(suffix));
self.styles.push(style);
self.max_len = self.max_len.max(suffix.len());
}
pub fn build(mut self) -> SuffixMap {
self.keys.reverse();
self.styles.reverse();
let cs_matcher = AhoCorasick::builder()
.match_kind(MatchKind::LeftmostFirst)
.start_kind(StartKind::Anchored)
.build(&self.keys)
.unwrap();
let mut lower_keys = self.keys.clone();
for key in lower_keys.iter_mut() {
key.rev_bytes.make_ascii_lowercase();
}
let mut cs_map: HashMap<&SuffixKey, usize> = HashMap::new();
let mut ci_map: HashMap<&SuffixKey, usize> = HashMap::new();
self.keys
.iter()
.zip(lower_keys.iter())
.enumerate()
.for_each(|(i, (cs_key, ci_key))| {
cs_map.entry(cs_key).or_insert(i);
ci_map.entry(ci_key).or_insert(i);
});
let mut cs_set: HashSet<&SuffixKey> = HashSet::new();
for i in cs_map.values().copied() {
let ci_key = &lower_keys[i];
let j = *ci_map.get(ci_key).unwrap();
if self.styles[i] != self.styles[j] {
cs_set.insert(ci_key);
}
}
let (ci_ids, ci_keys): (Vec<_>, Vec<_>) = lower_keys
.iter()
.enumerate()
.filter(|(_i, k)| !cs_set.contains(k))
.unzip();
let ci_matcher = AhoCorasick::builder()
.ascii_case_insensitive(true)
.match_kind(MatchKind::LeftmostFirst)
.start_kind(StartKind::Anchored)
.build(ci_keys)
.unwrap();
SuffixMap {
cs_matcher,
ci_matcher,
styles: self.styles,
ci_ids,
max_len: self.max_len,
}
}
}
#[derive(Clone, Debug)]
pub struct SuffixMap {
cs_matcher: AhoCorasick,
ci_matcher: AhoCorasick,
styles: Vec<Option<Style>>,
ci_ids: Vec<usize>,
max_len: usize,
}
impl SuffixMap {
pub fn get(&self, name: impl AsRef<[u8]>) -> Option<&Style> {
let name = name.as_ref();
let len = self.max_len.min(name.len());
let i = name.len() - len;
let mut name_stack = [0; 32];
let mut name_heap: Box<[u8]>;
let name = if len <= name_stack.len() {
name_stack[..len].copy_from_slice(&name[i..]);
&mut name_stack[..len]
} else {
name_heap = name[i..].into();
&mut name_heap
};
name.reverse();
let cs_index = Self::find(&self.cs_matcher, name);
let ci_index = Self::find(&self.ci_matcher, name).map(|i| self.ci_ids[i]);
let i = match (cs_index, ci_index) {
(Some(cs), Some(ci)) => cs.min(ci),
(Some(cs), _) => cs,
(_, Some(ci)) => ci,
(_, _) => return None,
};
self.styles[i].as_ref()
}
fn find(matcher: &AhoCorasick, name: &[u8]) -> Option<usize> {
let input = Input::new(name).anchored(Anchored::Yes);
matcher.find(input).map(|m| m.pattern().as_usize())
}
}