#[cfg(feature = "gxhash")]
type HashMap<K, V> = std::collections::HashMap<K, V, gxhash::GxBuildHasher>;
#[cfg(feature = "ahash")]
use ahash::HashMap;
#[cfg(feature = "rustc-hash")]
use rustc_hash::FxHashMap as HashMap;
#[cfg(all(
not(feature = "ahash"),
not(feature = "gxhash"),
not(feature = "rustc-hash")
))]
use std::collections::HashMap;
use kstring::KString;
use std::collections::BTreeSet;
use std::hash::Hash;
#[derive(Default)]
pub struct FuzzyTopScores<'a, K: Hash + Ord, S: PartialOrd> {
pub(crate) top: HashMap<&'a KString, (&'a BTreeSet<K>, S)>,
pub(crate) bottom: Option<(&'a KString, S)>,
pub(crate) capacity: usize,
}
impl<'a, K: Hash + Ord, S: Clone + PartialOrd> FuzzyTopScores<'a, K, S> {
#[allow(clippy::default_trait_access)]
pub(crate) fn with_capacity(capacity: usize) -> Self {
FuzzyTopScores {
top: HashMap::with_capacity_and_hasher(
capacity,
std::default::Default::default()
),
bottom: None,
capacity,
}
}
pub(crate) fn insert(
&mut self,
keyword: &'a KString,
keys: &'a BTreeSet<K>,
score: S,
) {
let below_threshold = self.top.len() >= self.capacity && self
.bottom
.as_ref()
.is_some_and(|(_, bottom_score)| score <= *bottom_score);
if !below_threshold {
if self.top.len() >= self.capacity {
self.remove_bottom();
}
self.top.insert(keyword, (keys, score.clone()));
let is_new_bottom = self
.bottom
.as_ref()
.is_none_or(|(_, bottom_score)| score < *bottom_score);
if is_new_bottom {
self.bottom = Some((keyword, score));
}
}
}
#[cfg(feature = "rapidfuzz")]
pub(crate) fn min_score(&self) -> Option<&S> {
if self.top.len() >= self.capacity {
self.bottom.as_ref().map(|(_, score)| score)
} else {
None
}
}
fn remove_bottom(&mut self) {
if let Some((bottom_keyword, _)) = &self.bottom {
self.top.remove(bottom_keyword);
}
self.bottom = self
.top
.iter()
.min_by(|(_, (_, a_score)), (_, (_, b_score))| {
a_score.partial_cmp(b_score).unwrap()
})
.map(|(keyword, (_, score))| (*keyword, score.clone()));
}
#[inline]
pub(crate) fn results(
self
) -> impl Iterator<Item = (&'a KString, &'a BTreeSet<K>)> {
let mut vec: Vec<_> = self.top.into_iter().collect();
vec.sort_unstable_by(|(_, (_, a_score)), (_, (_, b_score))| {
b_score.partial_cmp(a_score).unwrap()
});
vec.into_iter().map(|(keyword, (keys, _))| (keyword, keys))
}
}