use std::{collections::HashMap, sync::Arc};
use crate::{CVData, CVSource, CVStructure, CVVersion, text::*};
#[derive(Debug)]
pub struct CVIndex<CV: CVSource> {
data: CV::Structure,
index: HashMap<<CV::Data as CVData>::Index, Arc<CV::Data>>,
name: HashMap<Box<str>, Arc<CV::Data>>,
synonyms: HashMap<Box<str>, Arc<CV::Data>>,
#[cfg(feature = "search-index")]
trigram_index: HashMap<[u8; 3], Vec<Box<str>>>,
version: CVVersion,
}
impl<CV: CVSource> CVIndex<CV> {
pub fn is_empty(&self) -> bool {
self.data.is_empty()
}
pub fn len(&self) -> usize {
self.data.len()
}
pub fn get_by_index(&self, index: &<CV::Data as CVData>::Index) -> Option<Arc<CV::Data>> {
self.index.get(index).cloned()
}
pub fn get_by_name(&self, name: &str) -> Option<Arc<CV::Data>> {
let name = name.to_ascii_lowercase().into_boxed_str();
self.name.get(&name).cloned()
}
pub fn get_by_name_or_synonym(&self, name: &str) -> Option<(bool, Arc<CV::Data>)> {
let name = name.to_ascii_lowercase().into_boxed_str();
self.name
.get(&name)
.map(|m| (true, m.clone()))
.or_else(|| self.synonyms.get(&name).map(|m| (false, m.clone())))
}
pub fn search(
&self,
term: &str,
limit: usize,
max_distance: usize,
) -> Vec<(Arc<CV::Data>, Option<String>, usize)> {
let lowercase = term.to_ascii_lowercase().into_boxed_str();
self.name
.get(&lowercase)
.map(|v| (v.clone(), None, 0))
.or_else(|| {
self.synonyms
.get(&lowercase)
.map(|v| (v.clone(), Some(term.to_string()), 0))
})
.map_or_else(
|| {
let mut results: Vec<(&str, usize)> = Vec::with_capacity(limit);
#[cfg(feature = "search-index")]
let mut set = std::collections::HashSet::new();
for (distance, t) in {
#[cfg(feature = "search-index")]
{
tags(&lowercase)
.filter_map(|tag| self.trigram_index.get(&tag))
.flatten()
.filter(|_term| set.insert(lowercase.clone()))
}
#[cfg(not(feature = "search-index"))]
{
self.name.keys().chain(self.synonyms.keys())
}
}
.map(|t| (levenshtein_distance(&lowercase, t), t))
.filter(|(distance, _)| *distance <= max_distance)
{
let index = results
.binary_search_by(|item| item.1.cmp(&distance))
.unwrap_or_else(|i| i);
if index < limit {
if results.len() >= limit {
results.remove(limit - 1);
}
results.insert(index, (t, distance));
}
}
results
.into_iter()
.filter_map(|(name, distance)| {
self.name
.get(name)
.map(|m| (m.clone(), None, distance))
.or_else(|| {
self.synonyms
.get(name)
.map(|m| (m.clone(), Some(name.to_string()), distance))
})
})
.collect()
},
|v| vec![v],
)
}
pub const fn data(&self) -> &CV::Structure {
&self.data
}
pub const fn version(&self) -> &CVVersion {
&self.version
}
pub(crate) fn update_skip_rebuilding_cache(
&mut self,
data: impl IntoIterator<Item = Arc<CV::Data>>,
version: CVVersion,
) {
self.data.clear();
self.index.clear();
self.name.clear();
self.synonyms.clear();
#[cfg(feature = "search-index")]
self.trigram_index.clear();
self.version = version;
for element in data {
self.data.add(element.clone());
self.add_to_indices(element);
}
}
pub(crate) fn update_from_structure_skip_rebuilding_cache(
&mut self,
data: CV::Structure,
version: CVVersion,
) {
self.data = data;
self.index.clear();
self.name.clear();
self.synonyms.clear();
#[cfg(feature = "search-index")]
self.trigram_index.clear();
self.version = version;
let elements = self.data.iter_data().collect::<Vec<_>>();
for element in elements {
self.add_to_indices(element);
}
}
pub fn empty() -> Self {
Self {
data: Default::default(),
index: HashMap::new(),
name: HashMap::new(),
synonyms: HashMap::new(),
#[cfg(feature = "search-index")]
trigram_index: HashMap::new(),
version: CVVersion::default(),
}
}
pub fn remove(&mut self, index: &<CV::Data as CVData>::Index) -> bool {
let pos = self
.data
.iter_indexed()
.find(|(_, m)| m.index().is_some_and(|id| id == *index));
if let Some((pos, m)) = pos {
if let Some(name) = m.name() {
self.name.remove(name.as_ref());
#[cfg(feature = "search-index")]
for tag in tags(&name) {
self.trigram_index
.entry(tag)
.and_modify(|v| v.retain(|i| **i != *name));
}
}
for synonym in m.synonyms() {
self.synonyms.remove(synonym);
#[cfg(feature = "search-index")]
for tag in tags(&synonym) {
self.trigram_index
.entry(tag)
.and_modify(|v| v.retain(|i| **i != *synonym));
}
}
self.index.remove(index);
self.data.remove(pos);
true
} else {
false
}
}
#[allow(clippy::needless_pass_by_value)] pub fn add_to_indices(&mut self, element: Arc<CV::Data>) {
if let Some(index) = element.index() {
self.index.insert(index, element.clone());
}
if let Some(name) = element.name() {
let name = name.trim_ascii().to_ascii_lowercase().into_boxed_str();
#[cfg(feature = "search-index")]
for tag in tags(&name) {
self.trigram_index
.entry(tag)
.or_default()
.push(name.clone());
}
self.name.insert(name, element.clone());
}
for keyword in element.synonyms() {
let keyword = keyword.trim_ascii().to_ascii_lowercase().into_boxed_str();
#[cfg(feature = "search-index")]
for tag in tags(&keyword) {
self.trigram_index
.entry(tag)
.or_default()
.push(keyword.clone());
}
self.synonyms.insert(keyword, element.clone());
}
}
}