use std::collections::{BTreeMap, BTreeSet, HashMap};
use super::entity_index_store::ScopedPropertyKey;
#[derive(Debug, Default, Clone)]
pub(super) struct TrigramRegistry {
pub(super) by_scope: HashMap<ScopedPropertyKey, TrigramScope>,
}
#[derive(Debug, Default, Clone)]
pub(super) struct TrigramScope {
pub(super) grams: BTreeMap<[u8; 3], BTreeSet<u64>>,
refcount: u32,
}
impl TrigramRegistry {
pub(super) fn add_scope(&mut self, label: &str, property: &str) -> bool {
let key = ScopedPropertyKey::new(label, property);
let entry = self.by_scope.entry(key).or_default();
let was_empty = entry.refcount == 0;
entry.refcount = entry.refcount.saturating_add(1);
was_empty
}
pub(super) fn remove_scope(&mut self, label: &str, property: &str) {
let key = ScopedPropertyKey::new(label, property);
if let Some(scope) = self.by_scope.get_mut(&key) {
scope.refcount = scope.refcount.saturating_sub(1);
if scope.refcount == 0 {
self.by_scope.remove(&key);
}
}
}
#[cfg(test)]
pub(super) fn has_scope(&self, label: &str, property: &str) -> bool {
self.by_scope
.contains_key(&ScopedPropertyKey::new(label, property))
}
pub(super) fn insert(&mut self, label: &str, property: &str, id: u64, value: &str) {
if let Some(scope) = self
.by_scope
.get_mut(&ScopedPropertyKey::new(label, property))
{
scope.insert(id, value);
}
}
#[cfg(test)]
pub(super) fn remove(&mut self, label: &str, property: &str, id: u64, value: &str) {
if let Some(scope) = self
.by_scope
.get_mut(&ScopedPropertyKey::new(label, property))
{
scope.remove(id, value);
}
}
pub(super) fn update(
&mut self,
label: &str,
property: &str,
id: u64,
old: Option<&str>,
new: Option<&str>,
) {
let Some(scope) = self
.by_scope
.get_mut(&ScopedPropertyKey::new(label, property))
else {
return;
};
if let Some(old) = old {
scope.remove(id, old);
}
if let Some(new) = new {
scope.insert(id, new);
}
}
pub(super) fn candidates(
&self,
label: &str,
property: &str,
query: &str,
) -> Option<BTreeSet<u64>> {
let scope = self
.by_scope
.get(&ScopedPropertyKey::new(label, property))?;
scope.candidates(query)
}
}
impl TrigramScope {
fn insert(&mut self, id: u64, value: &str) {
for tri in trigrams(value) {
self.grams.entry(tri).or_default().insert(id);
}
}
fn remove(&mut self, id: u64, value: &str) {
for tri in trigrams(value) {
if let Some(set) = self.grams.get_mut(&tri) {
set.remove(&id);
if set.is_empty() {
self.grams.remove(&tri);
}
}
}
}
fn candidates(&self, query: &str) -> Option<BTreeSet<u64>> {
let mut grams: Vec<[u8; 3]> = trigrams(query).collect();
if grams.is_empty() {
return None;
}
grams.sort_by_key(|tri| self.grams.get(tri).map(BTreeSet::len).unwrap_or(usize::MAX));
let first = self.grams.get(&grams[0])?;
let mut out = first.clone();
for tri in &grams[1..] {
let next = self.grams.get(tri)?;
out.retain(|id| next.contains(id));
if out.is_empty() {
return Some(out);
}
}
Some(out)
}
}
fn trigrams(value: &str) -> impl Iterator<Item = [u8; 3]> + '_ {
let bytes = value.as_bytes();
(0..bytes.len().saturating_sub(2)).map(move |i| {
let window = &bytes[i..i + 3];
[window[0], window[1], window[2]]
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn empty_query_returns_none() {
let mut reg = TrigramRegistry::default();
reg.add_scope("Person", "name");
reg.insert("Person", "name", 1, "Alice");
assert!(reg.candidates("Person", "name", "ab").is_none());
}
#[test]
fn substring_match_intersects_trigrams() {
let mut reg = TrigramRegistry::default();
reg.add_scope("Person", "name");
reg.insert("Person", "name", 1, "Alexandra");
reg.insert("Person", "name", 2, "Alexander");
reg.insert("Person", "name", 3, "Bob");
let candidates = reg.candidates("Person", "name", "Alex").unwrap();
assert!(candidates.contains(&1));
assert!(candidates.contains(&2));
assert!(!candidates.contains(&3));
}
#[test]
fn remove_after_insert_clears_buckets() {
let mut reg = TrigramRegistry::default();
reg.add_scope("Person", "name");
reg.insert("Person", "name", 1, "Alice");
reg.remove("Person", "name", 1, "Alice");
assert!(reg
.candidates("Person", "name", "Alic")
.map(|s| s.is_empty())
.unwrap_or(true));
}
#[test]
fn refcount_keeps_scope_until_last_remove() {
let mut reg = TrigramRegistry::default();
assert!(reg.add_scope("Person", "name"));
assert!(!reg.add_scope("Person", "name")); reg.insert("Person", "name", 1, "Alice");
reg.remove_scope("Person", "name");
assert!(reg.has_scope("Person", "name"));
reg.remove_scope("Person", "name");
assert!(!reg.has_scope("Person", "name"));
}
}