#![allow(dead_code)]
use std::collections::{HashMap, HashSet};
use std::sync::Arc;
use parking_lot::RwLock;
use crate::index::{SequenceNumber, VersionPointer};
#[derive(Clone, Default)]
pub struct FullTextSidecar {
inner: Arc<RwLock<HashMap<String, HashMap<Vec<u8>, VersionPointer>>>>,
}
impl FullTextSidecar {
pub fn new() -> Self {
Self {
inner: Arc::new(RwLock::new(HashMap::new())),
}
}
pub fn index_document(&self, text: &str, key: Vec<u8>, pointer: VersionPointer) {
let tokens = FullTextSidecar::tokenise(text);
let mut guard = self.inner.write();
for token in tokens {
let entry = guard.entry(token).or_default();
match entry.get_mut(&key) {
Some(existing) if existing.sequence >= pointer.sequence => continue,
Some(existing) => *existing = pointer.clone(),
None => {
entry.insert(key.clone(), pointer.clone());
}
}
}
}
pub fn search(&self, terms: &[String], snapshot: SequenceNumber) -> Vec<Vec<u8>> {
if terms.is_empty() {
return Vec::new();
}
let guard = self.inner.read();
let mut iter = terms.iter();
let first = match iter.next() {
Some(term) => guard.get(term.as_str()).cloned().unwrap_or_default(),
None => return Vec::new(),
};
let mut keys: HashSet<Vec<u8>> = first
.into_iter()
.filter(|(_, pointer)| pointer.is_visible_at(snapshot))
.map(|(key, _)| key)
.collect();
for term in iter {
if let Some(mapping) = guard.get(term.as_str()) {
let term_keys: HashSet<Vec<u8>> = mapping
.iter()
.filter(|(_, pointer)| pointer.is_visible_at(snapshot))
.map(|(key, _)| key.clone())
.collect();
keys = keys
.intersection(&term_keys)
.cloned()
.collect::<HashSet<_>>();
} else {
return Vec::new();
}
}
keys.into_iter().collect()
}
fn tokenise(text: &str) -> Vec<String> {
text.split(|c: char| !c.is_alphanumeric())
.filter(|token| !token.is_empty())
.map(|token| token.to_lowercase())
.collect()
}
}