use std::collections::HashMap;
use roaring::RoaringBitmap;
use crate::{ExcerptStrategy, FieldPredicate};
#[derive(Debug, Clone)]
pub struct NumericFieldHintIndex {
strategy: ExcerptStrategy,
hints: HashMap<String, HashMap<String, HashMap<String, RoaringBitmap>>>,
hint_count: usize,
postings: u64,
}
impl Default for NumericFieldHintIndex {
fn default() -> Self {
Self {
strategy: ExcerptStrategy::Trigram,
hints: HashMap::new(),
hint_count: 0,
postings: 0,
}
}
}
impl NumericFieldHintIndex {
pub fn new() -> Self {
Self::default()
}
pub fn with_strategy(strategy: ExcerptStrategy) -> Self {
Self {
strategy,
hints: HashMap::new(),
hint_count: 0,
postings: 0,
}
}
pub fn strategy(&self) -> &ExcerptStrategy {
&self.strategy
}
pub fn is_empty(&self) -> bool {
self.hint_count == 0
}
pub fn hint_count(&self) -> usize {
self.hint_count
}
pub fn posting_count(&self) -> u64 {
self.postings
}
pub fn insert_value(&mut self, schema: &str, field: &str, value: &str, child: u32) {
for excerpt in self.strategy.excerpts(value) {
self.insert_excerpt(schema, field, excerpt, child);
}
}
pub fn insert_excerpt(
&mut self,
schema: &str,
field: &str,
excerpt: impl AsRef<str> + Into<String>,
child: u32,
) {
let posting_list = self
.hints
.entry(schema.to_owned())
.or_default()
.entry(field.to_owned())
.or_default()
.entry(excerpt.into())
.or_default();
let was_empty = posting_list.is_empty();
if posting_list.insert(child) {
self.postings += 1;
}
if was_empty {
self.hint_count += 1;
}
}
pub fn candidates(
&self,
schema: &str,
field: &str,
predicate: &FieldPredicate,
) -> RoaringBitmap {
let excerpts = predicate.excerpts(&self.strategy);
if excerpts.is_empty() {
return RoaringBitmap::new();
}
let mut posting_lists = Vec::with_capacity(excerpts.len());
for excerpt in &excerpts {
let Some(children) = self.lookup(schema, field, excerpt) else {
return RoaringBitmap::new();
};
posting_lists.push(children);
}
posting_lists.sort_unstable_by_key(|children| children.len());
let Some((smallest, rest)) = posting_lists.split_first() else {
return RoaringBitmap::new();
};
let mut result = (*smallest).clone();
for children in rest {
result &= *children;
}
result
}
pub fn candidate_union(
&self,
schema: &str,
field: &str,
predicates: &[FieldPredicate],
) -> RoaringBitmap {
let mut result = RoaringBitmap::new();
for predicate in predicates {
result |= self.candidates(schema, field, predicate);
}
result
}
fn lookup(&self, schema: &str, field: &str, excerpt: &str) -> Option<&RoaringBitmap> {
self.hints.get(schema)?.get(field)?.get(excerpt)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn compressed_index_intersects_candidates() {
let mut index = NumericFieldHintIndex::new();
index.insert_value("ComputeTask", "job", "monarch", 1);
index.insert_value("ComputeTask", "job", "monitor", 2);
let candidates = index.candidates(
"ComputeTask",
"job",
&FieldPredicate::Equals("monarch".to_owned()),
);
assert!(candidates.contains(1));
assert!(!candidates.contains(2));
}
#[test]
fn compressed_index_deduplicates_postings() {
let mut index = NumericFieldHintIndex::with_strategy(ExcerptStrategy::Full);
index.insert_value("Metric", "name", "latency", 7);
index.insert_value("Metric", "name", "latency", 7);
assert_eq!(index.hint_count(), 1);
assert_eq!(index.posting_count(), 1);
}
}