scepter 0.1.5

Composable primitives for planet-scale time-series routing, indexing, and aggregation.
Documentation
use std::collections::HashMap;

use roaring::RoaringBitmap;

use crate::{ExcerptStrategy, FieldPredicate};

/// Field-hint index backed by Roaring bitmaps for dense numeric child IDs.
///
/// This is useful when child shards can be represented as stable `u32`
/// ordinals and candidate sets are dense enough to benefit from compressed
/// bitmap operations. The generic [`crate::FieldHintIndex`] is still preferable
/// when callers need arbitrary child identifiers or tiny singleton postings.
#[derive(Debug, Clone)]
pub struct NumericFieldHintIndex {
    strategy: ExcerptStrategy,
    hints: HashMap<String, HashMap<String, HashMap<String, RoaringBitmap>>>,
    hint_count: usize,
    postings: u64,
}

impl Default for NumericFieldHintIndex {
    fn default() -> Self {
        Self {
            strategy: ExcerptStrategy::Trigram,
            hints: HashMap::new(),
            hint_count: 0,
            postings: 0,
        }
    }
}

impl NumericFieldHintIndex {
    /// Creates a trigram numeric field-hint index.
    pub fn new() -> Self {
        Self::default()
    }

    /// Creates a numeric field-hint index with a custom excerpt strategy.
    pub fn with_strategy(strategy: ExcerptStrategy) -> Self {
        Self {
            strategy,
            hints: HashMap::new(),
            hint_count: 0,
            postings: 0,
        }
    }

    /// Returns the excerpt strategy used by this index.
    pub fn strategy(&self) -> &ExcerptStrategy {
        &self.strategy
    }

    /// Returns true when the index contains no hints.
    pub fn is_empty(&self) -> bool {
        self.hint_count == 0
    }

    /// Returns the number of distinct field excerpts in the index.
    pub fn hint_count(&self) -> usize {
        self.hint_count
    }

    /// Returns the total number of unique child postings across all hints.
    pub fn posting_count(&self) -> u64 {
        self.postings
    }

    /// Indexes all excerpts extracted from `value` for `child`.
    pub fn insert_value(&mut self, schema: &str, field: &str, value: &str, child: u32) {
        for excerpt in self.strategy.excerpts(value) {
            self.insert_excerpt(schema, field, excerpt, child);
        }
    }

    /// Indexes one explicit excerpt for `child`.
    pub fn insert_excerpt(
        &mut self,
        schema: &str,
        field: &str,
        excerpt: impl AsRef<str> + Into<String>,
        child: u32,
    ) {
        let posting_list = self
            .hints
            .entry(schema.to_owned())
            .or_default()
            .entry(field.to_owned())
            .or_default()
            .entry(excerpt.into())
            .or_default();

        let was_empty = posting_list.is_empty();
        if posting_list.insert(child) {
            self.postings += 1;
        }
        if was_empty {
            self.hint_count += 1;
        }
    }

    /// Returns children that may match `predicate`.
    pub fn candidates(
        &self,
        schema: &str,
        field: &str,
        predicate: &FieldPredicate,
    ) -> RoaringBitmap {
        let excerpts = predicate.excerpts(&self.strategy);
        if excerpts.is_empty() {
            return RoaringBitmap::new();
        }

        let mut posting_lists = Vec::with_capacity(excerpts.len());
        for excerpt in &excerpts {
            let Some(children) = self.lookup(schema, field, excerpt) else {
                return RoaringBitmap::new();
            };
            posting_lists.push(children);
        }

        posting_lists.sort_unstable_by_key(|children| children.len());
        let Some((smallest, rest)) = posting_lists.split_first() else {
            return RoaringBitmap::new();
        };

        let mut result = (*smallest).clone();
        for children in rest {
            result &= *children;
        }
        result
    }

    /// Returns the union of candidates for several predicates.
    pub fn candidate_union(
        &self,
        schema: &str,
        field: &str,
        predicates: &[FieldPredicate],
    ) -> RoaringBitmap {
        let mut result = RoaringBitmap::new();
        for predicate in predicates {
            result |= self.candidates(schema, field, predicate);
        }
        result
    }

    fn lookup(&self, schema: &str, field: &str, excerpt: &str) -> Option<&RoaringBitmap> {
        self.hints.get(schema)?.get(field)?.get(excerpt)
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn compressed_index_intersects_candidates() {
        let mut index = NumericFieldHintIndex::new();
        index.insert_value("ComputeTask", "job", "monarch", 1);
        index.insert_value("ComputeTask", "job", "monitor", 2);

        let candidates = index.candidates(
            "ComputeTask",
            "job",
            &FieldPredicate::Equals("monarch".to_owned()),
        );

        assert!(candidates.contains(1));
        assert!(!candidates.contains(2));
    }

    #[test]
    fn compressed_index_deduplicates_postings() {
        let mut index = NumericFieldHintIndex::with_strategy(ExcerptStrategy::Full);
        index.insert_value("Metric", "name", "latency", 7);
        index.insert_value("Metric", "name", "latency", 7);

        assert_eq!(index.hint_count(), 1);
        assert_eq!(index.posting_count(), 1);
    }
}