kitt_score 0.1.0

Decision engine at the core of Project KITT — in-memory stateful matching with pluggable scoring backends.
Documentation
//! Bi-directional string interner.
//!
//! Interning converts strings into small integer IDs. We use it three times
//! in `Schema`:
//!
//! - Event-kind names (`KindId`)
//! - Attribute names (`AttrId`)
//! - Enum-coded string values (`EnumStrCode`)
//!
//! The interner is append-only during `SchemaBuilder` construction and frozen
//! thereafter. This matters: the frozen invariant means we can hand out `&str`
//! references with the interner's lifetime, and callers never observe stale
//! IDs.
//!
//! Design note: We do _not_ use a generic "interner" crate (`lasso`,
//! `string_interner`) because our usage is tiny and single-lifetime — bringing
//! in a crate for this would obscure the one behavior we actually care about.

use ahash::AHashMap;
use std::marker::PhantomData;

/// A string → `Id` table built at schema construction time and read-only after.
#[derive(Debug)]
pub struct Interner<Id: Copy + From<u32> + Into<u32>> {
    table: Vec<String>,           // position = id
    index: AHashMap<String, u32>, // for duplicate detection during build
    _marker: PhantomData<fn() -> Id>,
}

impl<Id: Copy + From<u32> + Into<u32>> Interner<Id> {
    /// Create a new empty interner.
    pub fn new() -> Self {
        Self {
            table: Vec::new(),
            index: AHashMap::new(),
            _marker: PhantomData,
        }
    }

    /// Insert a string, returning its ID. Duplicate inserts return the existing ID.
    #[allow(clippy::expect_used)]
    pub fn intern(&mut self, s: &str) -> Id {
        if let Some(&id) = self.index.get(s) {
            return Id::from(id);
        }
        let id = u32::try_from(self.table.len())
            .expect("interner overflowed u32 — schemas should be small");
        self.table.push(s.to_owned());
        self.index.insert(s.to_owned(), id);
        Id::from(id)
    }

    /// Look up a string's ID without inserting. `None` if unknown.
    pub fn get(&self, s: &str) -> Option<Id> {
        self.index.get(s).copied().map(Id::from)
    }

    /// Resolve an ID back to its string. Panics on an invalid ID — interner
    /// IDs are obtained from this interner, so an invalid one is a bug.
    #[must_use]
    pub fn lookup(&self, id: Id) -> &str {
        let idx: u32 = id.into();
        &self.table[idx as usize]
    }

    /// Return the number of interned strings.
    #[allow(clippy::len_without_is_empty)]
    pub fn len(&self) -> usize {
        self.table.len()
    }
}

impl<Id: Copy + From<u32> + Into<u32>> Default for Interner<Id> {
    fn default() -> Self {
        Self::new()
    }
}

// Conversion glue for the concrete ID types we use with this interner.
#[allow(clippy::cast_possible_truncation)]
impl From<u32> for crate::KindId {
    fn from(v: u32) -> Self {
        Self(v as u16)
    }
}
impl From<crate::KindId> for u32 {
    fn from(v: crate::KindId) -> Self {
        Self::from(v.0)
    }
}
#[allow(clippy::cast_possible_truncation)]
impl From<u32> for crate::AttrId {
    fn from(v: u32) -> Self {
        Self(v as u16)
    }
}
impl From<crate::AttrId> for u32 {
    fn from(v: crate::AttrId) -> Self {
        Self::from(v.0)
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::AttrId;

    #[test]
    fn interns_and_dedupes() {
        let mut i: Interner<AttrId> = Interner::new();
        let a = i.intern("audience");
        let b = i.intern("audience");
        let c = i.intern("dwell");
        assert_eq!(a, b);
        assert_ne!(a, c);
        assert_eq!(i.len(), 2);
    }

    #[test]
    fn lookup_round_trips() {
        let mut i: Interner<AttrId> = Interner::new();
        let id = i.intern("foo");
        assert_eq!(i.lookup(id), "foo");
    }

    #[test]
    fn get_does_not_insert() {
        let i = Interner::<AttrId>::new();
        assert!(i.get("missing").is_none());
        assert_eq!(i.len(), 0);
    }
}