kitt_score 0.1.0

Decision engine at the core of Project KITT — in-memory stateful matching with pluggable scoring backends.
Documentation
//! `SchemaBuilder`: accumulates kinds + attributes + enum-value dictionaries,
//! then consumes itself into an immutable `Arc<Schema>`.
//!
//! The consuming `build` ensures nobody can mutate the schema after it's
//! visible to the rest of the crate — the type system enforces the
//! "immutable after construction" invariant that the whole crate relies on.

use crate::schema::attr::AttrType;
use crate::schema::interner::Interner;
use crate::schema::layout::SlotLayoutBuilder;
use crate::schema::schema::Schema;
use crate::{AttrId, KindId};
use ahash::AHashMap;
use std::sync::Arc;

/// Staging area for a schema definition. Consumed by `build`.
pub struct SchemaBuilder {
    kind_names: Interner<KindId>,
    attr_names: Interner<AttrId>,
    // For each kind, the declared (AttrId, AttrType) pairs in order.
    kind_attrs: Vec<Vec<(AttrId, AttrType)>>,
    // Value-string interner: every EnumStr value is assigned a u32 code here.
    value_interner: ValueInterner,
}

/// Value-level interner. A separate type so a different ID space from
/// attribute-name interning is obvious in signatures.
#[derive(Default, Debug)]
pub struct ValueInterner {
    table: Vec<String>,
    index: AHashMap<String, u32>,
}

impl ValueInterner {
    /// Insert a value string, returning its code. Idempotent: duplicate calls return the existing code.
    #[allow(clippy::expect_used)] // bounded by Vec length; overflow means schema bug, not runtime data
    pub fn intern(&mut self, s: &str) -> u32 {
        if let Some(&id) = self.index.get(s) {
            return id;
        }
        let id = u32::try_from(self.table.len()).expect("value interner overflow");
        self.table.push(s.to_owned());
        self.index.insert(s.to_owned(), id);
        id
    }

    /// Look up a value string's code without inserting. `None` if unknown.
    #[must_use]
    pub fn get(&self, s: &str) -> Option<u32> {
        self.index.get(s).copied()
    }

    /// Resolve a code back to its string.
    #[must_use]
    #[allow(clippy::cast_possible_truncation)] // id came from our own interner, bounded by Vec length
    pub fn lookup(&self, id: u32) -> &str {
        &self.table[id as usize]
    }

    /// Return the number of interned values.
    #[must_use]
    #[allow(clippy::len_without_is_empty)] // a read-only interner has no meaningful is_empty semantics
    pub fn len(&self) -> usize {
        self.table.len()
    }
}

impl SchemaBuilder {
    /// Create a new empty `SchemaBuilder`.
    #[must_use]
    pub fn new() -> Self {
        Self {
            kind_names: Interner::new(),
            attr_names: Interner::new(),
            kind_attrs: Vec::new(),
            value_interner: ValueInterner::default(),
        }
    }

    /// Declare an event kind with its attribute list. The order of `attrs`
    /// determines slot layout within the kind.
    #[must_use]
    pub fn kind(&mut self, name: &str, attrs: &[(&str, AttrType)]) -> KindId {
        let kid = self.kind_names.intern(name);
        let mut resolved = Vec::with_capacity(attrs.len());
        for &(aname, ty) in attrs {
            let attr_id = self.attr_names.intern(aname);
            resolved.push((attr_id, ty));
        }
        // Grow kind_attrs to fit the index. Kinds should be declared once, so
        // the usual case is push.
        while self.kind_attrs.len() <= usize::from(kid.0) {
            self.kind_attrs.push(Vec::new());
        }
        self.kind_attrs[usize::from(kid.0)] = resolved;
        kid
    }

    /// Pre-register an enum value so its code is stable across startups. Call
    /// this in a fixed order for every `EnumStr` value that should have a
    /// predictable u32 code.
    #[must_use]
    pub fn enum_value(&mut self, s: &str) -> u32 {
        self.value_interner.intern(s)
    }

    /// Consume the builder and produce a frozen `Schema`.
    #[must_use]
    pub fn build(self) -> Arc<Schema> {
        let mut layout = SlotLayoutBuilder::new();
        for attrs in &self.kind_attrs {
            layout.push_kind(attrs);
        }
        Arc::new(Schema {
            kind_names: self.kind_names,
            attr_names: self.attr_names,
            value_interner: self.value_interner,
            slot_layout: layout.build(),
        })
    }
}

impl Default for SchemaBuilder {
    fn default() -> Self {
        Self::new()
    }
}

#[cfg(test)]
mod tests {
    #![allow(clippy::unwrap_used)]
    use super::*;

    #[test]
    fn build_produces_resolvable_schema() {
        let mut b = SchemaBuilder::new();
        let kid = b.kind(
            "audience",
            &[("male_frac", AttrType::F32), ("dwell_secs", AttrType::Int)],
        );
        let schema = b.build();
        let slot = schema
            .slot_layout
            .resolve(kid, schema.attr_names.get("male_frac").unwrap())
            .unwrap();
        assert_eq!(slot.offset, 0);
    }

    #[test]
    fn enum_values_get_stable_codes() {
        let mut b = SchemaBuilder::new();
        let c1 = b.enum_value("young_male");
        let c2 = b.enum_value("young_female");
        let c1_again = b.enum_value("young_male");
        assert_eq!(c1, c1_again);
        assert_ne!(c1, c2);
    }
}