kitt_score 0.1.0

Decision engine at the core of Project KITT — in-memory stateful matching with pluggable scoring backends.
Documentation
//! Attribute type-codes and runtime values.
//!
//! The spec (§3.2, §4.2) mandates these value categories: integer, float,
//! enum-coded string, raw-interned string, and arrays of each. The encoding
//! choices here are optimized for the scoring hot path:
//!
//! - Numeric values go into fixed-width slots (i64, f64) for SIMD friendliness.
//! - Categorical strings become `u32` enum codes, resolved via the Schema's
//!   string interner at `StateUpdate` apply time. Scoring sees `u32`s only.
//! - Array types store a length + pointer pair; arrays live in a per-location
//!   side buffer (see `location::state`). Keeping arrays out of the main
//!   fixed-layout buffer avoids variable-size slots in the hot path.
//!
//! Reference: this tiered encoding (fixed-width hot path, dictionary-coded
//! strings, side buffer for variable-length) mirrors the physical layout of
//! Apache Arrow record batches [Arrow specification, arrow.apache.org].

/// Runtime cap on the dimension of an `F32Arr` embedding written into a
/// location buffer. Writes beyond this cap are rejected by `apply_update`.
///
/// Why 512? Most production embedding models in 2025–2026 ship 128–768-dim
/// vectors (`OpenAI` text-embedding-3-small: 1536, truncatable to 512;
/// Cohere embed-v3-english: 1024, truncatable to 256/384; `SentenceTransformers`
/// models: typically 384 or 768). 512 covers the 95th percentile and bounds
/// worst-case per-location memory at 512 × 4 = 2 KiB per array. Callers that
/// need larger dims can contribute a PR bumping this constant; it is a
/// deliberate guardrail, not an architectural limit.
pub const MAX_EMBEDDING_DIM: usize = 512;

/// The compile-time-like type tag assigned to each (`KindId`, `AttrId`) pair.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum AttrType {
    /// 64-bit signed integer slot.
    Int,
    /// 32-bit float slot.
    F32,
    /// 64-bit float slot.
    F64,
    /// 32-bit enum code; resolved to `&str` via the Schema's value interner.
    EnumStr,
    /// 32-bit interner code; for rare free-form strings.
    RawStr,
    /// Length-prefixed array of i64.
    IntArr,
    /// Length-prefixed array of f32.
    F32Arr,
    /// Length-prefixed array of u32 enum codes.
    EnumStrArr,
}

impl AttrType {
    /// Byte width of the in-buffer slot for this type.
    /// Arrays store (u32 offset, u32 len) = 8 bytes pointing into a side buffer.
    #[must_use]
    pub const fn slot_width(self) -> usize {
        match self {
            Self::Int | Self::F64 | Self::IntArr | Self::F32Arr | Self::EnumStrArr => 8,
            Self::F32 | Self::EnumStr | Self::RawStr => 4,
        }
    }

    /// Alignment requirement for the slot within the per-location buffer.
    /// Keeping this explicit rather than relying on `std::mem::align_of` lets
    /// us lay out the buffer by hand (see `schema::layout`).
    #[must_use]
    pub const fn slot_align(self) -> usize {
        self.slot_width()
    }
}

/// Borrowed value carried by an event.
///
/// `AttrSet` is a sequence of `(AttrId, Value<'a>)` pairs. The engine applies
/// each pair to the location's buffer according to `AttrType`.
#[derive(Clone, Copy, Debug, PartialEq)]
pub enum Value<'a> {
    /// Signed integer.
    Int(i64),
    /// 32-bit float.
    F32(f32),
    /// 64-bit float.
    F64(f64),
    /// Unresolved string — the engine will intern it on apply.
    Str(&'a str),
    /// Pre-resolved enum code — when the caller wants zero-lookup writes.
    EnumCode(u32),
    /// Array of signed integers.
    IntArr(&'a [i64]),
    /// Array of 32-bit floats.
    F32Arr(&'a [f32]),
    /// Array of strings.
    StrArr(&'a [&'a str]),
}

/// Owned variant for `LocationDef` reference-attribute bundles that outlive
/// the request they came from.
#[derive(Clone, Debug, PartialEq)]
pub enum OwnedValue {
    /// Signed integer.
    Int(i64),
    /// 32-bit float.
    F32(f32),
    /// 64-bit float.
    F64(f64),
    /// Owned string.
    Str(String),
    /// Enum code.
    EnumCode(u32),
    /// Array of signed integers.
    IntArr(Vec<i64>),
    /// Array of 32-bit floats.
    F32Arr(Vec<f32>),
    /// Array of owned strings.
    StrArr(Vec<String>),
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn slot_widths() {
        assert_eq!(AttrType::Int.slot_width(), 8);
        assert_eq!(AttrType::F32.slot_width(), 4);
        assert_eq!(AttrType::EnumStr.slot_width(), 4);
        assert_eq!(AttrType::F32Arr.slot_width(), 8); // (offset, len)
    }

    #[test]
    fn value_size_is_sane() {
        // A Value<'a> is borrowed, so it should be at most a fat-pointer + tag.
        // 32 bytes on 64-bit target is the expected upper bound.
        assert!(std::mem::size_of::<Value<'_>>() <= 32);
    }
}