kiromi-ai-memory 0.2.2

Local-first multi-tenant memory store engine: Markdown/text content on object storage, metadata in SQLite, plugin-shaped embedder/storage/metadata, hybrid text+vector search.
Documentation
// SPDX-License-Identifier: Apache-2.0 OR MIT
//! Public `Query` for `Memory::search`.
//!
//! Three modes:
//! - `Query::semantic(text)` — top-K cosine over vector indices.
//! - `Query::text(text)` — Tantivy parsed query over lexical indices.
//! - `Query::hybrid(text)` — RRF fusion of the two; alpha-weighted.

use crate::memory::{MemoryId, MemoryRef};
use crate::partition::PartitionPath;

/// Search mode.
#[derive(Debug, Clone, PartialEq)]
#[non_exhaustive]
pub enum QueryMode {
    /// Semantic only.
    Semantic,
    /// Lexical only.
    Text,
    /// Reciprocal-rank fusion of both. `alpha` weights the semantic side; the
    /// text side gets `1 - alpha`. Default `0.6`.
    Hybrid {
        /// Semantic weight in `[0, 1]`.
        alpha: f32,
    },
}

/// A search query.
#[derive(Debug, Clone)]
#[non_exhaustive]
pub struct Query {
    pub(crate) text: String,
    pub(crate) mode: QueryMode,
    pub(crate) within: Option<PartitionPath>,
    pub(crate) precomputed_embedding: Option<Vec<f32>>,
    /// Plan 10: opt-in hierarchical descent.
    pub(crate) hierarchical: bool,
    /// Plan 10: minimum cosine score (after dedup) for a child node to be
    /// descended into during hierarchical search. `None` means no pruning.
    pub(crate) prune_threshold: Option<f32>,
    /// Plan 10: at each level, descend into top `k * descend_factor` children.
    pub(crate) descend_factor: u32,
}

impl Query {
    /// Pure semantic search.
    #[must_use]
    pub fn semantic(text: impl Into<String>) -> Self {
        Query {
            text: text.into(),
            mode: QueryMode::Semantic,
            within: None,
            precomputed_embedding: None,
            hierarchical: false,
            prune_threshold: None,
            descend_factor: 4,
        }
    }

    /// Pure lexical search.
    #[must_use]
    pub fn text(text: impl Into<String>) -> Self {
        Query {
            text: text.into(),
            mode: QueryMode::Text,
            within: None,
            precomputed_embedding: None,
            hierarchical: false,
            prune_threshold: None,
            descend_factor: 4,
        }
    }

    /// Hybrid (RRF) search; default `alpha = 0.6`.
    #[must_use]
    pub fn hybrid(text: impl Into<String>) -> Self {
        Query {
            text: text.into(),
            mode: QueryMode::Hybrid { alpha: 0.6 },
            within: None,
            precomputed_embedding: None,
            hierarchical: false,
            prune_threshold: None,
            descend_factor: 4,
        }
    }

    /// Plan 10: opt into hierarchical descent. The search starts at the
    /// configured scope (or the tenant root if unset) and descends through
    /// internal nodes, scoring child summaries at each level and pruning
    /// branches whose top scores fall below [`Query::prune_threshold`].
    /// Leaves are searched only after the parent's index has identified
    /// them as relevant. Defaults to flat (whole-tree) search when off.
    #[must_use]
    pub fn hierarchical(mut self) -> Self {
        self.hierarchical = true;
        self
    }

    /// Plan 10: prune children whose mid-level summary score is below `t`.
    /// Default is no pruning — every child whose score made the top-K of the
    /// parent's index is descended into. Clamped to `[-1, 1]` (cosine range).
    #[must_use]
    pub fn prune_threshold(mut self, t: f32) -> Self {
        self.prune_threshold = Some(t.clamp(-1.0, 1.0));
        self
    }

    /// Plan 10: at each internal level, descend into the top
    /// `k * descend_factor` children. Higher = wider beam = slower but more
    /// recall; lower = narrower beam = faster but more risk of pruning the
    /// right answer. Default 4. Clamped to `>= 1`.
    #[must_use]
    pub fn descend_factor(mut self, n: u32) -> Self {
        self.descend_factor = n.max(1);
        self
    }

    /// Whether this query is in hierarchical mode.
    #[must_use]
    pub fn is_hierarchical(&self) -> bool {
        self.hierarchical
    }

    /// Override hybrid alpha. No effect on non-hybrid queries.
    #[must_use]
    pub fn alpha(mut self, alpha: f32) -> Self {
        if let QueryMode::Hybrid { alpha: a } = &mut self.mode {
            *a = alpha.clamp(0.0, 1.0);
        }
        self
    }

    /// Restrict the search to a partition (and its descendants).
    #[must_use]
    pub fn within(mut self, path: PartitionPath) -> Self {
        self.within = Some(path);
        self
    }

    /// Caller hands the engine a pre-computed query vector. Used by
    /// caller-owned-models pathways (Apple Foundation Models, OpenAI proxies,
    /// Swift FFI consumers) where the model runs outside the library on the
    /// query side.
    ///
    /// When set, the engine bypasses its `Embedder` (if any) for the query
    /// step. The vector's length must match `schema_meta.embedder_dims`;
    /// mismatch surfaces from the underlying vector index. Has no effect on
    /// `QueryMode::Text` (lexical search ignores the vector).
    ///
    /// See spec § 12 (caller-owned models) and § 12.13.
    #[must_use]
    pub fn with_embedding(mut self, vector: Vec<f32>) -> Self {
        self.precomputed_embedding = Some(vector);
        self
    }

    /// Borrow the precomputed query vector, if any. `None` means the engine
    /// must invoke its configured `Embedder` to derive the query vector.
    #[must_use]
    pub fn precomputed_embedding(&self) -> Option<&[f32]> {
        self.precomputed_embedding.as_deref()
    }

    /// Borrow the query text.
    #[must_use]
    pub fn text_str(&self) -> &str {
        &self.text
    }

    /// Borrow the mode.
    #[must_use]
    pub fn mode(&self) -> &QueryMode {
        &self.mode
    }

    /// Borrow the partition restriction (if any).
    #[must_use]
    pub fn scope(&self) -> Option<&PartitionPath> {
        self.within.as_ref()
    }
}

/// One hit in a search result. Score is mode-specific:
/// - semantic: cosine similarity in `[-1, 1]` (mock embedder is unit-norm so usually `[0, 1]`).
/// - text: Tantivy BM25-ish raw score.
/// - hybrid: RRF score (sum of `alpha / (60 + rank)`).
#[derive(Debug, Clone, PartialEq, serde::Serialize)]
#[non_exhaustive]
pub struct SearchHit {
    /// Memory.
    pub r#ref: MemoryRef,
    /// Score (higher = better).
    pub score: f32,
}

impl SearchHit {
    /// Construct.
    #[must_use]
    pub fn new(id: MemoryId, partition: PartitionPath, score: f32) -> Self {
        SearchHit {
            r#ref: MemoryRef { id, partition },
            score,
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn hybrid_clamps_alpha() {
        let q = Query::hybrid("x").alpha(2.0);
        match q.mode() {
            QueryMode::Hybrid { alpha } => assert!((alpha - 1.0).abs() < f32::EPSILON),
            _ => panic!("expected hybrid"),
        }
        let q = Query::hybrid("x").alpha(-0.5);
        match q.mode() {
            QueryMode::Hybrid { alpha } => assert!(alpha.abs() < f32::EPSILON),
            _ => panic!("expected hybrid"),
        }
    }

    #[test]
    fn alpha_on_non_hybrid_is_noop() {
        let q = Query::semantic("x").alpha(0.1);
        assert!(matches!(q.mode(), QueryMode::Semantic));
    }
}