kiromi-ai-memory 0.2.2

// SPDX-License-Identifier: Apache-2.0 OR MIT
//! `Memory::search` and `Memory::related`.
//!
//! Plan 18 dispatch 2 — every search path now flows through the
//! [`crate::index::VectorIndex`] + [`crate::index::LexicalIndex`] traits,
//! which by default talk straight to the `vec0` / `fts5` virtual tables. The
//! legacy `IndexHandle` cache is no longer consulted for reads.

use std::collections::{HashMap, HashSet};
use std::sync::Arc;

use crate::error::{Error, Result};
use crate::handle::{Memory, MemoryInner};
use crate::index::vector_trait::VectorScope;
use crate::memory::{MemoryId, MemoryRef};
use crate::partition::PartitionPath;
use crate::partition::tenant_root_path;
use crate::query::{Query, QueryMode, SearchHit};

/// RRF anchor constant. Matches the literature's standard 60.
const RRF_K: f32 = 60.0;
/// Hybrid oversample factor: pull `k * OVERSAMPLE` per mode before fusing
/// so RRF has a meaningful tail to merge.
const HYBRID_OVERSAMPLE: u32 = 4;

/// Convert a vec0 cosine distance (lower = better) to a similarity score
/// (higher = better). `vec0` returns `1 - cos(a, b)` for cosine, so the
/// inverse is straightforward.
fn distance_to_similarity(d: f32) -> f32 {
    1.0 - d
}

/// Pick the `VectorScope` that best matches a [`Query`].
fn vector_scope_for(q: &Query) -> VectorScope {
    match q.scope() {
        Some(p) => VectorScope::PartitionPrefix(p.as_str().to_string()),
        None => VectorScope::Tenant,
    }
}

/// Drop tombstoned hits by joining against the catalog. Preserves order.
async fn drop_tombstoned(
    inner: &Arc<MemoryInner>,
    hits: Vec<(MemoryId, f32)>,
) -> Result<Vec<(MemoryId, PartitionPath, f32)>> {
    let mut out = Vec::with_capacity(hits.len());
    for (id, score) in hits {
        let row = inner.metadata.get_memory(&id).await?;
        if let Some(r) = row
            && !r.tombstoned
        {
            out.push((id, r.partition_path, score));
        }
    }
    Ok(out)
}

/// Variant of [`drop_tombstoned`] for callers that already know each hit's
/// partition (hierarchical search records the leaf path it was searching).
async fn drop_tombstoned_keep_partition(
    inner: &Arc<MemoryInner>,
    hits: Vec<(MemoryId, PartitionPath, f32)>,
) -> Result<Vec<(MemoryId, PartitionPath, f32)>> {
    let mut out = Vec::with_capacity(hits.len());
    for (id, p, score) in hits {
        let row = inner.metadata.get_memory(&id).await?;
        if let Some(r) = row
            && !r.tombstoned
        {
            out.push((id, p, score));
        }
    }
    Ok(out)
}

fn top_k(
    mut v: Vec<(MemoryId, PartitionPath, f32)>,
    k: usize,
) -> Vec<(MemoryId, PartitionPath, f32)> {
    v.sort_by(|a, b| b.2.partial_cmp(&a.2).unwrap_or(std::cmp::Ordering::Equal));
    v.truncate(k);
    v
}

/// RRF-fuse two ranked lists. Higher fused score wins.
fn rrf_fuse(
    semantic: &[(MemoryId, f32)],
    lexical: &[(MemoryId, f32)],
    alpha: f32,
) -> Vec<(MemoryId, f32)> {
    let mut acc: HashMap<MemoryId, f32> = HashMap::new();
    for (rank, (id, _)) in semantic.iter().enumerate() {
        let s = alpha / (RRF_K + (rank as f32 + 1.0));
        *acc.entry(*id).or_insert(0.0) += s;
    }
    for (rank, (id, _)) in lexical.iter().enumerate() {
        let s = (1.0 - alpha) / (RRF_K + (rank as f32 + 1.0));
        *acc.entry(*id).or_insert(0.0) += s;
    }
    acc.into_iter().collect()
}

impl Memory {
    /// Run a search and return the top `k` hits.
    ///
    /// Semantic hits flow through [`crate::index::VectorIndex::knn_memory`];
    /// lexical hits flow through [`crate::index::LexicalIndex::search_memory`].
    /// Hybrid mode RRF-fuses the two ranked lists in Rust. Hierarchical mode
    /// walks the partition tree level-by-level, scoring each node's
    /// `summary_vec` rows and pruning branches whose top hit falls below
    /// [`Query::prune_threshold`]. When a node has no summary embeddings
    /// (cold tree) the hierarchical descent falls back to walking its SQL
    /// children directly — summary freshness affects recall, not correctness.
    ///
    /// **Errors:** [`Error::Embedder`] when a configured embedder fails on
    /// the query vector; [`Error::Storage`] / [`Error::Metadata`] for
    /// backend failures.
    pub async fn search(&self, query: Query, k: usize) -> Result<Vec<SearchHit>> {
        let inner = Arc::clone(&self.inner);
        if k == 0 {
            return Ok(Vec::new());
        }

        // Hierarchical descent: only meaningful with a query vector.
        if query.is_hierarchical() {
            if matches!(query.mode(), QueryMode::Text) {
                tracing::warn!(
                    target: "kiromi-ai-memory.search",
                    "hierarchical=true with QueryMode::Text falls back to flat search; \
                     hierarchical pruning requires a query vector"
                );
            } else {
                let qvec = resolve_query_vector(&inner, &query).await?;
                let raw = hierarchical_search(&inner, &query, &qvec, k).await?;
                let live = drop_tombstoned_keep_partition(&inner, raw).await?;
                return Ok(top_k(live, k)
                    .into_iter()
                    .map(|(id, p, s)| SearchHit::new(id, p, s))
                    .collect());
            }
        }

        let scope = vector_scope_for(&query);
        let k_u32 = u32::try_from(k).unwrap_or(u32::MAX);

        let raw: Vec<(MemoryId, f32)> = match query.mode().clone() {
            QueryMode::Semantic => {
                let qvec = resolve_query_vector(&inner, &query).await?;
                inner
                    .vector_index
                    .knn_memory(&qvec, k_u32, scope, None)
                    .await?
                    .into_iter()
                    .map(|(id, d)| (id, distance_to_similarity(d)))
                    .collect()
            }
            QueryMode::Text => {
                inner
                    .lexical_index
                    .search_memory(query.text_str(), k_u32, scope)
                    .await?
            }
            QueryMode::Hybrid { alpha } => {
                let qvec = resolve_query_vector(&inner, &query).await?;
                let oversample = k_u32.saturating_mul(HYBRID_OVERSAMPLE);
                let sem_raw = inner
                    .vector_index
                    .knn_memory(&qvec, oversample, scope.clone(), None)
                    .await?;
                let sem: Vec<(MemoryId, f32)> = sem_raw
                    .into_iter()
                    .map(|(id, d)| (id, distance_to_similarity(d)))
                    .collect();
                let lex = inner
                    .lexical_index
                    .search_memory(query.text_str(), oversample, scope)
                    .await?;
                rrf_fuse(&sem, &lex, alpha)
            }
        };

        let live = drop_tombstoned(&inner, raw).await?;
        let topk = top_k(live, k);
        Ok(topk
            .into_iter()
            .map(|(id, p, s)| SearchHit::new(id, p, s))
            .collect())
    }

    /// Same-partition top-K cosine, excluding the source memory.
    ///
    /// ```no_run
    /// # async fn _ex(mem: kiromi_ai_memory::Memory, r: kiromi_ai_memory::MemoryRef) -> kiromi_ai_memory::Result<()> {
    /// let nbrs = mem.related(&r, 10).await?;
    /// # let _ = nbrs; Ok(()) }
    /// ```
    pub async fn related(&self, r: &MemoryRef, k: usize) -> Result<Vec<SearchHit>> {
        let inner = Arc::clone(&self.inner);
        if k == 0 {
            return Ok(Vec::new());
        }

        // Plan 18 dispatch 4 dropped the cached `index_cfg.dims`; pull
        // straight from `schema_meta`. `0` falls through to a no-match
        // result, same as before.
        let dims = inner
            .metadata
            .read_schema_meta()
            .await?
            .and_then(|m| m.embedder_dims)
            .and_then(|n| usize::try_from(n).ok())
            .unwrap_or(0);
        let qvec = inner
            .metadata
            .get_memory_embedding(&r.id, dims)
            .await?
            .ok_or_else(|| {
                Error::Config(format!(
                    "related({id}): source memory has no stored embedding (tombstoned or missing)",
                    id = r.id
                ))
            })?;

        let k_u32 = u32::try_from(k.saturating_add(1)).unwrap_or(u32::MAX);
        let raw = inner
            .vector_index
            .knn_memory(
                &qvec,
                k_u32,
                VectorScope::Partition(r.partition.clone()),
                None,
            )
            .await?
            .into_iter()
            .filter(|(id, _)| id != &r.id)
            .map(|(id, d)| (id, distance_to_similarity(d)))
            .collect::<Vec<_>>();
        let live = drop_tombstoned(&inner, raw).await?;
        Ok(live
            .into_iter()
            .take(k)
            .map(|(id, p, s)| SearchHit::new(id, p, s))
            .collect())
    }
}

/// Hierarchical search. Descends the partition tree level-by-level via
/// `vector_index.knn_summary`, pruning branches whose top child-summary
/// score falls below `prune_threshold`. At leaves, runs `knn_memory`.
///
/// Cold-tree fallback: if a node has zero summaries under its subtree, the
/// descent enumerates the SQL children directly (no pruning). The plan calls
/// this "summary freshness affects recall, not correctness."
async fn hierarchical_search(
    inner: &Arc<MemoryInner>,
    query: &Query,
    qvec: &[f32],
    k: usize,
) -> Result<Vec<(MemoryId, PartitionPath, f32)>> {
    let descend = (query.descend_factor as usize).max(1);
    let prune = query.prune_threshold.unwrap_or(f32::NEG_INFINITY);
    let beam = u32::try_from((k * descend).max(k)).unwrap_or(u32::MAX);
    let k_u32 = u32::try_from(k).unwrap_or(u32::MAX);

    let start_path: PartitionPath = match query.scope() {
        Some(s) => s.clone(),
        None => tenant_root_path(),
    };

    let mut visited: HashSet<PartitionPath> = HashSet::new();
    let mut frontier: Vec<PartitionPath> = vec![start_path];
    let mut leaf_hits: Vec<(MemoryId, PartitionPath, f32)> = Vec::new();

    while let Some(node) = frontier.pop() {
        if !visited.insert(node.clone()) {
            continue;
        }

        let is_tenant_root = node == tenant_root_path();
        let is_leaf = if is_tenant_root {
            false
        } else {
            inner.metadata.partition_is_leaf(&node).await?
        };

        if is_leaf {
            let raw = inner
                .vector_index
                .knn_memory(qvec, k_u32, VectorScope::Partition(node.clone()), None)
                .await?;
            for (id, dist) in raw {
                leaf_hits.push((id, node.clone(), distance_to_similarity(dist)));
            }
            continue;
        }

        // Internal node — score child summaries and pick survivors.
        let scored = score_children_for_descent(inner, &node, qvec, beam).await?;
        if scored.is_empty() {
            // Cold-tree fallback — enumerate SQL children, descend everything.
            let children = if is_tenant_root {
                inner.metadata.top_level_partitions().await?
            } else {
                inner.metadata.children_of(&node).await?
            };
            for child in children {
                if !visited.contains(&child) {
                    frontier.push(child);
                }
            }
        } else {
            for (child_path, score) in scored {
                if score >= prune && !visited.contains(&child_path) {
                    frontier.push(child_path);
                }
            }
        }
    }

    Ok(leaf_hits)
}

/// Score `node`'s direct children by their best summary embedding.
///
/// Implementation: pulls the top-`beam` summaries from `summary_vec` whose
/// `parent_path` is anywhere under `node`'s subtree, looks up each summary's
/// subject path, and keeps the *best score per direct child* of `node`.
/// Returns `(child_partition, best_similarity)` pairs unsorted.
async fn score_children_for_descent(
    inner: &Arc<MemoryInner>,
    node: &PartitionPath,
    qvec: &[f32],
    beam: u32,
) -> Result<Vec<(PartitionPath, f32)>> {
    let prefix = if node == &tenant_root_path() {
        // For the synthetic root we want every partition summary in the
        // tenant; the empty prefix matches "" or "/%". The vec0 query treats
        // an empty string as a literal so we instead enumerate top-level
        // partitions explicitly below.
        // Fast path: probe each top-level partition's subtree summaries.
        let tops = inner.metadata.top_level_partitions().await?;
        let mut best: HashMap<PartitionPath, f32> = HashMap::new();
        for top in tops {
            let raw = inner
                .vector_index
                .knn_summary(qvec, beam, top.as_str())
                .await?;
            for (sid, dist) in raw {
                let row = inner.metadata.get_summary(&sid).await?;
                let Some(row) = row else { continue };
                if row.tombstoned {
                    continue;
                }
                let Some(subject_path) = row.subject_path.clone() else {
                    continue;
                };
                // Bucket under the top-level partition that's a descendant
                // of `<root>`.
                let direct_child = direct_child_under(&top, &subject_path).or_else(|| {
                    if subject_path == top {
                        Some(top.clone())
                    } else {
                        None
                    }
                });
                if let Some(child) = direct_child {
                    let sim = distance_to_similarity(dist);
                    best.entry(child)
                        .and_modify(|s| {
                            if sim > *s {
                                *s = sim;
                            }
                        })
                        .or_insert(sim);
                }
            }
        }
        return Ok(best.into_iter().collect());
    } else {
        node.as_str().to_string()
    };

    let raw = inner.vector_index.knn_summary(qvec, beam, &prefix).await?;
    let mut best: HashMap<PartitionPath, f32> = HashMap::new();
    for (sid, dist) in raw {
        let row = inner.metadata.get_summary(&sid).await?;
        let Some(row) = row else { continue };
        if row.tombstoned {
            continue;
        }
        let Some(subject_path) = row.subject_path.clone() else {
            continue;
        };
        // Map subject_path to the *direct child of `node`* that contains it.
        let Some(child) = direct_child_under(node, &subject_path) else {
            continue;
        };
        let sim = distance_to_similarity(dist);
        best.entry(child)
            .and_modify(|s| {
                if sim > *s {
                    *s = sim;
                }
            })
            .or_insert(sim);
    }
    Ok(best.into_iter().collect())
}

/// Given parent `n` and descendant `d` (where `d == n` or `d` is under
/// `n/...`), return the direct child of `n` on the path to `d`. `None` if
/// `d` is not a descendant of `n`.
fn direct_child_under(n: &PartitionPath, d: &PartitionPath) -> Option<PartitionPath> {
    let n_str = n.as_str();
    let d_str = d.as_str();
    let suffix = d_str.strip_prefix(n_str)?.strip_prefix('/')?;
    let first_seg = suffix.split('/').next().unwrap_or("");
    if first_seg.is_empty() {
        return None;
    }
    let child = format!("{n_str}/{first_seg}");
    child.parse().ok()
}

/// Resolve the query-side vector for `Semantic` and `Hybrid` modes.
///
/// Order of precedence: caller-supplied [`Query::with_embedding`] wins;
/// otherwise fall back to the configured [`crate::Embedder`] under the
/// `Query` role. If neither is present, returns [`Error::Config`].
async fn resolve_query_vector(inner: &Arc<MemoryInner>, q: &Query) -> Result<Vec<f32>> {
    if let Some(v) = q.precomputed_embedding() {
        return Ok(v.to_vec());
    }
    let embedder = inner.embedder.as_ref().ok_or_else(|| {
        Error::Config(
            "search() with semantic/hybrid mode requires either a configured \
             Embedder on the engine or a caller-supplied query vector via \
             Query::with_embedding(...)"
                .into(),
        )
    })?;
    let v = embedder
        .embed(crate::embedder::EmbedRole::Query, &[q.text_str()])
        .await?;
    v.into_iter().next().ok_or_else(|| {
        Error::embedder(
            "empty embed",
            std::io::Error::from(std::io::ErrorKind::InvalidData),
        )
    })
}