tinycortex 0.1.1

Rust core for the TinyCortex memory system
Documentation
//! Maximal Marginal Relevance (MMR) selection.
//!
//! Given a set of candidate vectors and a query vector, select a diverse subset
//! that balances relevance to the query against redundancy within the selected
//! set. Ported from OpenHuman's `memory_search::vector::mmr`. Cosine similarity
//! is reused from [`crate::memory::store::vectors`].

use crate::memory::store::vectors::cosine_similarity;

/// A candidate for MMR selection.
pub struct MmrCandidate<'a> {
    /// Caller-side index, echoed back on the result so the candidate can be
    /// resolved to its original record.
    pub index: usize,
    pub embedding: &'a [f32],
    pub relevance: f64,
}

/// Result of MMR selection: the original index and its MMR score.
#[derive(Debug, Clone)]
pub struct MmrResult {
    pub index: usize,
    pub score: f64,
}

/// Select up to `limit` items from `candidates` using MMR.
///
/// `lambda` controls the relevance-diversity tradeoff:
/// - `1.0` = pure relevance (no diversity)
/// - `0.0` = pure diversity (ignores relevance)
/// - `0.7` = recommended default
///
/// For each selection step:
/// `mmr(c) = lambda · relevance(c) − (1 − lambda) · max_similarity(c, selected)`.
pub fn mmr_select(
    query_vec: &[f32],
    candidates: &[MmrCandidate<'_>],
    limit: usize,
    lambda: f64,
) -> Vec<MmrResult> {
    if candidates.is_empty() || limit == 0 {
        return Vec::new();
    }

    let lambda = lambda.clamp(0.0, 1.0);
    let limit = limit.min(candidates.len());

    let mut selected_embeddings: Vec<&[f32]> = Vec::with_capacity(limit);
    let mut results: Vec<MmrResult> = Vec::with_capacity(limit);
    let mut available: Vec<bool> = vec![true; candidates.len()];

    for _ in 0..limit {
        let mut best_idx: Option<usize> = None;
        let mut best_mmr = f64::NEG_INFINITY;

        for (i, candidate) in candidates.iter().enumerate() {
            if !available[i] {
                continue;
            }
            let max_sim_to_selected = if selected_embeddings.is_empty() {
                0.0
            } else {
                selected_embeddings
                    .iter()
                    .map(|sel| cosine_similarity(candidate.embedding, sel))
                    .fold(0.0_f64, f64::max)
            };
            let mmr_score = lambda * candidate.relevance - (1.0 - lambda) * max_sim_to_selected;
            if mmr_score > best_mmr {
                best_mmr = mmr_score;
                best_idx = Some(i);
            }
        }

        let Some(idx) = best_idx else { break };
        available[idx] = false;
        selected_embeddings.push(candidates[idx].embedding);
        results.push(MmrResult {
            index: candidates[idx].index,
            score: best_mmr,
        });
    }

    let _ = query_vec;
    results
}

#[cfg(test)]
#[path = "mmr_tests.rs"]
mod tests;