mempal 0.5.3

Project memory for coding agents. Single binary, hybrid search, knowledge graph.
Documentation
#![warn(clippy::all)]

use std::collections::BTreeMap;
use std::path::PathBuf;

use serde::Serialize;
use thiserror::Error;

use crate::core::{
    anchor,
    db::{Database, DbError},
    types::{
        AnchorKind, KnowledgeCard, KnowledgeEvidenceLink, KnowledgeEvidenceRole, KnowledgeStatus,
        MemoryDomain, MemoryKind, RouteDecision,
    },
};
use crate::embed::{EmbedError, Embedder};
use crate::search::{SearchError, SearchFilters, SearchOptions, search_with_vector_options};

pub type Result<T> = std::result::Result<T, KnowledgeCardRetrievalError>;

#[derive(Debug, Error)]
pub enum KnowledgeCardRetrievalError {
    #[error("failed to derive retrieval anchors")]
    DeriveAnchor(#[from] anchor::AnchorError),
    #[error("failed to embed card retrieval query")]
    EmbedQuery(#[source] EmbedError),
    #[error("embedder returned no card retrieval query vector")]
    MissingQueryVector,
    #[error("failed to search linked evidence")]
    SearchEvidence(#[source] SearchError),
    #[error("failed to load card retrieval metadata")]
    LoadMetadata(#[source] DbError),
}

#[derive(Debug, Clone)]
pub struct KnowledgeCardRetrievalRequest {
    pub query: String,
    pub domain: MemoryDomain,
    pub field: String,
    pub cwd: PathBuf,
    pub top_k: usize,
    pub evidence_top_k: usize,
}

#[derive(Debug, Clone, Serialize)]
pub struct RetrievedKnowledgeCard {
    pub card: KnowledgeCard,
    pub evidence_citations: Vec<RetrievedEvidenceCitation>,
    pub score: f32,
}

#[derive(Debug, Clone, Serialize)]
pub struct RetrievedEvidenceCitation {
    pub evidence_drawer_id: String,
    pub role: KnowledgeEvidenceRole,
    pub source_file: String,
    pub score: f32,
}

#[derive(Debug, Clone)]
struct AnchorCandidate {
    anchor_kind: AnchorKind,
    anchor_id: String,
    domain: MemoryDomain,
}

pub async fn retrieve_knowledge_cards<E: Embedder + ?Sized>(
    db: &Database,
    embedder: &E,
    request: KnowledgeCardRetrievalRequest,
) -> Result<Vec<RetrievedKnowledgeCard>> {
    if request.top_k == 0 {
        return Ok(Vec::new());
    }
    let query_vector = embedder
        .embed(&[request.query.as_str()])
        .await
        .map_err(KnowledgeCardRetrievalError::EmbedQuery)?
        .into_iter()
        .next()
        .ok_or(KnowledgeCardRetrievalError::MissingQueryVector)?;
    retrieve_knowledge_cards_with_vector(db, request, &query_vector)
}

pub fn retrieve_knowledge_cards_with_vector(
    db: &Database,
    request: KnowledgeCardRetrievalRequest,
    query_vector: &[f32],
) -> Result<Vec<RetrievedKnowledgeCard>> {
    if request.top_k == 0 {
        return Ok(Vec::new());
    }

    let mut by_card = BTreeMap::<String, RetrievedKnowledgeCard>::new();
    let route = RouteDecision {
        wing: None,
        room: None,
        confidence: 0.0,
        reason: "knowledge card linked-evidence retrieval".to_string(),
    };

    for anchor in retrieval_anchors(&request)? {
        let filters = SearchFilters {
            memory_kind: Some(memory_kind_slug(&MemoryKind::Evidence).to_string()),
            domain: Some(domain_slug(&anchor.domain).to_string()),
            field: Some(request.field.clone()),
            tier: None,
            status: None,
            anchor_kind: Some(anchor_kind_slug(&anchor.anchor_kind).to_string()),
        };
        let evidence_results = search_with_vector_options(
            db,
            &request.query,
            query_vector,
            route.clone(),
            SearchOptions {
                filters,
                with_neighbors: false,
            },
            request.evidence_top_k.max(request.top_k),
        )
        .map_err(KnowledgeCardRetrievalError::SearchEvidence)?;

        for evidence in evidence_results {
            if evidence.anchor_id != anchor.anchor_id {
                continue;
            }
            let links = db
                .knowledge_evidence_links_for_drawer(&evidence.drawer_id)
                .map_err(KnowledgeCardRetrievalError::LoadMetadata)?;
            for link in links {
                let Some(card) = db
                    .get_knowledge_card(&link.card_id)
                    .map_err(KnowledgeCardRetrievalError::LoadMetadata)?
                else {
                    continue;
                };
                if !card_is_retrievable(&card, &request, &anchor) {
                    continue;
                }
                let citation =
                    citation_from_link(&link, &evidence.source_file, evidence.similarity);
                match by_card.get_mut(&card.id) {
                    Some(existing) => {
                        if citation.score > existing.score {
                            existing.score = citation.score;
                        }
                        existing.evidence_citations.push(citation);
                    }
                    None => {
                        by_card.insert(
                            card.id.clone(),
                            RetrievedKnowledgeCard {
                                card,
                                score: citation.score,
                                evidence_citations: vec![citation],
                            },
                        );
                    }
                }
            }
        }
    }

    let mut results = by_card.into_values().collect::<Vec<_>>();
    results.sort_by(|left, right| {
        right
            .score
            .partial_cmp(&left.score)
            .unwrap_or(std::cmp::Ordering::Equal)
            .then_with(|| left.card.id.cmp(&right.card.id))
    });
    results.truncate(request.top_k);
    Ok(results)
}

fn retrieval_anchors(request: &KnowledgeCardRetrievalRequest) -> Result<Vec<AnchorCandidate>> {
    let derived = anchor::derive_anchor_from_cwd(Some(&request.cwd))?;
    let mut anchors = Vec::new();
    anchors.push(AnchorCandidate {
        anchor_kind: AnchorKind::Worktree,
        anchor_id: derived.anchor_id,
        domain: request.domain.clone(),
    });

    let repo_anchor_id = derived
        .parent_anchor_id
        .unwrap_or_else(|| anchor::LEGACY_REPO_ANCHOR_ID.to_string());
    anchors.push(AnchorCandidate {
        anchor_kind: AnchorKind::Repo,
        anchor_id: repo_anchor_id,
        domain: request.domain.clone(),
    });
    anchors.push(AnchorCandidate {
        anchor_kind: AnchorKind::Repo,
        anchor_id: anchor::LEGACY_REPO_ANCHOR_ID.to_string(),
        domain: request.domain.clone(),
    });
    anchors.push(AnchorCandidate {
        anchor_kind: AnchorKind::Global,
        anchor_id: "global://default".to_string(),
        domain: MemoryDomain::Global,
    });

    let mut seen = BTreeMap::new();
    Ok(anchors
        .into_iter()
        .filter(|anchor| {
            seen.insert(
                (
                    anchor_kind_slug(&anchor.anchor_kind).to_string(),
                    anchor.anchor_id.clone(),
                ),
                (),
            )
            .is_none()
        })
        .collect())
}

fn card_is_retrievable(
    card: &KnowledgeCard,
    request: &KnowledgeCardRetrievalRequest,
    anchor: &AnchorCandidate,
) -> bool {
    matches!(
        card.status,
        KnowledgeStatus::Canonical | KnowledgeStatus::Promoted
    ) && card.domain == anchor.domain
        && card.field == request.field
        && card.anchor_kind == anchor.anchor_kind
        && card.anchor_id == anchor.anchor_id
}

fn citation_from_link(
    link: &KnowledgeEvidenceLink,
    source_file: &str,
    score: f32,
) -> RetrievedEvidenceCitation {
    RetrievedEvidenceCitation {
        evidence_drawer_id: link.evidence_drawer_id.clone(),
        role: link.role.clone(),
        source_file: source_file.to_string(),
        score,
    }
}

fn memory_kind_slug(value: &MemoryKind) -> &'static str {
    match value {
        MemoryKind::Evidence => "evidence",
        MemoryKind::Knowledge => "knowledge",
    }
}

fn domain_slug(value: &MemoryDomain) -> &'static str {
    match value {
        MemoryDomain::Project => "project",
        MemoryDomain::Agent => "agent",
        MemoryDomain::Skill => "skill",
        MemoryDomain::Global => "global",
    }
}

fn anchor_kind_slug(value: &AnchorKind) -> &'static str {
    match value {
        AnchorKind::Global => "global",
        AnchorKind::Repo => "repo",
        AnchorKind::Worktree => "worktree",
    }
}