spool-memory 0.2.3

Local-first developer memory system — persistent, structured knowledge for AI coding tools
Documentation
pub mod project_matcher;
pub mod scenario_matcher;
pub mod scorer;
pub mod selector;

#[cfg(feature = "bm25")]
pub mod bm25;

#[cfg(feature = "embedding")]
pub mod embedding;

use crate::config::{AppConfig, ProjectConfig};
use crate::domain::{
    ContextBundle, DebugTrace, LifecycleCandidate, MemoryRecord, Note, RouteInput, RouteResult,
};
use std::collections::HashMap;

pub fn build_context(
    config: &AppConfig,
    notes: &[Note],
    input: RouteInput,
    debug: DebugTrace,
) -> ContextBundle {
    build_context_with_lifecycle(config, notes, &[], input, debug)
}

pub fn build_context_with_lifecycle(
    config: &AppConfig,
    notes: &[Note],
    lifecycle_records: &[(String, MemoryRecord)],
    input: RouteInput,
    debug: DebugTrace,
) -> ContextBundle {
    build_context_with_lifecycle_and_refs(config, notes, lifecycle_records, input, debug, None)
}

pub fn build_context_with_lifecycle_and_refs(
    config: &AppConfig,
    notes: &[Note],
    lifecycle_records: &[(String, MemoryRecord)],
    input: RouteInput,
    debug: DebugTrace,
    reference_map: Option<&crate::reference_tracker::ReferenceMap>,
) -> ContextBundle {
    let project = project_matcher::match_project(config, &input.cwd);
    let project_config = project.as_ref().and_then(|matched| {
        config
            .projects
            .iter()
            .find(|project| project.id == matched.id)
    });
    let modules = project_config
        .map(|project| scenario_matcher::match_modules(project, &input))
        .unwrap_or_default();
    let scenes = scenario_matcher::match_scenes(config, &input);
    let scored_notes = selector::select_scored_notes(
        project_config,
        project.as_ref(),
        &modules,
        &scenes,
        notes,
        &input,
        config.output.max_notes,
    );
    let mut excluded_record_ids = selector::excluded_record_ids_from_scored(&scored_notes);
    // P5: knowledge 综合页覆盖的源碎片 / 显式 supersedes 关系 也排除。
    excluded_record_ids.extend(selector::superseded_record_ids(lifecycle_records));
    let candidates: Vec<crate::domain::CandidateNote> = scored_notes
        .iter()
        .map(crate::domain::CandidateNote::from)
        .collect();

    let lifecycle_candidates = select_lifecycle_with_available_signals(
        config,
        project.as_ref(),
        lifecycle_records,
        &input,
        &excluded_record_ids,
        reference_map,
    );

    let crystallize_hint = detect_crystallize_hint(&lifecycle_candidates, lifecycle_records);

    let sources = candidates
        .iter()
        .map(|candidate| candidate.relative_path.clone())
        .collect();

    ContextBundle {
        input,
        route: RouteResult {
            project,
            modules,
            scenes,
            candidates,
            lifecycle_candidates,
            sources,
            debug,
            crystallize_hint,
        },
    }
}

fn select_lifecycle_with_available_signals(
    config: &AppConfig,
    project: Option<&crate::domain::MatchedProject>,
    lifecycle_records: &[(String, MemoryRecord)],
    input: &RouteInput,
    excluded_record_ids: &std::collections::HashSet<String>,
    reference_map: Option<&crate::reference_tracker::ReferenceMap>,
) -> Vec<crate::domain::LifecycleCandidate> {
    let limit = config.output.max_lifecycle;

    #[cfg(feature = "embedding")]
    {
        let embedding_results = try_embedding_search(config, &input.task, limit * 2);
        if !embedding_results.is_empty() {
            return selector::select_lifecycle_candidates_fused(
                project,
                lifecycle_records,
                input,
                limit,
                excluded_record_ids,
                reference_map,
                #[cfg(feature = "bm25")]
                None, // BM25 path handled inside fused fn
                &embedding_results,
            );
        }
    }

    selector::select_lifecycle_candidates(
        project,
        lifecycle_records,
        input,
        limit,
        excluded_record_ids,
        reference_map,
    )
}

#[cfg(feature = "embedding")]
fn try_embedding_search(config: &AppConfig, query: &str, limit: usize) -> Vec<(String, f32)> {
    if !config.embedding.enabled {
        return Vec::new();
    }
    let index_path = config.embedding.resolved_index_path();
    if !index_path.exists() {
        return Vec::new();
    }
    let index = match embedding::EmbeddingIndex::load(&index_path) {
        Ok(idx) => idx,
        Err(_) => return Vec::new(),
    };
    let Some(model) = embedding::cached_model_for(config.embedding.model_id.as_deref()) else {
        return Vec::new();
    };
    match embedding::EmbeddingIndex::embed_query(model, query) {
        Ok(query_emb) => index.search(&query_emb, limit),
        Err(_) => Vec::new(),
    }
}

pub fn project_config_for_input<'a>(
    config: &'a AppConfig,
    cwd: &std::path::Path,
) -> Option<&'a ProjectConfig> {
    project_matcher::match_project(config, cwd)
        .as_ref()
        .and_then(|matched| {
            config
                .projects
                .iter()
                .find(|project| project.id == matched.id)
        })
}

/// Minimum number of lifecycle candidates sharing a topic before emitting a hint.
const CRYSTALLIZE_THRESHOLD: usize = 3;

/// Detect whether 3+ selected lifecycle candidates share entities or tags,
/// indicating they could be merged into a structured knowledge page.
/// Returns a human-readable hint if a cluster is found, otherwise `None`.
fn detect_crystallize_hint(
    candidates: &[LifecycleCandidate],
    records: &[(String, MemoryRecord)],
) -> Option<String> {
    if candidates.len() < CRYSTALLIZE_THRESHOLD {
        return None;
    }

    // Build a frequency map of entities and tags across selected candidates.
    let mut entity_freq: HashMap<String, usize> = HashMap::new();
    let mut tag_freq: HashMap<String, usize> = HashMap::new();

    for candidate in candidates {
        if let Some((_, record)) = records.iter().find(|(id, _)| id == &candidate.record_id) {
            for entity in &record.entities {
                let key = entity.to_lowercase();
                *entity_freq.entry(key).or_insert(0) += 1;
            }
            for tag in &record.tags {
                let key = tag.to_lowercase();
                *tag_freq.entry(key).or_insert(0) += 1;
            }
        }
    }

    // Find the most shared entity or tag that meets the threshold.
    let best_entity = entity_freq
        .iter()
        .filter(|(_, count)| **count >= CRYSTALLIZE_THRESHOLD)
        .max_by_key(|(_, count)| *count);

    let best_tag = tag_freq
        .iter()
        .filter(|(_, count)| **count >= CRYSTALLIZE_THRESHOLD)
        .max_by_key(|(_, count)| *count);

    // Pick whichever has the higher frequency; prefer entity on tie.
    let (topic, count) = match (best_entity, best_tag) {
        (Some((entity, e_count)), Some((tag, t_count))) => {
            if e_count >= t_count {
                (entity.clone(), *e_count)
            } else {
                (tag.clone(), *t_count)
            }
        }
        (Some((entity, e_count)), None) => (entity.clone(), *e_count),
        (None, Some((tag, t_count))) => (tag.clone(), *t_count),
        (None, None) => return None,
    };

    Some(format!(
        "{count} fragments share topic \"{topic}\" — consider running `memory consolidate` to crystallize them into a knowledge page"
    ))
}