spool-memory 0.1.0

Local-first developer memory system — persistent, structured knowledge for AI coding tools
Documentation
//! Wiki lint — 周期化知识库一致性检查。
//!
//! Karpathy LLM Wiki 的 Lint 阶段在 spool 的具体化:把已有的
//! `consolidation::detect_prune_candidates` (staleness / expired / superseded)
//! 和新的 orphan / broken cross-ref 检测 合成一份 `LintReport`,供 CLI / MCP /
//! wakeup 统一消费。
//!
//! ## 设计原则
//! - 纯读,不改 ledger / 不写 vault。消费方决定如何处理。
//! - 复用 `consolidation::detect_prune_candidates`,不重建第二套 staleness 逻辑。
//! - orphan / cross-ref 是本模块新增: orphan 发现 vault 中有 canonical note 但
//!   ledger 无对应 record;broken cross-ref 发现 `related_records` 指向不存在
//!   记录。

use crate::domain::MemoryLifecycleState;
use crate::knowledge::cluster::{self as consolidation, PruneSuggestion};
use crate::lifecycle_store::{LedgerEntry, LifecycleStore, latest_state_entries};
use crate::vault_writer::{MEMORY_LEDGER_COMPILED_DIR, MEMORY_LEDGER_DIR};
use anyhow::{Context, Result};
use serde::Serialize;
use std::collections::HashSet;
use std::fs;
use std::path::Path;
use ts_rs::TS;

#[derive(Debug, Clone, Serialize, TS)]
#[ts(export, export_to = "../frontend/src/lib/types/generated/")]
pub struct BrokenCrossRef {
    pub record_id: String,
    pub title: String,
    /// 指向的目标 record_id (不存在于 ledger)
    pub missing_target: String,
    #[ts(type = "string")]
    pub field: &'static str,
}

#[derive(Debug, Clone, Serialize, TS)]
#[ts(export, export_to = "../frontend/src/lib/types/generated/")]
pub struct OrphanNote {
    /// 相对 vault_root 的路径 (如 "50-Memory-Ledger/Extracted/abc.md")
    pub relative_path: String,
    /// 从文件名反推的 record_id
    pub record_id: String,
}

#[derive(Debug, Clone, Serialize, TS)]
#[ts(export, export_to = "../frontend/src/lib/types/generated/")]
pub struct LintReport {
    pub total_active_records: usize,
    pub prune_suggestions: Vec<PruneSuggestion>,
    pub broken_cross_refs: Vec<BrokenCrossRef>,
    pub orphan_notes: Vec<OrphanNote>,
}

impl LintReport {
    pub fn is_clean(&self) -> bool {
        self.prune_suggestions.is_empty()
            && self.broken_cross_refs.is_empty()
            && self.orphan_notes.is_empty()
    }

    pub fn summary_line(&self) -> String {
        format!(
            "lint: {} active records, {} prune, {} broken cross-refs, {} orphan notes",
            self.total_active_records,
            self.prune_suggestions.len(),
            self.broken_cross_refs.len(),
            self.orphan_notes.len(),
        )
    }
}

/// 从 config_path 加载,跑完整 lint pass。失败降级为 anyhow::Err 供调用方自行处理。
pub fn run_lint_from_config(config_path: &Path) -> Result<LintReport> {
    let config = crate::app::load(config_path)
        .with_context(|| format!("failed to load config {}", config_path.display()))?;
    let vault_root = crate::app::resolve_override_path(&config.vault.root, config_path)
        .context("failed to resolve vault root")?;
    let config_dir = config_path.parent().unwrap_or_else(|| Path::new("."));
    let lifecycle_root = crate::lifecycle_store::lifecycle_root_from_config(config_dir);
    let store = LifecycleStore::new(&lifecycle_root);
    let entries = latest_state_entries(&store).context("failed to read ledger entries")?;
    Ok(run_lint(&entries, &lifecycle_root, &vault_root))
}

/// 核心 lint 逻辑 — 纯函数,方便测试。
pub fn run_lint(entries: &[LedgerEntry], lifecycle_root: &Path, vault_root: &Path) -> LintReport {
    let total_active_records = entries
        .iter()
        .filter(|e| {
            matches!(
                e.record.state,
                MemoryLifecycleState::Accepted | MemoryLifecycleState::Canonical
            )
        })
        .count();

    let prune_suggestions = consolidation::detect_prune_candidates(entries, lifecycle_root);
    let broken_cross_refs = detect_broken_cross_refs(entries);
    let orphan_notes = detect_orphan_notes(entries, vault_root).unwrap_or_default();

    LintReport {
        total_active_records,
        prune_suggestions,
        broken_cross_refs,
        orphan_notes,
    }
}

/// 扫 accepted / canonical 记录的 `related_records` / `supersedes` 字段,
/// 找出指向不存在 record_id 的引用。
fn detect_broken_cross_refs(entries: &[LedgerEntry]) -> Vec<BrokenCrossRef> {
    let known_ids: HashSet<&str> = entries.iter().map(|e| e.record_id.as_str()).collect();
    let mut broken = Vec::new();

    for entry in entries {
        if !matches!(
            entry.record.state,
            MemoryLifecycleState::Accepted | MemoryLifecycleState::Canonical
        ) {
            continue;
        }
        for target in &entry.record.related_records {
            if !known_ids.contains(target.as_str()) {
                broken.push(BrokenCrossRef {
                    record_id: entry.record_id.clone(),
                    title: entry.record.title.clone(),
                    missing_target: target.clone(),
                    field: "related_records",
                });
            }
        }
        if let Some(ref target) = entry.record.supersedes
            && !known_ids.contains(target.as_str())
        {
            broken.push(BrokenCrossRef {
                record_id: entry.record_id.clone(),
                title: entry.record.title.clone(),
                missing_target: target.clone(),
                field: "supersedes",
            });
        }
    }

    broken
}

/// 扫 vault `50-Memory-Ledger/{Extracted,Compiled}/` 目录,找出没有对应
/// ledger record 的 `.md` 文件 (文件名即 record_id)。
fn detect_orphan_notes(entries: &[LedgerEntry], vault_root: &Path) -> Result<Vec<OrphanNote>> {
    let known_ids: HashSet<&str> = entries.iter().map(|e| e.record_id.as_str()).collect();
    let mut orphans = Vec::new();

    for rel_dir in [MEMORY_LEDGER_DIR, MEMORY_LEDGER_COMPILED_DIR] {
        let dir = vault_root.join(rel_dir);
        if !dir.is_dir() {
            continue;
        }
        let reader = match fs::read_dir(&dir) {
            Ok(r) => r,
            Err(_) => continue,
        };
        for entry in reader.flatten() {
            let path = entry.path();
            if path.extension().and_then(|s| s.to_str()) != Some("md") {
                continue;
            }
            let record_id = match path.file_stem().and_then(|s| s.to_str()) {
                Some(s) => s.to_string(),
                None => continue,
            };
            if known_ids.contains(record_id.as_str()) {
                continue;
            }
            let rel_path = relative_path(vault_root, &path);
            orphans.push(OrphanNote {
                relative_path: rel_path,
                record_id,
            });
        }
    }

    Ok(orphans)
}

fn relative_path(base: &Path, absolute: &Path) -> String {
    absolute
        .strip_prefix(base)
        .map(|p| p.display().to_string())
        .unwrap_or_else(|_| absolute.display().to_string())
        .replace(std::path::MAIN_SEPARATOR, "/")
}

/// Render as markdown summary for wakeup / CLI display.
pub fn render_lint_markdown(report: &LintReport) -> String {
    let mut out = String::new();
    out.push_str("# Wiki Lint Report\n\n");
    out.push_str(&format!("{}\n\n", report.summary_line()));

    if report.is_clean() {
        out.push_str("✓ 知识库干净,无需清理。\n");
        return out;
    }

    if !report.prune_suggestions.is_empty() {
        out.push_str("## 可归档\n\n");
        for s in &report.prune_suggestions {
            out.push_str(&format!(
                "- `{}` {}{:?}\n",
                s.record_id, s.title, s.reason
            ));
        }
        out.push('\n');
    }

    if !report.broken_cross_refs.is_empty() {
        out.push_str("## 断链\n\n");
        for b in &report.broken_cross_refs {
            out.push_str(&format!(
                "- `{}` {} → `{}` 缺失 (字段 {})\n",
                b.record_id, b.title, b.missing_target, b.field
            ));
        }
        out.push('\n');
    }

    if !report.orphan_notes.is_empty() {
        out.push_str("## 孤儿 note\n\n");
        for o in &report.orphan_notes {
            out.push_str(&format!(
                "- `{}` (record_id `{}`)\n",
                o.relative_path, o.record_id
            ));
        }
        out.push('\n');
    }

    out
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::domain::{
        MemoryLedgerAction, MemoryLifecycleState, MemoryOrigin, MemoryRecord, MemoryScope,
        MemorySourceKind,
    };
    use crate::lifecycle_store::TransitionMetadata;
    use std::fs;
    use tempfile::tempdir;

    fn entry_with(record_id: &str, state: MemoryLifecycleState) -> LedgerEntry {
        LedgerEntry {
            schema_version: "memory-ledger.v1".to_string(),
            recorded_at: "unix:1".to_string(),
            record_id: record_id.to_string(),
            scope_key: "user:long".to_string(),
            action: MemoryLedgerAction::RecordManual,
            source_kind: MemorySourceKind::Manual,
            metadata: TransitionMetadata::default(),
            record: MemoryRecord {
                title: format!("title-{record_id}"),
                summary: "s".to_string(),
                memory_type: "preference".to_string(),
                scope: MemoryScope::User,
                state,
                origin: MemoryOrigin {
                    source_kind: MemorySourceKind::Manual,
                    source_ref: "m".to_string(),
                },
                project_id: None,
                user_id: None,
                sensitivity: None,
                entities: Vec::new(),
                tags: Vec::new(),
                triggers: Vec::new(),
                related_files: Vec::new(),
                related_records: Vec::new(),
                supersedes: None,
                applies_to: Vec::new(),
                valid_until: None,
            },
        }
    }

    #[test]
    fn detect_broken_cross_refs_should_flag_missing_related_records_and_supersedes() {
        let mut a = entry_with("rec-a", MemoryLifecycleState::Accepted);
        a.record.related_records = vec!["rec-b".to_string(), "rec-missing".to_string()];
        a.record.supersedes = Some("rec-also-missing".to_string());

        let b = entry_with("rec-b", MemoryLifecycleState::Accepted);

        // Candidate entries are not scanned.
        let mut c = entry_with("rec-c", MemoryLifecycleState::Candidate);
        c.record.related_records = vec!["rec-never".to_string()];

        let entries = vec![a, b, c];
        let broken = detect_broken_cross_refs(&entries);
        let missing: HashSet<_> = broken.iter().map(|b| b.missing_target.clone()).collect();
        assert!(missing.contains("rec-missing"));
        assert!(missing.contains("rec-also-missing"));
        assert!(!missing.contains("rec-never"));
        assert!(!missing.contains("rec-b"));
    }

    #[test]
    fn detect_orphan_notes_should_find_md_files_without_matching_record() {
        let temp = tempdir().unwrap();
        let extracted = temp.path().join(MEMORY_LEDGER_DIR);
        let compiled = temp.path().join(MEMORY_LEDGER_COMPILED_DIR);
        fs::create_dir_all(&extracted).unwrap();
        fs::create_dir_all(&compiled).unwrap();

        fs::write(extracted.join("rec-known.md"), "# known").unwrap();
        fs::write(extracted.join("rec-orphan-a.md"), "# orphan").unwrap();
        fs::write(compiled.join("wiki-orphan.md"), "# compiled orphan").unwrap();
        fs::write(extracted.join("not-markdown.txt"), "skip").unwrap();

        let entries = vec![entry_with("rec-known", MemoryLifecycleState::Accepted)];
        let orphans = detect_orphan_notes(&entries, temp.path()).unwrap();
        let ids: HashSet<_> = orphans.iter().map(|o| o.record_id.clone()).collect();
        assert!(ids.contains("rec-orphan-a"));
        assert!(ids.contains("wiki-orphan"));
        assert!(!ids.contains("rec-known"));
        assert!(!ids.contains("not-markdown"));
    }

    #[test]
    fn run_lint_should_count_active_and_compose_sub_reports() {
        let temp = tempdir().unwrap();
        let lifecycle_root = temp.path().join(".spool");
        fs::create_dir_all(&lifecycle_root).unwrap();

        let a = entry_with("rec-a", MemoryLifecycleState::Accepted);
        let b = entry_with("rec-b", MemoryLifecycleState::Canonical);
        let c = entry_with("rec-c", MemoryLifecycleState::Candidate);

        let report = run_lint(&[a, b, c], &lifecycle_root, temp.path());
        assert_eq!(report.total_active_records, 2);
        assert!(report.prune_suggestions.is_empty());
        assert!(report.broken_cross_refs.is_empty());
        assert!(report.orphan_notes.is_empty());
        assert!(report.is_clean());
    }

    #[test]
    fn render_lint_markdown_should_highlight_issues() {
        let temp = tempdir().unwrap();
        let mut a = entry_with("rec-a", MemoryLifecycleState::Accepted);
        a.record.related_records = vec!["rec-gone".to_string()];

        let report = run_lint(&[a], temp.path(), temp.path());
        let md = render_lint_markdown(&report);
        assert!(md.contains("断链"));
        assert!(md.contains("rec-gone"));
    }
}