spool-memory 0.1.0

Local-first developer memory system — persistent, structured knowledge for AI coding tools
Documentation
use crate::config::VaultLimits;
use crate::domain::Note;
use crate::support::Result;
use crate::vault::{frontmatter, markdown, wikilink};
use anyhow::Context;
use once_cell::sync::Lazy;
use std::fs;
use std::path::{Path, PathBuf};
use std::sync::{Arc, Mutex};
use walkdir::WalkDir;

#[derive(Debug, Clone)]
pub struct ScanSnapshot {
    pub notes: Vec<Note>,
    pub scan_roots: Vec<String>,
}

#[derive(Debug, Clone)]
pub struct RoutedSnapshot {
    pub project_id: String,
    pub note_roots: Vec<String>,
    pub snapshot: Arc<ScanSnapshot>,
}

#[derive(Debug, Clone)]
pub struct WakeupSnapshot {
    pub project_id: Option<String>,
    pub note_roots: Vec<String>,
    pub snapshot: Arc<ScanSnapshot>,
}

#[derive(Debug, Clone, PartialEq, Eq, Hash)]
struct ScanCacheKey {
    root: PathBuf,
    note_roots: Vec<String>,
    max_files: usize,
    max_file_bytes: u64,
    max_total_bytes: u64,
    max_depth: usize,
}

static SCAN_CACHE: Lazy<Mutex<std::collections::HashMap<ScanCacheKey, Arc<ScanSnapshot>>>> =
    Lazy::new(|| Mutex::new(std::collections::HashMap::new()));

pub fn scan_notes(root: &Path, note_roots: &[String], limits: &VaultLimits) -> Result<Vec<Note>> {
    let (notes, _) = scan_notes_with_debug(root, note_roots, limits)?;
    Ok(notes)
}

pub fn cached_scan_notes_with_debug(
    root: &Path,
    note_roots: &[String],
    limits: &VaultLimits,
) -> Result<Arc<ScanSnapshot>> {
    let canonical_root = root
        .canonicalize()
        .with_context(|| format!("failed to canonicalize vault root {}", root.display()))?;
    let key = ScanCacheKey {
        root: canonical_root,
        note_roots: note_roots.to_vec(),
        max_files: limits.max_files,
        max_file_bytes: limits.max_file_bytes,
        max_total_bytes: limits.max_total_bytes,
        max_depth: limits.max_depth,
    };

    if let Some(snapshot) = SCAN_CACHE.lock().unwrap().get(&key).cloned() {
        return Ok(snapshot);
    }

    let (notes, scan_roots) = scan_notes_with_debug(root, note_roots, limits)?;
    let snapshot = Arc::new(ScanSnapshot { notes, scan_roots });
    let mut cache = SCAN_CACHE.lock().unwrap();
    Ok(cache.entry(key).or_insert_with(|| snapshot.clone()).clone())
}

pub fn scan_notes_with_debug(
    root: &Path,
    note_roots: &[String],
    limits: &VaultLimits,
) -> Result<(Vec<Note>, Vec<String>)> {
    let mut notes = Vec::new();
    let mut total_bytes = 0u64;
    let canonical_root = root
        .canonicalize()
        .with_context(|| format!("failed to canonicalize vault root {}", root.display()))?;
    let scan_roots = build_scan_roots(root, note_roots)?;
    let scan_root_strings = scan_roots
        .iter()
        .map(|path| {
            path.strip_prefix(&canonical_root)
                .unwrap_or(path)
                .to_string_lossy()
                .replace('\\', "/")
        })
        .collect::<Vec<_>>();

    for scan_root in scan_roots {
        for entry in WalkDir::new(&scan_root).max_depth(limits.max_depth) {
            let entry = entry?;
            let path = entry.path();
            if !entry.file_type().is_file()
                || path.extension().and_then(|ext| ext.to_str()) != Some("md")
            {
                continue;
            }

            if notes.len() >= limits.max_files {
                anyhow::bail!("vault scan exceeded max_files limit: {}", limits.max_files);
            }

            let metadata = fs::metadata(path)
                .with_context(|| format!("failed to stat markdown file {}", path.display()))?;
            if metadata.len() > limits.max_file_bytes {
                anyhow::bail!(
                    "markdown file exceeds max_file_bytes limit: {} ({} bytes)",
                    path.display(),
                    metadata.len()
                );
            }
            total_bytes += metadata.len();
            if total_bytes > limits.max_total_bytes {
                anyhow::bail!(
                    "vault scan exceeded max_total_bytes limit: {}",
                    limits.max_total_bytes
                );
            }

            let raw = fs::read_to_string(path)
                .with_context(|| format!("failed to read markdown file {}", path.display()))?;
            let relative_path = path
                .strip_prefix(&canonical_root)?
                .to_string_lossy()
                .replace('\\', "/");
            let (frontmatter, body) = frontmatter::split_frontmatter(&raw)?;
            let sections = markdown::extract_sections(&body);
            let title = markdown::extract_title(&relative_path, &body);
            let wikilinks = wikilink::extract_wikilinks(&body);

            notes.push(Note::new(
                path.to_path_buf(),
                relative_path,
                title,
                frontmatter,
                sections,
                wikilinks,
                body,
            ));
        }
    }

    notes.sort_by(|left, right| left.relative_path.cmp(&right.relative_path));
    Ok((notes, scan_root_strings))
}

#[cfg(test)]
pub(crate) fn clear_scan_cache() {
    SCAN_CACHE.lock().unwrap().clear();
}

fn build_scan_roots(root: &Path, note_roots: &[String]) -> Result<Vec<PathBuf>> {
    if note_roots.is_empty() {
        return Ok(vec![root.to_path_buf()]);
    }

    let canonical_root = root
        .canonicalize()
        .with_context(|| format!("failed to canonicalize vault root {}", root.display()))?;

    let mut scan_roots = Vec::new();
    for note_root in note_roots {
        let path = root.join(note_root);
        if !path.exists() {
            anyhow::bail!("configured note_root does not exist: {}", path.display());
        }
        if !path.is_dir() {
            anyhow::bail!(
                "configured note_root is not a directory: {}",
                path.display()
            );
        }
        let canonical_path = path
            .canonicalize()
            .with_context(|| format!("failed to canonicalize note_root {}", path.display()))?;
        if !canonical_path.starts_with(&canonical_root) {
            anyhow::bail!(
                "configured note_root escapes vault root: {}",
                canonical_path.display()
            );
        }
        scan_roots.push(canonical_path);
    }

    scan_roots.sort();
    scan_roots.dedup();

    let mut filtered_roots: Vec<PathBuf> = Vec::new();
    for path in scan_roots {
        if filtered_roots
            .iter()
            .any(|existing| path.starts_with(existing))
        {
            continue;
        }
        filtered_roots.retain(|existing| !existing.starts_with(&path));
        filtered_roots.push(path);
    }

    Ok(filtered_roots)
}

#[cfg(test)]
mod tests {
    use super::{cached_scan_notes_with_debug, clear_scan_cache};
    use crate::config::VaultLimits;
    use std::fs;
    use std::sync::Arc;

    #[test]
    fn cached_scan_should_reuse_snapshot_for_same_inputs() {
        clear_scan_cache();

        let temp = tempfile::tempdir().unwrap();
        let vault_root = temp.path().join("vault");
        fs::create_dir_all(vault_root.join("10-Projects")).unwrap();
        fs::write(
            vault_root.join("10-Projects/context.md"),
            "# Context\n\nrepo_path and routing\n",
        )
        .unwrap();

        let note_roots = vec!["10-Projects".to_string()];
        let limits = VaultLimits::default();

        let first = cached_scan_notes_with_debug(&vault_root, &note_roots, &limits).unwrap();
        let second = cached_scan_notes_with_debug(&vault_root, &note_roots, &limits).unwrap();

        assert!(Arc::ptr_eq(&first, &second));
        assert_eq!(first.notes.len(), 1);
    }
}