use crate::config::VaultLimits;
use crate::domain::Note;
use crate::support::Result;
use crate::vault::{frontmatter, markdown, wikilink};
use anyhow::Context;
use once_cell::sync::Lazy;
use std::fs;
use std::path::{Path, PathBuf};
use std::sync::{Arc, Mutex};
use walkdir::WalkDir;
#[derive(Debug, Clone)]
pub struct ScanSnapshot {
pub notes: Vec<Note>,
pub scan_roots: Vec<String>,
}
#[derive(Debug, Clone)]
pub struct RoutedSnapshot {
pub project_id: String,
pub note_roots: Vec<String>,
pub snapshot: Arc<ScanSnapshot>,
}
#[derive(Debug, Clone)]
pub struct WakeupSnapshot {
pub project_id: Option<String>,
pub note_roots: Vec<String>,
pub snapshot: Arc<ScanSnapshot>,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
struct ScanCacheKey {
root: PathBuf,
note_roots: Vec<String>,
max_files: usize,
max_file_bytes: u64,
max_total_bytes: u64,
max_depth: usize,
}
static SCAN_CACHE: Lazy<Mutex<std::collections::HashMap<ScanCacheKey, Arc<ScanSnapshot>>>> =
Lazy::new(|| Mutex::new(std::collections::HashMap::new()));
pub fn scan_notes(root: &Path, note_roots: &[String], limits: &VaultLimits) -> Result<Vec<Note>> {
let (notes, _) = scan_notes_with_debug(root, note_roots, limits)?;
Ok(notes)
}
pub fn cached_scan_notes_with_debug(
root: &Path,
note_roots: &[String],
limits: &VaultLimits,
) -> Result<Arc<ScanSnapshot>> {
let canonical_root = root
.canonicalize()
.with_context(|| format!("failed to canonicalize vault root {}", root.display()))?;
let key = ScanCacheKey {
root: canonical_root,
note_roots: note_roots.to_vec(),
max_files: limits.max_files,
max_file_bytes: limits.max_file_bytes,
max_total_bytes: limits.max_total_bytes,
max_depth: limits.max_depth,
};
if let Some(snapshot) = SCAN_CACHE.lock().unwrap().get(&key).cloned() {
return Ok(snapshot);
}
let (notes, scan_roots) = scan_notes_with_debug(root, note_roots, limits)?;
let snapshot = Arc::new(ScanSnapshot { notes, scan_roots });
let mut cache = SCAN_CACHE.lock().unwrap();
Ok(cache.entry(key).or_insert_with(|| snapshot.clone()).clone())
}
pub fn scan_notes_with_debug(
root: &Path,
note_roots: &[String],
limits: &VaultLimits,
) -> Result<(Vec<Note>, Vec<String>)> {
let mut notes = Vec::new();
let mut total_bytes = 0u64;
let canonical_root = root
.canonicalize()
.with_context(|| format!("failed to canonicalize vault root {}", root.display()))?;
let scan_roots = build_scan_roots(root, note_roots)?;
let scan_root_strings = scan_roots
.iter()
.map(|path| {
path.strip_prefix(&canonical_root)
.unwrap_or(path)
.to_string_lossy()
.replace('\\', "/")
})
.collect::<Vec<_>>();
for scan_root in scan_roots {
for entry in WalkDir::new(&scan_root).max_depth(limits.max_depth) {
let entry = entry?;
let path = entry.path();
if !entry.file_type().is_file()
|| path.extension().and_then(|ext| ext.to_str()) != Some("md")
{
continue;
}
if notes.len() >= limits.max_files {
anyhow::bail!("vault scan exceeded max_files limit: {}", limits.max_files);
}
let metadata = fs::metadata(path)
.with_context(|| format!("failed to stat markdown file {}", path.display()))?;
if metadata.len() > limits.max_file_bytes {
anyhow::bail!(
"markdown file exceeds max_file_bytes limit: {} ({} bytes)",
path.display(),
metadata.len()
);
}
total_bytes += metadata.len();
if total_bytes > limits.max_total_bytes {
anyhow::bail!(
"vault scan exceeded max_total_bytes limit: {}",
limits.max_total_bytes
);
}
let raw = fs::read_to_string(path)
.with_context(|| format!("failed to read markdown file {}", path.display()))?;
let relative_path = path
.strip_prefix(&canonical_root)?
.to_string_lossy()
.replace('\\', "/");
let (frontmatter, body) = frontmatter::split_frontmatter(&raw)?;
let sections = markdown::extract_sections(&body);
let title = markdown::extract_title(&relative_path, &body);
let wikilinks = wikilink::extract_wikilinks(&body);
notes.push(Note::new(
path.to_path_buf(),
relative_path,
title,
frontmatter,
sections,
wikilinks,
body,
));
}
}
notes.sort_by(|left, right| left.relative_path.cmp(&right.relative_path));
Ok((notes, scan_root_strings))
}
#[cfg(test)]
pub(crate) fn clear_scan_cache() {
SCAN_CACHE.lock().unwrap().clear();
}
fn build_scan_roots(root: &Path, note_roots: &[String]) -> Result<Vec<PathBuf>> {
if note_roots.is_empty() {
return Ok(vec![root.to_path_buf()]);
}
let canonical_root = root
.canonicalize()
.with_context(|| format!("failed to canonicalize vault root {}", root.display()))?;
let mut scan_roots = Vec::new();
for note_root in note_roots {
let path = root.join(note_root);
if !path.exists() {
anyhow::bail!("configured note_root does not exist: {}", path.display());
}
if !path.is_dir() {
anyhow::bail!(
"configured note_root is not a directory: {}",
path.display()
);
}
let canonical_path = path
.canonicalize()
.with_context(|| format!("failed to canonicalize note_root {}", path.display()))?;
if !canonical_path.starts_with(&canonical_root) {
anyhow::bail!(
"configured note_root escapes vault root: {}",
canonical_path.display()
);
}
scan_roots.push(canonical_path);
}
scan_roots.sort();
scan_roots.dedup();
let mut filtered_roots: Vec<PathBuf> = Vec::new();
for path in scan_roots {
if filtered_roots
.iter()
.any(|existing| path.starts_with(existing))
{
continue;
}
filtered_roots.retain(|existing| !existing.starts_with(&path));
filtered_roots.push(path);
}
Ok(filtered_roots)
}
#[cfg(test)]
mod tests {
use super::{cached_scan_notes_with_debug, clear_scan_cache};
use crate::config::VaultLimits;
use std::fs;
use std::sync::Arc;
#[test]
fn cached_scan_should_reuse_snapshot_for_same_inputs() {
clear_scan_cache();
let temp = tempfile::tempdir().unwrap();
let vault_root = temp.path().join("vault");
fs::create_dir_all(vault_root.join("10-Projects")).unwrap();
fs::write(
vault_root.join("10-Projects/context.md"),
"# Context\n\nrepo_path and routing\n",
)
.unwrap();
let note_roots = vec!["10-Projects".to_string()];
let limits = VaultLimits::default();
let first = cached_scan_notes_with_debug(&vault_root, ¬e_roots, &limits).unwrap();
let second = cached_scan_notes_with_debug(&vault_root, ¬e_roots, &limits).unwrap();
assert!(Arc::ptr_eq(&first, &second));
assert_eq!(first.notes.len(), 1);
}
}