Skip to main content

spool/vault/
scanner.rs

1use crate::config::VaultLimits;
2use crate::domain::Note;
3use crate::support::Result;
4use crate::vault::{frontmatter, markdown, wikilink};
5use anyhow::Context;
6use once_cell::sync::Lazy;
7use std::fs;
8use std::path::{Path, PathBuf};
9use std::sync::{Arc, Mutex};
10use walkdir::WalkDir;
11
12#[derive(Debug, Clone)]
13pub struct ScanSnapshot {
14    pub notes: Vec<Note>,
15    pub scan_roots: Vec<String>,
16}
17
18#[derive(Debug, Clone)]
19pub struct RoutedSnapshot {
20    pub project_id: String,
21    pub note_roots: Vec<String>,
22    pub snapshot: Arc<ScanSnapshot>,
23}
24
25#[derive(Debug, Clone)]
26pub struct WakeupSnapshot {
27    pub project_id: Option<String>,
28    pub note_roots: Vec<String>,
29    pub snapshot: Arc<ScanSnapshot>,
30}
31
32#[derive(Debug, Clone, PartialEq, Eq, Hash)]
33struct ScanCacheKey {
34    root: PathBuf,
35    note_roots: Vec<String>,
36    max_files: usize,
37    max_file_bytes: u64,
38    max_total_bytes: u64,
39    max_depth: usize,
40}
41
42static SCAN_CACHE: Lazy<Mutex<std::collections::HashMap<ScanCacheKey, Arc<ScanSnapshot>>>> =
43    Lazy::new(|| Mutex::new(std::collections::HashMap::new()));
44
45pub fn scan_notes(root: &Path, note_roots: &[String], limits: &VaultLimits) -> Result<Vec<Note>> {
46    let (notes, _) = scan_notes_with_debug(root, note_roots, limits)?;
47    Ok(notes)
48}
49
50pub fn cached_scan_notes_with_debug(
51    root: &Path,
52    note_roots: &[String],
53    limits: &VaultLimits,
54) -> Result<Arc<ScanSnapshot>> {
55    let canonical_root = root
56        .canonicalize()
57        .with_context(|| format!("failed to canonicalize vault root {}", root.display()))?;
58    let key = ScanCacheKey {
59        root: canonical_root,
60        note_roots: note_roots.to_vec(),
61        max_files: limits.max_files,
62        max_file_bytes: limits.max_file_bytes,
63        max_total_bytes: limits.max_total_bytes,
64        max_depth: limits.max_depth,
65    };
66
67    if let Some(snapshot) = SCAN_CACHE.lock().unwrap().get(&key).cloned() {
68        return Ok(snapshot);
69    }
70
71    let (notes, scan_roots) = scan_notes_with_debug(root, note_roots, limits)?;
72    let snapshot = Arc::new(ScanSnapshot { notes, scan_roots });
73    let mut cache = SCAN_CACHE.lock().unwrap();
74    Ok(cache.entry(key).or_insert_with(|| snapshot.clone()).clone())
75}
76
77pub fn scan_notes_with_debug(
78    root: &Path,
79    note_roots: &[String],
80    limits: &VaultLimits,
81) -> Result<(Vec<Note>, Vec<String>)> {
82    let mut notes = Vec::new();
83    let mut total_bytes = 0u64;
84    let canonical_root = root
85        .canonicalize()
86        .with_context(|| format!("failed to canonicalize vault root {}", root.display()))?;
87    let scan_roots = build_scan_roots(root, note_roots)?;
88    let scan_root_strings = scan_roots
89        .iter()
90        .map(|path| {
91            path.strip_prefix(&canonical_root)
92                .unwrap_or(path)
93                .to_string_lossy()
94                .replace('\\', "/")
95        })
96        .collect::<Vec<_>>();
97
98    for scan_root in scan_roots {
99        for entry in WalkDir::new(&scan_root).max_depth(limits.max_depth) {
100            let entry = entry?;
101            let path = entry.path();
102            if !entry.file_type().is_file()
103                || path.extension().and_then(|ext| ext.to_str()) != Some("md")
104            {
105                continue;
106            }
107
108            if notes.len() >= limits.max_files {
109                anyhow::bail!("vault scan exceeded max_files limit: {}", limits.max_files);
110            }
111
112            let metadata = fs::metadata(path)
113                .with_context(|| format!("failed to stat markdown file {}", path.display()))?;
114            if metadata.len() > limits.max_file_bytes {
115                anyhow::bail!(
116                    "markdown file exceeds max_file_bytes limit: {} ({} bytes)",
117                    path.display(),
118                    metadata.len()
119                );
120            }
121            total_bytes += metadata.len();
122            if total_bytes > limits.max_total_bytes {
123                anyhow::bail!(
124                    "vault scan exceeded max_total_bytes limit: {}",
125                    limits.max_total_bytes
126                );
127            }
128
129            let raw = fs::read_to_string(path)
130                .with_context(|| format!("failed to read markdown file {}", path.display()))?;
131            let relative_path = path
132                .strip_prefix(&canonical_root)?
133                .to_string_lossy()
134                .replace('\\', "/");
135            let (frontmatter, body) = frontmatter::split_frontmatter(&raw)?;
136            let sections = markdown::extract_sections(&body);
137            let title = markdown::extract_title(&relative_path, &body);
138            let wikilinks = wikilink::extract_wikilinks(&body);
139
140            notes.push(Note::new(
141                path.to_path_buf(),
142                relative_path,
143                title,
144                frontmatter,
145                sections,
146                wikilinks,
147                body,
148            ));
149        }
150    }
151
152    notes.sort_by(|left, right| left.relative_path.cmp(&right.relative_path));
153    Ok((notes, scan_root_strings))
154}
155
156#[cfg(test)]
157pub(crate) fn clear_scan_cache() {
158    SCAN_CACHE.lock().unwrap().clear();
159}
160
161fn build_scan_roots(root: &Path, note_roots: &[String]) -> Result<Vec<PathBuf>> {
162    if note_roots.is_empty() {
163        return Ok(vec![root.to_path_buf()]);
164    }
165
166    let canonical_root = root
167        .canonicalize()
168        .with_context(|| format!("failed to canonicalize vault root {}", root.display()))?;
169
170    let mut scan_roots = Vec::new();
171    for note_root in note_roots {
172        let path = root.join(note_root);
173        if !path.exists() {
174            anyhow::bail!("configured note_root does not exist: {}", path.display());
175        }
176        if !path.is_dir() {
177            anyhow::bail!(
178                "configured note_root is not a directory: {}",
179                path.display()
180            );
181        }
182        let canonical_path = path
183            .canonicalize()
184            .with_context(|| format!("failed to canonicalize note_root {}", path.display()))?;
185        if !canonical_path.starts_with(&canonical_root) {
186            anyhow::bail!(
187                "configured note_root escapes vault root: {}",
188                canonical_path.display()
189            );
190        }
191        scan_roots.push(canonical_path);
192    }
193
194    scan_roots.sort();
195    scan_roots.dedup();
196
197    let mut filtered_roots: Vec<PathBuf> = Vec::new();
198    for path in scan_roots {
199        if filtered_roots
200            .iter()
201            .any(|existing| path.starts_with(existing))
202        {
203            continue;
204        }
205        filtered_roots.retain(|existing| !existing.starts_with(&path));
206        filtered_roots.push(path);
207    }
208
209    Ok(filtered_roots)
210}
211
212#[cfg(test)]
213mod tests {
214    use super::{cached_scan_notes_with_debug, clear_scan_cache};
215    use crate::config::VaultLimits;
216    use std::fs;
217    use std::sync::Arc;
218
219    #[test]
220    fn cached_scan_should_reuse_snapshot_for_same_inputs() {
221        clear_scan_cache();
222
223        let temp = tempfile::tempdir().unwrap();
224        let vault_root = temp.path().join("vault");
225        fs::create_dir_all(vault_root.join("10-Projects")).unwrap();
226        fs::write(
227            vault_root.join("10-Projects/context.md"),
228            "# Context\n\nrepo_path and routing\n",
229        )
230        .unwrap();
231
232        let note_roots = vec!["10-Projects".to_string()];
233        let limits = VaultLimits::default();
234
235        let first = cached_scan_notes_with_debug(&vault_root, &note_roots, &limits).unwrap();
236        let second = cached_scan_notes_with_debug(&vault_root, &note_roots, &limits).unwrap();
237
238        assert!(Arc::ptr_eq(&first, &second));
239        assert_eq!(first.notes.len(), 1);
240    }
241}