1use crate::config::VaultLimits;
2use crate::domain::Note;
3use crate::support::Result;
4use crate::vault::{frontmatter, markdown, wikilink};
5use anyhow::Context;
6use once_cell::sync::Lazy;
7use std::fs;
8use std::path::{Path, PathBuf};
9use std::sync::{Arc, Mutex};
10use walkdir::WalkDir;
11
12#[derive(Debug, Clone)]
13pub struct ScanSnapshot {
14 pub notes: Vec<Note>,
15 pub scan_roots: Vec<String>,
16}
17
18#[derive(Debug, Clone)]
19pub struct RoutedSnapshot {
20 pub project_id: String,
21 pub note_roots: Vec<String>,
22 pub snapshot: Arc<ScanSnapshot>,
23}
24
25#[derive(Debug, Clone)]
26pub struct WakeupSnapshot {
27 pub project_id: Option<String>,
28 pub note_roots: Vec<String>,
29 pub snapshot: Arc<ScanSnapshot>,
30}
31
32#[derive(Debug, Clone, PartialEq, Eq, Hash)]
33struct ScanCacheKey {
34 root: PathBuf,
35 note_roots: Vec<String>,
36 max_files: usize,
37 max_file_bytes: u64,
38 max_total_bytes: u64,
39 max_depth: usize,
40}
41
42static SCAN_CACHE: Lazy<Mutex<std::collections::HashMap<ScanCacheKey, Arc<ScanSnapshot>>>> =
43 Lazy::new(|| Mutex::new(std::collections::HashMap::new()));
44
45pub fn scan_notes(root: &Path, note_roots: &[String], limits: &VaultLimits) -> Result<Vec<Note>> {
46 let (notes, _) = scan_notes_with_debug(root, note_roots, limits)?;
47 Ok(notes)
48}
49
50pub fn cached_scan_notes_with_debug(
51 root: &Path,
52 note_roots: &[String],
53 limits: &VaultLimits,
54) -> Result<Arc<ScanSnapshot>> {
55 let canonical_root = root
56 .canonicalize()
57 .with_context(|| format!("failed to canonicalize vault root {}", root.display()))?;
58 let key = ScanCacheKey {
59 root: canonical_root,
60 note_roots: note_roots.to_vec(),
61 max_files: limits.max_files,
62 max_file_bytes: limits.max_file_bytes,
63 max_total_bytes: limits.max_total_bytes,
64 max_depth: limits.max_depth,
65 };
66
67 if let Some(snapshot) = SCAN_CACHE.lock().unwrap().get(&key).cloned() {
68 return Ok(snapshot);
69 }
70
71 let (notes, scan_roots) = scan_notes_with_debug(root, note_roots, limits)?;
72 let snapshot = Arc::new(ScanSnapshot { notes, scan_roots });
73 let mut cache = SCAN_CACHE.lock().unwrap();
74 Ok(cache.entry(key).or_insert_with(|| snapshot.clone()).clone())
75}
76
77pub fn scan_notes_with_debug(
78 root: &Path,
79 note_roots: &[String],
80 limits: &VaultLimits,
81) -> Result<(Vec<Note>, Vec<String>)> {
82 let mut notes = Vec::new();
83 let mut total_bytes = 0u64;
84 let canonical_root = root
85 .canonicalize()
86 .with_context(|| format!("failed to canonicalize vault root {}", root.display()))?;
87 let scan_roots = build_scan_roots(root, note_roots)?;
88 let scan_root_strings = scan_roots
89 .iter()
90 .map(|path| {
91 path.strip_prefix(&canonical_root)
92 .unwrap_or(path)
93 .to_string_lossy()
94 .replace('\\', "/")
95 })
96 .collect::<Vec<_>>();
97
98 for scan_root in scan_roots {
99 for entry in WalkDir::new(&scan_root).max_depth(limits.max_depth) {
100 let entry = entry?;
101 let path = entry.path();
102 if !entry.file_type().is_file()
103 || path.extension().and_then(|ext| ext.to_str()) != Some("md")
104 {
105 continue;
106 }
107
108 if notes.len() >= limits.max_files {
109 anyhow::bail!("vault scan exceeded max_files limit: {}", limits.max_files);
110 }
111
112 let metadata = fs::metadata(path)
113 .with_context(|| format!("failed to stat markdown file {}", path.display()))?;
114 if metadata.len() > limits.max_file_bytes {
115 anyhow::bail!(
116 "markdown file exceeds max_file_bytes limit: {} ({} bytes)",
117 path.display(),
118 metadata.len()
119 );
120 }
121 total_bytes += metadata.len();
122 if total_bytes > limits.max_total_bytes {
123 anyhow::bail!(
124 "vault scan exceeded max_total_bytes limit: {}",
125 limits.max_total_bytes
126 );
127 }
128
129 let raw = fs::read_to_string(path)
130 .with_context(|| format!("failed to read markdown file {}", path.display()))?;
131 let relative_path = path
132 .strip_prefix(&canonical_root)?
133 .to_string_lossy()
134 .replace('\\', "/");
135 let (frontmatter, body) = frontmatter::split_frontmatter(&raw)?;
136 let sections = markdown::extract_sections(&body);
137 let title = markdown::extract_title(&relative_path, &body);
138 let wikilinks = wikilink::extract_wikilinks(&body);
139
140 notes.push(Note::new(
141 path.to_path_buf(),
142 relative_path,
143 title,
144 frontmatter,
145 sections,
146 wikilinks,
147 body,
148 ));
149 }
150 }
151
152 notes.sort_by(|left, right| left.relative_path.cmp(&right.relative_path));
153 Ok((notes, scan_root_strings))
154}
155
156#[cfg(test)]
157pub(crate) fn clear_scan_cache() {
158 SCAN_CACHE.lock().unwrap().clear();
159}
160
161fn build_scan_roots(root: &Path, note_roots: &[String]) -> Result<Vec<PathBuf>> {
162 if note_roots.is_empty() {
163 return Ok(vec![root.to_path_buf()]);
164 }
165
166 let canonical_root = root
167 .canonicalize()
168 .with_context(|| format!("failed to canonicalize vault root {}", root.display()))?;
169
170 let mut scan_roots = Vec::new();
171 for note_root in note_roots {
172 let path = root.join(note_root);
173 if !path.exists() {
174 anyhow::bail!("configured note_root does not exist: {}", path.display());
175 }
176 if !path.is_dir() {
177 anyhow::bail!(
178 "configured note_root is not a directory: {}",
179 path.display()
180 );
181 }
182 let canonical_path = path
183 .canonicalize()
184 .with_context(|| format!("failed to canonicalize note_root {}", path.display()))?;
185 if !canonical_path.starts_with(&canonical_root) {
186 anyhow::bail!(
187 "configured note_root escapes vault root: {}",
188 canonical_path.display()
189 );
190 }
191 scan_roots.push(canonical_path);
192 }
193
194 scan_roots.sort();
195 scan_roots.dedup();
196
197 let mut filtered_roots: Vec<PathBuf> = Vec::new();
198 for path in scan_roots {
199 if filtered_roots
200 .iter()
201 .any(|existing| path.starts_with(existing))
202 {
203 continue;
204 }
205 filtered_roots.retain(|existing| !existing.starts_with(&path));
206 filtered_roots.push(path);
207 }
208
209 Ok(filtered_roots)
210}
211
212#[cfg(test)]
213mod tests {
214 use super::{cached_scan_notes_with_debug, clear_scan_cache};
215 use crate::config::VaultLimits;
216 use std::fs;
217 use std::sync::Arc;
218
219 #[test]
220 fn cached_scan_should_reuse_snapshot_for_same_inputs() {
221 clear_scan_cache();
222
223 let temp = tempfile::tempdir().unwrap();
224 let vault_root = temp.path().join("vault");
225 fs::create_dir_all(vault_root.join("10-Projects")).unwrap();
226 fs::write(
227 vault_root.join("10-Projects/context.md"),
228 "# Context\n\nrepo_path and routing\n",
229 )
230 .unwrap();
231
232 let note_roots = vec!["10-Projects".to_string()];
233 let limits = VaultLimits::default();
234
235 let first = cached_scan_notes_with_debug(&vault_root, ¬e_roots, &limits).unwrap();
236 let second = cached_scan_notes_with_debug(&vault_root, ¬e_roots, &limits).unwrap();
237
238 assert!(Arc::ptr_eq(&first, &second));
239 assert_eq!(first.notes.len(), 1);
240 }
241}