mdvault_core/vault/
walker.rs1use std::path::{Path, PathBuf};
4use std::time::SystemTime;
5use thiserror::Error;
6use walkdir::WalkDir;
7
8#[derive(Debug, Error)]
9pub enum VaultWalkerError {
10 #[error("vault root does not exist: {0}")]
11 MissingRoot(String),
12
13 #[error("failed to walk vault directory {0}: {1}")]
14 WalkError(String, #[source] walkdir::Error),
15
16 #[error("failed to read file metadata {0}: {1}")]
17 MetadataError(String, #[source] std::io::Error),
18}
19
20#[derive(Debug, Clone)]
22pub struct WalkedFile {
23 pub absolute_path: PathBuf,
25 pub relative_path: PathBuf,
27 pub modified: SystemTime,
29 pub size: u64,
31}
32
33#[derive(Debug)]
35pub struct VaultWalker {
36 root: PathBuf,
37 excluded_folders: Vec<PathBuf>,
39}
40
41impl VaultWalker {
42 pub fn new(root: &Path) -> Result<Self, VaultWalkerError> {
44 Self::with_exclusions(root, Vec::new())
45 }
46
47 pub fn with_exclusions(
53 root: &Path,
54 excluded_folders: Vec<PathBuf>,
55 ) -> Result<Self, VaultWalkerError> {
56 let root = root
57 .canonicalize()
58 .map_err(|_| VaultWalkerError::MissingRoot(root.display().to_string()))?;
59
60 if !root.exists() {
61 return Err(VaultWalkerError::MissingRoot(root.display().to_string()));
62 }
63
64 let excluded_folders = excluded_folders
66 .into_iter()
67 .map(|p| {
68 if p.is_absolute() {
69 p.strip_prefix(&root).unwrap_or(&p).to_path_buf()
70 } else {
71 p
72 }
73 })
74 .collect();
75
76 Ok(Self { root, excluded_folders })
77 }
78
79 pub fn walk(&self) -> Result<Vec<WalkedFile>, VaultWalkerError> {
82 let mut files = Vec::new();
83
84 for entry in WalkDir::new(&self.root)
85 .follow_links(false)
86 .into_iter()
87 .filter_entry(|e| !self.is_excluded(e))
88 {
89 let entry = entry.map_err(|e| {
90 VaultWalkerError::WalkError(self.root.display().to_string(), e)
91 })?;
92
93 let path = entry.path();
94 if !path.is_file() || !is_markdown_file(path) {
95 continue;
96 }
97
98 let metadata = path.metadata().map_err(|e| {
99 VaultWalkerError::MetadataError(path.display().to_string(), e)
100 })?;
101
102 let relative_path =
103 path.strip_prefix(&self.root).unwrap_or(path).to_path_buf();
104
105 files.push(WalkedFile {
106 absolute_path: path.to_path_buf(),
107 relative_path,
108 modified: metadata.modified().unwrap_or(std::time::UNIX_EPOCH),
109 size: metadata.len(),
110 });
111 }
112
113 files.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
114 Ok(files)
115 }
116
117 fn is_excluded(&self, entry: &walkdir::DirEntry) -> bool {
119 if entry.depth() == 0 {
121 return false;
122 }
123
124 let name = entry.file_name().to_string_lossy();
125
126 if name.starts_with('.') {
128 return true;
129 }
130
131 if matches!(name.as_ref(), "node_modules" | "target" | "__pycache__" | "venv") {
133 return true;
134 }
135
136 if !self.excluded_folders.is_empty()
138 && let Ok(relative) = entry.path().strip_prefix(&self.root)
139 {
140 for excluded in &self.excluded_folders {
141 if relative.starts_with(excluded) {
143 return true;
144 }
145 }
146 }
147
148 false
149 }
150
151 pub fn root(&self) -> &Path {
153 &self.root
154 }
155}
156
157fn is_markdown_file(path: &Path) -> bool {
158 path.extension().and_then(|e| e.to_str()).is_some_and(|e| e == "md")
159}
160
161#[cfg(test)]
162mod tests {
163 use super::*;
164 use std::fs;
165 use tempfile::TempDir;
166
167 fn create_test_vault() -> TempDir {
168 let dir = TempDir::new().unwrap();
169 let root = dir.path();
170
171 fs::write(root.join("note1.md"), "# Note 1").unwrap();
173 fs::write(root.join("note2.md"), "# Note 2").unwrap();
174
175 fs::create_dir(root.join("subdir")).unwrap();
177 fs::write(root.join("subdir/note3.md"), "# Note 3").unwrap();
178
179 fs::create_dir(root.join(".hidden")).unwrap();
181 fs::write(root.join(".hidden/secret.md"), "# Secret").unwrap();
182
183 fs::write(root.join("readme.txt"), "Not markdown").unwrap();
185
186 dir
187 }
188
189 #[test]
190 fn test_walk_finds_markdown_files() {
191 let vault = create_test_vault();
192 let walker = VaultWalker::new(vault.path()).unwrap();
193 let files = walker.walk().unwrap();
194
195 assert_eq!(files.len(), 3);
196
197 let paths: Vec<_> = files.iter().map(|f| f.relative_path.clone()).collect();
198 assert!(paths.contains(&PathBuf::from("note1.md")));
199 assert!(paths.contains(&PathBuf::from("note2.md")));
200 assert!(paths.contains(&PathBuf::from("subdir/note3.md")));
201 }
202
203 #[test]
204 fn test_walk_skips_hidden_directories() {
205 let vault = create_test_vault();
206 let walker = VaultWalker::new(vault.path()).unwrap();
207 let files = walker.walk().unwrap();
208
209 let paths: Vec<_> =
210 files.iter().map(|f| f.relative_path.to_string_lossy().to_string()).collect();
211
212 assert!(!paths.iter().any(|p| p.contains(".hidden")));
213 }
214
215 #[test]
216 fn test_walk_skips_non_markdown() {
217 let vault = create_test_vault();
218 let walker = VaultWalker::new(vault.path()).unwrap();
219 let files = walker.walk().unwrap();
220
221 let paths: Vec<_> =
222 files.iter().map(|f| f.relative_path.to_string_lossy().to_string()).collect();
223
224 assert!(!paths.iter().any(|p| p.contains("readme.txt")));
225 }
226
227 #[test]
228 fn test_walk_results_sorted() {
229 let vault = create_test_vault();
230 let walker = VaultWalker::new(vault.path()).unwrap();
231 let files = walker.walk().unwrap();
232
233 let paths: Vec<_> = files.iter().map(|f| &f.relative_path).collect();
234 let mut sorted = paths.clone();
235 sorted.sort();
236
237 assert_eq!(paths, sorted);
238 }
239
240 #[test]
241 fn test_missing_root() {
242 let result = VaultWalker::new(Path::new("/nonexistent/path"));
243 assert!(result.is_err());
244 assert!(matches!(result.unwrap_err(), VaultWalkerError::MissingRoot(_)));
245 }
246
247 #[test]
248 fn test_walk_with_exclusions() {
249 let dir = TempDir::new().unwrap();
250 let root = dir.path();
251
252 fs::write(root.join("note1.md"), "# Note 1").unwrap();
254
255 fs::create_dir_all(root.join("templates")).unwrap();
256 fs::write(root.join("templates/task.md"), "# Task Template").unwrap();
257
258 fs::create_dir_all(root.join("automations/templates")).unwrap();
259 fs::write(root.join("automations/templates/meeting.md"), "# Meeting").unwrap();
260
261 fs::create_dir_all(root.join("projects")).unwrap();
262 fs::write(root.join("projects/proj.md"), "# Project").unwrap();
263
264 let walker = VaultWalker::new(root).unwrap();
266 let files = walker.walk().unwrap();
267 assert_eq!(files.len(), 4);
268
269 let excluded = vec![PathBuf::from("templates"), PathBuf::from("automations")];
271 let walker = VaultWalker::with_exclusions(root, excluded).unwrap();
272 let files = walker.walk().unwrap();
273
274 assert_eq!(files.len(), 2);
275
276 let paths: Vec<_> =
277 files.iter().map(|f| f.relative_path.to_string_lossy().to_string()).collect();
278
279 assert!(paths.contains(&"note1.md".to_string()));
280 assert!(paths.contains(&"projects/proj.md".to_string()));
281 assert!(!paths.iter().any(|p| p.contains("templates")));
282 assert!(!paths.iter().any(|p| p.contains("automations")));
283 }
284
285 #[test]
286 fn test_walk_with_nested_exclusion() {
287 let dir = TempDir::new().unwrap();
288 let root = dir.path();
289
290 fs::create_dir_all(root.join("docs/internal")).unwrap();
292 fs::write(root.join("docs/readme.md"), "# Docs").unwrap();
293 fs::write(root.join("docs/internal/secret.md"), "# Secret").unwrap();
294
295 fs::write(root.join("note.md"), "# Note").unwrap();
296
297 let excluded = vec![PathBuf::from("docs/internal")];
299 let walker = VaultWalker::with_exclusions(root, excluded).unwrap();
300 let files = walker.walk().unwrap();
301
302 assert_eq!(files.len(), 2);
303
304 let paths: Vec<_> =
305 files.iter().map(|f| f.relative_path.to_string_lossy().to_string()).collect();
306
307 assert!(paths.contains(&"note.md".to_string()));
308 assert!(paths.contains(&"docs/readme.md".to_string()));
309 assert!(!paths.iter().any(|p| p.contains("internal")));
310 }
311}