Skip to main content

mdvault_core/vault/
walker.rs

1//! Recursive vault directory walker.
2
3use std::path::{Path, PathBuf};
4use std::time::SystemTime;
5use thiserror::Error;
6use walkdir::WalkDir;
7
8#[derive(Debug, Error)]
9pub enum VaultWalkerError {
10    #[error("vault root does not exist: {0}")]
11    MissingRoot(String),
12
13    #[error("failed to walk vault directory {0}: {1}")]
14    WalkError(String, #[source] walkdir::Error),
15
16    #[error("failed to read file metadata {0}: {1}")]
17    MetadataError(String, #[source] std::io::Error),
18}
19
20/// Information about a discovered markdown file.
21#[derive(Debug, Clone)]
22pub struct WalkedFile {
23    /// Absolute path to the file.
24    pub absolute_path: PathBuf,
25    /// Path relative to vault root.
26    pub relative_path: PathBuf,
27    /// File modification time.
28    pub modified: SystemTime,
29    /// File size in bytes.
30    pub size: u64,
31}
32
33/// Walker for discovering markdown files in a vault.
34#[derive(Debug)]
35pub struct VaultWalker {
36    root: PathBuf,
37    /// Folders to exclude from walking (relative paths from vault root).
38    excluded_folders: Vec<PathBuf>,
39}
40
41impl VaultWalker {
42    /// Create a new walker for the given vault root.
43    pub fn new(root: &Path) -> Result<Self, VaultWalkerError> {
44        Self::with_exclusions(root, Vec::new())
45    }
46
47    /// Create a new walker with folder exclusions.
48    ///
49    /// Excluded folders can be specified as:
50    /// - Relative paths from vault root (e.g., "automations/templates")
51    /// - Absolute paths (will be converted to relative)
52    pub fn with_exclusions(
53        root: &Path,
54        excluded_folders: Vec<PathBuf>,
55    ) -> Result<Self, VaultWalkerError> {
56        let root = root
57            .canonicalize()
58            .map_err(|_| VaultWalkerError::MissingRoot(root.display().to_string()))?;
59
60        if !root.exists() {
61            return Err(VaultWalkerError::MissingRoot(root.display().to_string()));
62        }
63
64        // Normalize exclusions to be relative to root
65        let excluded_folders = excluded_folders
66            .into_iter()
67            .map(|p| {
68                if p.is_absolute() {
69                    p.strip_prefix(&root).unwrap_or(&p).to_path_buf()
70                } else {
71                    p
72                }
73            })
74            .collect();
75
76        Ok(Self { root, excluded_folders })
77    }
78
79    /// Walk the vault and return all markdown files.
80    /// Excludes hidden directories, common non-vault directories, and configured exclusions.
81    pub fn walk(&self) -> Result<Vec<WalkedFile>, VaultWalkerError> {
82        let mut files = Vec::new();
83
84        for entry in WalkDir::new(&self.root)
85            .follow_links(false)
86            .into_iter()
87            .filter_entry(|e| !self.is_excluded(e))
88        {
89            let entry = entry.map_err(|e| {
90                VaultWalkerError::WalkError(self.root.display().to_string(), e)
91            })?;
92
93            let path = entry.path();
94            if !path.is_file() || !is_markdown_file(path) {
95                continue;
96            }
97
98            let metadata = path.metadata().map_err(|e| {
99                VaultWalkerError::MetadataError(path.display().to_string(), e)
100            })?;
101
102            let relative_path =
103                path.strip_prefix(&self.root).unwrap_or(path).to_path_buf();
104
105            files.push(WalkedFile {
106                absolute_path: path.to_path_buf(),
107                relative_path,
108                modified: metadata.modified().unwrap_or(std::time::UNIX_EPOCH),
109                size: metadata.len(),
110            });
111        }
112
113        files.sort_by(|a, b| a.relative_path.cmp(&b.relative_path));
114        Ok(files)
115    }
116
117    /// Check if an entry should be excluded from walking.
118    fn is_excluded(&self, entry: &walkdir::DirEntry) -> bool {
119        // Never filter the root directory (depth 0)
120        if entry.depth() == 0 {
121            return false;
122        }
123
124        let name = entry.file_name().to_string_lossy();
125
126        // Skip hidden files and directories
127        if name.starts_with('.') {
128            return true;
129        }
130
131        // Skip common non-vault directories
132        if matches!(name.as_ref(), "node_modules" | "target" | "__pycache__" | "venv") {
133            return true;
134        }
135
136        // Check against configured exclusions
137        if !self.excluded_folders.is_empty()
138            && let Ok(relative) = entry.path().strip_prefix(&self.root)
139        {
140            for excluded in &self.excluded_folders {
141                // Check if the entry's path starts with the excluded folder
142                if relative.starts_with(excluded) {
143                    return true;
144                }
145            }
146        }
147
148        false
149    }
150
151    /// Get the vault root path.
152    pub fn root(&self) -> &Path {
153        &self.root
154    }
155}
156
157fn is_markdown_file(path: &Path) -> bool {
158    path.extension().and_then(|e| e.to_str()).is_some_and(|e| e == "md")
159}
160
161#[cfg(test)]
162mod tests {
163    use super::*;
164    use std::fs;
165    use tempfile::TempDir;
166
167    fn create_test_vault() -> TempDir {
168        let dir = TempDir::new().unwrap();
169        let root = dir.path();
170
171        // Create some markdown files
172        fs::write(root.join("note1.md"), "# Note 1").unwrap();
173        fs::write(root.join("note2.md"), "# Note 2").unwrap();
174
175        // Create subdirectory with notes
176        fs::create_dir(root.join("subdir")).unwrap();
177        fs::write(root.join("subdir/note3.md"), "# Note 3").unwrap();
178
179        // Create hidden directory (should be skipped)
180        fs::create_dir(root.join(".hidden")).unwrap();
181        fs::write(root.join(".hidden/secret.md"), "# Secret").unwrap();
182
183        // Create non-markdown file (should be skipped)
184        fs::write(root.join("readme.txt"), "Not markdown").unwrap();
185
186        dir
187    }
188
189    #[test]
190    fn test_walk_finds_markdown_files() {
191        let vault = create_test_vault();
192        let walker = VaultWalker::new(vault.path()).unwrap();
193        let files = walker.walk().unwrap();
194
195        assert_eq!(files.len(), 3);
196
197        let paths: Vec<_> = files.iter().map(|f| f.relative_path.clone()).collect();
198        assert!(paths.contains(&PathBuf::from("note1.md")));
199        assert!(paths.contains(&PathBuf::from("note2.md")));
200        assert!(paths.contains(&PathBuf::from("subdir/note3.md")));
201    }
202
203    #[test]
204    fn test_walk_skips_hidden_directories() {
205        let vault = create_test_vault();
206        let walker = VaultWalker::new(vault.path()).unwrap();
207        let files = walker.walk().unwrap();
208
209        let paths: Vec<_> =
210            files.iter().map(|f| f.relative_path.to_string_lossy().to_string()).collect();
211
212        assert!(!paths.iter().any(|p| p.contains(".hidden")));
213    }
214
215    #[test]
216    fn test_walk_skips_non_markdown() {
217        let vault = create_test_vault();
218        let walker = VaultWalker::new(vault.path()).unwrap();
219        let files = walker.walk().unwrap();
220
221        let paths: Vec<_> =
222            files.iter().map(|f| f.relative_path.to_string_lossy().to_string()).collect();
223
224        assert!(!paths.iter().any(|p| p.contains("readme.txt")));
225    }
226
227    #[test]
228    fn test_walk_results_sorted() {
229        let vault = create_test_vault();
230        let walker = VaultWalker::new(vault.path()).unwrap();
231        let files = walker.walk().unwrap();
232
233        let paths: Vec<_> = files.iter().map(|f| &f.relative_path).collect();
234        let mut sorted = paths.clone();
235        sorted.sort();
236
237        assert_eq!(paths, sorted);
238    }
239
240    #[test]
241    fn test_missing_root() {
242        let result = VaultWalker::new(Path::new("/nonexistent/path"));
243        assert!(result.is_err());
244        assert!(matches!(result.unwrap_err(), VaultWalkerError::MissingRoot(_)));
245    }
246
247    #[test]
248    fn test_walk_with_exclusions() {
249        let dir = TempDir::new().unwrap();
250        let root = dir.path();
251
252        // Create some markdown files in various directories
253        fs::write(root.join("note1.md"), "# Note 1").unwrap();
254
255        fs::create_dir_all(root.join("templates")).unwrap();
256        fs::write(root.join("templates/task.md"), "# Task Template").unwrap();
257
258        fs::create_dir_all(root.join("automations/templates")).unwrap();
259        fs::write(root.join("automations/templates/meeting.md"), "# Meeting").unwrap();
260
261        fs::create_dir_all(root.join("projects")).unwrap();
262        fs::write(root.join("projects/proj.md"), "# Project").unwrap();
263
264        // Walk without exclusions - should find all 4 files
265        let walker = VaultWalker::new(root).unwrap();
266        let files = walker.walk().unwrap();
267        assert_eq!(files.len(), 4);
268
269        // Walk with exclusions - should skip templates and automations
270        let excluded = vec![PathBuf::from("templates"), PathBuf::from("automations")];
271        let walker = VaultWalker::with_exclusions(root, excluded).unwrap();
272        let files = walker.walk().unwrap();
273
274        assert_eq!(files.len(), 2);
275
276        let paths: Vec<_> =
277            files.iter().map(|f| f.relative_path.to_string_lossy().to_string()).collect();
278
279        assert!(paths.contains(&"note1.md".to_string()));
280        assert!(paths.contains(&"projects/proj.md".to_string()));
281        assert!(!paths.iter().any(|p| p.contains("templates")));
282        assert!(!paths.iter().any(|p| p.contains("automations")));
283    }
284
285    #[test]
286    fn test_walk_with_nested_exclusion() {
287        let dir = TempDir::new().unwrap();
288        let root = dir.path();
289
290        // Create nested structure
291        fs::create_dir_all(root.join("docs/internal")).unwrap();
292        fs::write(root.join("docs/readme.md"), "# Docs").unwrap();
293        fs::write(root.join("docs/internal/secret.md"), "# Secret").unwrap();
294
295        fs::write(root.join("note.md"), "# Note").unwrap();
296
297        // Exclude only docs/internal, not all of docs
298        let excluded = vec![PathBuf::from("docs/internal")];
299        let walker = VaultWalker::with_exclusions(root, excluded).unwrap();
300        let files = walker.walk().unwrap();
301
302        assert_eq!(files.len(), 2);
303
304        let paths: Vec<_> =
305            files.iter().map(|f| f.relative_path.to_string_lossy().to_string()).collect();
306
307        assert!(paths.contains(&"note.md".to_string()));
308        assert!(paths.contains(&"docs/readme.md".to_string()));
309        assert!(!paths.iter().any(|p| p.contains("internal")));
310    }
311}