Skip to main content

mdvault_core/index/
builder.rs

1//! Index building orchestration.
2
3use std::path::Path;
4
5use chrono::{DateTime, Utc};
6use thiserror::Error;
7
8use super::db::{IndexDb, IndexError};
9use super::types::{IndexedLink, IndexedNote};
10use crate::vault::{
11    VaultWalker, VaultWalkerError, WalkedFile, content_hash, extract_note,
12};
13
14#[derive(Debug, Error)]
15pub enum BuilderError {
16    #[error("Vault walker error: {0}")]
17    Walker(#[from] VaultWalkerError),
18
19    #[error("Index database error: {0}")]
20    Index(#[from] IndexError),
21
22    #[error("Failed to read file {path}: {source}")]
23    FileRead {
24        path: String,
25        #[source]
26        source: std::io::Error,
27    },
28}
29
30/// Statistics from an indexing operation.
31#[derive(Debug, Clone, Default)]
32pub struct IndexStats {
33    /// Number of files discovered.
34    pub files_found: usize,
35    /// Number of notes indexed (new or updated).
36    pub notes_indexed: usize,
37    /// Number of notes skipped due to errors.
38    pub notes_skipped: usize,
39    /// Number of links indexed.
40    pub links_indexed: usize,
41    /// Number of broken links found.
42    pub broken_links: usize,
43    /// Indexing duration in milliseconds.
44    pub duration_ms: u64,
45    /// Number of files unchanged (hash match) - incremental mode only.
46    pub files_unchanged: usize,
47    /// Number of files added (new to index) - incremental mode only.
48    pub files_added: usize,
49    /// Number of files updated (content changed) - incremental mode only.
50    pub files_updated: usize,
51    /// Number of files deleted (removed from vault) - incremental mode only.
52    pub files_deleted: usize,
53}
54
55/// File change classification for incremental updates.
56#[derive(Debug, Clone, Copy, PartialEq, Eq)]
57pub enum FileChange {
58    /// File is new (not in index).
59    Added,
60    /// File content has changed.
61    Modified,
62    /// File content unchanged.
63    Unchanged,
64}
65
66/// Progress callback for indexing operations.
67/// Parameters: (current, total, current_path)
68pub type ProgressCallback = Box<dyn Fn(usize, usize, &str)>;
69
70/// Builder for populating the vault index.
71pub struct IndexBuilder<'a> {
72    db: &'a IndexDb,
73    vault_root: &'a Path,
74    excluded_folders: Vec<std::path::PathBuf>,
75}
76
77impl<'a> IndexBuilder<'a> {
78    /// Create a new index builder.
79    pub fn new(db: &'a IndexDb, vault_root: &'a Path) -> Self {
80        Self { db, vault_root, excluded_folders: Vec::new() }
81    }
82
83    /// Create a new index builder with folder exclusions.
84    pub fn with_exclusions(
85        db: &'a IndexDb,
86        vault_root: &'a Path,
87        excluded_folders: Vec<std::path::PathBuf>,
88    ) -> Self {
89        Self { db, vault_root, excluded_folders }
90    }
91
92    /// Perform a full reindex of the vault.
93    /// Clears existing data and rebuilds from scratch.
94    pub fn full_reindex(
95        &self,
96        progress: Option<ProgressCallback>,
97    ) -> Result<IndexStats, BuilderError> {
98        let start = std::time::Instant::now();
99        let mut stats = IndexStats::default();
100
101        // Walk the vault with exclusions
102        let walker =
103            VaultWalker::with_exclusions(self.vault_root, self.excluded_folders.clone())?;
104        let files = walker.walk()?;
105        stats.files_found = files.len();
106
107        // Clear existing index
108        self.db.clear_all()?;
109
110        // Phase 1: Index all notes
111        for (i, file) in files.iter().enumerate() {
112            if let Some(ref cb) = progress {
113                cb(i + 1, files.len(), &file.relative_path.to_string_lossy());
114            }
115
116            match self.index_note(file) {
117                Ok(link_count) => {
118                    stats.notes_indexed += 1;
119                    stats.links_indexed += link_count;
120                }
121                Err(e) => {
122                    // Log error but continue indexing
123                    tracing::warn!(
124                        "Failed to index {}: {}",
125                        file.relative_path.display(),
126                        e
127                    );
128                    stats.notes_skipped += 1;
129                }
130            }
131        }
132
133        // Phase 2: Resolve link targets
134        self.db.resolve_link_targets()?;
135        stats.broken_links = self.db.count_broken_links()? as usize;
136
137        stats.duration_ms = start.elapsed().as_millis() as u64;
138        Ok(stats)
139    }
140
141    /// Perform an incremental reindex of the vault.
142    /// Only processes files that have changed since last index.
143    pub fn incremental_reindex(
144        &self,
145        progress: Option<ProgressCallback>,
146    ) -> Result<IndexStats, BuilderError> {
147        let start = std::time::Instant::now();
148        let mut stats = IndexStats::default();
149
150        // Phase 1: Walk the vault and collect all current files (with exclusions)
151        let walker =
152            VaultWalker::with_exclusions(self.vault_root, self.excluded_folders.clone())?;
153        let files = walker.walk()?;
154        stats.files_found = files.len();
155
156        // Phase 2: Get all currently indexed paths for deletion detection
157        let indexed_paths: std::collections::HashSet<std::path::PathBuf> =
158            self.db.get_all_paths()?.into_iter().collect();
159
160        // Track which paths we've seen in the vault
161        let mut seen_paths: std::collections::HashSet<std::path::PathBuf> =
162            std::collections::HashSet::with_capacity(files.len());
163
164        // Phase 3: Classify and process each file
165        for (i, file) in files.iter().enumerate() {
166            if let Some(ref cb) = progress {
167                cb(i + 1, files.len(), &file.relative_path.to_string_lossy());
168            }
169
170            seen_paths.insert(file.relative_path.clone());
171
172            // Classify the change
173            let change = self.classify_change(file)?;
174
175            match change {
176                FileChange::Unchanged => {
177                    stats.files_unchanged += 1;
178                }
179                FileChange::Added | FileChange::Modified => match self.index_note(file) {
180                    Ok(link_count) => {
181                        stats.notes_indexed += 1;
182                        stats.links_indexed += link_count;
183                        if change == FileChange::Added {
184                            stats.files_added += 1;
185                        } else {
186                            stats.files_updated += 1;
187                        }
188                    }
189                    Err(e) => {
190                        tracing::warn!(
191                            "Failed to index {}: {}",
192                            file.relative_path.display(),
193                            e
194                        );
195                        stats.notes_skipped += 1;
196                    }
197                },
198            }
199        }
200
201        // Phase 4: Detect and delete removed files
202        for indexed_path in &indexed_paths {
203            if !seen_paths.contains(indexed_path) && self.db.delete_note(indexed_path)? {
204                stats.files_deleted += 1;
205                tracing::debug!("Deleted from index: {}", indexed_path.display());
206            }
207        }
208
209        // Phase 5: Resolve link targets (handles newly valid links)
210        self.db.resolve_link_targets()?;
211        stats.broken_links = self.db.count_broken_links()? as usize;
212
213        stats.duration_ms = start.elapsed().as_millis() as u64;
214        Ok(stats)
215    }
216
217    /// Classify a file's change status by comparing content hashes.
218    fn classify_change(&self, file: &WalkedFile) -> Result<FileChange, BuilderError> {
219        // Get stored hash (if any)
220        let stored_hash = self.db.get_content_hash(&file.relative_path)?;
221
222        match stored_hash {
223            None => Ok(FileChange::Added),
224            Some(stored) => {
225                // Compute current hash
226                let current = content_hash(&file.absolute_path).map_err(|e| {
227                    BuilderError::FileRead {
228                        path: file.absolute_path.display().to_string(),
229                        source: e,
230                    }
231                })?;
232
233                if current == stored {
234                    Ok(FileChange::Unchanged)
235                } else {
236                    Ok(FileChange::Modified)
237                }
238            }
239        }
240    }
241
242    /// Index a single note file.
243    /// Returns the number of links indexed.
244    fn index_note(&self, file: &WalkedFile) -> Result<usize, BuilderError> {
245        // Read file content
246        let content = std::fs::read_to_string(&file.absolute_path).map_err(|e| {
247            BuilderError::FileRead {
248                path: file.absolute_path.display().to_string(),
249                source: e,
250            }
251        })?;
252
253        // Compute content hash
254        let hash =
255            content_hash(&file.absolute_path).map_err(|e| BuilderError::FileRead {
256                path: file.absolute_path.display().to_string(),
257                source: e,
258            })?;
259
260        // Extract note metadata
261        let extracted = extract_note(&content, &file.relative_path);
262
263        // Convert modified time to DateTime<Utc>
264        let modified: DateTime<Utc> = file.modified.into();
265
266        // Create indexed note
267        let note = IndexedNote {
268            id: None,
269            path: file.relative_path.clone(),
270            note_type: extracted.note_type,
271            title: extracted.title,
272            created: None, // Could extract from frontmatter if present
273            modified,
274            frontmatter_json: extracted.frontmatter_json,
275            content_hash: hash,
276        };
277
278        // Insert note and get ID
279        let note_id = self.db.upsert_note(&note)?;
280
281        // Delete existing links for this note (in case of update)
282        self.db.delete_links_from(note_id)?;
283
284        // Insert links
285        let link_count = extracted.links.len();
286        for link in extracted.links {
287            let indexed_link = IndexedLink {
288                id: None,
289                source_id: note_id,
290                target_id: None, // Resolved in phase 2
291                target_path: link.target,
292                link_text: link.text,
293                link_type: link.link_type,
294                context: link.context,
295                line_number: Some(link.line_number),
296            };
297            self.db.insert_link(&indexed_link)?;
298        }
299
300        Ok(link_count)
301    }
302}
303
304#[cfg(test)]
305mod tests {
306    use super::*;
307    use std::fs;
308    use tempfile::TempDir;
309
310    fn create_test_vault() -> TempDir {
311        let dir = TempDir::new().unwrap();
312        let root = dir.path();
313
314        // Create some markdown files with links
315        fs::write(
316            root.join("note1.md"),
317            r#"---
318title: Note One
319type: zettel
320---
321# Note One
322
323This links to [[note2]] and [[missing-note]].
324"#,
325        )
326        .unwrap();
327
328        fs::write(
329            root.join("note2.md"),
330            r#"---
331title: Note Two
332type: task
333project: note1
334---
335# Note Two
336
337Back to [[note1]].
338"#,
339        )
340        .unwrap();
341
342        fs::create_dir(root.join("subdir")).unwrap();
343        fs::write(
344            root.join("subdir/note3.md"),
345            r#"# Note Three
346
347Links to [Note One](../note1.md).
348"#,
349        )
350        .unwrap();
351
352        dir
353    }
354
355    #[test]
356    fn test_full_reindex() {
357        let vault = create_test_vault();
358        let db = IndexDb::open_in_memory().unwrap();
359
360        let builder = IndexBuilder::new(&db, vault.path());
361        let stats = builder.full_reindex(None).unwrap();
362
363        assert_eq!(stats.files_found, 3);
364        assert_eq!(stats.notes_indexed, 3);
365        assert_eq!(stats.notes_skipped, 0);
366        assert!(stats.links_indexed >= 4); // At least 4 links across all notes
367    }
368
369    #[test]
370    fn test_notes_are_indexed_correctly() {
371        let vault = create_test_vault();
372        let db = IndexDb::open_in_memory().unwrap();
373
374        let builder = IndexBuilder::new(&db, vault.path());
375        builder.full_reindex(None).unwrap();
376
377        // Check note1 is indexed
378        let note1 = db
379            .get_note_by_path(Path::new("note1.md"))
380            .unwrap()
381            .expect("note1 should exist");
382        assert_eq!(note1.title, "Note One");
383        assert_eq!(note1.note_type, crate::index::types::NoteType::Zettel);
384
385        // Check note2 is indexed
386        let note2 = db
387            .get_note_by_path(Path::new("note2.md"))
388            .unwrap()
389            .expect("note2 should exist");
390        assert_eq!(note2.title, "Note Two");
391        assert_eq!(note2.note_type, crate::index::types::NoteType::Task);
392    }
393
394    #[test]
395    fn test_links_are_indexed() {
396        let vault = create_test_vault();
397        let db = IndexDb::open_in_memory().unwrap();
398
399        let builder = IndexBuilder::new(&db, vault.path());
400        builder.full_reindex(None).unwrap();
401
402        let note1 = db
403            .get_note_by_path(Path::new("note1.md"))
404            .unwrap()
405            .expect("note1 should exist");
406
407        let outgoing = db.get_outgoing_links(note1.id.unwrap()).unwrap();
408        assert_eq!(outgoing.len(), 2); // [[note2]] and [[missing-note]]
409    }
410
411    #[test]
412    fn test_link_targets_resolved() {
413        let vault = create_test_vault();
414        let db = IndexDb::open_in_memory().unwrap();
415
416        let builder = IndexBuilder::new(&db, vault.path());
417        let stats = builder.full_reindex(None).unwrap();
418
419        // At least one broken link (missing-note)
420        assert!(stats.broken_links >= 1);
421
422        // Check that existing links have target_id resolved
423        let note2 = db
424            .get_note_by_path(Path::new("note2.md"))
425            .unwrap()
426            .expect("note2 should exist");
427
428        let backlinks = db.get_backlinks(note2.id.unwrap()).unwrap();
429        // note1 links to note2
430        assert!(!backlinks.is_empty());
431    }
432
433    #[test]
434    fn test_reindex_clears_old_data() {
435        let vault = create_test_vault();
436        let db = IndexDb::open_in_memory().unwrap();
437
438        let builder = IndexBuilder::new(&db, vault.path());
439
440        // Index twice
441        builder.full_reindex(None).unwrap();
442        let stats = builder.full_reindex(None).unwrap();
443
444        // Should still have same counts (not doubled)
445        assert_eq!(stats.notes_indexed, 3);
446        assert_eq!(db.count_notes().unwrap(), 3);
447    }
448
449    // ─────────────────────────────────────────────────────────────────────────
450    // Incremental reindex tests
451    // ─────────────────────────────────────────────────────────────────────────
452
453    #[test]
454    fn test_incremental_first_run() {
455        let vault = create_test_vault();
456        let db = IndexDb::open_in_memory().unwrap();
457        let builder = IndexBuilder::new(&db, vault.path());
458
459        let stats = builder.incremental_reindex(None).unwrap();
460
461        assert_eq!(stats.files_found, 3);
462        assert_eq!(stats.files_added, 3);
463        assert_eq!(stats.files_unchanged, 0);
464        assert_eq!(stats.files_updated, 0);
465        assert_eq!(stats.files_deleted, 0);
466        assert_eq!(stats.notes_indexed, 3);
467    }
468
469    #[test]
470    fn test_incremental_no_changes() {
471        let vault = create_test_vault();
472        let db = IndexDb::open_in_memory().unwrap();
473        let builder = IndexBuilder::new(&db, vault.path());
474
475        builder.incremental_reindex(None).unwrap();
476        let stats = builder.incremental_reindex(None).unwrap();
477
478        assert_eq!(stats.files_found, 3);
479        assert_eq!(stats.files_unchanged, 3);
480        assert_eq!(stats.files_added, 0);
481        assert_eq!(stats.files_updated, 0);
482        assert_eq!(stats.files_deleted, 0);
483        assert_eq!(stats.notes_indexed, 0);
484    }
485
486    #[test]
487    fn test_incremental_file_modified() {
488        let vault = create_test_vault();
489        let db = IndexDb::open_in_memory().unwrap();
490        let builder = IndexBuilder::new(&db, vault.path());
491
492        builder.incremental_reindex(None).unwrap();
493
494        // Modify a file
495        fs::write(vault.path().join("note1.md"), "# Note 1 Modified\n\nNew content.")
496            .unwrap();
497
498        let stats = builder.incremental_reindex(None).unwrap();
499
500        assert_eq!(stats.files_unchanged, 2);
501        assert_eq!(stats.files_updated, 1);
502        assert_eq!(stats.files_added, 0);
503        assert_eq!(stats.notes_indexed, 1);
504    }
505
506    #[test]
507    fn test_incremental_file_added() {
508        let vault = create_test_vault();
509        let db = IndexDb::open_in_memory().unwrap();
510        let builder = IndexBuilder::new(&db, vault.path());
511
512        builder.incremental_reindex(None).unwrap();
513
514        // Add a new file
515        fs::write(vault.path().join("note4.md"), "# Note 4\n\nBrand new note.").unwrap();
516
517        let stats = builder.incremental_reindex(None).unwrap();
518
519        assert_eq!(stats.files_found, 4);
520        assert_eq!(stats.files_unchanged, 3);
521        assert_eq!(stats.files_added, 1);
522        assert_eq!(stats.files_updated, 0);
523        assert_eq!(stats.notes_indexed, 1);
524    }
525
526    #[test]
527    fn test_incremental_file_deleted() {
528        let vault = create_test_vault();
529        let db = IndexDb::open_in_memory().unwrap();
530        let builder = IndexBuilder::new(&db, vault.path());
531
532        builder.incremental_reindex(None).unwrap();
533
534        // Delete a file
535        fs::remove_file(vault.path().join("note2.md")).unwrap();
536
537        let stats = builder.incremental_reindex(None).unwrap();
538
539        assert_eq!(stats.files_found, 2);
540        assert_eq!(stats.files_deleted, 1);
541        assert_eq!(stats.files_unchanged, 2);
542
543        // Verify it's gone from the index
544        assert!(db.get_note_by_path(Path::new("note2.md")).unwrap().is_none());
545        assert_eq!(db.count_notes().unwrap(), 2);
546    }
547
548    #[test]
549    fn test_incremental_links_updated_on_change() {
550        let vault = create_test_vault();
551        let db = IndexDb::open_in_memory().unwrap();
552        let builder = IndexBuilder::new(&db, vault.path());
553
554        builder.incremental_reindex(None).unwrap();
555
556        let note1 = db.get_note_by_path(Path::new("note1.md")).unwrap().unwrap();
557        let links_before = db.get_outgoing_links(note1.id.unwrap()).unwrap();
558        assert_eq!(links_before.len(), 2); // [[note2]] and [[missing-note]]
559
560        // Modify to have different links
561        fs::write(vault.path().join("note1.md"), "# Note 1\n\n[[note3]] only now.")
562            .unwrap();
563        builder.incremental_reindex(None).unwrap();
564
565        let note1 = db.get_note_by_path(Path::new("note1.md")).unwrap().unwrap();
566        let links_after = db.get_outgoing_links(note1.id.unwrap()).unwrap();
567        assert_eq!(links_after.len(), 1);
568        assert_eq!(links_after[0].target_path, "note3");
569    }
570
571    #[test]
572    fn test_incremental_broken_links_resolved() {
573        let vault = create_test_vault();
574        let db = IndexDb::open_in_memory().unwrap();
575        let builder = IndexBuilder::new(&db, vault.path());
576
577        // note1 links to note2 and missing-note
578        let stats1 = builder.incremental_reindex(None).unwrap();
579        assert!(stats1.broken_links > 0); // missing-note is broken
580
581        // Now create the missing note
582        fs::write(vault.path().join("missing-note.md"), "# Missing Note\n\nNow exists!")
583            .unwrap();
584
585        let stats2 = builder.incremental_reindex(None).unwrap();
586        assert_eq!(stats2.files_added, 1);
587
588        // The link to missing-note should now be resolved
589        let missing = db.get_note_by_path(Path::new("missing-note.md")).unwrap().unwrap();
590        let backlinks = db.get_backlinks(missing.id.unwrap()).unwrap();
591        assert!(!backlinks.is_empty());
592    }
593}