mdvault_core/index/
builder.rs

1//! Index building orchestration.
2
3use std::path::Path;
4
5use chrono::{DateTime, Utc};
6use thiserror::Error;
7
8use super::db::{IndexDb, IndexError};
9use super::types::{IndexedLink, IndexedNote};
10use crate::vault::{
11    VaultWalker, VaultWalkerError, WalkedFile, content_hash, extract_note,
12};
13
14#[derive(Debug, Error)]
15pub enum BuilderError {
16    #[error("Vault walker error: {0}")]
17    Walker(#[from] VaultWalkerError),
18
19    #[error("Index database error: {0}")]
20    Index(#[from] IndexError),
21
22    #[error("Failed to read file {path}: {source}")]
23    FileRead {
24        path: String,
25        #[source]
26        source: std::io::Error,
27    },
28}
29
30/// Statistics from an indexing operation.
31#[derive(Debug, Clone, Default)]
32pub struct IndexStats {
33    /// Number of files discovered.
34    pub files_found: usize,
35    /// Number of notes indexed (new or updated).
36    pub notes_indexed: usize,
37    /// Number of notes skipped due to errors.
38    pub notes_skipped: usize,
39    /// Number of links indexed.
40    pub links_indexed: usize,
41    /// Number of broken links found.
42    pub broken_links: usize,
43    /// Indexing duration in milliseconds.
44    pub duration_ms: u64,
45    /// Number of files unchanged (hash match) - incremental mode only.
46    pub files_unchanged: usize,
47    /// Number of files added (new to index) - incremental mode only.
48    pub files_added: usize,
49    /// Number of files updated (content changed) - incremental mode only.
50    pub files_updated: usize,
51    /// Number of files deleted (removed from vault) - incremental mode only.
52    pub files_deleted: usize,
53}
54
55/// File change classification for incremental updates.
56#[derive(Debug, Clone, Copy, PartialEq, Eq)]
57pub enum FileChange {
58    /// File is new (not in index).
59    Added,
60    /// File content has changed.
61    Modified,
62    /// File content unchanged.
63    Unchanged,
64}
65
66/// Progress callback for indexing operations.
67/// Parameters: (current, total, current_path)
68pub type ProgressCallback = Box<dyn Fn(usize, usize, &str)>;
69
70/// Builder for populating the vault index.
71pub struct IndexBuilder<'a> {
72    db: &'a IndexDb,
73    vault_root: &'a Path,
74}
75
76impl<'a> IndexBuilder<'a> {
77    /// Create a new index builder.
78    pub fn new(db: &'a IndexDb, vault_root: &'a Path) -> Self {
79        Self { db, vault_root }
80    }
81
82    /// Perform a full reindex of the vault.
83    /// Clears existing data and rebuilds from scratch.
84    pub fn full_reindex(
85        &self,
86        progress: Option<ProgressCallback>,
87    ) -> Result<IndexStats, BuilderError> {
88        let start = std::time::Instant::now();
89        let mut stats = IndexStats::default();
90
91        // Walk the vault
92        let walker = VaultWalker::new(self.vault_root)?;
93        let files = walker.walk()?;
94        stats.files_found = files.len();
95
96        // Clear existing index
97        self.db.clear_all()?;
98
99        // Phase 1: Index all notes
100        for (i, file) in files.iter().enumerate() {
101            if let Some(ref cb) = progress {
102                cb(i + 1, files.len(), &file.relative_path.to_string_lossy());
103            }
104
105            match self.index_note(file) {
106                Ok(link_count) => {
107                    stats.notes_indexed += 1;
108                    stats.links_indexed += link_count;
109                }
110                Err(e) => {
111                    // Log error but continue indexing
112                    tracing::warn!(
113                        "Failed to index {}: {}",
114                        file.relative_path.display(),
115                        e
116                    );
117                    stats.notes_skipped += 1;
118                }
119            }
120        }
121
122        // Phase 2: Resolve link targets
123        self.db.resolve_link_targets()?;
124        stats.broken_links = self.db.count_broken_links()? as usize;
125
126        stats.duration_ms = start.elapsed().as_millis() as u64;
127        Ok(stats)
128    }
129
130    /// Perform an incremental reindex of the vault.
131    /// Only processes files that have changed since last index.
132    pub fn incremental_reindex(
133        &self,
134        progress: Option<ProgressCallback>,
135    ) -> Result<IndexStats, BuilderError> {
136        let start = std::time::Instant::now();
137        let mut stats = IndexStats::default();
138
139        // Phase 1: Walk the vault and collect all current files
140        let walker = VaultWalker::new(self.vault_root)?;
141        let files = walker.walk()?;
142        stats.files_found = files.len();
143
144        // Phase 2: Get all currently indexed paths for deletion detection
145        let indexed_paths: std::collections::HashSet<std::path::PathBuf> =
146            self.db.get_all_paths()?.into_iter().collect();
147
148        // Track which paths we've seen in the vault
149        let mut seen_paths: std::collections::HashSet<std::path::PathBuf> =
150            std::collections::HashSet::with_capacity(files.len());
151
152        // Phase 3: Classify and process each file
153        for (i, file) in files.iter().enumerate() {
154            if let Some(ref cb) = progress {
155                cb(i + 1, files.len(), &file.relative_path.to_string_lossy());
156            }
157
158            seen_paths.insert(file.relative_path.clone());
159
160            // Classify the change
161            let change = self.classify_change(file)?;
162
163            match change {
164                FileChange::Unchanged => {
165                    stats.files_unchanged += 1;
166                }
167                FileChange::Added | FileChange::Modified => match self.index_note(file) {
168                    Ok(link_count) => {
169                        stats.notes_indexed += 1;
170                        stats.links_indexed += link_count;
171                        if change == FileChange::Added {
172                            stats.files_added += 1;
173                        } else {
174                            stats.files_updated += 1;
175                        }
176                    }
177                    Err(e) => {
178                        tracing::warn!(
179                            "Failed to index {}: {}",
180                            file.relative_path.display(),
181                            e
182                        );
183                        stats.notes_skipped += 1;
184                    }
185                },
186            }
187        }
188
189        // Phase 4: Detect and delete removed files
190        for indexed_path in &indexed_paths {
191            if !seen_paths.contains(indexed_path) && self.db.delete_note(indexed_path)? {
192                stats.files_deleted += 1;
193                tracing::debug!("Deleted from index: {}", indexed_path.display());
194            }
195        }
196
197        // Phase 5: Resolve link targets (handles newly valid links)
198        self.db.resolve_link_targets()?;
199        stats.broken_links = self.db.count_broken_links()? as usize;
200
201        stats.duration_ms = start.elapsed().as_millis() as u64;
202        Ok(stats)
203    }
204
205    /// Classify a file's change status by comparing content hashes.
206    fn classify_change(&self, file: &WalkedFile) -> Result<FileChange, BuilderError> {
207        // Get stored hash (if any)
208        let stored_hash = self.db.get_content_hash(&file.relative_path)?;
209
210        match stored_hash {
211            None => Ok(FileChange::Added),
212            Some(stored) => {
213                // Compute current hash
214                let current = content_hash(&file.absolute_path).map_err(|e| {
215                    BuilderError::FileRead {
216                        path: file.absolute_path.display().to_string(),
217                        source: e,
218                    }
219                })?;
220
221                if current == stored {
222                    Ok(FileChange::Unchanged)
223                } else {
224                    Ok(FileChange::Modified)
225                }
226            }
227        }
228    }
229
230    /// Index a single note file.
231    /// Returns the number of links indexed.
232    fn index_note(&self, file: &WalkedFile) -> Result<usize, BuilderError> {
233        // Read file content
234        let content = std::fs::read_to_string(&file.absolute_path).map_err(|e| {
235            BuilderError::FileRead {
236                path: file.absolute_path.display().to_string(),
237                source: e,
238            }
239        })?;
240
241        // Compute content hash
242        let hash =
243            content_hash(&file.absolute_path).map_err(|e| BuilderError::FileRead {
244                path: file.absolute_path.display().to_string(),
245                source: e,
246            })?;
247
248        // Extract note metadata
249        let extracted = extract_note(&content, &file.relative_path);
250
251        // Convert modified time to DateTime<Utc>
252        let modified: DateTime<Utc> = file.modified.into();
253
254        // Create indexed note
255        let note = IndexedNote {
256            id: None,
257            path: file.relative_path.clone(),
258            note_type: extracted.note_type,
259            title: extracted.title,
260            created: None, // Could extract from frontmatter if present
261            modified,
262            frontmatter_json: extracted.frontmatter_json,
263            content_hash: hash,
264        };
265
266        // Insert note and get ID
267        let note_id = self.db.upsert_note(&note)?;
268
269        // Delete existing links for this note (in case of update)
270        self.db.delete_links_from(note_id)?;
271
272        // Insert links
273        let link_count = extracted.links.len();
274        for link in extracted.links {
275            let indexed_link = IndexedLink {
276                id: None,
277                source_id: note_id,
278                target_id: None, // Resolved in phase 2
279                target_path: link.target,
280                link_text: link.text,
281                link_type: link.link_type,
282                context: link.context,
283                line_number: Some(link.line_number),
284            };
285            self.db.insert_link(&indexed_link)?;
286        }
287
288        Ok(link_count)
289    }
290}
291
292#[cfg(test)]
293mod tests {
294    use super::*;
295    use std::fs;
296    use tempfile::TempDir;
297
298    fn create_test_vault() -> TempDir {
299        let dir = TempDir::new().unwrap();
300        let root = dir.path();
301
302        // Create some markdown files with links
303        fs::write(
304            root.join("note1.md"),
305            r#"---
306title: Note One
307type: zettel
308---
309# Note One
310
311This links to [[note2]] and [[missing-note]].
312"#,
313        )
314        .unwrap();
315
316        fs::write(
317            root.join("note2.md"),
318            r#"---
319title: Note Two
320type: task
321project: note1
322---
323# Note Two
324
325Back to [[note1]].
326"#,
327        )
328        .unwrap();
329
330        fs::create_dir(root.join("subdir")).unwrap();
331        fs::write(
332            root.join("subdir/note3.md"),
333            r#"# Note Three
334
335Links to [Note One](../note1.md).
336"#,
337        )
338        .unwrap();
339
340        dir
341    }
342
343    #[test]
344    fn test_full_reindex() {
345        let vault = create_test_vault();
346        let db = IndexDb::open_in_memory().unwrap();
347
348        let builder = IndexBuilder::new(&db, vault.path());
349        let stats = builder.full_reindex(None).unwrap();
350
351        assert_eq!(stats.files_found, 3);
352        assert_eq!(stats.notes_indexed, 3);
353        assert_eq!(stats.notes_skipped, 0);
354        assert!(stats.links_indexed >= 4); // At least 4 links across all notes
355    }
356
357    #[test]
358    fn test_notes_are_indexed_correctly() {
359        let vault = create_test_vault();
360        let db = IndexDb::open_in_memory().unwrap();
361
362        let builder = IndexBuilder::new(&db, vault.path());
363        builder.full_reindex(None).unwrap();
364
365        // Check note1 is indexed
366        let note1 = db
367            .get_note_by_path(Path::new("note1.md"))
368            .unwrap()
369            .expect("note1 should exist");
370        assert_eq!(note1.title, "Note One");
371        assert_eq!(note1.note_type, crate::index::types::NoteType::Zettel);
372
373        // Check note2 is indexed
374        let note2 = db
375            .get_note_by_path(Path::new("note2.md"))
376            .unwrap()
377            .expect("note2 should exist");
378        assert_eq!(note2.title, "Note Two");
379        assert_eq!(note2.note_type, crate::index::types::NoteType::Task);
380    }
381
382    #[test]
383    fn test_links_are_indexed() {
384        let vault = create_test_vault();
385        let db = IndexDb::open_in_memory().unwrap();
386
387        let builder = IndexBuilder::new(&db, vault.path());
388        builder.full_reindex(None).unwrap();
389
390        let note1 = db
391            .get_note_by_path(Path::new("note1.md"))
392            .unwrap()
393            .expect("note1 should exist");
394
395        let outgoing = db.get_outgoing_links(note1.id.unwrap()).unwrap();
396        assert_eq!(outgoing.len(), 2); // [[note2]] and [[missing-note]]
397    }
398
399    #[test]
400    fn test_link_targets_resolved() {
401        let vault = create_test_vault();
402        let db = IndexDb::open_in_memory().unwrap();
403
404        let builder = IndexBuilder::new(&db, vault.path());
405        let stats = builder.full_reindex(None).unwrap();
406
407        // At least one broken link (missing-note)
408        assert!(stats.broken_links >= 1);
409
410        // Check that existing links have target_id resolved
411        let note2 = db
412            .get_note_by_path(Path::new("note2.md"))
413            .unwrap()
414            .expect("note2 should exist");
415
416        let backlinks = db.get_backlinks(note2.id.unwrap()).unwrap();
417        // note1 links to note2
418        assert!(!backlinks.is_empty());
419    }
420
421    #[test]
422    fn test_reindex_clears_old_data() {
423        let vault = create_test_vault();
424        let db = IndexDb::open_in_memory().unwrap();
425
426        let builder = IndexBuilder::new(&db, vault.path());
427
428        // Index twice
429        builder.full_reindex(None).unwrap();
430        let stats = builder.full_reindex(None).unwrap();
431
432        // Should still have same counts (not doubled)
433        assert_eq!(stats.notes_indexed, 3);
434        assert_eq!(db.count_notes().unwrap(), 3);
435    }
436
437    // ─────────────────────────────────────────────────────────────────────────
438    // Incremental reindex tests
439    // ─────────────────────────────────────────────────────────────────────────
440
441    #[test]
442    fn test_incremental_first_run() {
443        let vault = create_test_vault();
444        let db = IndexDb::open_in_memory().unwrap();
445        let builder = IndexBuilder::new(&db, vault.path());
446
447        let stats = builder.incremental_reindex(None).unwrap();
448
449        assert_eq!(stats.files_found, 3);
450        assert_eq!(stats.files_added, 3);
451        assert_eq!(stats.files_unchanged, 0);
452        assert_eq!(stats.files_updated, 0);
453        assert_eq!(stats.files_deleted, 0);
454        assert_eq!(stats.notes_indexed, 3);
455    }
456
457    #[test]
458    fn test_incremental_no_changes() {
459        let vault = create_test_vault();
460        let db = IndexDb::open_in_memory().unwrap();
461        let builder = IndexBuilder::new(&db, vault.path());
462
463        builder.incremental_reindex(None).unwrap();
464        let stats = builder.incremental_reindex(None).unwrap();
465
466        assert_eq!(stats.files_found, 3);
467        assert_eq!(stats.files_unchanged, 3);
468        assert_eq!(stats.files_added, 0);
469        assert_eq!(stats.files_updated, 0);
470        assert_eq!(stats.files_deleted, 0);
471        assert_eq!(stats.notes_indexed, 0);
472    }
473
474    #[test]
475    fn test_incremental_file_modified() {
476        let vault = create_test_vault();
477        let db = IndexDb::open_in_memory().unwrap();
478        let builder = IndexBuilder::new(&db, vault.path());
479
480        builder.incremental_reindex(None).unwrap();
481
482        // Modify a file
483        fs::write(vault.path().join("note1.md"), "# Note 1 Modified\n\nNew content.")
484            .unwrap();
485
486        let stats = builder.incremental_reindex(None).unwrap();
487
488        assert_eq!(stats.files_unchanged, 2);
489        assert_eq!(stats.files_updated, 1);
490        assert_eq!(stats.files_added, 0);
491        assert_eq!(stats.notes_indexed, 1);
492    }
493
494    #[test]
495    fn test_incremental_file_added() {
496        let vault = create_test_vault();
497        let db = IndexDb::open_in_memory().unwrap();
498        let builder = IndexBuilder::new(&db, vault.path());
499
500        builder.incremental_reindex(None).unwrap();
501
502        // Add a new file
503        fs::write(vault.path().join("note4.md"), "# Note 4\n\nBrand new note.").unwrap();
504
505        let stats = builder.incremental_reindex(None).unwrap();
506
507        assert_eq!(stats.files_found, 4);
508        assert_eq!(stats.files_unchanged, 3);
509        assert_eq!(stats.files_added, 1);
510        assert_eq!(stats.files_updated, 0);
511        assert_eq!(stats.notes_indexed, 1);
512    }
513
514    #[test]
515    fn test_incremental_file_deleted() {
516        let vault = create_test_vault();
517        let db = IndexDb::open_in_memory().unwrap();
518        let builder = IndexBuilder::new(&db, vault.path());
519
520        builder.incremental_reindex(None).unwrap();
521
522        // Delete a file
523        fs::remove_file(vault.path().join("note2.md")).unwrap();
524
525        let stats = builder.incremental_reindex(None).unwrap();
526
527        assert_eq!(stats.files_found, 2);
528        assert_eq!(stats.files_deleted, 1);
529        assert_eq!(stats.files_unchanged, 2);
530
531        // Verify it's gone from the index
532        assert!(db.get_note_by_path(Path::new("note2.md")).unwrap().is_none());
533        assert_eq!(db.count_notes().unwrap(), 2);
534    }
535
536    #[test]
537    fn test_incremental_links_updated_on_change() {
538        let vault = create_test_vault();
539        let db = IndexDb::open_in_memory().unwrap();
540        let builder = IndexBuilder::new(&db, vault.path());
541
542        builder.incremental_reindex(None).unwrap();
543
544        let note1 = db.get_note_by_path(Path::new("note1.md")).unwrap().unwrap();
545        let links_before = db.get_outgoing_links(note1.id.unwrap()).unwrap();
546        assert_eq!(links_before.len(), 2); // [[note2]] and [[missing-note]]
547
548        // Modify to have different links
549        fs::write(vault.path().join("note1.md"), "# Note 1\n\n[[note3]] only now.")
550            .unwrap();
551        builder.incremental_reindex(None).unwrap();
552
553        let note1 = db.get_note_by_path(Path::new("note1.md")).unwrap().unwrap();
554        let links_after = db.get_outgoing_links(note1.id.unwrap()).unwrap();
555        assert_eq!(links_after.len(), 1);
556        assert_eq!(links_after[0].target_path, "note3");
557    }
558
559    #[test]
560    fn test_incremental_broken_links_resolved() {
561        let vault = create_test_vault();
562        let db = IndexDb::open_in_memory().unwrap();
563        let builder = IndexBuilder::new(&db, vault.path());
564
565        // note1 links to note2 and missing-note
566        let stats1 = builder.incremental_reindex(None).unwrap();
567        assert!(stats1.broken_links > 0); // missing-note is broken
568
569        // Now create the missing note
570        fs::write(vault.path().join("missing-note.md"), "# Missing Note\n\nNow exists!")
571            .unwrap();
572
573        let stats2 = builder.incremental_reindex(None).unwrap();
574        assert_eq!(stats2.files_added, 1);
575
576        // The link to missing-note should now be resolved
577        let missing = db.get_note_by_path(Path::new("missing-note.md")).unwrap().unwrap();
578        let backlinks = db.get_backlinks(missing.id.unwrap()).unwrap();
579        assert!(!backlinks.is_empty());
580    }
581}