1use std::path::Path;
4
5use chrono::{DateTime, Utc};
6use thiserror::Error;
7
8use super::db::{IndexDb, IndexError};
9use super::types::{IndexedLink, IndexedNote};
10use crate::vault::{
11 VaultWalker, VaultWalkerError, WalkedFile, content_hash, extract_note,
12};
13
14#[derive(Debug, Error)]
15pub enum BuilderError {
16 #[error("Vault walker error: {0}")]
17 Walker(#[from] VaultWalkerError),
18
19 #[error("Index database error: {0}")]
20 Index(#[from] IndexError),
21
22 #[error("Failed to read file {path}: {source}")]
23 FileRead {
24 path: String,
25 #[source]
26 source: std::io::Error,
27 },
28}
29
30#[derive(Debug, Clone, Default)]
32pub struct IndexStats {
33 pub files_found: usize,
35 pub notes_indexed: usize,
37 pub notes_skipped: usize,
39 pub links_indexed: usize,
41 pub broken_links: usize,
43 pub duration_ms: u64,
45 pub files_unchanged: usize,
47 pub files_added: usize,
49 pub files_updated: usize,
51 pub files_deleted: usize,
53}
54
55#[derive(Debug, Clone, Copy, PartialEq, Eq)]
57pub enum FileChange {
58 Added,
60 Modified,
62 Unchanged,
64}
65
66pub type ProgressCallback = Box<dyn Fn(usize, usize, &str)>;
69
70pub struct IndexBuilder<'a> {
72 db: &'a IndexDb,
73 vault_root: &'a Path,
74 excluded_folders: Vec<std::path::PathBuf>,
75}
76
77impl<'a> IndexBuilder<'a> {
78 pub fn new(db: &'a IndexDb, vault_root: &'a Path) -> Self {
80 Self { db, vault_root, excluded_folders: Vec::new() }
81 }
82
83 pub fn with_exclusions(
85 db: &'a IndexDb,
86 vault_root: &'a Path,
87 excluded_folders: Vec<std::path::PathBuf>,
88 ) -> Self {
89 Self { db, vault_root, excluded_folders }
90 }
91
92 pub fn full_reindex(
95 &self,
96 progress: Option<ProgressCallback>,
97 ) -> Result<IndexStats, BuilderError> {
98 let start = std::time::Instant::now();
99 let mut stats = IndexStats::default();
100
101 let walker =
103 VaultWalker::with_exclusions(self.vault_root, self.excluded_folders.clone())?;
104 let files = walker.walk()?;
105 stats.files_found = files.len();
106
107 self.db.clear_all()?;
109
110 for (i, file) in files.iter().enumerate() {
112 if let Some(ref cb) = progress {
113 cb(i + 1, files.len(), &file.relative_path.to_string_lossy());
114 }
115
116 match self.index_note(file) {
117 Ok(link_count) => {
118 stats.notes_indexed += 1;
119 stats.links_indexed += link_count;
120 }
121 Err(e) => {
122 tracing::warn!(
124 "Failed to index {}: {}",
125 file.relative_path.display(),
126 e
127 );
128 stats.notes_skipped += 1;
129 }
130 }
131 }
132
133 self.db.resolve_link_targets()?;
135 stats.broken_links = self.db.count_broken_links()? as usize;
136
137 stats.duration_ms = start.elapsed().as_millis() as u64;
138 Ok(stats)
139 }
140
141 pub fn incremental_reindex(
144 &self,
145 progress: Option<ProgressCallback>,
146 ) -> Result<IndexStats, BuilderError> {
147 let start = std::time::Instant::now();
148 let mut stats = IndexStats::default();
149
150 let walker =
152 VaultWalker::with_exclusions(self.vault_root, self.excluded_folders.clone())?;
153 let files = walker.walk()?;
154 stats.files_found = files.len();
155
156 let indexed_paths: std::collections::HashSet<std::path::PathBuf> =
158 self.db.get_all_paths()?.into_iter().collect();
159
160 let mut seen_paths: std::collections::HashSet<std::path::PathBuf> =
162 std::collections::HashSet::with_capacity(files.len());
163
164 for (i, file) in files.iter().enumerate() {
166 if let Some(ref cb) = progress {
167 cb(i + 1, files.len(), &file.relative_path.to_string_lossy());
168 }
169
170 seen_paths.insert(file.relative_path.clone());
171
172 let change = self.classify_change(file)?;
174
175 match change {
176 FileChange::Unchanged => {
177 stats.files_unchanged += 1;
178 }
179 FileChange::Added | FileChange::Modified => match self.index_note(file) {
180 Ok(link_count) => {
181 stats.notes_indexed += 1;
182 stats.links_indexed += link_count;
183 if change == FileChange::Added {
184 stats.files_added += 1;
185 } else {
186 stats.files_updated += 1;
187 }
188 }
189 Err(e) => {
190 tracing::warn!(
191 "Failed to index {}: {}",
192 file.relative_path.display(),
193 e
194 );
195 stats.notes_skipped += 1;
196 }
197 },
198 }
199 }
200
201 for indexed_path in &indexed_paths {
203 if !seen_paths.contains(indexed_path) && self.db.delete_note(indexed_path)? {
204 stats.files_deleted += 1;
205 tracing::debug!("Deleted from index: {}", indexed_path.display());
206 }
207 }
208
209 self.db.resolve_link_targets()?;
211 stats.broken_links = self.db.count_broken_links()? as usize;
212
213 stats.duration_ms = start.elapsed().as_millis() as u64;
214 Ok(stats)
215 }
216
217 fn classify_change(&self, file: &WalkedFile) -> Result<FileChange, BuilderError> {
219 let stored_hash = self.db.get_content_hash(&file.relative_path)?;
221
222 match stored_hash {
223 None => Ok(FileChange::Added),
224 Some(stored) => {
225 let current = content_hash(&file.absolute_path).map_err(|e| {
227 BuilderError::FileRead {
228 path: file.absolute_path.display().to_string(),
229 source: e,
230 }
231 })?;
232
233 if current == stored {
234 Ok(FileChange::Unchanged)
235 } else {
236 Ok(FileChange::Modified)
237 }
238 }
239 }
240 }
241
242 fn index_note(&self, file: &WalkedFile) -> Result<usize, BuilderError> {
245 let content = std::fs::read_to_string(&file.absolute_path).map_err(|e| {
247 BuilderError::FileRead {
248 path: file.absolute_path.display().to_string(),
249 source: e,
250 }
251 })?;
252
253 let hash =
255 content_hash(&file.absolute_path).map_err(|e| BuilderError::FileRead {
256 path: file.absolute_path.display().to_string(),
257 source: e,
258 })?;
259
260 let extracted = extract_note(&content, &file.relative_path);
262
263 let modified: DateTime<Utc> = file.modified.into();
265
266 let note = IndexedNote {
268 id: None,
269 path: file.relative_path.clone(),
270 note_type: extracted.note_type,
271 title: extracted.title,
272 created: None, modified,
274 frontmatter_json: extracted.frontmatter_json,
275 content_hash: hash,
276 };
277
278 let note_id = self.db.upsert_note(¬e)?;
280
281 self.db.delete_links_from(note_id)?;
283
284 let link_count = extracted.links.len();
286 for link in extracted.links {
287 let indexed_link = IndexedLink {
288 id: None,
289 source_id: note_id,
290 target_id: None, target_path: link.target,
292 link_text: link.text,
293 link_type: link.link_type,
294 context: link.context,
295 line_number: Some(link.line_number),
296 };
297 self.db.insert_link(&indexed_link)?;
298 }
299
300 Ok(link_count)
301 }
302
303 pub fn reindex_file(&self, relative_path: &Path) -> Result<(), BuilderError> {
305 let absolute_path = self.vault_root.join(relative_path);
306 let metadata =
307 std::fs::metadata(&absolute_path).map_err(|e| BuilderError::FileRead {
308 path: absolute_path.display().to_string(),
309 source: e,
310 })?;
311 let file = WalkedFile {
312 absolute_path,
313 relative_path: relative_path.to_path_buf(),
314 modified: metadata.modified().unwrap_or(std::time::SystemTime::now()),
315 size: metadata.len(),
316 };
317 self.index_note(&file)?;
318 Ok(())
319 }
320}
321
322#[cfg(test)]
323mod tests {
324 use super::*;
325 use std::fs;
326 use tempfile::TempDir;
327
328 fn create_test_vault() -> TempDir {
329 let dir = TempDir::new().unwrap();
330 let root = dir.path();
331
332 fs::write(
334 root.join("note1.md"),
335 r#"---
336title: Note One
337type: zettel
338---
339# Note One
340
341This links to [[note2]] and [[missing-note]].
342"#,
343 )
344 .unwrap();
345
346 fs::write(
347 root.join("note2.md"),
348 r#"---
349title: Note Two
350type: task
351project: note1
352---
353# Note Two
354
355Back to [[note1]].
356"#,
357 )
358 .unwrap();
359
360 fs::create_dir(root.join("subdir")).unwrap();
361 fs::write(
362 root.join("subdir/note3.md"),
363 r#"# Note Three
364
365Links to [Note One](../note1.md).
366"#,
367 )
368 .unwrap();
369
370 dir
371 }
372
373 #[test]
374 fn test_full_reindex() {
375 let vault = create_test_vault();
376 let db = IndexDb::open_in_memory().unwrap();
377
378 let builder = IndexBuilder::new(&db, vault.path());
379 let stats = builder.full_reindex(None).unwrap();
380
381 assert_eq!(stats.files_found, 3);
382 assert_eq!(stats.notes_indexed, 3);
383 assert_eq!(stats.notes_skipped, 0);
384 assert!(stats.links_indexed >= 4); }
386
387 #[test]
388 fn test_notes_are_indexed_correctly() {
389 let vault = create_test_vault();
390 let db = IndexDb::open_in_memory().unwrap();
391
392 let builder = IndexBuilder::new(&db, vault.path());
393 builder.full_reindex(None).unwrap();
394
395 let note1 = db
397 .get_note_by_path(Path::new("note1.md"))
398 .unwrap()
399 .expect("note1 should exist");
400 assert_eq!(note1.title, "Note One");
401 assert_eq!(note1.note_type, crate::index::types::NoteType::Zettel);
402
403 let note2 = db
405 .get_note_by_path(Path::new("note2.md"))
406 .unwrap()
407 .expect("note2 should exist");
408 assert_eq!(note2.title, "Note Two");
409 assert_eq!(note2.note_type, crate::index::types::NoteType::Task);
410 }
411
412 #[test]
413 fn test_links_are_indexed() {
414 let vault = create_test_vault();
415 let db = IndexDb::open_in_memory().unwrap();
416
417 let builder = IndexBuilder::new(&db, vault.path());
418 builder.full_reindex(None).unwrap();
419
420 let note1 = db
421 .get_note_by_path(Path::new("note1.md"))
422 .unwrap()
423 .expect("note1 should exist");
424
425 let outgoing = db.get_outgoing_links(note1.id.unwrap()).unwrap();
426 assert_eq!(outgoing.len(), 2); }
428
429 #[test]
430 fn test_link_targets_resolved() {
431 let vault = create_test_vault();
432 let db = IndexDb::open_in_memory().unwrap();
433
434 let builder = IndexBuilder::new(&db, vault.path());
435 let stats = builder.full_reindex(None).unwrap();
436
437 assert!(stats.broken_links >= 1);
439
440 let note2 = db
442 .get_note_by_path(Path::new("note2.md"))
443 .unwrap()
444 .expect("note2 should exist");
445
446 let backlinks = db.get_backlinks(note2.id.unwrap()).unwrap();
447 assert!(!backlinks.is_empty());
449 }
450
451 #[test]
452 fn test_reindex_clears_old_data() {
453 let vault = create_test_vault();
454 let db = IndexDb::open_in_memory().unwrap();
455
456 let builder = IndexBuilder::new(&db, vault.path());
457
458 builder.full_reindex(None).unwrap();
460 let stats = builder.full_reindex(None).unwrap();
461
462 assert_eq!(stats.notes_indexed, 3);
464 assert_eq!(db.count_notes().unwrap(), 3);
465 }
466
467 #[test]
472 fn test_incremental_first_run() {
473 let vault = create_test_vault();
474 let db = IndexDb::open_in_memory().unwrap();
475 let builder = IndexBuilder::new(&db, vault.path());
476
477 let stats = builder.incremental_reindex(None).unwrap();
478
479 assert_eq!(stats.files_found, 3);
480 assert_eq!(stats.files_added, 3);
481 assert_eq!(stats.files_unchanged, 0);
482 assert_eq!(stats.files_updated, 0);
483 assert_eq!(stats.files_deleted, 0);
484 assert_eq!(stats.notes_indexed, 3);
485 }
486
487 #[test]
488 fn test_incremental_no_changes() {
489 let vault = create_test_vault();
490 let db = IndexDb::open_in_memory().unwrap();
491 let builder = IndexBuilder::new(&db, vault.path());
492
493 builder.incremental_reindex(None).unwrap();
494 let stats = builder.incremental_reindex(None).unwrap();
495
496 assert_eq!(stats.files_found, 3);
497 assert_eq!(stats.files_unchanged, 3);
498 assert_eq!(stats.files_added, 0);
499 assert_eq!(stats.files_updated, 0);
500 assert_eq!(stats.files_deleted, 0);
501 assert_eq!(stats.notes_indexed, 0);
502 }
503
504 #[test]
505 fn test_incremental_file_modified() {
506 let vault = create_test_vault();
507 let db = IndexDb::open_in_memory().unwrap();
508 let builder = IndexBuilder::new(&db, vault.path());
509
510 builder.incremental_reindex(None).unwrap();
511
512 fs::write(vault.path().join("note1.md"), "# Note 1 Modified\n\nNew content.")
514 .unwrap();
515
516 let stats = builder.incremental_reindex(None).unwrap();
517
518 assert_eq!(stats.files_unchanged, 2);
519 assert_eq!(stats.files_updated, 1);
520 assert_eq!(stats.files_added, 0);
521 assert_eq!(stats.notes_indexed, 1);
522 }
523
524 #[test]
525 fn test_incremental_file_added() {
526 let vault = create_test_vault();
527 let db = IndexDb::open_in_memory().unwrap();
528 let builder = IndexBuilder::new(&db, vault.path());
529
530 builder.incremental_reindex(None).unwrap();
531
532 fs::write(vault.path().join("note4.md"), "# Note 4\n\nBrand new note.").unwrap();
534
535 let stats = builder.incremental_reindex(None).unwrap();
536
537 assert_eq!(stats.files_found, 4);
538 assert_eq!(stats.files_unchanged, 3);
539 assert_eq!(stats.files_added, 1);
540 assert_eq!(stats.files_updated, 0);
541 assert_eq!(stats.notes_indexed, 1);
542 }
543
544 #[test]
545 fn test_incremental_file_deleted() {
546 let vault = create_test_vault();
547 let db = IndexDb::open_in_memory().unwrap();
548 let builder = IndexBuilder::new(&db, vault.path());
549
550 builder.incremental_reindex(None).unwrap();
551
552 fs::remove_file(vault.path().join("note2.md")).unwrap();
554
555 let stats = builder.incremental_reindex(None).unwrap();
556
557 assert_eq!(stats.files_found, 2);
558 assert_eq!(stats.files_deleted, 1);
559 assert_eq!(stats.files_unchanged, 2);
560
561 assert!(db.get_note_by_path(Path::new("note2.md")).unwrap().is_none());
563 assert_eq!(db.count_notes().unwrap(), 2);
564 }
565
566 #[test]
567 fn test_incremental_links_updated_on_change() {
568 let vault = create_test_vault();
569 let db = IndexDb::open_in_memory().unwrap();
570 let builder = IndexBuilder::new(&db, vault.path());
571
572 builder.incremental_reindex(None).unwrap();
573
574 let note1 = db.get_note_by_path(Path::new("note1.md")).unwrap().unwrap();
575 let links_before = db.get_outgoing_links(note1.id.unwrap()).unwrap();
576 assert_eq!(links_before.len(), 2); fs::write(vault.path().join("note1.md"), "# Note 1\n\n[[note3]] only now.")
580 .unwrap();
581 builder.incremental_reindex(None).unwrap();
582
583 let note1 = db.get_note_by_path(Path::new("note1.md")).unwrap().unwrap();
584 let links_after = db.get_outgoing_links(note1.id.unwrap()).unwrap();
585 assert_eq!(links_after.len(), 1);
586 assert_eq!(links_after[0].target_path, "note3");
587 }
588
589 #[test]
590 fn test_incremental_broken_links_resolved() {
591 let vault = create_test_vault();
592 let db = IndexDb::open_in_memory().unwrap();
593 let builder = IndexBuilder::new(&db, vault.path());
594
595 let stats1 = builder.incremental_reindex(None).unwrap();
597 assert!(stats1.broken_links > 0); fs::write(vault.path().join("missing-note.md"), "# Missing Note\n\nNow exists!")
601 .unwrap();
602
603 let stats2 = builder.incremental_reindex(None).unwrap();
604 assert_eq!(stats2.files_added, 1);
605
606 let missing = db.get_note_by_path(Path::new("missing-note.md")).unwrap().unwrap();
608 let backlinks = db.get_backlinks(missing.id.unwrap()).unwrap();
609 assert!(!backlinks.is_empty());
610 }
611}