1use std::path::Path;
4
5use chrono::{DateTime, Utc};
6use thiserror::Error;
7
8use super::db::{IndexDb, IndexError};
9use super::types::{IndexedLink, IndexedNote};
10use crate::vault::{
11 VaultWalker, VaultWalkerError, WalkedFile, content_hash, extract_note,
12};
13
14#[derive(Debug, Error)]
15pub enum BuilderError {
16 #[error("Vault walker error: {0}")]
17 Walker(#[from] VaultWalkerError),
18
19 #[error("Index database error: {0}")]
20 Index(#[from] IndexError),
21
22 #[error("Failed to read file {path}: {source}")]
23 FileRead {
24 path: String,
25 #[source]
26 source: std::io::Error,
27 },
28}
29
30#[derive(Debug, Clone, Default)]
32pub struct IndexStats {
33 pub files_found: usize,
35 pub notes_indexed: usize,
37 pub notes_skipped: usize,
39 pub links_indexed: usize,
41 pub broken_links: usize,
43 pub duration_ms: u64,
45 pub files_unchanged: usize,
47 pub files_added: usize,
49 pub files_updated: usize,
51 pub files_deleted: usize,
53}
54
55#[derive(Debug, Clone, Copy, PartialEq, Eq)]
57pub enum FileChange {
58 Added,
60 Modified,
62 Unchanged,
64}
65
66pub type ProgressCallback = Box<dyn Fn(usize, usize, &str)>;
69
70pub struct IndexBuilder<'a> {
72 db: &'a IndexDb,
73 vault_root: &'a Path,
74 excluded_folders: Vec<std::path::PathBuf>,
75}
76
77impl<'a> IndexBuilder<'a> {
78 pub fn new(db: &'a IndexDb, vault_root: &'a Path) -> Self {
80 Self { db, vault_root, excluded_folders: Vec::new() }
81 }
82
83 pub fn with_exclusions(
85 db: &'a IndexDb,
86 vault_root: &'a Path,
87 excluded_folders: Vec<std::path::PathBuf>,
88 ) -> Self {
89 Self { db, vault_root, excluded_folders }
90 }
91
92 pub fn full_reindex(
95 &self,
96 progress: Option<ProgressCallback>,
97 ) -> Result<IndexStats, BuilderError> {
98 let start = std::time::Instant::now();
99 let mut stats = IndexStats::default();
100
101 let walker =
103 VaultWalker::with_exclusions(self.vault_root, self.excluded_folders.clone())?;
104 let files = walker.walk()?;
105 stats.files_found = files.len();
106
107 self.db.clear_all()?;
109
110 for (i, file) in files.iter().enumerate() {
112 if let Some(ref cb) = progress {
113 cb(i + 1, files.len(), &file.relative_path.to_string_lossy());
114 }
115
116 match self.index_note(file) {
117 Ok(link_count) => {
118 stats.notes_indexed += 1;
119 stats.links_indexed += link_count;
120 }
121 Err(e) => {
122 tracing::warn!(
124 "Failed to index {}: {}",
125 file.relative_path.display(),
126 e
127 );
128 stats.notes_skipped += 1;
129 }
130 }
131 }
132
133 self.db.resolve_link_targets()?;
135 stats.broken_links = self.db.count_broken_links()? as usize;
136
137 stats.duration_ms = start.elapsed().as_millis() as u64;
138 Ok(stats)
139 }
140
141 pub fn incremental_reindex(
144 &self,
145 progress: Option<ProgressCallback>,
146 ) -> Result<IndexStats, BuilderError> {
147 let start = std::time::Instant::now();
148 let mut stats = IndexStats::default();
149
150 let walker =
152 VaultWalker::with_exclusions(self.vault_root, self.excluded_folders.clone())?;
153 let files = walker.walk()?;
154 stats.files_found = files.len();
155
156 let indexed_paths: std::collections::HashSet<std::path::PathBuf> =
158 self.db.get_all_paths()?.into_iter().collect();
159
160 let mut seen_paths: std::collections::HashSet<std::path::PathBuf> =
162 std::collections::HashSet::with_capacity(files.len());
163
164 for (i, file) in files.iter().enumerate() {
166 if let Some(ref cb) = progress {
167 cb(i + 1, files.len(), &file.relative_path.to_string_lossy());
168 }
169
170 seen_paths.insert(file.relative_path.clone());
171
172 let change = self.classify_change(file)?;
174
175 match change {
176 FileChange::Unchanged => {
177 stats.files_unchanged += 1;
178 }
179 FileChange::Added | FileChange::Modified => match self.index_note(file) {
180 Ok(link_count) => {
181 stats.notes_indexed += 1;
182 stats.links_indexed += link_count;
183 if change == FileChange::Added {
184 stats.files_added += 1;
185 } else {
186 stats.files_updated += 1;
187 }
188 }
189 Err(e) => {
190 tracing::warn!(
191 "Failed to index {}: {}",
192 file.relative_path.display(),
193 e
194 );
195 stats.notes_skipped += 1;
196 }
197 },
198 }
199 }
200
201 for indexed_path in &indexed_paths {
203 if !seen_paths.contains(indexed_path) && self.db.delete_note(indexed_path)? {
204 stats.files_deleted += 1;
205 tracing::debug!("Deleted from index: {}", indexed_path.display());
206 }
207 }
208
209 self.db.resolve_link_targets()?;
211 stats.broken_links = self.db.count_broken_links()? as usize;
212
213 stats.duration_ms = start.elapsed().as_millis() as u64;
214 Ok(stats)
215 }
216
217 fn classify_change(&self, file: &WalkedFile) -> Result<FileChange, BuilderError> {
219 let stored_hash = self.db.get_content_hash(&file.relative_path)?;
221
222 match stored_hash {
223 None => Ok(FileChange::Added),
224 Some(stored) => {
225 let current = content_hash(&file.absolute_path).map_err(|e| {
227 BuilderError::FileRead {
228 path: file.absolute_path.display().to_string(),
229 source: e,
230 }
231 })?;
232
233 if current == stored {
234 Ok(FileChange::Unchanged)
235 } else {
236 Ok(FileChange::Modified)
237 }
238 }
239 }
240 }
241
242 fn index_note(&self, file: &WalkedFile) -> Result<usize, BuilderError> {
245 let content = std::fs::read_to_string(&file.absolute_path).map_err(|e| {
247 BuilderError::FileRead {
248 path: file.absolute_path.display().to_string(),
249 source: e,
250 }
251 })?;
252
253 let hash =
255 content_hash(&file.absolute_path).map_err(|e| BuilderError::FileRead {
256 path: file.absolute_path.display().to_string(),
257 source: e,
258 })?;
259
260 let extracted = extract_note(&content, &file.relative_path);
262
263 let modified: DateTime<Utc> = file.modified.into();
265
266 let note = IndexedNote {
268 id: None,
269 path: file.relative_path.clone(),
270 note_type: extracted.note_type,
271 title: extracted.title,
272 created: None, modified,
274 frontmatter_json: extracted.frontmatter_json,
275 content_hash: hash,
276 };
277
278 let note_id = self.db.upsert_note(¬e)?;
280
281 self.db.delete_links_from(note_id)?;
283
284 let link_count = extracted.links.len();
286 for link in extracted.links {
287 let indexed_link = IndexedLink {
288 id: None,
289 source_id: note_id,
290 target_id: None, target_path: link.target,
292 link_text: link.text,
293 link_type: link.link_type,
294 context: link.context,
295 line_number: Some(link.line_number),
296 };
297 self.db.insert_link(&indexed_link)?;
298 }
299
300 Ok(link_count)
301 }
302}
303
304#[cfg(test)]
305mod tests {
306 use super::*;
307 use std::fs;
308 use tempfile::TempDir;
309
310 fn create_test_vault() -> TempDir {
311 let dir = TempDir::new().unwrap();
312 let root = dir.path();
313
314 fs::write(
316 root.join("note1.md"),
317 r#"---
318title: Note One
319type: zettel
320---
321# Note One
322
323This links to [[note2]] and [[missing-note]].
324"#,
325 )
326 .unwrap();
327
328 fs::write(
329 root.join("note2.md"),
330 r#"---
331title: Note Two
332type: task
333project: note1
334---
335# Note Two
336
337Back to [[note1]].
338"#,
339 )
340 .unwrap();
341
342 fs::create_dir(root.join("subdir")).unwrap();
343 fs::write(
344 root.join("subdir/note3.md"),
345 r#"# Note Three
346
347Links to [Note One](../note1.md).
348"#,
349 )
350 .unwrap();
351
352 dir
353 }
354
355 #[test]
356 fn test_full_reindex() {
357 let vault = create_test_vault();
358 let db = IndexDb::open_in_memory().unwrap();
359
360 let builder = IndexBuilder::new(&db, vault.path());
361 let stats = builder.full_reindex(None).unwrap();
362
363 assert_eq!(stats.files_found, 3);
364 assert_eq!(stats.notes_indexed, 3);
365 assert_eq!(stats.notes_skipped, 0);
366 assert!(stats.links_indexed >= 4); }
368
369 #[test]
370 fn test_notes_are_indexed_correctly() {
371 let vault = create_test_vault();
372 let db = IndexDb::open_in_memory().unwrap();
373
374 let builder = IndexBuilder::new(&db, vault.path());
375 builder.full_reindex(None).unwrap();
376
377 let note1 = db
379 .get_note_by_path(Path::new("note1.md"))
380 .unwrap()
381 .expect("note1 should exist");
382 assert_eq!(note1.title, "Note One");
383 assert_eq!(note1.note_type, crate::index::types::NoteType::Zettel);
384
385 let note2 = db
387 .get_note_by_path(Path::new("note2.md"))
388 .unwrap()
389 .expect("note2 should exist");
390 assert_eq!(note2.title, "Note Two");
391 assert_eq!(note2.note_type, crate::index::types::NoteType::Task);
392 }
393
394 #[test]
395 fn test_links_are_indexed() {
396 let vault = create_test_vault();
397 let db = IndexDb::open_in_memory().unwrap();
398
399 let builder = IndexBuilder::new(&db, vault.path());
400 builder.full_reindex(None).unwrap();
401
402 let note1 = db
403 .get_note_by_path(Path::new("note1.md"))
404 .unwrap()
405 .expect("note1 should exist");
406
407 let outgoing = db.get_outgoing_links(note1.id.unwrap()).unwrap();
408 assert_eq!(outgoing.len(), 2); }
410
411 #[test]
412 fn test_link_targets_resolved() {
413 let vault = create_test_vault();
414 let db = IndexDb::open_in_memory().unwrap();
415
416 let builder = IndexBuilder::new(&db, vault.path());
417 let stats = builder.full_reindex(None).unwrap();
418
419 assert!(stats.broken_links >= 1);
421
422 let note2 = db
424 .get_note_by_path(Path::new("note2.md"))
425 .unwrap()
426 .expect("note2 should exist");
427
428 let backlinks = db.get_backlinks(note2.id.unwrap()).unwrap();
429 assert!(!backlinks.is_empty());
431 }
432
433 #[test]
434 fn test_reindex_clears_old_data() {
435 let vault = create_test_vault();
436 let db = IndexDb::open_in_memory().unwrap();
437
438 let builder = IndexBuilder::new(&db, vault.path());
439
440 builder.full_reindex(None).unwrap();
442 let stats = builder.full_reindex(None).unwrap();
443
444 assert_eq!(stats.notes_indexed, 3);
446 assert_eq!(db.count_notes().unwrap(), 3);
447 }
448
449 #[test]
454 fn test_incremental_first_run() {
455 let vault = create_test_vault();
456 let db = IndexDb::open_in_memory().unwrap();
457 let builder = IndexBuilder::new(&db, vault.path());
458
459 let stats = builder.incremental_reindex(None).unwrap();
460
461 assert_eq!(stats.files_found, 3);
462 assert_eq!(stats.files_added, 3);
463 assert_eq!(stats.files_unchanged, 0);
464 assert_eq!(stats.files_updated, 0);
465 assert_eq!(stats.files_deleted, 0);
466 assert_eq!(stats.notes_indexed, 3);
467 }
468
469 #[test]
470 fn test_incremental_no_changes() {
471 let vault = create_test_vault();
472 let db = IndexDb::open_in_memory().unwrap();
473 let builder = IndexBuilder::new(&db, vault.path());
474
475 builder.incremental_reindex(None).unwrap();
476 let stats = builder.incremental_reindex(None).unwrap();
477
478 assert_eq!(stats.files_found, 3);
479 assert_eq!(stats.files_unchanged, 3);
480 assert_eq!(stats.files_added, 0);
481 assert_eq!(stats.files_updated, 0);
482 assert_eq!(stats.files_deleted, 0);
483 assert_eq!(stats.notes_indexed, 0);
484 }
485
486 #[test]
487 fn test_incremental_file_modified() {
488 let vault = create_test_vault();
489 let db = IndexDb::open_in_memory().unwrap();
490 let builder = IndexBuilder::new(&db, vault.path());
491
492 builder.incremental_reindex(None).unwrap();
493
494 fs::write(vault.path().join("note1.md"), "# Note 1 Modified\n\nNew content.")
496 .unwrap();
497
498 let stats = builder.incremental_reindex(None).unwrap();
499
500 assert_eq!(stats.files_unchanged, 2);
501 assert_eq!(stats.files_updated, 1);
502 assert_eq!(stats.files_added, 0);
503 assert_eq!(stats.notes_indexed, 1);
504 }
505
506 #[test]
507 fn test_incremental_file_added() {
508 let vault = create_test_vault();
509 let db = IndexDb::open_in_memory().unwrap();
510 let builder = IndexBuilder::new(&db, vault.path());
511
512 builder.incremental_reindex(None).unwrap();
513
514 fs::write(vault.path().join("note4.md"), "# Note 4\n\nBrand new note.").unwrap();
516
517 let stats = builder.incremental_reindex(None).unwrap();
518
519 assert_eq!(stats.files_found, 4);
520 assert_eq!(stats.files_unchanged, 3);
521 assert_eq!(stats.files_added, 1);
522 assert_eq!(stats.files_updated, 0);
523 assert_eq!(stats.notes_indexed, 1);
524 }
525
526 #[test]
527 fn test_incremental_file_deleted() {
528 let vault = create_test_vault();
529 let db = IndexDb::open_in_memory().unwrap();
530 let builder = IndexBuilder::new(&db, vault.path());
531
532 builder.incremental_reindex(None).unwrap();
533
534 fs::remove_file(vault.path().join("note2.md")).unwrap();
536
537 let stats = builder.incremental_reindex(None).unwrap();
538
539 assert_eq!(stats.files_found, 2);
540 assert_eq!(stats.files_deleted, 1);
541 assert_eq!(stats.files_unchanged, 2);
542
543 assert!(db.get_note_by_path(Path::new("note2.md")).unwrap().is_none());
545 assert_eq!(db.count_notes().unwrap(), 2);
546 }
547
548 #[test]
549 fn test_incremental_links_updated_on_change() {
550 let vault = create_test_vault();
551 let db = IndexDb::open_in_memory().unwrap();
552 let builder = IndexBuilder::new(&db, vault.path());
553
554 builder.incremental_reindex(None).unwrap();
555
556 let note1 = db.get_note_by_path(Path::new("note1.md")).unwrap().unwrap();
557 let links_before = db.get_outgoing_links(note1.id.unwrap()).unwrap();
558 assert_eq!(links_before.len(), 2); fs::write(vault.path().join("note1.md"), "# Note 1\n\n[[note3]] only now.")
562 .unwrap();
563 builder.incremental_reindex(None).unwrap();
564
565 let note1 = db.get_note_by_path(Path::new("note1.md")).unwrap().unwrap();
566 let links_after = db.get_outgoing_links(note1.id.unwrap()).unwrap();
567 assert_eq!(links_after.len(), 1);
568 assert_eq!(links_after[0].target_path, "note3");
569 }
570
571 #[test]
572 fn test_incremental_broken_links_resolved() {
573 let vault = create_test_vault();
574 let db = IndexDb::open_in_memory().unwrap();
575 let builder = IndexBuilder::new(&db, vault.path());
576
577 let stats1 = builder.incremental_reindex(None).unwrap();
579 assert!(stats1.broken_links > 0); fs::write(vault.path().join("missing-note.md"), "# Missing Note\n\nNow exists!")
583 .unwrap();
584
585 let stats2 = builder.incremental_reindex(None).unwrap();
586 assert_eq!(stats2.files_added, 1);
587
588 let missing = db.get_note_by_path(Path::new("missing-note.md")).unwrap().unwrap();
590 let backlinks = db.get_backlinks(missing.id.unwrap()).unwrap();
591 assert!(!backlinks.is_empty());
592 }
593}