1use std::path::Path;
4
5use chrono::{DateTime, Utc};
6use thiserror::Error;
7
8use super::db::{IndexDb, IndexError};
9use super::types::{IndexedLink, IndexedNote};
10use crate::vault::{
11 VaultWalker, VaultWalkerError, WalkedFile, content_hash, extract_note,
12};
13
14#[derive(Debug, Error)]
15pub enum BuilderError {
16 #[error("Vault walker error: {0}")]
17 Walker(#[from] VaultWalkerError),
18
19 #[error("Index database error: {0}")]
20 Index(#[from] IndexError),
21
22 #[error("Failed to read file {path}: {source}")]
23 FileRead {
24 path: String,
25 #[source]
26 source: std::io::Error,
27 },
28}
29
30#[derive(Debug, Clone, Default)]
32pub struct IndexStats {
33 pub files_found: usize,
35 pub notes_indexed: usize,
37 pub notes_skipped: usize,
39 pub links_indexed: usize,
41 pub broken_links: usize,
43 pub duration_ms: u64,
45 pub files_unchanged: usize,
47 pub files_added: usize,
49 pub files_updated: usize,
51 pub files_deleted: usize,
53}
54
55#[derive(Debug, Clone, Copy, PartialEq, Eq)]
57pub enum FileChange {
58 Added,
60 Modified,
62 Unchanged,
64}
65
66pub type ProgressCallback = Box<dyn Fn(usize, usize, &str)>;
69
70pub struct IndexBuilder<'a> {
72 db: &'a IndexDb,
73 vault_root: &'a Path,
74}
75
76impl<'a> IndexBuilder<'a> {
77 pub fn new(db: &'a IndexDb, vault_root: &'a Path) -> Self {
79 Self { db, vault_root }
80 }
81
82 pub fn full_reindex(
85 &self,
86 progress: Option<ProgressCallback>,
87 ) -> Result<IndexStats, BuilderError> {
88 let start = std::time::Instant::now();
89 let mut stats = IndexStats::default();
90
91 let walker = VaultWalker::new(self.vault_root)?;
93 let files = walker.walk()?;
94 stats.files_found = files.len();
95
96 self.db.clear_all()?;
98
99 for (i, file) in files.iter().enumerate() {
101 if let Some(ref cb) = progress {
102 cb(i + 1, files.len(), &file.relative_path.to_string_lossy());
103 }
104
105 match self.index_note(file) {
106 Ok(link_count) => {
107 stats.notes_indexed += 1;
108 stats.links_indexed += link_count;
109 }
110 Err(e) => {
111 tracing::warn!(
113 "Failed to index {}: {}",
114 file.relative_path.display(),
115 e
116 );
117 stats.notes_skipped += 1;
118 }
119 }
120 }
121
122 self.db.resolve_link_targets()?;
124 stats.broken_links = self.db.count_broken_links()? as usize;
125
126 stats.duration_ms = start.elapsed().as_millis() as u64;
127 Ok(stats)
128 }
129
130 pub fn incremental_reindex(
133 &self,
134 progress: Option<ProgressCallback>,
135 ) -> Result<IndexStats, BuilderError> {
136 let start = std::time::Instant::now();
137 let mut stats = IndexStats::default();
138
139 let walker = VaultWalker::new(self.vault_root)?;
141 let files = walker.walk()?;
142 stats.files_found = files.len();
143
144 let indexed_paths: std::collections::HashSet<std::path::PathBuf> =
146 self.db.get_all_paths()?.into_iter().collect();
147
148 let mut seen_paths: std::collections::HashSet<std::path::PathBuf> =
150 std::collections::HashSet::with_capacity(files.len());
151
152 for (i, file) in files.iter().enumerate() {
154 if let Some(ref cb) = progress {
155 cb(i + 1, files.len(), &file.relative_path.to_string_lossy());
156 }
157
158 seen_paths.insert(file.relative_path.clone());
159
160 let change = self.classify_change(file)?;
162
163 match change {
164 FileChange::Unchanged => {
165 stats.files_unchanged += 1;
166 }
167 FileChange::Added | FileChange::Modified => match self.index_note(file) {
168 Ok(link_count) => {
169 stats.notes_indexed += 1;
170 stats.links_indexed += link_count;
171 if change == FileChange::Added {
172 stats.files_added += 1;
173 } else {
174 stats.files_updated += 1;
175 }
176 }
177 Err(e) => {
178 tracing::warn!(
179 "Failed to index {}: {}",
180 file.relative_path.display(),
181 e
182 );
183 stats.notes_skipped += 1;
184 }
185 },
186 }
187 }
188
189 for indexed_path in &indexed_paths {
191 if !seen_paths.contains(indexed_path) && self.db.delete_note(indexed_path)? {
192 stats.files_deleted += 1;
193 tracing::debug!("Deleted from index: {}", indexed_path.display());
194 }
195 }
196
197 self.db.resolve_link_targets()?;
199 stats.broken_links = self.db.count_broken_links()? as usize;
200
201 stats.duration_ms = start.elapsed().as_millis() as u64;
202 Ok(stats)
203 }
204
205 fn classify_change(&self, file: &WalkedFile) -> Result<FileChange, BuilderError> {
207 let stored_hash = self.db.get_content_hash(&file.relative_path)?;
209
210 match stored_hash {
211 None => Ok(FileChange::Added),
212 Some(stored) => {
213 let current = content_hash(&file.absolute_path).map_err(|e| {
215 BuilderError::FileRead {
216 path: file.absolute_path.display().to_string(),
217 source: e,
218 }
219 })?;
220
221 if current == stored {
222 Ok(FileChange::Unchanged)
223 } else {
224 Ok(FileChange::Modified)
225 }
226 }
227 }
228 }
229
230 fn index_note(&self, file: &WalkedFile) -> Result<usize, BuilderError> {
233 let content = std::fs::read_to_string(&file.absolute_path).map_err(|e| {
235 BuilderError::FileRead {
236 path: file.absolute_path.display().to_string(),
237 source: e,
238 }
239 })?;
240
241 let hash =
243 content_hash(&file.absolute_path).map_err(|e| BuilderError::FileRead {
244 path: file.absolute_path.display().to_string(),
245 source: e,
246 })?;
247
248 let extracted = extract_note(&content, &file.relative_path);
250
251 let modified: DateTime<Utc> = file.modified.into();
253
254 let note = IndexedNote {
256 id: None,
257 path: file.relative_path.clone(),
258 note_type: extracted.note_type,
259 title: extracted.title,
260 created: None, modified,
262 frontmatter_json: extracted.frontmatter_json,
263 content_hash: hash,
264 };
265
266 let note_id = self.db.upsert_note(¬e)?;
268
269 self.db.delete_links_from(note_id)?;
271
272 let link_count = extracted.links.len();
274 for link in extracted.links {
275 let indexed_link = IndexedLink {
276 id: None,
277 source_id: note_id,
278 target_id: None, target_path: link.target,
280 link_text: link.text,
281 link_type: link.link_type,
282 context: link.context,
283 line_number: Some(link.line_number),
284 };
285 self.db.insert_link(&indexed_link)?;
286 }
287
288 Ok(link_count)
289 }
290}
291
292#[cfg(test)]
293mod tests {
294 use super::*;
295 use std::fs;
296 use tempfile::TempDir;
297
298 fn create_test_vault() -> TempDir {
299 let dir = TempDir::new().unwrap();
300 let root = dir.path();
301
302 fs::write(
304 root.join("note1.md"),
305 r#"---
306title: Note One
307type: zettel
308---
309# Note One
310
311This links to [[note2]] and [[missing-note]].
312"#,
313 )
314 .unwrap();
315
316 fs::write(
317 root.join("note2.md"),
318 r#"---
319title: Note Two
320type: task
321project: note1
322---
323# Note Two
324
325Back to [[note1]].
326"#,
327 )
328 .unwrap();
329
330 fs::create_dir(root.join("subdir")).unwrap();
331 fs::write(
332 root.join("subdir/note3.md"),
333 r#"# Note Three
334
335Links to [Note One](../note1.md).
336"#,
337 )
338 .unwrap();
339
340 dir
341 }
342
343 #[test]
344 fn test_full_reindex() {
345 let vault = create_test_vault();
346 let db = IndexDb::open_in_memory().unwrap();
347
348 let builder = IndexBuilder::new(&db, vault.path());
349 let stats = builder.full_reindex(None).unwrap();
350
351 assert_eq!(stats.files_found, 3);
352 assert_eq!(stats.notes_indexed, 3);
353 assert_eq!(stats.notes_skipped, 0);
354 assert!(stats.links_indexed >= 4); }
356
357 #[test]
358 fn test_notes_are_indexed_correctly() {
359 let vault = create_test_vault();
360 let db = IndexDb::open_in_memory().unwrap();
361
362 let builder = IndexBuilder::new(&db, vault.path());
363 builder.full_reindex(None).unwrap();
364
365 let note1 = db
367 .get_note_by_path(Path::new("note1.md"))
368 .unwrap()
369 .expect("note1 should exist");
370 assert_eq!(note1.title, "Note One");
371 assert_eq!(note1.note_type, crate::index::types::NoteType::Zettel);
372
373 let note2 = db
375 .get_note_by_path(Path::new("note2.md"))
376 .unwrap()
377 .expect("note2 should exist");
378 assert_eq!(note2.title, "Note Two");
379 assert_eq!(note2.note_type, crate::index::types::NoteType::Task);
380 }
381
382 #[test]
383 fn test_links_are_indexed() {
384 let vault = create_test_vault();
385 let db = IndexDb::open_in_memory().unwrap();
386
387 let builder = IndexBuilder::new(&db, vault.path());
388 builder.full_reindex(None).unwrap();
389
390 let note1 = db
391 .get_note_by_path(Path::new("note1.md"))
392 .unwrap()
393 .expect("note1 should exist");
394
395 let outgoing = db.get_outgoing_links(note1.id.unwrap()).unwrap();
396 assert_eq!(outgoing.len(), 2); }
398
399 #[test]
400 fn test_link_targets_resolved() {
401 let vault = create_test_vault();
402 let db = IndexDb::open_in_memory().unwrap();
403
404 let builder = IndexBuilder::new(&db, vault.path());
405 let stats = builder.full_reindex(None).unwrap();
406
407 assert!(stats.broken_links >= 1);
409
410 let note2 = db
412 .get_note_by_path(Path::new("note2.md"))
413 .unwrap()
414 .expect("note2 should exist");
415
416 let backlinks = db.get_backlinks(note2.id.unwrap()).unwrap();
417 assert!(!backlinks.is_empty());
419 }
420
421 #[test]
422 fn test_reindex_clears_old_data() {
423 let vault = create_test_vault();
424 let db = IndexDb::open_in_memory().unwrap();
425
426 let builder = IndexBuilder::new(&db, vault.path());
427
428 builder.full_reindex(None).unwrap();
430 let stats = builder.full_reindex(None).unwrap();
431
432 assert_eq!(stats.notes_indexed, 3);
434 assert_eq!(db.count_notes().unwrap(), 3);
435 }
436
437 #[test]
442 fn test_incremental_first_run() {
443 let vault = create_test_vault();
444 let db = IndexDb::open_in_memory().unwrap();
445 let builder = IndexBuilder::new(&db, vault.path());
446
447 let stats = builder.incremental_reindex(None).unwrap();
448
449 assert_eq!(stats.files_found, 3);
450 assert_eq!(stats.files_added, 3);
451 assert_eq!(stats.files_unchanged, 0);
452 assert_eq!(stats.files_updated, 0);
453 assert_eq!(stats.files_deleted, 0);
454 assert_eq!(stats.notes_indexed, 3);
455 }
456
457 #[test]
458 fn test_incremental_no_changes() {
459 let vault = create_test_vault();
460 let db = IndexDb::open_in_memory().unwrap();
461 let builder = IndexBuilder::new(&db, vault.path());
462
463 builder.incremental_reindex(None).unwrap();
464 let stats = builder.incremental_reindex(None).unwrap();
465
466 assert_eq!(stats.files_found, 3);
467 assert_eq!(stats.files_unchanged, 3);
468 assert_eq!(stats.files_added, 0);
469 assert_eq!(stats.files_updated, 0);
470 assert_eq!(stats.files_deleted, 0);
471 assert_eq!(stats.notes_indexed, 0);
472 }
473
474 #[test]
475 fn test_incremental_file_modified() {
476 let vault = create_test_vault();
477 let db = IndexDb::open_in_memory().unwrap();
478 let builder = IndexBuilder::new(&db, vault.path());
479
480 builder.incremental_reindex(None).unwrap();
481
482 fs::write(vault.path().join("note1.md"), "# Note 1 Modified\n\nNew content.")
484 .unwrap();
485
486 let stats = builder.incremental_reindex(None).unwrap();
487
488 assert_eq!(stats.files_unchanged, 2);
489 assert_eq!(stats.files_updated, 1);
490 assert_eq!(stats.files_added, 0);
491 assert_eq!(stats.notes_indexed, 1);
492 }
493
494 #[test]
495 fn test_incremental_file_added() {
496 let vault = create_test_vault();
497 let db = IndexDb::open_in_memory().unwrap();
498 let builder = IndexBuilder::new(&db, vault.path());
499
500 builder.incremental_reindex(None).unwrap();
501
502 fs::write(vault.path().join("note4.md"), "# Note 4\n\nBrand new note.").unwrap();
504
505 let stats = builder.incremental_reindex(None).unwrap();
506
507 assert_eq!(stats.files_found, 4);
508 assert_eq!(stats.files_unchanged, 3);
509 assert_eq!(stats.files_added, 1);
510 assert_eq!(stats.files_updated, 0);
511 assert_eq!(stats.notes_indexed, 1);
512 }
513
514 #[test]
515 fn test_incremental_file_deleted() {
516 let vault = create_test_vault();
517 let db = IndexDb::open_in_memory().unwrap();
518 let builder = IndexBuilder::new(&db, vault.path());
519
520 builder.incremental_reindex(None).unwrap();
521
522 fs::remove_file(vault.path().join("note2.md")).unwrap();
524
525 let stats = builder.incremental_reindex(None).unwrap();
526
527 assert_eq!(stats.files_found, 2);
528 assert_eq!(stats.files_deleted, 1);
529 assert_eq!(stats.files_unchanged, 2);
530
531 assert!(db.get_note_by_path(Path::new("note2.md")).unwrap().is_none());
533 assert_eq!(db.count_notes().unwrap(), 2);
534 }
535
536 #[test]
537 fn test_incremental_links_updated_on_change() {
538 let vault = create_test_vault();
539 let db = IndexDb::open_in_memory().unwrap();
540 let builder = IndexBuilder::new(&db, vault.path());
541
542 builder.incremental_reindex(None).unwrap();
543
544 let note1 = db.get_note_by_path(Path::new("note1.md")).unwrap().unwrap();
545 let links_before = db.get_outgoing_links(note1.id.unwrap()).unwrap();
546 assert_eq!(links_before.len(), 2); fs::write(vault.path().join("note1.md"), "# Note 1\n\n[[note3]] only now.")
550 .unwrap();
551 builder.incremental_reindex(None).unwrap();
552
553 let note1 = db.get_note_by_path(Path::new("note1.md")).unwrap().unwrap();
554 let links_after = db.get_outgoing_links(note1.id.unwrap()).unwrap();
555 assert_eq!(links_after.len(), 1);
556 assert_eq!(links_after[0].target_path, "note3");
557 }
558
559 #[test]
560 fn test_incremental_broken_links_resolved() {
561 let vault = create_test_vault();
562 let db = IndexDb::open_in_memory().unwrap();
563 let builder = IndexBuilder::new(&db, vault.path());
564
565 let stats1 = builder.incremental_reindex(None).unwrap();
567 assert!(stats1.broken_links > 0); fs::write(vault.path().join("missing-note.md"), "# Missing Note\n\nNow exists!")
571 .unwrap();
572
573 let stats2 = builder.incremental_reindex(None).unwrap();
574 assert_eq!(stats2.files_added, 1);
575
576 let missing = db.get_note_by_path(Path::new("missing-note.md")).unwrap().unwrap();
578 let backlinks = db.get_backlinks(missing.id.unwrap()).unwrap();
579 assert!(!backlinks.is_empty());
580 }
581}