1use std::collections::BTreeMap;
53use std::fs;
54use std::io::Write as _;
55use std::path::{Path, PathBuf};
56
57use chrono::{DateTime, FixedOffset, SecondsFormat};
58use serde::{Deserialize, Serialize};
59use serde_json::Value;
60
61use crate::parser::FolderMeta;
62use crate::store::{Layer, Store};
63
64const MD_CAP: usize = 500;
66
67const MISSING_SUMMARY: &str = "(no summary)";
71
72const ROOT_TITLE: &str = "Knowledge base index";
74
75#[derive(Debug, Clone, PartialEq, Eq)]
77pub enum IndexLevel {
78 Root,
80 Layer(Layer),
82 TypeFolder(PathBuf),
84}
85
86#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
94pub struct IndexRecord {
95 #[serde(with = "path_serde")]
99 pub path: PathBuf,
100 #[serde(rename = "type")]
102 pub type_: String,
103 pub summary: String,
105 #[serde(default)]
107 pub tags: Vec<String>,
108 #[serde(default)]
110 pub links: Vec<String>,
111 pub created: Option<DateTime<FixedOffset>>,
113 pub updated: Option<DateTime<FixedOffset>>,
115 #[serde(flatten)]
117 pub fields: BTreeMap<String, Value>,
118}
119
120#[derive(Debug, Clone, PartialEq)]
123pub struct Index {
124 pub level: IndexLevel,
126 pub records: Vec<IndexRecord>,
129 pub child_counts: BTreeMap<PathBuf, usize>,
131}
132
133impl Index {
134 pub fn build_type_folder(store: &Store, type_folder: &Path) -> crate::Result<Index> {
140 let rel = normalize_rel(type_folder);
141 let abs = store.root.join(&rel);
142 let mut records = Vec::new();
143 for file_abs in walk_type_folder_files(&abs) {
144 let rel_path =
145 rel_to_store(&store.root, &file_abs).expect("walked file is under the store root");
146 records.push(record_from_file(&file_abs, rel_path)?);
158 }
159 sort_records(&mut records);
160 Ok(Index {
161 level: IndexLevel::TypeFolder(rel),
162 records,
163 child_counts: BTreeMap::new(),
164 })
165 }
166
167 pub fn build_layer(store: &Store, layer: Layer) -> crate::Result<Index> {
176 let mut child_counts = BTreeMap::new();
177 for tf in type_folders_in_layer(store, layer) {
178 let abs = store.root.join(&tf);
179 let n = walk_type_folder_files(&abs).len();
180 if n > 0 {
181 child_counts.insert(tf, n);
182 }
183 }
184 let mut records = Vec::new();
185 for file_abs in loose_files_in_layer(store, layer) {
186 let rel_path =
187 rel_to_store(&store.root, &file_abs).expect("walked file is under the store root");
188 records.push(record_from_file(&file_abs, rel_path)?);
193 }
194 sort_records(&mut records);
195 Ok(Index {
196 level: IndexLevel::Layer(layer),
197 records,
198 child_counts,
199 })
200 }
201
202 pub fn build_root(store: &Store) -> crate::Result<Index> {
205 let mut child_counts = BTreeMap::new();
206 for layer in Layer::all() {
207 for tf in type_folders_in_layer(store, layer) {
208 let abs = store.root.join(&tf);
209 let n = walk_type_folder_files(&abs).len();
210 if n > 0 {
211 child_counts.insert(tf, n);
212 }
213 }
214 }
215 Ok(Index {
216 level: IndexLevel::Root,
217 records: Vec::new(),
218 child_counts,
219 })
220 }
221
222 pub fn to_markdown(&self) -> String {
224 match &self.level {
225 IndexLevel::TypeFolder(folder) => self.render_type_folder_md(folder),
226 IndexLevel::Layer(layer) => self.render_layer_md(*layer),
227 IndexLevel::Root => self.render_root_md(),
228 }
229 }
230
231 pub fn to_jsonl(&self) -> String {
237 let mut out = String::new();
238 for rec in &self.records {
239 let line = serde_json::to_string(rec).expect("IndexRecord serializes");
242 out.push_str(&line);
243 out.push('\n');
244 }
245 out
246 }
247
248 fn render_type_folder_md(&self, folder: &Path) -> String {
251 let folder_disp = path_to_unix(folder);
252 let updated = max_updated(self.records.iter().map(|r| r.updated.as_ref()));
253 let mut s = String::new();
254 s.push_str("---\n");
255 s.push_str("type: index\n");
256 s.push_str("scope: type-folder\n");
257 s.push_str(&format!("folder: {folder_disp}\n"));
258 if let Some(ts) = updated {
259 s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
260 }
261 s.push_str("---\n\n");
262 s.push_str(&format!("# {folder_disp}\n\n"));
263
264 let shown = self.records.len().min(MD_CAP);
265 for rec in self.records.iter().take(shown) {
266 s.push_str(&format_md_entry(rec));
267 s.push('\n');
268 }
269
270 if self.records.len() > MD_CAP {
271 let type_ = self.records.first().map(|r| r.type_.as_str()).unwrap_or("");
272 let layer = folder
273 .components()
274 .next()
275 .and_then(|c| c.as_os_str().to_str())
276 .unwrap_or("");
277 s.push('\n');
278 s.push_str(&more_footer(self.records.len(), type_, layer));
279 }
280 s
281 }
282
283 fn render_layer_md(&self, layer: Layer) -> String {
288 let layer_dir = layer_dir_name(layer);
289 let mut s = String::new();
290 s.push_str("---\n");
291 s.push_str("type: index\n");
292 s.push_str("scope: layer\n");
293 s.push_str(&format!("folder: {layer_dir}\n"));
294 s.push_str("---\n\n");
295 s.push_str(&format!("# {layer_dir}\n\n"));
296 for (tf, n) in &self.child_counts {
297 let tf_unix = path_to_unix(tf);
298 let display = capitalize(folder_basename(tf));
299 s.push_str(&format!("- [[{tf_unix}/index|{display}]] ({n})\n"));
300 }
301 s
302 }
303
304 fn render_root_md(&self) -> String {
307 let mut s = String::new();
308 s.push_str("---\n");
309 s.push_str("type: index\n");
310 s.push_str("scope: root\n");
311 s.push_str("---\n\n");
312 s.push_str(&format!("# {ROOT_TITLE}\n"));
313 for layer in Layer::all() {
314 let layer_dir = layer_dir_name(layer);
315 let prefix = format!("{layer_dir}/");
316 let children: Vec<(&PathBuf, &usize)> = self
317 .child_counts
318 .iter()
319 .filter(|(tf, _)| path_to_unix(tf).starts_with(&prefix))
320 .collect();
321 if children.is_empty() {
322 continue;
323 }
324 let total: usize = children.iter().map(|(_, n)| **n).sum();
325 s.push('\n');
326 s.push_str(&format!("## {} ({total})\n", capitalize(layer_dir)));
327 for (tf, n) in children {
328 let tf_unix = path_to_unix(tf);
329 let display = capitalize(folder_basename(tf));
330 s.push_str(&format!("- [[{tf_unix}/index|{display}]] ({n})\n"));
331 }
332 }
333 s
334 }
335}
336
337impl Index {
342 pub fn on_write(store: &Store, file: &Path) -> crate::Result<()> {
349 let file_rel = normalize_rel(file);
350 if is_index_artifact(&file_rel) {
357 return Ok(());
358 }
359 if let Some(layer) = loose_layer_of(&file_rel) {
363 return apply_loose_change(store, layer, &file_rel, false);
364 }
365 let file_abs = store.root.join(&file_rel);
366 let folder = type_folder_of(&file_rel)
367 .ok_or_else(|| bad_index(&file_rel, "file is not inside a layer/type-folder"))?;
368 let record = record_from_file(&file_abs, file_rel.clone())?;
369
370 let _lock = FolderLock::acquire(&store.root.join(&folder));
373 let mut records = read_jsonl_records(&store.root.join(&folder).join("index.jsonl"))?;
374 records.retain(|r| r.path != record.path);
375 records.push(record);
376 sort_records(&mut records);
377
378 write_type_folder_artifacts(store, &folder, &records)?;
379 update_parents(store, &folder)?;
380 Ok(())
381 }
382
383 pub fn on_rename(store: &Store, old: &Path, new: &Path) -> crate::Result<()> {
387 let old_rel = normalize_rel(old);
388 let new_rel = normalize_rel(new);
389 if is_index_artifact(&old_rel) || is_index_artifact(&new_rel) {
393 return Ok(());
394 }
395 if loose_layer_of(&old_rel).is_some() || loose_layer_of(&new_rel).is_some() {
401 Self::on_remove(store, &old_rel)?;
402 Self::on_write(store, &new_rel)?;
403 return Ok(());
404 }
405 let old_folder = type_folder_of(&old_rel)
406 .ok_or_else(|| bad_index(&old_rel, "source is not inside a layer/type-folder"))?;
407 let new_folder = type_folder_of(&new_rel)
408 .ok_or_else(|| bad_index(&new_rel, "target is not inside a layer/type-folder"))?;
409
410 let _locks = lock_folders(store, &old_folder, &new_folder);
414
415 let mut old_records =
417 read_jsonl_records(&store.root.join(&old_folder).join("index.jsonl"))?;
418 old_records.retain(|r| r.path != old_rel);
419
420 if old_folder == new_folder {
421 let record = record_from_file(&store.root.join(&new_rel), new_rel.clone())?;
423 old_records.retain(|r| r.path != record.path);
424 old_records.push(record);
425 sort_records(&mut old_records);
426 write_type_folder_artifacts(store, &old_folder, &old_records)?;
427 update_parents(store, &old_folder)?;
428 return Ok(());
429 }
430
431 sort_records(&mut old_records);
434 write_type_folder_artifacts(store, &old_folder, &old_records)?;
435
436 let record = record_from_file(&store.root.join(&new_rel), new_rel.clone())?;
437 let mut new_records =
438 read_jsonl_records(&store.root.join(&new_folder).join("index.jsonl"))?;
439 new_records.retain(|r| r.path != record.path);
440 new_records.push(record);
441 sort_records(&mut new_records);
442 write_type_folder_artifacts(store, &new_folder, &new_records)?;
443
444 update_parents(store, &old_folder)?;
445 update_parents(store, &new_folder)?;
446 Ok(())
447 }
448
449 pub fn on_remove(store: &Store, file: &Path) -> crate::Result<()> {
454 let file_rel = normalize_rel(file);
455 if is_index_artifact(&file_rel) {
458 return Ok(());
459 }
460 if let Some(layer) = loose_layer_of(&file_rel) {
462 return apply_loose_change(store, layer, &file_rel, true);
463 }
464 let folder = type_folder_of(&file_rel)
465 .ok_or_else(|| bad_index(&file_rel, "file is not inside a layer/type-folder"))?;
466 let _lock = FolderLock::acquire(&store.root.join(&folder));
468 let mut records = read_jsonl_records(&store.root.join(&folder).join("index.jsonl"))?;
469 let before = records.len();
470 records.retain(|r| r.path != file_rel);
471 if records.len() == before {
472 }
475 sort_records(&mut records);
476 write_type_folder_artifacts(store, &folder, &records)?;
477 update_parents(store, &folder)?;
478 Ok(())
479 }
480
481 pub fn rebuild_all(store: &Store) -> crate::Result<()> {
485 Index::cleanup(store)?;
486 for layer in Layer::all() {
487 for tf in type_folders_in_layer(store, layer) {
488 let idx = Index::build_type_folder(store, &tf)?;
489 if idx.records.is_empty() {
490 continue;
491 }
492 write_type_folder_artifacts(store, &tf, &idx.records)?;
493 }
494 let layer_idx = Index::build_layer(store, layer)?;
495 let layer_index_md = store.root.join(layer_dir_name(layer)).join("index.md");
496 if layer_idx.child_counts.is_empty() {
497 remove_if_exists(&layer_index_md)?;
498 } else {
499 write_atomic(
500 &layer_index_md,
501 render_layer_md_with_store(store, &layer_idx),
502 )?;
503 }
504 write_layer_jsonl(store, layer, &layer_idx.records)?;
508 }
509 let root_idx = Index::build_root(store)?;
510 let root_index_md = store.root.join("index.md");
511 if root_idx.child_counts.is_empty() {
512 remove_if_exists(&root_index_md)?;
513 } else {
514 write_atomic(&root_index_md, render_root_md_with_store(store, &root_idx))?;
515 }
516 Ok(())
517 }
518
519 pub fn rebuild_folder(store: &Store, folder: &Path) -> crate::Result<()> {
526 Self::write_level(store, &IndexLevel::TypeFolder(folder.to_path_buf()))?;
527 update_parents(store, folder)
528 }
529
530 pub fn write_level(store: &Store, level: &IndexLevel) -> crate::Result<()> {
532 match level {
533 IndexLevel::TypeFolder(folder) => {
534 let idx = Index::build_type_folder(store, folder)?;
535 if idx.records.is_empty() {
536 remove_if_exists(&store.root.join(folder).join("index.md"))?;
537 remove_if_exists(&store.root.join(folder).join("index.jsonl"))?;
538 } else {
539 write_type_folder_artifacts(store, folder, &idx.records)?;
540 }
541 }
542 IndexLevel::Layer(layer) => {
543 let idx = Index::build_layer(store, *layer)?;
544 let p = store.root.join(layer_dir_name(*layer)).join("index.md");
545 if idx.child_counts.is_empty() {
546 remove_if_exists(&p)?;
547 } else {
548 write_atomic(&p, render_layer_md_with_store(store, &idx))?;
549 }
550 write_layer_jsonl(store, *layer, &idx.records)?;
551 }
552 IndexLevel::Root => {
553 let idx = Index::build_root(store)?;
554 let p = store.root.join("index.md");
555 if idx.child_counts.is_empty() {
556 remove_if_exists(&p)?;
557 } else {
558 write_atomic(&p, render_root_md_with_store(store, &idx))?;
559 }
560 }
561 }
562 Ok(())
563 }
564
565 pub fn render_dry_run(store: &Store, level: &IndexLevel) -> crate::Result<String> {
568 let mut out = String::new();
569 match level {
570 IndexLevel::TypeFolder(folder) => {
571 let idx = Index::build_type_folder(store, folder)?;
572 let md_path = path_to_unix(&folder.join("index.md"));
573 let jsonl_path = path_to_unix(&folder.join("index.jsonl"));
574 out.push_str(&format!("--- {md_path} ---\n"));
575 out.push_str(&idx.to_markdown());
576 out.push_str(&format!("--- {jsonl_path} ---\n"));
577 out.push_str(&idx.to_jsonl());
578 }
579 IndexLevel::Layer(layer) => {
580 let idx = Index::build_layer(store, *layer)?;
581 let md_path = format!("{}/index.md", layer_dir_name(*layer));
582 out.push_str(&format!("--- {md_path} ---\n"));
583 out.push_str(&render_layer_md_with_store(store, &idx));
584 }
585 IndexLevel::Root => {
586 let idx = Index::build_root(store)?;
587 out.push_str("--- index.md ---\n");
588 out.push_str(&render_root_md_with_store(store, &idx));
589 }
590 }
591 Ok(out)
592 }
593
594 pub fn cleanup(store: &Store) -> crate::Result<()> {
612 for layer in Layer::all() {
613 let layer_dir = store.root.join(layer_dir_name(layer));
614 if !layer_dir.is_dir() {
615 continue;
616 }
617 for tf in type_folders_in_layer(store, layer) {
618 let tf_abs = store.root.join(&tf);
619 for entry in walkdir::WalkDir::new(&tf_abs)
623 .min_depth(2)
624 .into_iter()
625 .filter_map(|e| e.ok())
626 {
627 let p = entry.path();
628 if is_index_artifact(p) && is_deletable_catalog_artifact(p) {
629 remove_if_exists(p)?;
630 }
631 }
632 if walk_type_folder_files(&tf_abs).is_empty() {
636 let md = tf_abs.join("index.md");
637 if is_deletable_catalog_artifact(&md) {
638 remove_if_exists(&md)?;
639 }
640 remove_if_exists(&tf_abs.join("index.jsonl"))?;
641 }
642 }
643 }
644 Ok(())
645 }
646}
647
648fn write_type_folder_artifacts(
656 store: &Store,
657 folder: &Path,
658 records: &[IndexRecord],
659) -> crate::Result<()> {
660 let folder_abs = store.root.join(folder);
661 let md_path = folder_abs.join("index.md");
662 let jsonl_path = folder_abs.join("index.jsonl");
663 if records.is_empty() {
664 remove_if_exists(&md_path)?;
665 remove_if_exists(&jsonl_path)?;
666 return Ok(());
667 }
668 let idx = Index {
669 level: IndexLevel::TypeFolder(folder.to_path_buf()),
670 records: records.to_vec(),
671 child_counts: BTreeMap::new(),
672 };
673 write_atomic(&md_path, idx.to_markdown())?;
674 write_atomic(&jsonl_path, idx.to_jsonl())?;
675 Ok(())
676}
677
678fn update_parents(store: &Store, folder: &Path) -> crate::Result<()> {
691 let _root_lock = FolderLock::acquire(&store.root);
724 let stats = collect_child_stats(store, &Layer::all())?;
725
726 let layer = folder
727 .components()
728 .next()
729 .and_then(|c| c.as_os_str().to_str())
730 .and_then(layer_from_dir_name);
731 if let Some(layer) = layer {
732 let p = store.root.join(layer_dir_name(layer)).join("index.md");
733 if layer_has_children(&stats, layer) {
734 write_atomic(
735 &p,
736 render_layer_md_from_stats(layer, &stats, &store.config.folders),
737 )?;
738 } else {
739 remove_if_exists(&p)?;
740 }
741 }
742 let rp = store.root.join("index.md");
743 if stats.values().any(|s| s.count > 0) {
744 write_atomic(
745 &rp,
746 render_root_md_from_stats(&stats, &store.config.folders),
747 )?;
748 } else {
749 remove_if_exists(&rp)?;
750 }
751 Ok(())
752}
753
754fn layer_has_children(stats: &BTreeMap<PathBuf, FolderStat>, layer: Layer) -> bool {
756 let prefix = format!("{}/", layer_dir_name(layer));
757 stats
758 .iter()
759 .any(|(tf, s)| s.count > 0 && path_to_unix(tf).starts_with(&prefix))
760}
761
762fn render_layer_md_from_stats(
767 layer: Layer,
768 stats: &BTreeMap<PathBuf, FolderStat>,
769 folders: &BTreeMap<String, FolderMeta>,
770) -> String {
771 let layer_dir = layer_dir_name(layer);
772 let prefix = format!("{layer_dir}/");
773 let mut max_upd: Option<DateTime<FixedOffset>> = None;
774 let mut entries = String::new();
775 for (tf, stat) in stats {
776 if stat.count == 0 || !path_to_unix(tf).starts_with(&prefix) {
777 continue;
778 }
779 if let Some(u) = stat.newest.as_ref().and_then(|r| r.updated) {
780 max_upd = Some(match max_upd {
781 Some(cur) if cur >= u => cur,
782 _ => u,
783 });
784 }
785 let tf_unix = path_to_unix(tf);
786 let (display, description) = folder_label(&tf_unix, folder_basename(tf), folders);
787 entries.push_str(&folder_entry(&tf_unix, &display, stat.count, description));
788 }
789 let mut s = String::new();
790 s.push_str("---\n");
791 s.push_str("type: index\n");
792 s.push_str("scope: layer\n");
793 s.push_str(&format!("folder: {layer_dir}\n"));
794 if let Some(ts) = max_upd {
795 s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
796 }
797 s.push_str("---\n\n");
798 s.push_str(&format!("# {layer_dir}\n\n"));
799 s.push_str(&entries);
800 s
801}
802
803fn render_root_md_from_stats(
805 stats: &BTreeMap<PathBuf, FolderStat>,
806 folders: &BTreeMap<String, FolderMeta>,
807) -> String {
808 let mut max_upd: Option<DateTime<FixedOffset>> = None;
809 for stat in stats.values() {
810 if stat.count == 0 {
811 continue;
812 }
813 if let Some(u) = stat.newest.as_ref().and_then(|r| r.updated) {
814 max_upd = Some(match max_upd {
815 Some(cur) if cur >= u => cur,
816 _ => u,
817 });
818 }
819 }
820 let mut s = String::new();
821 s.push_str("---\n");
822 s.push_str("type: index\n");
823 s.push_str("scope: root\n");
824 if let Some(ts) = max_upd {
825 s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
826 }
827 s.push_str("---\n\n");
828 s.push_str(&format!("# {ROOT_TITLE}\n"));
829 for layer in Layer::all() {
830 let layer_dir = layer_dir_name(layer);
831 let prefix = format!("{layer_dir}/");
832 let children: Vec<(&PathBuf, usize)> = stats
833 .iter()
834 .filter(|(tf, s)| s.count > 0 && path_to_unix(tf).starts_with(&prefix))
835 .map(|(tf, s)| (tf, s.count))
836 .collect();
837 if children.is_empty() {
838 continue;
839 }
840 let total: usize = children.iter().map(|(_, n)| *n).sum();
841 s.push('\n');
842 s.push_str(&format!("## {} ({total})\n", capitalize(layer_dir)));
843 for (tf, n) in children {
844 let tf_unix = path_to_unix(tf);
845 let (display, description) = folder_label(&tf_unix, folder_basename(tf), folders);
846 s.push_str(&folder_entry(&tf_unix, &display, n, description));
847 }
848 }
849 s
850}
851
852fn render_layer_md_with_store(store: &Store, idx: &Index) -> String {
859 let layer = match idx.level {
860 IndexLevel::Layer(l) => l,
861 _ => unreachable!("render_layer_md_with_store called on non-layer"),
862 };
863 let layer_dir = layer_dir_name(layer);
864 let mut max_upd: Option<DateTime<FixedOffset>> = None;
865 let mut entries = String::new();
866 for (tf, n) in &idx.child_counts {
867 let recs = read_jsonl_records(&store.root.join(tf).join("index.jsonl")).unwrap_or_default();
868 let newest = recs.first();
869 if let Some(u) = newest.and_then(|r| r.updated) {
870 max_upd = Some(match max_upd {
871 Some(cur) if cur >= u => cur,
872 _ => u,
873 });
874 }
875 let tf_unix = path_to_unix(tf);
876 let (display, description) =
877 folder_label(&tf_unix, folder_basename(tf), &store.config.folders);
878 entries.push_str(&folder_entry(&tf_unix, &display, *n, description));
879 }
880 let mut s = String::new();
881 s.push_str("---\n");
882 s.push_str("type: index\n");
883 s.push_str("scope: layer\n");
884 s.push_str(&format!("folder: {layer_dir}\n"));
885 if let Some(ts) = max_upd {
886 s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
887 }
888 s.push_str("---\n\n");
889 s.push_str(&format!("# {layer_dir}\n\n"));
890 s.push_str(&entries);
891 s
892}
893
894fn render_root_md_with_store(store: &Store, idx: &Index) -> String {
898 let mut max_upd: Option<DateTime<FixedOffset>> = None;
899 for tf in idx.child_counts.keys() {
900 let recs = read_jsonl_records(&store.root.join(tf).join("index.jsonl")).unwrap_or_default();
901 if let Some(u) = recs.first().and_then(|r| r.updated) {
902 max_upd = Some(match max_upd {
903 Some(cur) if cur >= u => cur,
904 _ => u,
905 });
906 }
907 }
908 let mut s = String::new();
909 s.push_str("---\n");
910 s.push_str("type: index\n");
911 s.push_str("scope: root\n");
912 if let Some(ts) = max_upd {
913 s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
914 }
915 s.push_str("---\n\n");
916 s.push_str(&format!("# {ROOT_TITLE}\n"));
917 for layer in Layer::all() {
918 let layer_dir = layer_dir_name(layer);
919 let prefix = format!("{layer_dir}/");
920 let children: Vec<(&PathBuf, &usize)> = idx
921 .child_counts
922 .iter()
923 .filter(|(tf, _)| path_to_unix(tf).starts_with(&prefix))
924 .collect();
925 if children.is_empty() {
926 continue;
927 }
928 let total: usize = children.iter().map(|(_, n)| **n).sum();
929 s.push('\n');
930 s.push_str(&format!("## {} ({total})\n", capitalize(layer_dir)));
931 for (tf, n) in children {
932 let tf_unix = path_to_unix(tf);
933 let (display, description) =
934 folder_label(&tf_unix, folder_basename(tf), &store.config.folders);
935 s.push_str(&folder_entry(&tf_unix, &display, *n, description));
936 }
937 }
938 s
939}
940
941fn format_md_entry(rec: &IndexRecord) -> String {
947 let path = wiki_target(&rec.path);
948 let summary = collapse_whitespace(&rec.summary);
957 let mut line = format!("- [[{path}]] — {summary}");
958 if !rec.tags.is_empty() {
959 let tags = rec
960 .tags
961 .iter()
962 .map(|t| format!("#{t}"))
963 .collect::<Vec<_>>()
964 .join(" ");
965 line.push_str(&format!(" · {tags}"));
966 }
967 line
968}
969
970fn more_footer(total: usize, type_: &str, layer: &str) -> String {
972 format!(
973 "## More\n\nThis folder has {total} files. The {MD_CAP} most recent are listed above.\nUse `dbmd query --type {type_} --in {layer}` for the complete catalog.\n"
974 )
975}
976
977fn sort_records(records: &mut [IndexRecord]) {
981 records.sort_by(record_recency_cmp);
982}
983
984impl IndexRecord {
985 pub(crate) fn expected_from_file(abs: &Path, rel: PathBuf) -> crate::Result<IndexRecord> {
997 record_from_file(abs, rel)
998 }
999}
1000
1001fn record_from_file(abs: &Path, rel: PathBuf) -> crate::Result<IndexRecord> {
1004 let mut meta = read_frontmatter(abs)?;
1005 if rel.starts_with("records") {
1010 meta.fields
1011 .entry("meta-type".to_string())
1012 .or_insert_with(|| Value::String("fact".to_string()));
1013 }
1014 Ok(IndexRecord {
1015 path: rel,
1016 type_: meta.type_.unwrap_or_default(),
1017 summary: meta.summary.unwrap_or_else(|| MISSING_SUMMARY.to_string()),
1018 tags: meta.tags,
1019 links: meta.links,
1020 created: meta.created,
1021 updated: meta.updated,
1022 fields: meta.fields,
1023 })
1024}
1025
1026struct FileMeta {
1028 type_: Option<String>,
1029 summary: Option<String>,
1030 tags: Vec<String>,
1031 links: Vec<String>,
1032 created: Option<DateTime<FixedOffset>>,
1033 updated: Option<DateTime<FixedOffset>>,
1034 fields: BTreeMap<String, Value>,
1035}
1036
1037fn read_frontmatter(abs: &Path) -> crate::Result<FileMeta> {
1051 let bytes = fs::read(abs)?;
1052 let yaml = extract_frontmatter_block_lossy(&bytes).unwrap_or_default();
1053 let map: serde_norway::Mapping = if yaml.trim().is_empty() {
1054 serde_norway::Mapping::new()
1055 } else {
1056 serde_norway::from_str(&yaml).map_err(|e| {
1057 crate::Error::Store(crate::store::StoreError::BadTypeIndex {
1058 path: abs.to_path_buf(),
1059 message: format!("frontmatter YAML: {e}"),
1060 })
1061 })?
1062 };
1063
1064 let mut type_ = None;
1065 let mut summary = None;
1066 let mut tags = Vec::new();
1067 let mut links = Vec::new();
1068 let mut created = None;
1069 let mut updated = None;
1070 let mut fields = BTreeMap::new();
1071
1072 for (k, v) in map {
1073 let key = match k.as_str() {
1074 Some(s) => s.to_string(),
1075 None => continue,
1076 };
1077 match key.as_str() {
1078 "type" => type_ = scalar_string(&v),
1088 "summary" => summary = scalar_string(&v),
1089 "tags" => tags = yaml_string_list(&v),
1090 "links" => links = yaml_string_list(&v),
1091 "created" => created = v.as_str().and_then(parse_ts),
1092 "updated" => updated = v.as_str().and_then(parse_ts),
1093 "path" => {}
1097 _ => {
1098 fields.insert(key, yaml_to_json_value(&v));
1099 }
1100 }
1101 }
1102
1103 Ok(FileMeta {
1104 type_,
1105 summary,
1106 tags,
1107 links,
1108 created,
1109 updated,
1110 fields,
1111 })
1112}
1113
1114fn scalar_string(v: &serde_norway::Value) -> Option<String> {
1120 match v {
1121 serde_norway::Value::String(s) => Some(s.clone()),
1122 serde_norway::Value::Number(n) => Some(n.to_string()),
1123 serde_norway::Value::Bool(b) => Some(b.to_string()),
1124 _ => None,
1125 }
1126}
1127
1128fn extract_frontmatter_block_lossy(bytes: &[u8]) -> Option<String> {
1134 let text = String::from_utf8_lossy(bytes);
1139 extract_frontmatter_block(&text)
1140}
1141
1142fn extract_frontmatter_block(text: &str) -> Option<String> {
1145 let trimmed = text.strip_prefix('\u{feff}').unwrap_or(text);
1146 let mut lines = trimmed.lines();
1147 let first = lines.next()?;
1148 if first.trim_end() != "---" {
1149 return None;
1150 }
1151 let mut block = String::new();
1152 for line in lines {
1153 if line.trim_end() == "---" {
1154 return Some(block);
1155 }
1156 block.push_str(line);
1157 block.push('\n');
1158 }
1159 None }
1161
1162fn yaml_string_list(v: &serde_norway::Value) -> Vec<String> {
1165 match v {
1166 serde_norway::Value::String(s) => vec![s.clone()],
1167 serde_norway::Value::Sequence(seq) => seq
1168 .iter()
1169 .filter_map(yaml_string_or_wiki_link_literal)
1170 .collect(),
1171 _ => Vec::new(),
1172 }
1173}
1174
1175fn yaml_string_or_wiki_link_literal(v: &serde_norway::Value) -> Option<String> {
1176 v.as_str()
1177 .map(str::to_string)
1178 .or_else(|| unquoted_wiki_link_literal(v))
1179}
1180
1181fn yaml_to_json_value(v: &serde_norway::Value) -> Value {
1182 if let Some(link) = unquoted_wiki_link_literal(v) {
1183 return Value::String(link);
1184 }
1185 match v {
1186 serde_norway::Value::String(s) => Value::String(s.clone()),
1187 serde_norway::Value::Bool(b) => Value::Bool(*b),
1188 serde_norway::Value::Number(n) => {
1189 serde_json::to_value(n).unwrap_or_else(|_| Value::String(n.to_string()))
1190 }
1191 serde_norway::Value::Sequence(seq) => {
1192 Value::Array(seq.iter().map(yaml_to_json_value).collect())
1193 }
1194 serde_norway::Value::Mapping(_) | serde_norway::Value::Tagged(_) => {
1195 serde_json::to_value(v).unwrap_or(Value::Null)
1196 }
1197 serde_norway::Value::Null => Value::Null,
1198 }
1199}
1200
1201fn unquoted_wiki_link_literal(v: &serde_norway::Value) -> Option<String> {
1202 let serde_norway::Value::Sequence(outer) = v else {
1203 return None;
1204 };
1205 if outer.len() != 1 {
1206 return None;
1207 }
1208 let serde_norway::Value::Sequence(inner) = &outer[0] else {
1209 return None;
1210 };
1211 let [serde_norway::Value::String(target)] = inner.as_slice() else {
1212 return None;
1213 };
1214 Some(format!("[[{target}]]"))
1215}
1216
1217fn parse_ts(s: &str) -> Option<DateTime<FixedOffset>> {
1219 DateTime::parse_from_rfc3339(s.trim()).ok()
1220}
1221
1222fn fmt_ts(ts: &DateTime<FixedOffset>) -> String {
1226 ts.to_rfc3339_opts(SecondsFormat::AutoSi, true)
1227}
1228
1229fn max_updated<'a>(
1231 it: impl Iterator<Item = Option<&'a DateTime<FixedOffset>>>,
1232) -> Option<DateTime<FixedOffset>> {
1233 let mut best: Option<DateTime<FixedOffset>> = None;
1234 for ts in it.flatten() {
1235 best = Some(match best {
1236 Some(cur) if cur >= *ts => cur,
1237 _ => *ts,
1238 });
1239 }
1240 best
1241}
1242
1243fn read_jsonl_records(jsonl: &Path) -> crate::Result<Vec<IndexRecord>> {
1247 let text = match fs::read_to_string(jsonl) {
1248 Ok(t) => t,
1249 Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(Vec::new()),
1250 Err(e) => return Err(e.into()),
1251 };
1252 let mut by_path: BTreeMap<PathBuf, IndexRecord> = BTreeMap::new();
1254 for (i, line) in text.lines().enumerate() {
1255 if line.trim().is_empty() {
1256 continue;
1257 }
1258 let rec: IndexRecord = serde_json::from_str(line).map_err(|e| {
1259 crate::Error::Store(crate::store::StoreError::BadTypeIndex {
1260 path: jsonl.to_path_buf(),
1261 message: format!("line {}: {e}", i + 1),
1262 })
1263 })?;
1264 by_path.insert(rec.path.clone(), rec);
1265 }
1266 let mut records: Vec<IndexRecord> = by_path.into_values().collect();
1267 sort_records(&mut records);
1268 Ok(records)
1269}
1270
1271#[derive(Debug, Clone, Default, PartialEq)]
1278struct FolderStat {
1279 count: usize,
1280 newest: Option<IndexRecord>,
1281}
1282
1283fn read_folder_stat(jsonl: &Path) -> crate::Result<FolderStat> {
1293 let text = match fs::read_to_string(jsonl) {
1294 Ok(t) => t,
1295 Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(FolderStat::default()),
1296 Err(e) => return Err(e.into()),
1297 };
1298 let mut by_path: BTreeMap<PathBuf, IndexRecord> = BTreeMap::new();
1301 for (i, line) in text.lines().enumerate() {
1302 if line.trim().is_empty() {
1303 continue;
1304 }
1305 let rec: IndexRecord = serde_json::from_str(line).map_err(|e| {
1306 crate::Error::Store(crate::store::StoreError::BadTypeIndex {
1307 path: jsonl.to_path_buf(),
1308 message: format!("line {}: {e}", i + 1),
1309 })
1310 })?;
1311 by_path.insert(rec.path.clone(), rec);
1312 }
1313 let count = by_path.len();
1314 let newest = by_path.into_values().min_by(record_recency_cmp);
1318 Ok(FolderStat { count, newest })
1319}
1320
1321fn record_recency_cmp(a: &IndexRecord, b: &IndexRecord) -> std::cmp::Ordering {
1326 match (b.updated, a.updated) {
1327 (Some(bu), Some(au)) => bu.cmp(&au),
1328 (Some(_), None) => std::cmp::Ordering::Greater, (None, Some(_)) => std::cmp::Ordering::Less, (None, None) => std::cmp::Ordering::Equal,
1331 }
1332 .then_with(|| a.path.cmp(&b.path))
1333}
1334
1335fn collect_child_stats(
1348 store: &Store,
1349 layers: &[Layer],
1350) -> crate::Result<BTreeMap<PathBuf, FolderStat>> {
1351 let mut stats = BTreeMap::new();
1352 for &layer in layers {
1353 for tf in type_folders_in_layer(store, layer) {
1354 let stat = read_folder_stat(&store.root.join(&tf).join("index.jsonl"))?;
1355 if stat.count > 0 {
1356 stats.insert(tf, stat);
1357 }
1358 }
1359 }
1360 Ok(stats)
1361}
1362
1363fn walk_type_folder_files(folder_abs: &Path) -> Vec<PathBuf> {
1366 let mut out = Vec::new();
1367 if !folder_abs.is_dir() {
1368 return out;
1369 }
1370 for entry in walkdir::WalkDir::new(folder_abs)
1371 .into_iter()
1372 .filter_entry(|e| !is_hidden(e.file_name()))
1373 .filter_map(|e| e.ok())
1374 {
1375 if !entry.file_type().is_file() {
1376 continue;
1377 }
1378 let p = entry.path();
1379 if p.extension().and_then(|e| e.to_str()) != Some("md") {
1380 continue;
1381 }
1382 if p.file_name().and_then(|n| n.to_str()) == Some("index.md") {
1383 continue;
1384 }
1385 out.push(p.to_path_buf());
1386 }
1387 out
1388}
1389
1390fn type_folders_in_layer(store: &Store, layer: Layer) -> Vec<PathBuf> {
1393 let layer_dir = store.root.join(layer_dir_name(layer));
1394 let mut out = Vec::new();
1395 let rd = match fs::read_dir(&layer_dir) {
1396 Ok(rd) => rd,
1397 Err(_) => return out,
1398 };
1399 for entry in rd.flatten() {
1400 if !entry.path().is_dir() {
1401 continue;
1402 }
1403 let name = entry.file_name();
1404 let name = match name.to_str() {
1405 Some(n) => n,
1406 None => continue,
1407 };
1408 if is_hidden(entry.file_name().as_os_str()) || name == "log" {
1409 continue;
1410 }
1411 out.push(PathBuf::from(layer_dir_name(layer)).join(name));
1412 }
1413 out.sort();
1414 out
1415}
1416
1417fn loose_layer_of(file_rel: &Path) -> Option<Layer> {
1423 let mut comps = file_rel.components();
1424 let layer = layer_from_dir_name(comps.next()?.as_os_str().to_str()?)?;
1425 comps.next()?; if comps.next().is_some() {
1427 return None; }
1429 Some(layer)
1430}
1431
1432fn loose_files_in_layer(store: &Store, layer: Layer) -> Vec<PathBuf> {
1436 let layer_dir = store.root.join(layer_dir_name(layer));
1437 let mut out = Vec::new();
1438 let rd = match fs::read_dir(&layer_dir) {
1439 Ok(rd) => rd,
1440 Err(_) => return out,
1441 };
1442 for entry in rd.flatten() {
1443 let p = entry.path();
1444 if !p.is_file() {
1445 continue;
1446 }
1447 if p.extension().and_then(|e| e.to_str()) != Some("md") {
1448 continue;
1449 }
1450 if is_index_artifact(&p) || is_hidden(entry.file_name().as_os_str()) {
1451 continue;
1452 }
1453 out.push(p);
1454 }
1455 out
1456}
1457
1458fn write_layer_jsonl(store: &Store, layer: Layer, records: &[IndexRecord]) -> crate::Result<()> {
1463 let path = store.root.join(layer_dir_name(layer)).join("index.jsonl");
1464 if records.is_empty() {
1465 remove_if_exists(&path)?;
1466 return Ok(());
1467 }
1468 let idx = Index {
1469 level: IndexLevel::Layer(layer),
1470 records: records.to_vec(),
1471 child_counts: BTreeMap::new(),
1472 };
1473 write_atomic(&path, idx.to_jsonl())
1474}
1475
1476fn apply_loose_change(
1481 store: &Store,
1482 layer: Layer,
1483 file_rel: &Path,
1484 removing: bool,
1485) -> crate::Result<()> {
1486 let layer_dir = store.root.join(layer_dir_name(layer));
1487 let _lock = FolderLock::acquire(&layer_dir);
1488 let jsonl = layer_dir.join("index.jsonl");
1489 let mut records = read_jsonl_records(&jsonl)?;
1490 records.retain(|r| r.path != file_rel);
1491 if !removing {
1492 records.push(record_from_file(
1493 &store.root.join(file_rel),
1494 file_rel.to_path_buf(),
1495 )?);
1496 }
1497 sort_records(&mut records);
1498 write_layer_jsonl(store, layer, &records)
1499}
1500
1501fn type_folder_of(file_rel: &Path) -> Option<PathBuf> {
1505 let mut comps = file_rel.components();
1506 let layer = comps.next()?.as_os_str().to_str()?;
1507 layer_from_dir_name(layer)?;
1508 let type_seg = comps.next()?.as_os_str().to_str()?;
1509 Some(PathBuf::from(layer).join(type_seg))
1510}
1511
1512fn rel_to_store(root: &Path, abs: &Path) -> Option<PathBuf> {
1514 abs.strip_prefix(root).ok().map(|p| p.to_path_buf())
1515}
1516
1517fn normalize_rel(p: &Path) -> PathBuf {
1520 let s = path_to_unix(p);
1521 let s = s.strip_prefix("./").unwrap_or(&s);
1522 PathBuf::from(s)
1523}
1524
1525fn is_index_artifact(p: &Path) -> bool {
1526 matches!(
1527 p.file_name().and_then(|n| n.to_str()),
1528 Some("index.md") | Some("index.jsonl")
1529 )
1530}
1531
1532fn is_deletable_catalog_artifact(p: &Path) -> bool {
1546 match p.file_name().and_then(|n| n.to_str()) {
1547 Some("index.jsonl") => true,
1548 Some("index.md") => match read_frontmatter(p) {
1549 Ok(meta) => meta.type_.as_deref().is_none_or(|t| t == "index"),
1551 Err(_) => true,
1553 },
1554 _ => false,
1555 }
1556}
1557
1558fn is_hidden(name: &std::ffi::OsStr) -> bool {
1559 name.to_str().map(|s| s.starts_with('.')).unwrap_or(false)
1560}
1561
1562fn layer_dir_name(layer: Layer) -> &'static str {
1563 match layer {
1564 Layer::Sources => "sources",
1565 Layer::Records => "records",
1566 }
1567}
1568
1569fn layer_from_dir_name(name: &str) -> Option<Layer> {
1572 match name {
1573 "sources" => Some(Layer::Sources),
1574 "records" => Some(Layer::Records),
1575 _ => None,
1576 }
1577}
1578
1579fn folder_basename(p: &Path) -> &str {
1581 p.file_name().and_then(|n| n.to_str()).unwrap_or("")
1582}
1583
1584fn wiki_target(p: &Path) -> String {
1588 let unix = path_to_unix(p);
1589 unix.strip_suffix(".md").unwrap_or(&unix).to_string()
1590}
1591
1592fn path_to_unix(p: &Path) -> String {
1604 p.components()
1605 .map(|c| c.as_os_str().to_string_lossy().into_owned())
1606 .collect::<Vec<_>>()
1607 .join("/")
1608}
1609
1610mod path_serde {
1616 use super::path_to_unix;
1617 use serde::{Deserialize, Deserializer, Serializer};
1618 use std::path::{Path, PathBuf};
1619
1620 pub fn serialize<S: Serializer>(p: &Path, s: S) -> Result<S::Ok, S::Error> {
1621 s.serialize_str(&path_to_unix(p))
1622 }
1623
1624 pub fn deserialize<'de, D: Deserializer<'de>>(d: D) -> Result<PathBuf, D::Error> {
1625 Ok(PathBuf::from(String::deserialize(d)?))
1626 }
1627}
1628
1629fn capitalize(s: &str) -> String {
1631 let mut chars = s.chars();
1632 match chars.next() {
1633 Some(first) => first.to_uppercase().collect::<String>() + chars.as_str(),
1634 None => String::new(),
1635 }
1636}
1637
1638fn collapse_whitespace(s: &str) -> String {
1643 s.split_whitespace().collect::<Vec<_>>().join(" ")
1644}
1645
1646fn default_display(basename: &str) -> String {
1652 let spaced: String = basename
1653 .chars()
1654 .map(|c| if c == '-' || c == '_' { ' ' } else { c })
1655 .collect();
1656 capitalize(&spaced)
1657}
1658
1659fn folder_label<'a>(
1666 tf_unix: &str,
1667 basename: &str,
1668 folders: &'a BTreeMap<String, FolderMeta>,
1669) -> (String, Option<&'a str>) {
1670 let meta = folders.get(tf_unix);
1671 let display = meta
1672 .and_then(|m| m.display.as_deref())
1673 .map(str::to_string)
1674 .unwrap_or_else(|| default_display(basename));
1675 (display, meta.and_then(|m| m.description.as_deref()))
1676}
1677
1678fn folder_entry(tf_unix: &str, display: &str, count: usize, description: Option<&str>) -> String {
1681 match description {
1682 Some(d) => format!("- [[{tf_unix}/index|{display}]] ({count}) — {d}\n"),
1683 None => format!("- [[{tf_unix}/index|{display}]] ({count})\n"),
1684 }
1685}
1686
1687fn write_atomic(path: &Path, contents: String) -> crate::Result<()> {
1694 if let Some(parent) = path.parent() {
1695 fs::create_dir_all(parent)?;
1696 }
1697 let dir = path.parent().unwrap_or_else(|| Path::new("."));
1698 let mut tmp = tempfile_in(dir)?;
1699 tmp.write_all(contents.as_bytes())?;
1700 tmp.flush()?;
1701 tmp.persist(path)?;
1702 Ok(())
1703}
1704
1705fn remove_if_exists(path: &Path) -> crate::Result<()> {
1706 match fs::remove_file(path) {
1707 Ok(()) => Ok(()),
1708 Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(()),
1709 Err(e) => Err(e.into()),
1710 }
1711}
1712
1713fn bad_index(path: &Path, msg: &str) -> crate::Error {
1714 crate::Error::Store(crate::store::StoreError::BadTypeIndex {
1715 path: path.to_path_buf(),
1716 message: msg.to_string(),
1717 })
1718}
1719
1720struct FolderLock {
1740 path: PathBuf,
1741 held: bool,
1742}
1743
1744impl FolderLock {
1745 fn acquire(folder_abs: &Path) -> Self {
1772 use std::time::{Duration, SystemTime};
1773 const SPIN: Duration = Duration::from_millis(10);
1774 const STALE_AFTER: Duration = Duration::from_secs(30);
1775
1776 let path = folder_abs.join(".index.lock");
1777 let _ = fs::create_dir_all(folder_abs);
1779 loop {
1780 match fs::OpenOptions::new()
1781 .write(true)
1782 .create_new(true)
1783 .open(&path)
1784 {
1785 Ok(_) => return FolderLock { path, held: true },
1786 Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => {
1787 let stale = fs::metadata(&path)
1790 .and_then(|m| m.modified())
1791 .ok()
1792 .and_then(|t| SystemTime::now().duration_since(t).ok())
1793 .map(|age| age > STALE_AFTER)
1794 .unwrap_or(false);
1795 if stale {
1796 let _ = fs::remove_file(&path);
1797 continue;
1798 }
1799 std::thread::sleep(SPIN);
1800 }
1801 Err(_) => return FolderLock { path, held: false },
1806 }
1807 }
1808 }
1809}
1810
1811impl Drop for FolderLock {
1812 fn drop(&mut self) {
1813 if self.held {
1814 let _ = fs::remove_file(&self.path);
1815 }
1816 }
1817}
1818
1819fn lock_folders(store: &Store, a: &Path, b: &Path) -> Vec<FolderLock> {
1825 if a == b {
1826 return vec![FolderLock::acquire(&store.root.join(a))];
1827 }
1828 let (first, second) = if a < b { (a, b) } else { (b, a) };
1829 vec![
1830 FolderLock::acquire(&store.root.join(first)),
1831 FolderLock::acquire(&store.root.join(second)),
1832 ]
1833}
1834
1835struct AtomicTemp {
1841 file: Option<fs::File>,
1842 path: PathBuf,
1843 persisted: bool,
1844}
1845
1846impl AtomicTemp {
1847 fn write_all(&mut self, bytes: &[u8]) -> std::io::Result<()> {
1848 self.file.as_mut().expect("temp file open").write_all(bytes)
1849 }
1850 fn flush(&mut self) -> std::io::Result<()> {
1851 self.file.as_mut().expect("temp file open").flush()
1852 }
1853 fn persist(mut self, dest: &Path) -> std::io::Result<()> {
1854 if let Some(f) = self.file.take() {
1855 f.sync_all().ok();
1856 }
1858 fs::rename(&self.path, dest)?;
1859 self.persisted = true;
1860 Ok(())
1861 }
1862}
1863
1864impl Drop for AtomicTemp {
1865 fn drop(&mut self) {
1866 if !self.persisted {
1868 let _ = fs::remove_file(&self.path);
1869 }
1870 }
1871}
1872
1873fn tempfile_in(dir: &Path) -> std::io::Result<AtomicTemp> {
1874 use std::time::{SystemTime, UNIX_EPOCH};
1875 let nanos = SystemTime::now()
1876 .duration_since(UNIX_EPOCH)
1877 .map(|d| d.as_nanos())
1878 .unwrap_or(0);
1879 let pid = std::process::id();
1880 let counter = next_temp_counter();
1883 let name = format!(".dbmd-index-{pid}-{nanos}-{counter}.tmp");
1884 let path = dir.join(name);
1885 let file = fs::OpenOptions::new()
1886 .write(true)
1887 .create_new(true)
1888 .open(&path)?;
1889 Ok(AtomicTemp {
1890 file: Some(file),
1891 path,
1892 persisted: false,
1893 })
1894}
1895
1896fn next_temp_counter() -> u64 {
1897 use std::sync::atomic::{AtomicU64, Ordering};
1898 static C: AtomicU64 = AtomicU64::new(0);
1899 C.fetch_add(1, Ordering::Relaxed)
1900}
1901
1902#[cfg(test)]
1903mod tests {
1904 use super::*;
1905 use std::collections::BTreeSet;
1906 use std::fs;
1907 use tempfile::TempDir;
1908
1909 fn mk_store() -> (TempDir, Store) {
1914 let dir = TempDir::new().unwrap();
1915 fs::write(dir.path().join("DB.md"), "# test store\n").unwrap();
1916 let store = Store {
1917 root: dir.path().to_path_buf(),
1918 config: crate::parser::Config::default(),
1919 };
1920 (dir, store)
1921 }
1922
1923 fn write_raw(store: &Store, rel: &str, fm: &str, body: &str) {
1926 let abs = store.root.join(rel);
1927 fs::create_dir_all(abs.parent().unwrap()).unwrap();
1928 fs::write(&abs, format!("---\n{fm}\n---\n{body}")).unwrap();
1929 }
1930
1931 fn write_doc(
1933 store: &Store,
1934 rel: &str,
1935 type_: &str,
1936 summary: Option<&str>,
1937 updated: Option<&str>,
1938 extra_yaml: &str,
1939 ) {
1940 let mut fm = format!("type: {type_}\n");
1941 if let Some(s) = summary {
1942 fm.push_str(&format!("summary: {s}\n"));
1943 }
1944 if let Some(u) = updated {
1945 fm.push_str(&format!("updated: {u}\n"));
1946 }
1947 fm.push_str(extra_yaml);
1948 write_raw(store, rel, fm.trim_end(), "\nbody text\n");
1949 }
1950
1951 fn read(store: &Store, rel: &str) -> String {
1952 fs::read_to_string(store.root.join(rel)).unwrap()
1953 }
1954
1955 fn exists(store: &Store, rel: &str) -> bool {
1956 store.root.join(rel).exists()
1957 }
1958
1959 fn snapshot_artifacts(store: &Store) -> BTreeMap<String, String> {
1962 let mut out = BTreeMap::new();
1963 for entry in walkdir::WalkDir::new(&store.root)
1964 .into_iter()
1965 .filter_map(|e| e.ok())
1966 {
1967 let p = entry.path();
1968 if is_index_artifact(p) {
1969 let rel = path_to_unix(&rel_to_store(&store.root, p).unwrap());
1970 out.insert(rel, fs::read_to_string(p).unwrap());
1971 }
1972 }
1973 out
1974 }
1975
1976 #[test]
1979 fn type_folder_aggregates_across_shards_in_recency_order() {
1980 let (_d, store) = mk_store();
1981 write_doc(
1984 &store,
1985 "sources/emails/2026/05/b-old.md",
1986 "email",
1987 Some("Older mail"),
1988 Some("2026-05-01T09:00:00Z"),
1989 "",
1990 );
1991 write_doc(
1992 &store,
1993 "sources/emails/2026/06/c-new.md",
1994 "email",
1995 Some("Newest mail"),
1996 Some("2026-06-15T12:00:00Z"),
1997 "",
1998 );
1999 write_doc(
2000 &store,
2001 "sources/emails/2026/05/a-mid.md",
2002 "email",
2003 Some("Middle mail"),
2004 Some("2026-05-20T08:00:00Z"),
2005 "",
2006 );
2007
2008 let idx = Index::build_type_folder(&store, Path::new("sources/emails")).unwrap();
2009 let paths: Vec<String> = idx.records.iter().map(|r| path_to_unix(&r.path)).collect();
2010 assert_eq!(
2011 paths,
2012 vec![
2013 "sources/emails/2026/06/c-new.md",
2014 "sources/emails/2026/05/a-mid.md",
2015 "sources/emails/2026/05/b-old.md",
2016 ],
2017 "records must aggregate across shards, newest `updated` first"
2018 );
2019 }
2020
2021 #[test]
2022 fn type_folder_md_format_entries_tags_and_derived_updated() {
2023 let (_d, store) = mk_store();
2024 write_doc(
2025 &store,
2026 "records/contacts/sarah-chen.md",
2027 "contact",
2028 Some("Renewal champion at Acme"),
2029 Some("2026-05-27T10:00:00Z"),
2030 "tags:\n - renewal\n - acme\n",
2031 );
2032 write_doc(
2033 &store,
2034 "records/contacts/no-tags.md",
2035 "contact",
2036 Some("Plain contact"),
2037 Some("2026-05-26T10:00:00Z"),
2038 "",
2039 );
2040
2041 let idx = Index::build_type_folder(&store, Path::new("records/contacts")).unwrap();
2042 let md = idx.to_markdown();
2043
2044 assert!(md.starts_with(
2047 "---\ntype: index\nscope: type-folder\nfolder: records/contacts\nupdated: 2026-05-27T10:00:00Z\n---\n\n# records/contacts\n"
2048 ), "frontmatter/heading wrong:\n{md}");
2049
2050 assert!(
2052 md.contains(
2053 "- [[records/contacts/sarah-chen]] — Renewal champion at Acme · #renewal #acme\n"
2054 ),
2055 "tagged entry wrong:\n{md}"
2056 );
2057 assert!(
2059 md.contains("- [[records/contacts/no-tags]] — Plain contact\n"),
2060 "untagged entry wrong:\n{md}"
2061 );
2062 assert!(
2063 !md.contains("Plain contact ·"),
2064 "untagged entry must not emit a tag separator"
2065 );
2066 assert!(!md.contains("## More"), "no footer expected under the cap");
2068 }
2069
2070 #[test]
2071 fn missing_summary_becomes_placeholder_not_invented() {
2072 let (_d, store) = mk_store();
2073 write_doc(
2074 &store,
2075 "records/notes/x.md",
2076 "note",
2077 None,
2078 Some("2026-05-27T10:00:00Z"),
2079 "",
2080 );
2081 let idx = Index::build_type_folder(&store, Path::new("records/notes")).unwrap();
2082 assert_eq!(idx.records[0].summary, MISSING_SUMMARY);
2083 let md = idx.to_markdown();
2084 assert!(
2085 md.contains("- [[records/notes/x]] — (no summary)\n"),
2086 "missing summary must render the placeholder, not invent text:\n{md}"
2087 );
2088 }
2089
2090 #[test]
2093 fn jsonl_is_complete_structured_and_round_trips() {
2094 let (_d, store) = mk_store();
2095 write_doc(
2096 &store,
2097 "records/expenses/2026/05/e1.md",
2098 "expense",
2099 Some("Lunch with vendor"),
2100 Some("2026-05-10T10:00:00Z"),
2101 "created: 2026-05-10T09:00:00Z\nstatus: paid\namount: 42\ncompany: [[records/companies/acme]]\nrelated:\n - [[records/concepts/spend]]\ntags:\n - food\nlinks:\n - records/concepts/spend\n - [[records/concepts/renewal]]\n",
2102 );
2103 write_doc(
2104 &store,
2105 "records/expenses/2026/06/e2.md",
2106 "expense",
2107 Some("Cloud bill"),
2108 Some("2026-06-01T10:00:00Z"),
2109 "amount: 100\n",
2110 );
2111
2112 let idx = Index::build_type_folder(&store, Path::new("records/expenses")).unwrap();
2113 let jsonl = idx.to_jsonl();
2114 let lines: Vec<&str> = jsonl.lines().collect();
2115 assert_eq!(lines.len(), 2, "one JSON object per file, uncapped");
2116
2117 let r0: IndexRecord = serde_json::from_str(lines[0]).unwrap();
2119 assert_eq!(path_to_unix(&r0.path), "records/expenses/2026/06/e2.md");
2120 assert_eq!(
2121 r0, idx.records[0],
2122 "jsonl line must round-trip to the record"
2123 );
2124
2125 let r1: IndexRecord = serde_json::from_str(lines[1]).unwrap();
2128 assert_eq!(r1.type_, "expense");
2129 assert_eq!(r1.summary, "Lunch with vendor");
2130 assert_eq!(r1.tags, vec!["food".to_string()]);
2131 assert_eq!(
2132 r1.links,
2133 vec![
2134 "records/concepts/spend".to_string(),
2135 "[[records/concepts/renewal]]".to_string()
2136 ]
2137 );
2138 assert_eq!(
2139 r1.created,
2140 Some(DateTime::parse_from_rfc3339("2026-05-10T09:00:00Z").unwrap())
2141 );
2142 assert_eq!(r1.fields.get("status"), Some(&Value::from("paid")));
2143 assert_eq!(r1.fields.get("amount"), Some(&Value::from(42)));
2144 assert_eq!(
2145 r1.fields.get("company"),
2146 Some(&Value::from("[[records/companies/acme]]"))
2147 );
2148 assert_eq!(
2149 r1.fields.get("related"),
2150 Some(&serde_json::json!(["[[records/concepts/spend]]"]))
2151 );
2152 for reserved in [
2154 "path", "type", "summary", "tags", "links", "created", "updated",
2155 ] {
2156 assert!(
2157 !r1.fields.contains_key(reserved),
2158 "reserved key {reserved} must not appear in fields"
2159 );
2160 }
2161
2162 assert!(
2164 lines[1].starts_with(
2165 r#"{"path":"records/expenses/2026/05/e1.md","type":"expense","summary":"Lunch with vendor","tags":["food"],"links":["records/concepts/spend","[[records/concepts/renewal]]"],"created":"2026-05-10T09:00:00Z","updated":"2026-05-10T10:00:00Z","#
2166 ),
2167 "jsonl key order not stable:\n{}",
2168 lines[1]
2169 );
2170 assert!(
2175 lines[1].ends_with(r#""amount":42,"company":"[[records/companies/acme]]","meta-type":"fact","related":["[[records/concepts/spend]]"],"status":"paid"}"#),
2176 "extras must be sorted:\n{}",
2177 lines[1]
2178 );
2179 }
2180
2181 #[test]
2184 fn over_cap_md_shows_500_plus_footer_jsonl_holds_all() {
2185 let (_d, store) = mk_store();
2186 let total = MD_CAP + 7;
2187 for i in 0..total {
2188 let day = 1 + (i % 27);
2190 let rel = format!("sources/emails/2026/05/m-{i:04}.md");
2191 let updated = format!("2026-05-{day:02}T00:00:{:02}Z", i % 60);
2192 write_doc(
2193 &store,
2194 &rel,
2195 "email",
2196 Some(&format!("mail {i}")),
2197 Some(&updated),
2198 "",
2199 );
2200 }
2201 let idx = Index::build_type_folder(&store, Path::new("sources/emails")).unwrap();
2202 assert_eq!(idx.records.len(), total, "jsonl/records keep every file");
2203
2204 let md = idx.to_markdown();
2205 let entry_lines = md.lines().filter(|l| l.starts_with("- [[")).count();
2206 assert_eq!(entry_lines, MD_CAP, "md browse view is capped at 500");
2207
2208 assert!(
2209 md.contains("## More\n\n"),
2210 "over-cap md needs a More footer"
2211 );
2212 assert!(
2213 md.contains(&format!(
2214 "This folder has {total} files. The 500 most recent are listed above.\n"
2215 )),
2216 "footer count wrong:\n{md}"
2217 );
2218 assert!(
2219 md.contains("Use `dbmd query --type email --in sources` for the complete catalog.\n"),
2220 "footer must infer type=email layer=sources:\n{md}"
2221 );
2222
2223 let jsonl = idx.to_jsonl();
2224 assert_eq!(jsonl.lines().count(), total, "jsonl is uncapped");
2225 }
2226
2227 #[test]
2230 fn sort_breaks_ties_by_path_and_puts_undated_last() {
2231 let mut recs = vec![
2232 rec("z/a.md", Some("2026-05-01T00:00:00Z")),
2233 rec("a/b.md", Some("2026-05-01T00:00:00Z")), rec("m/c.md", None), rec("b/d.md", Some("2026-06-01T00:00:00Z")), ];
2237 sort_records(&mut recs);
2238 let order: Vec<String> = recs.iter().map(|r| path_to_unix(&r.path)).collect();
2239 assert_eq!(order, vec!["b/d.md", "a/b.md", "z/a.md", "m/c.md"]);
2240 }
2241
2242 fn rec(path: &str, updated: Option<&str>) -> IndexRecord {
2243 IndexRecord {
2244 path: PathBuf::from(path),
2245 type_: "t".into(),
2246 summary: "s".into(),
2247 tags: vec![],
2248 links: vec![],
2249 created: None,
2250 updated: updated.map(|u| DateTime::parse_from_rfc3339(u).unwrap()),
2251 fields: BTreeMap::new(),
2252 }
2253 }
2254
2255 #[test]
2258 fn layer_index_lists_type_folders_with_counts() {
2259 let (_d, store) = mk_store();
2260 write_doc(
2261 &store,
2262 "records/contacts/a.md",
2263 "contact",
2264 Some("Contact A older"),
2265 Some("2026-05-01T00:00:00Z"),
2266 "",
2267 );
2268 write_doc(
2269 &store,
2270 "records/contacts/b.md",
2271 "contact",
2272 Some("Contact B newest"),
2273 Some("2026-05-09T00:00:00Z"),
2274 "",
2275 );
2276 write_doc(
2277 &store,
2278 "records/companies/x.md",
2279 "company",
2280 Some("Acme Inc"),
2281 Some("2026-05-05T00:00:00Z"),
2282 "",
2283 );
2284 Index::write_level(&store, &IndexLevel::TypeFolder("records/contacts".into())).unwrap();
2286 Index::write_level(&store, &IndexLevel::TypeFolder("records/companies".into())).unwrap();
2287
2288 Index::write_level(&store, &IndexLevel::Layer(Layer::Records)).unwrap();
2289 let md = read(&store, "records/index.md");
2290
2291 assert!(
2292 md.starts_with("---\ntype: index\nscope: layer\nfolder: records\n"),
2293 "layer fm:\n{md}"
2294 );
2295 let companies_at = md.find("companies/index").unwrap();
2297 let contacts_at = md.find("contacts/index").unwrap();
2298 assert!(
2299 companies_at < contacts_at,
2300 "type folders must be alphabetical"
2301 );
2302 assert!(
2305 md.contains("- [[records/contacts/index|Contacts]] (2)\n"),
2306 "contacts entry:\n{md}"
2307 );
2308 assert!(
2309 md.contains("- [[records/companies/index|Companies]] (1)\n"),
2310 "companies entry:\n{md}"
2311 );
2312 assert!(
2314 !md.contains("Contact B newest") && !md.contains("Acme Inc"),
2315 "layer rollup must not quote a member summary:\n{md}"
2316 );
2317 assert!(
2319 md.contains("updated: 2026-05-09T00:00:00Z\n"),
2320 "layer updated must be max child:\n{md}"
2321 );
2322 }
2323
2324 #[test]
2325 fn folders_section_supplies_authored_display_and_description() {
2326 let (_d, mut store) = mk_store();
2330 store.config.folders.insert(
2331 "records/contacts".into(),
2332 crate::parser::FolderMeta {
2333 display: None,
2334 description: Some("people across customer + prospect accounts".into()),
2335 },
2336 );
2337 store.config.folders.insert(
2338 "sources/hubspot-exports".into(),
2339 crate::parser::FolderMeta {
2340 display: Some("HubSpot exports".into()),
2341 description: Some("deal + pipeline exports".into()),
2342 },
2343 );
2344 write_doc(
2345 &store,
2346 "records/contacts/a.md",
2347 "contact",
2348 Some("Contact A"),
2349 Some("2026-05-01T00:00:00Z"),
2350 "",
2351 );
2352 write_doc(
2354 &store,
2355 "records/companies/x.md",
2356 "company",
2357 Some("Acme Inc"),
2358 Some("2026-05-05T00:00:00Z"),
2359 "",
2360 );
2361 write_doc(
2362 &store,
2363 "sources/hubspot-exports/d.md",
2364 "hubspot-export",
2365 Some("a single deal export"),
2366 Some("2026-05-03T00:00:00Z"),
2367 "",
2368 );
2369
2370 Index::rebuild_all(&store).unwrap();
2371
2372 let records_layer = read(&store, "records/index.md");
2374 assert!(
2375 records_layer.contains("- [[records/contacts/index|Contacts]] (1) — people across customer + prospect accounts\n"),
2376 "authored description must surface:\n{records_layer}"
2377 );
2378 assert!(
2380 records_layer.contains("- [[records/companies/index|Companies]] (1)\n")
2381 && !records_layer.contains("Acme Inc"),
2382 "un-described folder is counts-only:\n{records_layer}"
2383 );
2384
2385 let sources_layer = read(&store, "sources/index.md");
2387 assert!(
2388 sources_layer.contains("- [[sources/hubspot-exports/index|HubSpot exports]] (1) — deal + pipeline exports\n"),
2389 "display override + description must surface:\n{sources_layer}"
2390 );
2391
2392 let root = read(&store, "index.md");
2394 assert!(
2395 root.contains("- [[records/contacts/index|Contacts]] (1) — people across customer + prospect accounts\n"),
2396 "root surfaces authored description:\n{root}"
2397 );
2398 assert!(
2399 root.contains("- [[sources/hubspot-exports/index|HubSpot exports]] (1) — deal + pipeline exports\n"),
2400 "root surfaces display override:\n{root}"
2401 );
2402 }
2403
2404 #[test]
2405 fn default_display_turns_separators_to_spaces_and_caps() {
2406 assert_eq!(default_display("contacts"), "Contacts");
2407 assert_eq!(default_display("hubspot-exports"), "Hubspot exports");
2408 assert_eq!(default_display("usage_exports"), "Usage exports");
2409 }
2410
2411 #[test]
2412 fn root_index_groups_layers_with_totals_and_per_type_counts() {
2413 let (_d, store) = mk_store();
2414 write_doc(
2415 &store,
2416 "sources/emails/2026/05/a.md",
2417 "email",
2418 Some("Mail"),
2419 Some("2026-05-01T00:00:00Z"),
2420 "",
2421 );
2422 write_doc(
2423 &store,
2424 "sources/docs/d.md",
2425 "doc",
2426 Some("Doc"),
2427 Some("2026-05-02T00:00:00Z"),
2428 "",
2429 );
2430 write_doc(
2431 &store,
2432 "records/contacts/c.md",
2433 "contact",
2434 Some("C"),
2435 Some("2026-05-03T00:00:00Z"),
2436 "",
2437 );
2438 Index::rebuild_all(&store).unwrap();
2441 let md = read(&store, "index.md");
2442
2443 assert!(
2444 md.starts_with("---\ntype: index\nscope: root\n"),
2445 "root fm:\n{md}"
2446 );
2447 assert!(md.contains("# Knowledge base index\n"), "root title:\n{md}");
2448 let sources_h = md
2450 .find("## Sources (2)")
2451 .expect("sources heading w/ total 2");
2452 let records_h = md
2453 .find("## Records (1)")
2454 .expect("records heading w/ total 1");
2455 assert!(sources_h < records_h, "Sources must precede Records");
2456 assert!(!md.contains("## Wiki"), "empty layer gets no section");
2457 assert!(
2459 md.contains("- [[sources/docs/index|Docs]] (1)\n"),
2460 "root docs entry:\n{md}"
2461 );
2462 assert!(
2463 md.contains("- [[sources/emails/index|Emails]] (1)\n"),
2464 "root emails entry:\n{md}"
2465 );
2466 assert!(
2467 md.contains("- [[records/contacts/index|Contacts]] (1)\n"),
2468 "root contacts entry:\n{md}"
2469 );
2470 assert!(!md.contains("— "), "root entries carry no preview text");
2471 }
2472
2473 #[test]
2476 fn on_write_matches_rebuild_byte_for_byte() {
2477 let (_d1, wt) = mk_store();
2480 let (_d2, rb) = mk_store();
2481
2482 let docs: &[(&str, &str, &str, &str, &str)] = &[
2483 (
2484 "sources/emails/2026/05/e1.md",
2485 "email",
2486 "First mail",
2487 "2026-05-01T10:00:00Z",
2488 "tags:\n - inbox\n",
2489 ),
2490 (
2491 "sources/emails/2026/06/e2.md",
2492 "email",
2493 "Second mail",
2494 "2026-06-01T10:00:00Z",
2495 "",
2496 ),
2497 (
2498 "records/contacts/sarah.md",
2499 "contact",
2500 "Sarah",
2501 "2026-05-15T10:00:00Z",
2502 "links:\n - records/profiles/sarah\n",
2503 ),
2504 (
2505 "records/contacts/elena.md",
2506 "contact",
2507 "Elena",
2508 "2026-05-20T10:00:00Z",
2509 "status: active\n",
2510 ),
2511 (
2512 "records/profiles/sarah.md",
2513 "profile",
2514 "Sarah bio",
2515 "2026-05-21T10:00:00Z",
2516 "",
2517 ),
2518 ];
2519
2520 for (rel, t, sum, upd, extra) in docs {
2521 write_doc(&wt, rel, t, Some(sum), Some(upd), extra);
2522 write_doc(&rb, rel, t, Some(sum), Some(upd), extra);
2523 Index::on_write(&wt, Path::new(rel)).unwrap();
2524 }
2525 Index::rebuild_all(&rb).unwrap();
2526
2527 let a = snapshot_artifacts(&wt);
2528 let b = snapshot_artifacts(&rb);
2529 assert_eq!(
2530 a.keys().collect::<Vec<_>>(),
2531 b.keys().collect::<Vec<_>>(),
2532 "same set of index artifacts must exist"
2533 );
2534 for (k, v) in &a {
2535 assert_eq!(v, &b[k], "artifact {k} differs between write-through and rebuild:\n--- write-through ---\n{v}\n--- rebuild ---\n{}", b[k]);
2536 }
2537 assert!(a.contains_key("index.md"));
2539 assert!(a.contains_key("sources/emails/index.jsonl"));
2540 assert!(a.contains_key("records/contacts/index.md"));
2541 }
2542
2543 #[test]
2560 fn loop_op_does_not_walk_sibling_content_tree() {
2561 let (_d, store) = mk_store();
2562
2563 write_doc(
2567 &store,
2568 "records/companies/acme.md",
2569 "company",
2570 Some("Acme Inc"),
2571 Some("2026-05-05T00:00:00Z"),
2572 "",
2573 );
2574 write_doc(
2575 &store,
2576 "records/companies/globex.md",
2577 "company",
2578 Some("Globex"),
2579 Some("2026-05-06T00:00:00Z"),
2580 "",
2581 );
2582 assert!(
2583 !exists(&store, "records/companies/index.jsonl"),
2584 "precondition: companies must be un-indexed"
2585 );
2586
2587 write_doc(
2589 &store,
2590 "records/contacts/sarah.md",
2591 "contact",
2592 Some("Sarah"),
2593 Some("2026-05-15T00:00:00Z"),
2594 "",
2595 );
2596 Index::on_write(&store, Path::new("records/contacts/sarah.md")).unwrap();
2597
2598 let layer_md = read(&store, "records/index.md");
2600 let root_md = read(&store, "index.md");
2601 assert!(
2603 layer_md.contains("- [[records/contacts/index|Contacts]] (1)\n")
2604 && !layer_md.contains("Sarah"),
2605 "layer must reflect the written folder, counts only:\n{layer_md}"
2606 );
2607 assert!(
2608 root_md.contains("- [[records/contacts/index|Contacts]] (1)\n"),
2609 "root must reflect the written folder:\n{root_md}"
2610 );
2611
2612 assert!(
2616 !layer_md.contains("companies"),
2617 "loop op walked the sibling content tree: layer rollup counts un-indexed records/companies\n{layer_md}"
2618 );
2619 assert!(
2620 !root_md.contains("companies"),
2621 "loop op walked the sibling content tree: root rollup counts un-indexed records/companies\n{root_md}"
2622 );
2623 assert!(
2625 root_md.contains("## Records (1)"),
2626 "root layer total must count only the sidecar-indexed folder (1), not walked siblings (would be 3):\n{root_md}"
2627 );
2628
2629 let (_d2, rb) = mk_store();
2634 for (rel, t, s, u) in [
2635 (
2636 "records/companies/acme.md",
2637 "company",
2638 "Acme Inc",
2639 "2026-05-05T00:00:00Z",
2640 ),
2641 (
2642 "records/companies/globex.md",
2643 "company",
2644 "Globex",
2645 "2026-05-06T00:00:00Z",
2646 ),
2647 (
2648 "records/contacts/sarah.md",
2649 "contact",
2650 "Sarah",
2651 "2026-05-15T00:00:00Z",
2652 ),
2653 ] {
2654 write_doc(&rb, rel, t, Some(s), Some(u), "");
2655 }
2656 Index::on_write(&store, Path::new("records/companies/acme.md")).unwrap();
2657 Index::on_write(&store, Path::new("records/companies/globex.md")).unwrap();
2658 Index::rebuild_all(&rb).unwrap();
2659 let a = snapshot_artifacts(&store);
2660 let b = snapshot_artifacts(&rb);
2661 assert_eq!(
2662 a.keys().collect::<BTreeSet<_>>(),
2663 b.keys().collect::<BTreeSet<_>>(),
2664 "same artifact set after indexing both folders"
2665 );
2666 for (k, v) in &a {
2667 assert_eq!(
2668 v, &b[k],
2669 "after indexing the sibling too, loop result must equal rebuild for {k}"
2670 );
2671 }
2672 assert!(
2673 read(&store, "index.md").contains("## Records (3)"),
2674 "now that both folders are indexed, the root total is 3"
2675 );
2676 }
2677
2678 #[test]
2691 fn custom_type_at_shard_path_for_is_indexable_end_to_end() {
2692 let (_d1, wt) = mk_store();
2693 let (_d2, rb) = mk_store();
2694
2695 let rel = wt
2697 .shard_path_for(
2698 "profile",
2699 &crate::parser::Frontmatter::default(),
2700 "renewal-theme",
2701 )
2702 .unwrap();
2703 let rel_str = path_to_unix(&rel);
2704 assert!(
2707 type_folder_of(&rel).is_some(),
2708 "shard_path_for produced a path the index cannot file: {rel_str}"
2709 );
2710
2711 write_doc(
2712 &wt,
2713 &rel_str,
2714 "profile",
2715 Some("Renewal theme"),
2716 Some("2026-05-21T10:00:00Z"),
2717 "",
2718 );
2719 write_doc(
2720 &rb,
2721 &rel_str,
2722 "profile",
2723 Some("Renewal theme"),
2724 Some("2026-05-21T10:00:00Z"),
2725 "",
2726 );
2727
2728 Index::on_write(&wt, &rel)
2731 .expect("on_write must succeed for a toolkit-computed custom-type path");
2732 Index::rebuild_all(&rb).unwrap();
2733
2734 let page_link = wiki_target(&rel); let tf_md = read(&rb, "records/profile/index.md");
2741 assert!(
2742 tf_md.contains(&format!("[[{page_link}]]")),
2743 "type-folder index must list the page link, got:\n{tf_md}"
2744 );
2745 assert!(
2746 exists(&rb, "records/profile/index.jsonl"),
2747 "type-folder jsonl must exist"
2748 );
2749 assert!(
2750 read(&rb, "records/profile/index.jsonl").contains(&rel_str),
2751 "type-folder jsonl must contain the page row"
2752 );
2753 let layer_md = read(&rb, "records/index.md");
2756 assert!(
2757 layer_md.contains("records/profile/index"),
2758 "layer index must roll up the records/profile type-folder, got:\n{layer_md}"
2759 );
2760
2761 let a = snapshot_artifacts(&wt);
2763 let b = snapshot_artifacts(&rb);
2764 assert_eq!(
2765 a.keys().collect::<Vec<_>>(),
2766 b.keys().collect::<Vec<_>>(),
2767 "loop and sweep must produce the same artifact set"
2768 );
2769 for (k, v) in &a {
2770 assert_eq!(
2771 v, &b[k],
2772 "custom-type artifact {k} differs between on_write and rebuild"
2773 );
2774 }
2775 }
2776
2777 #[test]
2778 fn on_remove_then_rebuild_match_and_pull_in_next_over_cap() {
2779 let (_d1, wt) = mk_store();
2780 let (_d2, rb) = mk_store();
2781 let total = MD_CAP + 3; let mut all_rels = Vec::new();
2783 for i in 0..total {
2784 let rel = format!("sources/emails/2026/05/m-{i:04}.md");
2785 let updated = format!("2026-05-10T00:{:02}:{:02}Z", i / 60, i % 60);
2787 write_doc(
2788 &wt,
2789 &rel,
2790 "email",
2791 Some(&format!("mail {i}")),
2792 Some(&updated),
2793 "",
2794 );
2795 write_doc(
2796 &rb,
2797 &rel,
2798 "email",
2799 Some(&format!("mail {i}")),
2800 Some(&updated),
2801 "",
2802 );
2803 all_rels.push(rel);
2804 }
2805 Index::rebuild_all(&wt).unwrap();
2807 let newest = &all_rels[total - 1]; fs::remove_file(wt.root.join(newest)).unwrap();
2809 Index::on_remove(&wt, Path::new(newest)).unwrap();
2810
2811 fs::remove_file(rb.root.join(newest)).unwrap();
2813 Index::rebuild_all(&rb).unwrap();
2814
2815 let a = snapshot_artifacts(&wt);
2816 let b = snapshot_artifacts(&rb);
2817 for (k, v) in &a {
2818 assert_eq!(v, &b[k], "after remove, artifact {k} drifted from rebuild");
2819 }
2820
2821 let md = read(&wt, "sources/emails/index.md");
2824 assert_eq!(md.lines().filter(|l| l.starts_with("- [[")).count(), MD_CAP);
2825 assert!(
2827 !md.contains(&format!("[[{}]]", wiki_target(Path::new(newest)))),
2828 "removed file must not be listed in md"
2829 );
2830 let pulled_in = &all_rels[2];
2834 assert!(
2835 md.contains(&format!("[[{}]]", wiki_target(Path::new(pulled_in)))),
2836 "the 501st-most-recent must be pulled into the browse view after a removal"
2837 );
2838 assert!(
2839 md.contains(&format!("This folder has {} files.", total - 1)),
2840 "footer count must decrement:\n{}",
2841 md.lines().rev().take(4).collect::<Vec<_>>().join("\n")
2842 );
2843 let jsonl = read(&wt, "sources/emails/index.jsonl");
2844 assert_eq!(
2845 jsonl.lines().count(),
2846 total - 1,
2847 "jsonl loses exactly the removed file"
2848 );
2849 assert!(
2850 !jsonl.contains(&path_to_unix(Path::new(newest))),
2851 "removed file must be gone from the jsonl too"
2852 );
2853 }
2854
2855 #[test]
2856 fn on_rename_cross_folder_matches_rebuild() {
2857 let (_d1, wt) = mk_store();
2858 let (_d2, rb) = mk_store();
2859 let seed: &[(&str, &str, &str, &str)] = &[
2861 (
2862 "records/contacts/a.md",
2863 "contact",
2864 "A",
2865 "2026-05-01T00:00:00Z",
2866 ),
2867 (
2868 "records/contacts/b.md",
2869 "contact",
2870 "B",
2871 "2026-05-02T00:00:00Z",
2872 ),
2873 (
2874 "records/companies/x.md",
2875 "company",
2876 "X",
2877 "2026-05-03T00:00:00Z",
2878 ),
2879 ];
2880 for (rel, t, s, u) in seed {
2881 write_doc(&wt, rel, t, Some(s), Some(u), "");
2882 write_doc(&rb, rel, t, Some(s), Some(u), "");
2883 }
2884 Index::rebuild_all(&wt).unwrap();
2885
2886 let old = "records/contacts/b.md";
2889 let new = "records/companies/b.md";
2890 fs::create_dir_all(wt.root.join("records/companies")).unwrap();
2891 fs::rename(wt.root.join(old), wt.root.join(new)).unwrap();
2892 Index::on_rename(&wt, Path::new(old), Path::new(new)).unwrap();
2895
2896 fs::create_dir_all(rb.root.join("records/companies")).unwrap();
2898 fs::rename(rb.root.join(old), rb.root.join(new)).unwrap();
2899 Index::rebuild_all(&rb).unwrap();
2900
2901 let a = snapshot_artifacts(&wt);
2902 let b = snapshot_artifacts(&rb);
2903 assert_eq!(a.keys().collect::<Vec<_>>(), b.keys().collect::<Vec<_>>());
2904 for (k, v) in &a {
2905 assert_eq!(v, &b[k], "rename: artifact {k} drifted from rebuild");
2906 }
2907 let contacts = read(&wt, "records/contacts/index.md");
2909 assert!(!contacts.contains("records/contacts/b]]"));
2910 let companies = read(&wt, "records/companies/index.md");
2911 assert!(companies.contains("[[records/companies/b]]"));
2912 }
2913
2914 #[test]
2915 fn on_write_updates_existing_entry_in_place() {
2916 let (_d, store) = mk_store();
2917 write_doc(
2918 &store,
2919 "records/contacts/a.md",
2920 "contact",
2921 Some("Original"),
2922 Some("2026-05-01T00:00:00Z"),
2923 "",
2924 );
2925 Index::on_write(&store, Path::new("records/contacts/a.md")).unwrap();
2926 write_doc(
2928 &store,
2929 "records/contacts/a.md",
2930 "contact",
2931 Some("Revised"),
2932 Some("2026-05-09T00:00:00Z"),
2933 "",
2934 );
2935 Index::on_write(&store, Path::new("records/contacts/a.md")).unwrap();
2936
2937 let jsonl = read(&store, "records/contacts/index.jsonl");
2938 assert_eq!(
2939 jsonl.lines().count(),
2940 1,
2941 "upsert must not duplicate the line"
2942 );
2943 assert!(jsonl.contains("Revised"), "jsonl must reflect the update");
2944 assert!(
2945 !jsonl.contains("Original"),
2946 "stale line must be gone (compacted)"
2947 );
2948 let md = read(&store, "records/contacts/index.md");
2949 assert!(md.contains("- [[records/contacts/a]] — Revised\n"));
2950 assert!(
2951 md.contains("updated: 2026-05-09T00:00:00Z\n"),
2952 "index updated must track the newer member"
2953 );
2954 }
2955
2956 #[test]
2959 fn dry_run_emits_separators_and_writes_nothing() {
2960 let (_d, store) = mk_store();
2961 write_doc(
2962 &store,
2963 "sources/emails/2026/05/a.md",
2964 "email",
2965 Some("Mail"),
2966 Some("2026-05-01T00:00:00Z"),
2967 "",
2968 );
2969 let out = Index::render_dry_run(&store, &IndexLevel::TypeFolder("sources/emails".into()))
2970 .unwrap();
2971 assert!(
2972 out.contains("--- sources/emails/index.md ---\n"),
2973 "md separator:\n{out}"
2974 );
2975 assert!(
2976 out.contains("--- sources/emails/index.jsonl ---\n"),
2977 "jsonl separator:\n{out}"
2978 );
2979 assert!(
2980 out.contains("- [[sources/emails/2026/05/a]] — Mail"),
2981 "md body present"
2982 );
2983 assert!(
2985 !exists(&store, "sources/emails/index.md"),
2986 "dry-run must not write"
2987 );
2988 assert!(
2989 !exists(&store, "sources/emails/index.jsonl"),
2990 "dry-run must not write"
2991 );
2992 }
2993
2994 #[test]
2995 fn cleanup_removes_noncanonical_and_empty_indexes() {
2996 let (_d, store) = mk_store();
2997 write_doc(
2998 &store,
2999 "sources/emails/2026/05/a.md",
3000 "email",
3001 Some("Mail"),
3002 Some("2026-05-01T00:00:00Z"),
3003 "",
3004 );
3005 fs::write(
3007 store.root.join("sources/emails/2026/05/index.md"),
3008 "stale\n",
3009 )
3010 .unwrap();
3011 fs::write(
3012 store.root.join("sources/emails/2026/05/index.jsonl"),
3013 "stale\n",
3014 )
3015 .unwrap();
3016 fs::create_dir_all(store.root.join("records/empty")).unwrap();
3018 fs::write(store.root.join("records/empty/index.md"), "stale\n").unwrap();
3019
3020 Index::cleanup(&store).unwrap();
3021
3022 assert!(
3023 !exists(&store, "sources/emails/2026/05/index.md"),
3024 "shard index must be deleted"
3025 );
3026 assert!(
3027 !exists(&store, "sources/emails/2026/05/index.jsonl"),
3028 "shard jsonl must be deleted"
3029 );
3030 assert!(
3031 !exists(&store, "records/empty/index.md"),
3032 "empty-folder index must be deleted"
3033 );
3034 assert!(exists(&store, "sources/emails/2026/05/a.md"));
3036 }
3037
3038 #[test]
3039 fn rebuild_deletes_stale_indexes_for_emptied_folders() {
3040 let (_d, store) = mk_store();
3041 write_doc(
3042 &store,
3043 "records/contacts/a.md",
3044 "contact",
3045 Some("A"),
3046 Some("2026-05-01T00:00:00Z"),
3047 "",
3048 );
3049 Index::rebuild_all(&store).unwrap();
3050 assert!(exists(&store, "records/contacts/index.md"));
3051 assert!(exists(&store, "records/index.md"));
3052 assert!(exists(&store, "index.md"));
3053
3054 fs::remove_file(store.root.join("records/contacts/a.md")).unwrap();
3056 Index::rebuild_all(&store).unwrap();
3057 assert!(
3058 !exists(&store, "records/contacts/index.md"),
3059 "emptied type-folder index gone"
3060 );
3061 assert!(
3062 !exists(&store, "records/index.md"),
3063 "now-empty layer index gone"
3064 );
3065 assert!(!exists(&store, "index.md"), "now-empty root index gone");
3066 }
3067
3068 #[test]
3071 fn property_writethrough_equals_rebuild_under_mixed_ops() {
3072 let (_d1, wt) = mk_store();
3074 let (_d2, rb) = mk_store();
3075 let mut seed: u64 = 0x9E3779B97F4A7C15;
3076 let mut next = || {
3077 seed = seed
3078 .wrapping_mul(6364136223846793005)
3079 .wrapping_add(1442695040888963407);
3080 (seed >> 33) as u32
3081 };
3082
3083 let folders = ["sources/emails", "records/contacts", "records/profiles"];
3084 let types = ["email", "contact", "profile"];
3085 let mut live: Vec<String> = Vec::new(); for step in 0..120u32 {
3088 let r = next();
3089 let op = r % 10;
3090 if op < 6 || live.is_empty() {
3091 let fi = (next() as usize) % folders.len();
3093 let folder = folders[fi];
3094 let id = next() % 40;
3095 let rel = if folder == "sources/emails" {
3096 let month = 5 + (id % 2); format!("{folder}/2026/{month:02}/f-{id:02}.md")
3098 } else {
3099 format!("{folder}/f-{id:02}.md")
3100 };
3101 let updated = format!(
3103 "2026-05-{:02}T{:02}:{:02}:00Z",
3104 1 + (step % 27),
3105 step % 24,
3106 id % 60
3107 );
3108 let extra = if id % 3 == 0 {
3109 "tags:\n - x\n - y\n"
3110 } else {
3111 ""
3112 };
3113 write_doc(
3114 &wt,
3115 &rel,
3116 types[fi],
3117 Some(&format!("sum {step}")),
3118 Some(&updated),
3119 extra,
3120 );
3121 write_doc(
3122 &rb,
3123 &rel,
3124 types[fi],
3125 Some(&format!("sum {step}")),
3126 Some(&updated),
3127 extra,
3128 );
3129 Index::on_write(&wt, Path::new(&rel)).unwrap();
3130 if !live.contains(&rel) {
3131 live.push(rel);
3132 }
3133 } else if op < 8 {
3134 let idx = (next() as usize) % live.len();
3136 let rel = live.remove(idx);
3137 fs::remove_file(wt.root.join(&rel)).unwrap();
3138 fs::remove_file(rb.root.join(&rel)).ok();
3139 Index::on_remove(&wt, Path::new(&rel)).unwrap();
3140 } else {
3141 let idx = (next() as usize) % live.len();
3143 let old = live[idx].clone();
3144 let fi = (next() as usize) % folders.len();
3146 let folder = folders[fi];
3147 let id = 50 + (next() % 40);
3148 let new = if folder == "sources/emails" {
3149 format!("{folder}/2026/05/f-{id:02}.md")
3150 } else {
3151 format!("{folder}/f-{id:02}.md")
3152 };
3153 if new == old || live.contains(&new) {
3154 continue;
3155 }
3156 fs::create_dir_all(wt.root.join(&new).parent().unwrap()).unwrap();
3157 fs::create_dir_all(rb.root.join(&new).parent().unwrap()).unwrap();
3158 fs::rename(wt.root.join(&old), wt.root.join(&new)).unwrap();
3159 fs::rename(rb.root.join(&old), rb.root.join(&new)).unwrap();
3160 Index::on_rename(&wt, Path::new(&old), Path::new(&new)).unwrap();
3161 live[idx] = new;
3162 }
3163 }
3164
3165 Index::rebuild_all(&rb).unwrap();
3167 let a = snapshot_artifacts(&wt);
3168 let b = snapshot_artifacts(&rb);
3169 assert_eq!(
3170 a.keys().collect::<BTreeSet<_>>(),
3171 b.keys().collect::<BTreeSet<_>>(),
3172 "write-through and rebuild must produce the same set of artifacts"
3173 );
3174 for (k, v) in &a {
3175 assert_eq!(
3176 v, &b[k],
3177 "INVARIANT VIOLATED: artifact {k} differs after mixed ops\n--- write-through ---\n{v}\n--- rebuild ---\n{}",
3178 b[k]
3179 );
3180 }
3181 assert!(
3182 !a.is_empty(),
3183 "the run must have produced at least one artifact"
3184 );
3185 }
3186
3187 #[test]
3193 fn cleanup_preserves_user_content_named_index_md_in_shard() {
3194 let (_d, store) = mk_store();
3195 write_doc(
3197 &store,
3198 "sources/emails/2026/06/index.md",
3199 "email",
3200 Some("Important imported mail"),
3201 Some("2026-06-11T04:23:25Z"),
3202 "",
3203 );
3204 Index::cleanup(&store).unwrap();
3205 assert!(
3206 exists(&store, "sources/emails/2026/06/index.md"),
3207 "cleanup must not delete a user content file named index.md"
3208 );
3209 Index::rebuild_all(&store).unwrap();
3211 assert!(
3212 exists(&store, "sources/emails/2026/06/index.md"),
3213 "rebuild_all must not delete a user content file named index.md"
3214 );
3215 let kept = read(&store, "sources/emails/2026/06/index.md");
3216 assert!(
3217 kept.contains("Important imported mail"),
3218 "the user's record content must be intact"
3219 );
3220 }
3221
3222 #[test]
3227 fn cleanup_keeps_canonical_type_folder_root_sidecars() {
3228 let (_d, store) = mk_store();
3229 write_doc(
3230 &store,
3231 "records/contacts/alice.md",
3232 "contact",
3233 Some("Alice"),
3234 Some("2026-05-01T00:00:00Z"),
3235 "",
3236 );
3237 Index::write_level(&store, &IndexLevel::TypeFolder("records/contacts".into())).unwrap();
3238 assert!(exists(&store, "records/contacts/index.md"));
3239 assert!(exists(&store, "records/contacts/index.jsonl"));
3240 Index::cleanup(&store).unwrap();
3241 assert!(
3242 exists(&store, "records/contacts/index.md"),
3243 "cleanup must keep the canonical type-folder index.md (non-empty folder)"
3244 );
3245 assert!(
3246 exists(&store, "records/contacts/index.jsonl"),
3247 "cleanup must keep the canonical type-folder index.jsonl (non-empty folder)"
3248 );
3249 }
3250
3251 #[test]
3257 fn on_write_ignores_index_artifact_no_phantom_row() {
3258 let (_d, store) = mk_store();
3259 write_doc(
3260 &store,
3261 "records/contacts/alice.md",
3262 "contact",
3263 Some("Alice"),
3264 Some("2026-05-01T00:00:00Z"),
3265 "",
3266 );
3267 Index::on_write(&store, Path::new("records/contacts/alice.md")).unwrap();
3268 let jsonl_before = read(&store, "records/contacts/index.jsonl");
3269 assert_eq!(jsonl_before.lines().count(), 1);
3270
3271 Index::on_write(&store, Path::new("records/contacts/index.md")).unwrap();
3273
3274 let jsonl_after = read(&store, "records/contacts/index.jsonl");
3275 assert_eq!(
3276 jsonl_after.lines().count(),
3277 1,
3278 "on_write on index.md must not add a phantom self-row"
3279 );
3280 assert!(
3281 !jsonl_after.contains("\"type\":\"index\""),
3282 "the catalog artifact must never appear as a catalogued row"
3283 );
3284 let root = read(&store, "index.md");
3286 assert!(
3287 root.contains("[[records/contacts/index|Contacts]] (1)"),
3288 "count must not inflate:\n{root}"
3289 );
3290 }
3291
3292 #[test]
3298 fn multiline_summary_is_single_lined_in_index_md() {
3299 let (_d, store) = mk_store();
3300 write_raw(
3302 &store,
3303 "records/notes/evil.md",
3304 "type: note\nupdated: 2026-06-10T00:00:00Z\nsummary: |-\n legit first line\n - [[records/secrets/fake|Click me]] — injected entry",
3305 "\nbody\n",
3306 );
3307 let idx = Index::build_type_folder(&store, Path::new("records/notes")).unwrap();
3308 let md = idx.to_markdown();
3309 let entry_lines = md.lines().filter(|l| l.starts_with("- [[")).count();
3311 assert_eq!(
3312 entry_lines, 1,
3313 "a multi-line summary must not produce extra entry lines:\n{md}"
3314 );
3315 assert!(
3316 md.contains(
3317 "- [[records/notes/evil]] — legit first line - [[records/secrets/fake|Click me]] — injected entry\n"
3318 ),
3319 "summary newlines must collapse to spaces inline:\n{md}"
3320 );
3321 }
3322
3323 #[test]
3331 fn non_string_scalar_summary_and_type_are_coerced_like_validator() {
3332 let (_d, store) = mk_store();
3333 write_raw(
3334 &store,
3335 "records/contacts/a.md",
3336 "type: contact\nupdated: 2026-05-01T00:00:00Z\nsummary: 2026",
3337 "\nbody\n",
3338 );
3339 let rec = record_from_file(
3340 &store.root.join("records/contacts/a.md"),
3341 PathBuf::from("records/contacts/a.md"),
3342 )
3343 .unwrap();
3344 assert_eq!(rec.summary, "2026");
3347 assert_eq!(rec.type_, "contact");
3348
3349 let idx = Index::build_type_folder(&store, Path::new("records/contacts")).unwrap();
3351 let md = idx.to_markdown();
3352 assert!(
3353 md.contains("- [[records/contacts/a]] — 2026\n"),
3354 "index entry must hold the coerced scalar, not the placeholder:\n{md}"
3355 );
3356
3357 write_raw(
3359 &store,
3360 "records/contacts/b.md",
3361 "type: true\nupdated: 2026-05-02T00:00:00Z\nsummary: hi",
3362 "\nbody\n",
3363 );
3364 let rec_b = record_from_file(
3365 &store.root.join("records/contacts/b.md"),
3366 PathBuf::from("records/contacts/b.md"),
3367 )
3368 .unwrap();
3369 assert_eq!(rec_b.type_, "true");
3370 }
3371
3372 #[test]
3380 fn non_utf8_body_does_not_abort_record_projection() {
3381 let (_d, store) = mk_store();
3382 let rel = "sources/emails/2026/06/x.md";
3383 let abs = store.root.join(rel);
3384 fs::create_dir_all(abs.parent().unwrap()).unwrap();
3385 let mut bytes: Vec<u8> =
3387 b"---\ntype: email\nupdated: 2026-06-11T00:00:00Z\nsummary: An imported email\n---\n\nCaf"
3388 .to_vec();
3389 bytes.push(0xE9);
3390 bytes.extend_from_slice(b" meeting notes\n");
3391 fs::write(&abs, bytes).unwrap();
3392
3393 let rec = record_from_file(&abs, PathBuf::from(rel))
3394 .expect("non-UTF-8 body must not abort the frontmatter read");
3395 assert_eq!(rec.summary, "An imported email");
3396 assert_eq!(rec.type_, "email");
3397
3398 Index::rebuild_all(&store).unwrap();
3400 assert!(
3401 exists(&store, "sources/emails/index.jsonl"),
3402 "rebuild must produce the catalog despite a non-UTF-8 body byte"
3403 );
3404 assert!(
3405 read(&store, "sources/emails/index.jsonl").contains("An imported email"),
3406 "the record must be catalogued"
3407 );
3408 }
3409
3410 #[test]
3419 fn rebuild_aborts_on_malformed_file_and_keeps_prior_catalogs() {
3420 let (_d, store) = mk_store();
3421 write_doc(
3422 &store,
3423 "records/contacts/alice.md",
3424 "contact",
3425 Some("Alice"),
3426 Some("2026-05-01T00:00:00Z"),
3427 "",
3428 );
3429 write_doc(
3430 &store,
3431 "records/companies/acme.md",
3432 "company",
3433 Some("Acme"),
3434 Some("2026-05-02T00:00:00Z"),
3435 "",
3436 );
3437
3438 Index::rebuild_all(&store).expect("clean rebuild succeeds");
3440 assert!(exists(&store, "records/contacts/index.jsonl"));
3441 assert!(exists(&store, "records/companies/index.jsonl"));
3442
3443 let bad = store.root.join("records/contacts/broken.md");
3445 fs::write(
3446 &bad,
3447 "---\ntype: contact\nsummary: \"unterminated\n---\nbody\n",
3448 )
3449 .unwrap();
3450
3451 Index::rebuild_all(&store)
3454 .expect_err("rebuild must abort, not silently skip, on a malformed file");
3455
3456 assert!(
3460 exists(&store, "records/companies/index.jsonl"),
3461 "an aborted rebuild must not destroy a clean sibling folder's catalog"
3462 );
3463 assert!(
3464 exists(&store, "records/contacts/index.jsonl"),
3465 "an aborted rebuild must not destroy the affected folder's prior catalog"
3466 );
3467 let contacts_jsonl = read(&store, "records/contacts/index.jsonl");
3468 assert!(contacts_jsonl.contains("records/contacts/alice.md"));
3469 }
3470
3471 #[test]
3484 fn rebuild_rollup_counts_equal_jsonl_records_and_write_through() {
3485 let (_d, store) = mk_store();
3486 write_doc(
3490 &store,
3491 "records/contacts/alice.md",
3492 "contact",
3493 Some("Alice"),
3494 Some("2026-05-01T00:00:00Z"),
3495 "",
3496 );
3497 write_doc(
3498 &store,
3499 "records/contacts/bob.md",
3500 "contact",
3501 Some("Bob"),
3502 Some("2026-05-02T00:00:00Z"),
3503 "",
3504 );
3505 Index::rebuild_all(&store).expect("clean rebuild succeeds");
3506
3507 let jsonl_lines = read(&store, "records/contacts/index.jsonl")
3509 .lines()
3510 .filter(|l| !l.trim().is_empty())
3511 .count();
3512 assert_eq!(jsonl_lines, 2, "two well-formed files ⇒ two jsonl records");
3513 let layer_md = read(&store, "records/index.md");
3514 let root_md = read(&store, "index.md");
3515 assert!(
3516 layer_md.contains("- [[records/contacts/index|Contacts]] (2)"),
3517 "layer rollup (N) must equal the jsonl record count (2), not a raw .md walk:\n{layer_md}"
3518 );
3519 assert!(
3520 root_md.contains("- [[records/contacts/index|Contacts]] (2)\n")
3521 && root_md.contains("## Records (2)"),
3522 "root rollup (N)/layer total must equal the jsonl record count (2):\n{root_md}"
3523 );
3524
3525 let (_d2, wt) = mk_store();
3532 write_doc(
3533 &wt,
3534 "records/contacts/alice.md",
3535 "contact",
3536 Some("Alice"),
3537 Some("2026-05-01T00:00:00Z"),
3538 "",
3539 );
3540 write_doc(
3541 &wt,
3542 "records/contacts/bob.md",
3543 "contact",
3544 Some("Bob"),
3545 Some("2026-05-02T00:00:00Z"),
3546 "",
3547 );
3548 Index::on_write(&wt, Path::new("records/contacts/alice.md")).unwrap();
3549 Index::on_write(&wt, Path::new("records/contacts/bob.md")).unwrap();
3550
3551 let a = snapshot_artifacts(&wt);
3552 let b = snapshot_artifacts(&store);
3553 assert_eq!(
3554 a.keys().collect::<BTreeSet<_>>(),
3555 b.keys().collect::<BTreeSet<_>>(),
3556 "write-through and rebuild_all must produce the same artifact set"
3557 );
3558 for (k, v) in &a {
3559 assert_eq!(
3560 v, &b[k],
3561 "rollup bytes diverged between write-through and rebuild_all for {k} \
3562 (a skip-version inflates rebuild_all's (N) above the jsonl record \
3563 count, which write-through then rewrites):\n--- write-through ---\n{v}\n--- rebuild ---\n{}",
3564 b[k]
3565 );
3566 }
3567 }
3568
3569 #[cfg(unix)]
3574 #[test]
3575 fn non_utf8_path_component_is_kept_not_dropped() {
3576 use std::ffi::OsStr;
3577 use std::os::unix::ffi::OsStrExt;
3578 let mut leaf = b"caf".to_vec();
3580 leaf.push(0xE9);
3581 leaf.extend_from_slice(b".md");
3582 let p = Path::new("sources/emails").join(OsStr::from_bytes(&leaf));
3583 let unix = path_to_unix(&p);
3584 assert_ne!(
3587 unix, "sources/emails",
3588 "non-UTF-8 leaf must not be dropped, collapsing the path to its parent dir"
3589 );
3590 assert!(
3591 unix.starts_with("sources/emails/caf"),
3592 "the lossy leaf must remain under its folder: {unix}"
3593 );
3594 }
3595
3596 #[test]
3599 fn loose_file_is_catalogued_in_layer_jsonl_not_type_folder() {
3600 let (_d, store) = mk_store();
3601 write_doc(
3603 &store,
3604 "records/contacts/alice.md",
3605 "contact",
3606 Some("Alice"),
3607 Some("2026-06-01T08:00:00Z"),
3608 "id: alice\n",
3609 );
3610 write_doc(
3611 &store,
3612 "records/loose.md",
3613 "contact",
3614 Some("Loose"),
3615 Some("2026-06-01T08:00:00Z"),
3616 "id: loose\n",
3617 );
3618 Index::rebuild_all(&store).unwrap();
3619
3620 assert!(
3623 exists(&store, "records/index.jsonl"),
3624 "layer jsonl must exist when loose files are present"
3625 );
3626 let layer_jsonl = read(&store, "records/index.jsonl");
3627 assert!(
3628 layer_jsonl.contains("records/loose.md"),
3629 "layer jsonl must list the loose file, got:\n{layer_jsonl}"
3630 );
3631 assert!(
3632 !layer_jsonl.contains("records/contacts/alice.md"),
3633 "layer jsonl must NOT list type-folder files"
3634 );
3635 let tf_jsonl = read(&store, "records/contacts/index.jsonl");
3636 assert!(tf_jsonl.contains("records/contacts/alice.md"));
3637 assert!(!tf_jsonl.contains("records/loose.md"));
3638
3639 let layer_md = read(&store, "records/index.md");
3641 assert!(
3642 layer_md.contains("records/contacts/index"),
3643 "layer md must roll up the type-folder, got:\n{layer_md}"
3644 );
3645 assert!(
3646 !layer_md.contains("records/loose"),
3647 "layer md must stay a rollup, not list loose files, got:\n{layer_md}"
3648 );
3649 }
3650
3651 #[test]
3652 fn loose_file_write_through_equals_rebuild() {
3653 let (_d1, wt) = mk_store();
3654 let (_d2, rb) = mk_store();
3655 for s in [&wt, &rb] {
3656 write_doc(
3657 s,
3658 "records/contacts/alice.md",
3659 "contact",
3660 Some("Alice"),
3661 Some("2026-06-01T08:00:00Z"),
3662 "id: alice\n",
3663 );
3664 write_doc(
3665 s,
3666 "records/loose.md",
3667 "contact",
3668 Some("Loose"),
3669 Some("2026-06-02T08:00:00Z"),
3670 "id: loose\n",
3671 );
3672 }
3673 Index::on_write(&wt, Path::new("records/contacts/alice.md")).unwrap();
3675 Index::on_write(&wt, Path::new("records/loose.md")).unwrap();
3676 Index::rebuild_all(&rb).unwrap();
3677
3678 let a = snapshot_artifacts(&wt);
3679 let b = snapshot_artifacts(&rb);
3680 assert_eq!(
3681 a.keys().collect::<Vec<_>>(),
3682 b.keys().collect::<Vec<_>>(),
3683 "loose-file loop and sweep must produce the same artifact set"
3684 );
3685 for (k, v) in &a {
3686 assert_eq!(
3687 v, &b[k],
3688 "loose-file artifact {k} differs between loop and sweep"
3689 );
3690 }
3691 }
3692
3693 #[test]
3694 fn removing_last_loose_file_clears_layer_jsonl() {
3695 let (_d, store) = mk_store();
3696 write_doc(
3697 &store,
3698 "records/loose.md",
3699 "contact",
3700 Some("Loose"),
3701 Some("2026-06-01T08:00:00Z"),
3702 "id: loose\n",
3703 );
3704 Index::on_write(&store, Path::new("records/loose.md")).unwrap();
3705 assert!(
3706 exists(&store, "records/index.jsonl"),
3707 "layer jsonl present after a loose write"
3708 );
3709 fs::remove_file(store.root.join("records/loose.md")).unwrap();
3710 Index::on_remove(&store, Path::new("records/loose.md")).unwrap();
3711 assert!(
3712 !exists(&store, "records/index.jsonl"),
3713 "layer jsonl must be removed once the last loose file is gone"
3714 );
3715 }
3716
3717 #[test]
3720 fn concurrent_writes_to_different_type_folders_match_rebuild() {
3721 use std::sync::Arc;
3722 use std::thread;
3723
3724 let (_d, store) = mk_store();
3733 let folders = ["records/contacts", "records/companies"];
3734 let n = 12usize;
3735
3736 for (fi, folder) in folders.iter().enumerate() {
3739 for i in 0..n {
3740 write_doc(
3741 &store,
3742 &format!("{folder}/f{fi}_{i}.md"),
3743 "contact",
3744 Some(&format!("Summary {fi}-{i}")),
3745 Some(&format!("2026-06-{:02}T08:00:00Z", i + 1)),
3746 &format!("id: f{fi}_{i}\n"),
3747 );
3748 }
3749 }
3750
3751 let store = Arc::new(store);
3752 let handles: Vec<_> = folders
3753 .iter()
3754 .enumerate()
3755 .map(|(fi, folder)| {
3756 let store = Arc::clone(&store);
3757 let folder = folder.to_string();
3758 thread::spawn(move || {
3759 for i in 0..n {
3760 let rel = format!("{folder}/f{fi}_{i}.md");
3761 Index::on_write(&store, Path::new(&rel)).unwrap();
3762 }
3763 })
3764 })
3765 .collect();
3766 for h in handles {
3767 h.join().unwrap();
3768 }
3769
3770 let got = snapshot_artifacts(&store);
3773 Index::rebuild_all(&store).unwrap();
3774 let want = snapshot_artifacts(&store);
3775
3776 assert_eq!(
3777 got.keys().collect::<Vec<_>>(),
3778 want.keys().collect::<Vec<_>>(),
3779 "artifact set after concurrent write-through must match rebuild"
3780 );
3781 for (k, v) in &want {
3782 assert_eq!(
3783 &got[k], v,
3784 "rollup artifact {k} diverged from rebuild after concurrent writes"
3785 );
3786 }
3787 }
3788}