1use std::collections::BTreeMap;
53use std::fs;
54use std::io::Write as _;
55use std::path::{Path, PathBuf};
56
57use chrono::{DateTime, FixedOffset, SecondsFormat};
58use serde::{Deserialize, Serialize};
59use serde_json::Value;
60
61use crate::parser::FolderMeta;
62use crate::store::{Layer, Store};
63
64const MD_CAP: usize = 500;
66
67const MISSING_SUMMARY: &str = "(no summary)";
71
72const ROOT_TITLE: &str = "Knowledge base index";
74
75#[derive(Debug, Clone, PartialEq, Eq)]
77pub enum IndexLevel {
78 Root,
80 Layer(Layer),
82 TypeFolder(PathBuf),
84}
85
86#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
94pub struct IndexRecord {
95 #[serde(with = "path_serde")]
99 pub path: PathBuf,
100 #[serde(rename = "type")]
102 pub type_: String,
103 pub summary: String,
105 #[serde(default)]
107 pub tags: Vec<String>,
108 #[serde(default)]
110 pub links: Vec<String>,
111 pub created: Option<DateTime<FixedOffset>>,
113 pub updated: Option<DateTime<FixedOffset>>,
115 #[serde(flatten)]
117 pub fields: BTreeMap<String, Value>,
118}
119
120#[derive(Debug, Clone, PartialEq)]
123pub struct Index {
124 pub level: IndexLevel,
126 pub records: Vec<IndexRecord>,
129 pub child_counts: BTreeMap<PathBuf, usize>,
131}
132
133impl Index {
134 pub fn build_type_folder(store: &Store, type_folder: &Path) -> crate::Result<Index> {
140 let rel = normalize_rel(type_folder);
141 let abs = store.root.join(&rel);
142 let mut records = Vec::new();
143 for file_abs in walk_type_folder_files(&abs) {
144 let rel_path =
145 rel_to_store(&store.root, &file_abs).expect("walked file is under the store root");
146 records.push(record_from_file(&file_abs, rel_path)?);
158 }
159 sort_records(&mut records);
160 Ok(Index {
161 level: IndexLevel::TypeFolder(rel),
162 records,
163 child_counts: BTreeMap::new(),
164 })
165 }
166
167 pub fn build_layer(store: &Store, layer: Layer) -> crate::Result<Index> {
176 let mut child_counts = BTreeMap::new();
177 for tf in type_folders_in_layer(store, layer) {
178 let abs = store.root.join(&tf);
179 let n = walk_type_folder_files(&abs).len();
180 if n > 0 {
181 child_counts.insert(tf, n);
182 }
183 }
184 let mut records = Vec::new();
185 for file_abs in loose_files_in_layer(store, layer) {
186 let rel_path =
187 rel_to_store(&store.root, &file_abs).expect("walked file is under the store root");
188 records.push(record_from_file(&file_abs, rel_path)?);
193 }
194 sort_records(&mut records);
195 Ok(Index {
196 level: IndexLevel::Layer(layer),
197 records,
198 child_counts,
199 })
200 }
201
202 pub fn build_root(store: &Store) -> crate::Result<Index> {
205 let mut child_counts = BTreeMap::new();
206 for layer in Layer::all() {
207 for tf in type_folders_in_layer(store, layer) {
208 let abs = store.root.join(&tf);
209 let n = walk_type_folder_files(&abs).len();
210 if n > 0 {
211 child_counts.insert(tf, n);
212 }
213 }
214 }
215 Ok(Index {
216 level: IndexLevel::Root,
217 records: Vec::new(),
218 child_counts,
219 })
220 }
221
222 pub fn to_markdown(&self) -> String {
224 match &self.level {
225 IndexLevel::TypeFolder(folder) => self.render_type_folder_md(folder),
226 IndexLevel::Layer(layer) => self.render_layer_md(*layer),
227 IndexLevel::Root => self.render_root_md(),
228 }
229 }
230
231 pub fn to_jsonl(&self) -> String {
237 let mut out = String::new();
238 for rec in &self.records {
239 let line = serde_json::to_string(rec).expect("IndexRecord serializes");
242 out.push_str(&line);
243 out.push('\n');
244 }
245 out
246 }
247
248 fn render_type_folder_md(&self, folder: &Path) -> String {
251 let folder_disp = path_to_unix(folder);
252 let updated = max_updated(self.records.iter().map(|r| r.updated.as_ref()));
253 let mut s = String::new();
254 s.push_str("---\n");
255 s.push_str("type: index\n");
256 s.push_str("scope: type-folder\n");
257 s.push_str(&format!("folder: {folder_disp}\n"));
258 if let Some(ts) = updated {
259 s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
260 }
261 s.push_str("---\n\n");
262 s.push_str(&format!("# {folder_disp}\n\n"));
263
264 let shown = self.records.len().min(MD_CAP);
265 for rec in self.records.iter().take(shown) {
266 s.push_str(&format_md_entry(rec));
267 s.push('\n');
268 }
269
270 if self.records.len() > MD_CAP {
271 let type_ = self.records.first().map(|r| r.type_.as_str()).unwrap_or("");
272 let layer = folder
273 .components()
274 .next()
275 .and_then(|c| c.as_os_str().to_str())
276 .unwrap_or("");
277 s.push('\n');
278 s.push_str(&more_footer(self.records.len(), type_, layer));
279 }
280 s
281 }
282
283 fn render_layer_md(&self, layer: Layer) -> String {
288 let layer_dir = layer_dir_name(layer);
289 let mut s = String::new();
290 s.push_str("---\n");
291 s.push_str("type: index\n");
292 s.push_str("scope: layer\n");
293 s.push_str(&format!("folder: {layer_dir}\n"));
294 s.push_str("---\n\n");
295 s.push_str(&format!("# {layer_dir}\n\n"));
296 for (tf, n) in &self.child_counts {
297 let tf_unix = path_to_unix(tf);
298 let display = capitalize(folder_basename(tf));
299 s.push_str(&format!("- [[{tf_unix}/index|{display}]] ({n})\n"));
300 }
301 s
302 }
303
304 fn render_root_md(&self) -> String {
307 let mut s = String::new();
308 s.push_str("---\n");
309 s.push_str("type: index\n");
310 s.push_str("scope: root\n");
311 s.push_str("---\n\n");
312 s.push_str(&format!("# {ROOT_TITLE}\n"));
313 for layer in Layer::all() {
314 let layer_dir = layer_dir_name(layer);
315 let prefix = format!("{layer_dir}/");
316 let children: Vec<(&PathBuf, &usize)> = self
317 .child_counts
318 .iter()
319 .filter(|(tf, _)| path_to_unix(tf).starts_with(&prefix))
320 .collect();
321 if children.is_empty() {
322 continue;
323 }
324 let total: usize = children.iter().map(|(_, n)| **n).sum();
325 s.push('\n');
326 s.push_str(&format!("## {} ({total})\n", capitalize(layer_dir)));
327 for (tf, n) in children {
328 let tf_unix = path_to_unix(tf);
329 let display = capitalize(folder_basename(tf));
330 s.push_str(&format!("- [[{tf_unix}/index|{display}]] ({n})\n"));
331 }
332 }
333 s
334 }
335}
336
337impl Index {
342 pub fn on_write(store: &Store, file: &Path) -> crate::Result<()> {
349 let file_rel = normalize_rel(file);
350 if is_index_artifact(&file_rel) {
357 return Ok(());
358 }
359 if let Some(layer) = loose_layer_of(&file_rel) {
363 return apply_loose_change(store, layer, &file_rel, false);
364 }
365 let file_abs = store.root.join(&file_rel);
366 let folder = type_folder_of(&file_rel)
367 .ok_or_else(|| bad_index(&file_rel, "file is not inside a layer/type-folder"))?;
368 let record = record_from_file(&file_abs, file_rel.clone())?;
369
370 let _lock = FolderLock::acquire(&store.root.join(&folder));
373 let mut records = read_jsonl_records(&store.root.join(&folder).join("index.jsonl"))?;
374 records.retain(|r| r.path != record.path);
375 records.push(record);
376 sort_records(&mut records);
377
378 write_type_folder_artifacts(store, &folder, &records)?;
379 update_parents(store, &folder)?;
380 Ok(())
381 }
382
383 pub fn on_rename(store: &Store, old: &Path, new: &Path) -> crate::Result<()> {
387 let old_rel = normalize_rel(old);
388 let new_rel = normalize_rel(new);
389 if is_index_artifact(&old_rel) || is_index_artifact(&new_rel) {
393 return Ok(());
394 }
395 if loose_layer_of(&old_rel).is_some() || loose_layer_of(&new_rel).is_some() {
401 Self::on_remove(store, &old_rel)?;
402 Self::on_write(store, &new_rel)?;
403 return Ok(());
404 }
405 let old_folder = type_folder_of(&old_rel)
406 .ok_or_else(|| bad_index(&old_rel, "source is not inside a layer/type-folder"))?;
407 let new_folder = type_folder_of(&new_rel)
408 .ok_or_else(|| bad_index(&new_rel, "target is not inside a layer/type-folder"))?;
409
410 let _locks = lock_folders(store, &old_folder, &new_folder);
414
415 let mut old_records =
417 read_jsonl_records(&store.root.join(&old_folder).join("index.jsonl"))?;
418 old_records.retain(|r| r.path != old_rel);
419
420 if old_folder == new_folder {
421 let record = record_from_file(&store.root.join(&new_rel), new_rel.clone())?;
423 old_records.retain(|r| r.path != record.path);
424 old_records.push(record);
425 sort_records(&mut old_records);
426 write_type_folder_artifacts(store, &old_folder, &old_records)?;
427 update_parents(store, &old_folder)?;
428 return Ok(());
429 }
430
431 sort_records(&mut old_records);
434 write_type_folder_artifacts(store, &old_folder, &old_records)?;
435
436 let record = record_from_file(&store.root.join(&new_rel), new_rel.clone())?;
437 let mut new_records =
438 read_jsonl_records(&store.root.join(&new_folder).join("index.jsonl"))?;
439 new_records.retain(|r| r.path != record.path);
440 new_records.push(record);
441 sort_records(&mut new_records);
442 write_type_folder_artifacts(store, &new_folder, &new_records)?;
443
444 update_parents(store, &old_folder)?;
445 update_parents(store, &new_folder)?;
446 Ok(())
447 }
448
449 pub fn on_remove(store: &Store, file: &Path) -> crate::Result<()> {
454 let file_rel = normalize_rel(file);
455 if is_index_artifact(&file_rel) {
458 return Ok(());
459 }
460 if let Some(layer) = loose_layer_of(&file_rel) {
462 return apply_loose_change(store, layer, &file_rel, true);
463 }
464 let folder = type_folder_of(&file_rel)
465 .ok_or_else(|| bad_index(&file_rel, "file is not inside a layer/type-folder"))?;
466 let _lock = FolderLock::acquire(&store.root.join(&folder));
468 let mut records = read_jsonl_records(&store.root.join(&folder).join("index.jsonl"))?;
469 let before = records.len();
470 records.retain(|r| r.path != file_rel);
471 if records.len() == before {
472 }
475 sort_records(&mut records);
476 write_type_folder_artifacts(store, &folder, &records)?;
477 update_parents(store, &folder)?;
478 Ok(())
479 }
480
481 pub fn rebuild_all(store: &Store) -> crate::Result<()> {
485 Index::cleanup(store)?;
486 for layer in Layer::all() {
487 for tf in type_folders_in_layer(store, layer) {
488 let idx = Index::build_type_folder(store, &tf)?;
489 if idx.records.is_empty() {
490 continue;
491 }
492 write_type_folder_artifacts(store, &tf, &idx.records)?;
493 }
494 let layer_idx = Index::build_layer(store, layer)?;
495 let layer_index_md = store.root.join(layer_dir_name(layer)).join("index.md");
496 if layer_idx.child_counts.is_empty() {
497 remove_if_exists(&layer_index_md)?;
498 } else {
499 write_atomic(
500 &layer_index_md,
501 render_layer_md_with_store(store, &layer_idx),
502 )?;
503 }
504 write_layer_jsonl(store, layer, &layer_idx.records)?;
508 }
509 let root_idx = Index::build_root(store)?;
510 let root_index_md = store.root.join("index.md");
511 if root_idx.child_counts.is_empty() {
512 remove_if_exists(&root_index_md)?;
513 } else {
514 write_atomic(&root_index_md, render_root_md_with_store(store, &root_idx))?;
515 }
516 Ok(())
517 }
518
519 pub fn rebuild_folder(store: &Store, folder: &Path) -> crate::Result<()> {
526 Self::write_level(store, &IndexLevel::TypeFolder(folder.to_path_buf()))?;
527 update_parents(store, folder)
528 }
529
530 pub fn write_level(store: &Store, level: &IndexLevel) -> crate::Result<()> {
532 match level {
533 IndexLevel::TypeFolder(folder) => {
534 let idx = Index::build_type_folder(store, folder)?;
535 if idx.records.is_empty() {
536 remove_if_exists(&store.root.join(folder).join("index.md"))?;
537 remove_if_exists(&store.root.join(folder).join("index.jsonl"))?;
538 } else {
539 write_type_folder_artifacts(store, folder, &idx.records)?;
540 }
541 }
542 IndexLevel::Layer(layer) => {
543 let idx = Index::build_layer(store, *layer)?;
544 let p = store.root.join(layer_dir_name(*layer)).join("index.md");
545 if idx.child_counts.is_empty() {
546 remove_if_exists(&p)?;
547 } else {
548 write_atomic(&p, render_layer_md_with_store(store, &idx))?;
549 }
550 write_layer_jsonl(store, *layer, &idx.records)?;
551 }
552 IndexLevel::Root => {
553 let idx = Index::build_root(store)?;
554 let p = store.root.join("index.md");
555 if idx.child_counts.is_empty() {
556 remove_if_exists(&p)?;
557 } else {
558 write_atomic(&p, render_root_md_with_store(store, &idx))?;
559 }
560 }
561 }
562 Ok(())
563 }
564
565 pub fn render_dry_run(store: &Store, level: &IndexLevel) -> crate::Result<String> {
568 let mut out = String::new();
569 match level {
570 IndexLevel::TypeFolder(folder) => {
571 let idx = Index::build_type_folder(store, folder)?;
572 let md_path = path_to_unix(&folder.join("index.md"));
573 let jsonl_path = path_to_unix(&folder.join("index.jsonl"));
574 out.push_str(&format!("--- {md_path} ---\n"));
575 out.push_str(&idx.to_markdown());
576 out.push_str(&format!("--- {jsonl_path} ---\n"));
577 out.push_str(&idx.to_jsonl());
578 }
579 IndexLevel::Layer(layer) => {
580 let idx = Index::build_layer(store, *layer)?;
581 let md_path = format!("{}/index.md", layer_dir_name(*layer));
582 out.push_str(&format!("--- {md_path} ---\n"));
583 out.push_str(&render_layer_md_with_store(store, &idx));
584 }
585 IndexLevel::Root => {
586 let idx = Index::build_root(store)?;
587 out.push_str("--- index.md ---\n");
588 out.push_str(&render_root_md_with_store(store, &idx));
589 }
590 }
591 Ok(out)
592 }
593
594 pub fn cleanup(store: &Store) -> crate::Result<()> {
612 for layer in Layer::all() {
613 let layer_dir = store.root.join(layer_dir_name(layer));
614 if !layer_dir.is_dir() {
615 continue;
616 }
617 for tf in type_folders_in_layer(store, layer) {
618 let tf_abs = store.root.join(&tf);
619 for entry in walkdir::WalkDir::new(&tf_abs)
623 .min_depth(2)
624 .into_iter()
625 .filter_map(|e| e.ok())
626 {
627 let p = entry.path();
628 if is_index_artifact(p) && is_deletable_catalog_artifact(p) {
629 remove_if_exists(p)?;
630 }
631 }
632 if walk_type_folder_files(&tf_abs).is_empty() {
636 let md = tf_abs.join("index.md");
637 if is_deletable_catalog_artifact(&md) {
638 remove_if_exists(&md)?;
639 }
640 remove_if_exists(&tf_abs.join("index.jsonl"))?;
641 }
642 }
643 }
644 Ok(())
645 }
646}
647
648fn write_type_folder_artifacts(
656 store: &Store,
657 folder: &Path,
658 records: &[IndexRecord],
659) -> crate::Result<()> {
660 let folder_abs = store.root.join(folder);
661 let md_path = folder_abs.join("index.md");
662 let jsonl_path = folder_abs.join("index.jsonl");
663 if records.is_empty() {
664 remove_if_exists(&md_path)?;
665 remove_if_exists(&jsonl_path)?;
666 return Ok(());
667 }
668 let idx = Index {
669 level: IndexLevel::TypeFolder(folder.to_path_buf()),
670 records: records.to_vec(),
671 child_counts: BTreeMap::new(),
672 };
673 write_atomic(&md_path, idx.to_markdown())?;
674 write_atomic(&jsonl_path, idx.to_jsonl())?;
675 Ok(())
676}
677
678fn update_parents(store: &Store, folder: &Path) -> crate::Result<()> {
691 let _root_lock = FolderLock::acquire(&store.root);
724 let stats = collect_child_stats(store, &Layer::all())?;
725
726 let layer = folder
727 .components()
728 .next()
729 .and_then(|c| c.as_os_str().to_str())
730 .and_then(layer_from_dir_name);
731 if let Some(layer) = layer {
732 let p = store.root.join(layer_dir_name(layer)).join("index.md");
733 if layer_has_children(&stats, layer) {
734 write_atomic(
735 &p,
736 render_layer_md_from_stats(layer, &stats, &store.config.folders),
737 )?;
738 } else {
739 remove_if_exists(&p)?;
740 }
741 }
742 let rp = store.root.join("index.md");
743 if stats.values().any(|s| s.count > 0) {
744 write_atomic(
745 &rp,
746 render_root_md_from_stats(&stats, &store.config.folders),
747 )?;
748 } else {
749 remove_if_exists(&rp)?;
750 }
751 Ok(())
752}
753
754fn layer_has_children(stats: &BTreeMap<PathBuf, FolderStat>, layer: Layer) -> bool {
756 let prefix = format!("{}/", layer_dir_name(layer));
757 stats
758 .iter()
759 .any(|(tf, s)| s.count > 0 && path_to_unix(tf).starts_with(&prefix))
760}
761
762fn render_layer_md_from_stats(
767 layer: Layer,
768 stats: &BTreeMap<PathBuf, FolderStat>,
769 folders: &BTreeMap<String, FolderMeta>,
770) -> String {
771 let layer_dir = layer_dir_name(layer);
772 let prefix = format!("{layer_dir}/");
773 let mut max_upd: Option<DateTime<FixedOffset>> = None;
774 let mut entries = String::new();
775 for (tf, stat) in stats {
776 if stat.count == 0 || !path_to_unix(tf).starts_with(&prefix) {
777 continue;
778 }
779 if let Some(u) = stat.newest.as_ref().and_then(|r| r.updated) {
780 max_upd = Some(match max_upd {
781 Some(cur) if cur >= u => cur,
782 _ => u,
783 });
784 }
785 let tf_unix = path_to_unix(tf);
786 let (display, description) = folder_label(&tf_unix, folder_basename(tf), folders);
787 entries.push_str(&folder_entry(&tf_unix, &display, stat.count, description));
788 }
789 let mut s = String::new();
790 s.push_str("---\n");
791 s.push_str("type: index\n");
792 s.push_str("scope: layer\n");
793 s.push_str(&format!("folder: {layer_dir}\n"));
794 if let Some(ts) = max_upd {
795 s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
796 }
797 s.push_str("---\n\n");
798 s.push_str(&format!("# {layer_dir}\n\n"));
799 s.push_str(&entries);
800 s
801}
802
803fn render_root_md_from_stats(
805 stats: &BTreeMap<PathBuf, FolderStat>,
806 folders: &BTreeMap<String, FolderMeta>,
807) -> String {
808 let mut max_upd: Option<DateTime<FixedOffset>> = None;
809 for stat in stats.values() {
810 if stat.count == 0 {
811 continue;
812 }
813 if let Some(u) = stat.newest.as_ref().and_then(|r| r.updated) {
814 max_upd = Some(match max_upd {
815 Some(cur) if cur >= u => cur,
816 _ => u,
817 });
818 }
819 }
820 let mut s = String::new();
821 s.push_str("---\n");
822 s.push_str("type: index\n");
823 s.push_str("scope: root\n");
824 if let Some(ts) = max_upd {
825 s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
826 }
827 s.push_str("---\n\n");
828 s.push_str(&format!("# {ROOT_TITLE}\n"));
829 for layer in Layer::all() {
830 let layer_dir = layer_dir_name(layer);
831 let prefix = format!("{layer_dir}/");
832 let children: Vec<(&PathBuf, usize)> = stats
833 .iter()
834 .filter(|(tf, s)| s.count > 0 && path_to_unix(tf).starts_with(&prefix))
835 .map(|(tf, s)| (tf, s.count))
836 .collect();
837 if children.is_empty() {
838 continue;
839 }
840 let total: usize = children.iter().map(|(_, n)| *n).sum();
841 s.push('\n');
842 s.push_str(&format!("## {} ({total})\n", capitalize(layer_dir)));
843 for (tf, n) in children {
844 let tf_unix = path_to_unix(tf);
845 let (display, description) = folder_label(&tf_unix, folder_basename(tf), folders);
846 s.push_str(&folder_entry(&tf_unix, &display, n, description));
847 }
848 }
849 s
850}
851
852fn render_layer_md_with_store(store: &Store, idx: &Index) -> String {
859 let layer = match idx.level {
860 IndexLevel::Layer(l) => l,
861 _ => unreachable!("render_layer_md_with_store called on non-layer"),
862 };
863 let layer_dir = layer_dir_name(layer);
864 let mut max_upd: Option<DateTime<FixedOffset>> = None;
865 let mut entries = String::new();
866 for (tf, n) in &idx.child_counts {
867 let recs = read_jsonl_records(&store.root.join(tf).join("index.jsonl")).unwrap_or_default();
868 let newest = recs.first();
869 if let Some(u) = newest.and_then(|r| r.updated) {
870 max_upd = Some(match max_upd {
871 Some(cur) if cur >= u => cur,
872 _ => u,
873 });
874 }
875 let tf_unix = path_to_unix(tf);
876 let (display, description) =
877 folder_label(&tf_unix, folder_basename(tf), &store.config.folders);
878 entries.push_str(&folder_entry(&tf_unix, &display, *n, description));
879 }
880 let mut s = String::new();
881 s.push_str("---\n");
882 s.push_str("type: index\n");
883 s.push_str("scope: layer\n");
884 s.push_str(&format!("folder: {layer_dir}\n"));
885 if let Some(ts) = max_upd {
886 s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
887 }
888 s.push_str("---\n\n");
889 s.push_str(&format!("# {layer_dir}\n\n"));
890 s.push_str(&entries);
891 s
892}
893
894fn render_root_md_with_store(store: &Store, idx: &Index) -> String {
898 let mut max_upd: Option<DateTime<FixedOffset>> = None;
899 for tf in idx.child_counts.keys() {
900 let recs = read_jsonl_records(&store.root.join(tf).join("index.jsonl")).unwrap_or_default();
901 if let Some(u) = recs.first().and_then(|r| r.updated) {
902 max_upd = Some(match max_upd {
903 Some(cur) if cur >= u => cur,
904 _ => u,
905 });
906 }
907 }
908 let mut s = String::new();
909 s.push_str("---\n");
910 s.push_str("type: index\n");
911 s.push_str("scope: root\n");
912 if let Some(ts) = max_upd {
913 s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
914 }
915 s.push_str("---\n\n");
916 s.push_str(&format!("# {ROOT_TITLE}\n"));
917 for layer in Layer::all() {
918 let layer_dir = layer_dir_name(layer);
919 let prefix = format!("{layer_dir}/");
920 let children: Vec<(&PathBuf, &usize)> = idx
921 .child_counts
922 .iter()
923 .filter(|(tf, _)| path_to_unix(tf).starts_with(&prefix))
924 .collect();
925 if children.is_empty() {
926 continue;
927 }
928 let total: usize = children.iter().map(|(_, n)| **n).sum();
929 s.push('\n');
930 s.push_str(&format!("## {} ({total})\n", capitalize(layer_dir)));
931 for (tf, n) in children {
932 let tf_unix = path_to_unix(tf);
933 let (display, description) =
934 folder_label(&tf_unix, folder_basename(tf), &store.config.folders);
935 s.push_str(&folder_entry(&tf_unix, &display, *n, description));
936 }
937 }
938 s
939}
940
941fn format_md_entry(rec: &IndexRecord) -> String {
947 let path = wiki_target(&rec.path);
948 let summary = collapse_whitespace(&rec.summary);
957 let mut line = format!("- [[{path}]] — {summary}");
958 if !rec.tags.is_empty() {
959 let tags = rec
960 .tags
961 .iter()
962 .map(|t| format!("#{t}"))
963 .collect::<Vec<_>>()
964 .join(" ");
965 line.push_str(&format!(" · {tags}"));
966 }
967 line
968}
969
970fn more_footer(total: usize, type_: &str, layer: &str) -> String {
972 format!(
973 "## More\n\nThis folder has {total} files. The {MD_CAP} most recent are listed above.\nUse `dbmd index query --type {type_} --in {layer}` for the complete catalog.\n"
974 )
975}
976
977fn sort_records(records: &mut [IndexRecord]) {
981 records.sort_by(record_recency_cmp);
982}
983
984impl IndexRecord {
985 pub(crate) fn expected_from_file(abs: &Path, rel: PathBuf) -> crate::Result<IndexRecord> {
997 record_from_file(abs, rel)
998 }
999}
1000
1001fn record_from_file(abs: &Path, rel: PathBuf) -> crate::Result<IndexRecord> {
1004 let mut meta = read_frontmatter(abs)?;
1005 if rel.starts_with("records") {
1010 meta.fields
1011 .entry("meta-type".to_string())
1012 .or_insert_with(|| Value::String("fact".to_string()));
1013 }
1014 Ok(IndexRecord {
1015 path: rel,
1016 type_: meta.type_.unwrap_or_default(),
1017 summary: meta.summary.unwrap_or_else(|| MISSING_SUMMARY.to_string()),
1018 tags: meta.tags,
1019 links: meta.links,
1020 created: meta.created,
1021 updated: meta.updated,
1022 fields: meta.fields,
1023 })
1024}
1025
1026struct FileMeta {
1028 type_: Option<String>,
1029 summary: Option<String>,
1030 tags: Vec<String>,
1031 links: Vec<String>,
1032 created: Option<DateTime<FixedOffset>>,
1033 updated: Option<DateTime<FixedOffset>>,
1034 fields: BTreeMap<String, Value>,
1035}
1036
1037fn read_frontmatter(abs: &Path) -> crate::Result<FileMeta> {
1051 let bytes = fs::read(abs)?;
1052 let yaml = extract_frontmatter_block_lossy(&bytes).unwrap_or_default();
1053 let map: serde_norway::Mapping = if yaml.trim().is_empty() {
1054 serde_norway::Mapping::new()
1055 } else {
1056 serde_norway::from_str(&yaml).map_err(|e| {
1057 crate::Error::Store(crate::store::StoreError::BadTypeIndex {
1058 path: abs.to_path_buf(),
1059 message: format!("frontmatter YAML: {e}"),
1060 })
1061 })?
1062 };
1063
1064 let mut type_ = None;
1065 let mut summary = None;
1066 let mut tags = Vec::new();
1067 let mut links = Vec::new();
1068 let mut created = None;
1069 let mut updated = None;
1070 let mut fields = BTreeMap::new();
1071
1072 for (k, v) in map {
1073 let key = match k.as_str() {
1074 Some(s) => s.to_string(),
1075 None => continue,
1076 };
1077 match key.as_str() {
1078 "type" => type_ = scalar_string(&v),
1088 "summary" => summary = scalar_string(&v),
1089 "tags" => tags = yaml_string_list(&v),
1090 "links" => links = yaml_string_list(&v),
1091 "created" => created = v.as_str().and_then(parse_ts),
1092 "updated" => updated = v.as_str().and_then(parse_ts),
1093 "path" => {}
1097 _ => {
1098 fields.insert(key, yaml_to_json_value(&v));
1099 }
1100 }
1101 }
1102
1103 Ok(FileMeta {
1104 type_,
1105 summary,
1106 tags,
1107 links,
1108 created,
1109 updated,
1110 fields,
1111 })
1112}
1113
1114fn scalar_string(v: &serde_norway::Value) -> Option<String> {
1120 match v {
1121 serde_norway::Value::String(s) => Some(s.clone()),
1122 serde_norway::Value::Number(n) => Some(n.to_string()),
1123 serde_norway::Value::Bool(b) => Some(b.to_string()),
1124 _ => None,
1125 }
1126}
1127
1128fn extract_frontmatter_block_lossy(bytes: &[u8]) -> Option<String> {
1134 let text = String::from_utf8_lossy(bytes);
1139 extract_frontmatter_block(&text)
1140}
1141
1142fn extract_frontmatter_block(text: &str) -> Option<String> {
1145 let trimmed = text.strip_prefix('\u{feff}').unwrap_or(text);
1146 let mut lines = trimmed.lines();
1147 let first = lines.next()?;
1148 if first.trim_end() != "---" {
1149 return None;
1150 }
1151 let mut block = String::new();
1152 for line in lines {
1153 if line.trim_end() == "---" {
1154 return Some(block);
1155 }
1156 block.push_str(line);
1157 block.push('\n');
1158 }
1159 None }
1161
1162fn yaml_string_list(v: &serde_norway::Value) -> Vec<String> {
1165 match v {
1166 serde_norway::Value::String(s) => vec![s.clone()],
1167 serde_norway::Value::Sequence(seq) => seq
1168 .iter()
1169 .filter_map(yaml_string_or_wiki_link_literal)
1170 .collect(),
1171 _ => Vec::new(),
1172 }
1173}
1174
1175fn yaml_string_or_wiki_link_literal(v: &serde_norway::Value) -> Option<String> {
1176 v.as_str()
1177 .map(str::to_string)
1178 .or_else(|| unquoted_wiki_link_literal(v))
1179}
1180
1181fn yaml_to_json_value(v: &serde_norway::Value) -> Value {
1182 if let Some(link) = unquoted_wiki_link_literal(v) {
1183 return Value::String(link);
1184 }
1185 match v {
1186 serde_norway::Value::String(s) => Value::String(s.clone()),
1187 serde_norway::Value::Bool(b) => Value::Bool(*b),
1188 serde_norway::Value::Number(n) => {
1189 serde_json::to_value(n).unwrap_or_else(|_| Value::String(n.to_string()))
1190 }
1191 serde_norway::Value::Sequence(seq) => {
1192 Value::Array(seq.iter().map(yaml_to_json_value).collect())
1193 }
1194 serde_norway::Value::Mapping(_) | serde_norway::Value::Tagged(_) => {
1195 serde_json::to_value(v).unwrap_or(Value::Null)
1196 }
1197 serde_norway::Value::Null => Value::Null,
1198 }
1199}
1200
1201fn unquoted_wiki_link_literal(v: &serde_norway::Value) -> Option<String> {
1202 let serde_norway::Value::Sequence(outer) = v else {
1203 return None;
1204 };
1205 if outer.len() != 1 {
1206 return None;
1207 }
1208 let serde_norway::Value::Sequence(inner) = &outer[0] else {
1209 return None;
1210 };
1211 let [serde_norway::Value::String(target)] = inner.as_slice() else {
1212 return None;
1213 };
1214 Some(format!("[[{target}]]"))
1215}
1216
1217fn parse_ts(s: &str) -> Option<DateTime<FixedOffset>> {
1219 DateTime::parse_from_rfc3339(s.trim()).ok()
1220}
1221
1222fn fmt_ts(ts: &DateTime<FixedOffset>) -> String {
1226 ts.to_rfc3339_opts(SecondsFormat::AutoSi, true)
1227}
1228
1229fn max_updated<'a>(
1231 it: impl Iterator<Item = Option<&'a DateTime<FixedOffset>>>,
1232) -> Option<DateTime<FixedOffset>> {
1233 let mut best: Option<DateTime<FixedOffset>> = None;
1234 for ts in it.flatten() {
1235 best = Some(match best {
1236 Some(cur) if cur >= *ts => cur,
1237 _ => *ts,
1238 });
1239 }
1240 best
1241}
1242
1243fn read_jsonl_records(jsonl: &Path) -> crate::Result<Vec<IndexRecord>> {
1247 let text = match fs::read_to_string(jsonl) {
1248 Ok(t) => t,
1249 Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(Vec::new()),
1250 Err(e) => return Err(e.into()),
1251 };
1252 let mut by_path: BTreeMap<PathBuf, IndexRecord> = BTreeMap::new();
1254 for (i, line) in text.lines().enumerate() {
1255 if line.trim().is_empty() {
1256 continue;
1257 }
1258 let rec: IndexRecord = serde_json::from_str(line).map_err(|e| {
1259 crate::Error::Store(crate::store::StoreError::BadTypeIndex {
1260 path: jsonl.to_path_buf(),
1261 message: format!("line {}: {e}", i + 1),
1262 })
1263 })?;
1264 by_path.insert(rec.path.clone(), rec);
1265 }
1266 let mut records: Vec<IndexRecord> = by_path.into_values().collect();
1267 sort_records(&mut records);
1268 Ok(records)
1269}
1270
1271#[derive(Debug, Clone, Default, PartialEq)]
1278struct FolderStat {
1279 count: usize,
1280 newest: Option<IndexRecord>,
1281}
1282
1283fn read_folder_stat(jsonl: &Path) -> crate::Result<FolderStat> {
1293 let text = match fs::read_to_string(jsonl) {
1294 Ok(t) => t,
1295 Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(FolderStat::default()),
1296 Err(e) => return Err(e.into()),
1297 };
1298 let mut by_path: BTreeMap<PathBuf, IndexRecord> = BTreeMap::new();
1301 for (i, line) in text.lines().enumerate() {
1302 if line.trim().is_empty() {
1303 continue;
1304 }
1305 let rec: IndexRecord = serde_json::from_str(line).map_err(|e| {
1306 crate::Error::Store(crate::store::StoreError::BadTypeIndex {
1307 path: jsonl.to_path_buf(),
1308 message: format!("line {}: {e}", i + 1),
1309 })
1310 })?;
1311 by_path.insert(rec.path.clone(), rec);
1312 }
1313 let count = by_path.len();
1314 let newest = by_path.into_values().min_by(record_recency_cmp);
1318 Ok(FolderStat { count, newest })
1319}
1320
1321fn record_recency_cmp(a: &IndexRecord, b: &IndexRecord) -> std::cmp::Ordering {
1326 match (b.updated, a.updated) {
1327 (Some(bu), Some(au)) => bu.cmp(&au),
1328 (Some(_), None) => std::cmp::Ordering::Greater, (None, Some(_)) => std::cmp::Ordering::Less, (None, None) => std::cmp::Ordering::Equal,
1331 }
1332 .then_with(|| a.path.cmp(&b.path))
1333}
1334
1335fn collect_child_stats(
1348 store: &Store,
1349 layers: &[Layer],
1350) -> crate::Result<BTreeMap<PathBuf, FolderStat>> {
1351 let mut stats = BTreeMap::new();
1352 for &layer in layers {
1353 for tf in type_folders_in_layer(store, layer) {
1354 let stat = read_folder_stat(&store.root.join(&tf).join("index.jsonl"))?;
1355 if stat.count > 0 {
1356 stats.insert(tf, stat);
1357 }
1358 }
1359 }
1360 Ok(stats)
1361}
1362
1363fn walk_type_folder_files(folder_abs: &Path) -> Vec<PathBuf> {
1366 let mut out = Vec::new();
1367 if !folder_abs.is_dir() {
1368 return out;
1369 }
1370 for entry in walkdir::WalkDir::new(folder_abs)
1371 .into_iter()
1372 .filter_entry(|e| !is_hidden(e.file_name()))
1373 .filter_map(|e| e.ok())
1374 {
1375 if !entry.file_type().is_file() {
1376 continue;
1377 }
1378 let p = entry.path();
1379 if p.extension().and_then(|e| e.to_str()) != Some("md") {
1380 continue;
1381 }
1382 if p.file_name().and_then(|n| n.to_str()) == Some("index.md") {
1383 continue;
1384 }
1385 out.push(p.to_path_buf());
1386 }
1387 out
1388}
1389
1390fn type_folders_in_layer(store: &Store, layer: Layer) -> Vec<PathBuf> {
1393 let layer_dir = store.root.join(layer_dir_name(layer));
1394 let mut out = Vec::new();
1395 let rd = match fs::read_dir(&layer_dir) {
1396 Ok(rd) => rd,
1397 Err(_) => return out,
1398 };
1399 for entry in rd.flatten() {
1400 if !entry.path().is_dir() {
1401 continue;
1402 }
1403 let name = entry.file_name();
1404 let name = match name.to_str() {
1405 Some(n) => n,
1406 None => continue,
1407 };
1408 if is_hidden(entry.file_name().as_os_str()) || name == "log" {
1409 continue;
1410 }
1411 out.push(PathBuf::from(layer_dir_name(layer)).join(name));
1412 }
1413 out.sort();
1414 out
1415}
1416
1417fn loose_layer_of(file_rel: &Path) -> Option<Layer> {
1423 let mut comps = file_rel.components();
1424 let layer = layer_from_dir_name(comps.next()?.as_os_str().to_str()?)?;
1425 comps.next()?; if comps.next().is_some() {
1427 return None; }
1429 Some(layer)
1430}
1431
1432fn loose_files_in_layer(store: &Store, layer: Layer) -> Vec<PathBuf> {
1436 let layer_dir = store.root.join(layer_dir_name(layer));
1437 let mut out = Vec::new();
1438 let rd = match fs::read_dir(&layer_dir) {
1439 Ok(rd) => rd,
1440 Err(_) => return out,
1441 };
1442 for entry in rd.flatten() {
1443 let p = entry.path();
1444 if !p.is_file() {
1445 continue;
1446 }
1447 if p.extension().and_then(|e| e.to_str()) != Some("md") {
1448 continue;
1449 }
1450 if is_index_artifact(&p) || is_hidden(entry.file_name().as_os_str()) {
1451 continue;
1452 }
1453 out.push(p);
1454 }
1455 out
1456}
1457
1458fn write_layer_jsonl(store: &Store, layer: Layer, records: &[IndexRecord]) -> crate::Result<()> {
1463 let path = store.root.join(layer_dir_name(layer)).join("index.jsonl");
1464 if records.is_empty() {
1465 remove_if_exists(&path)?;
1466 return Ok(());
1467 }
1468 let idx = Index {
1469 level: IndexLevel::Layer(layer),
1470 records: records.to_vec(),
1471 child_counts: BTreeMap::new(),
1472 };
1473 write_atomic(&path, idx.to_jsonl())
1474}
1475
1476fn apply_loose_change(
1481 store: &Store,
1482 layer: Layer,
1483 file_rel: &Path,
1484 removing: bool,
1485) -> crate::Result<()> {
1486 let layer_dir = store.root.join(layer_dir_name(layer));
1487 let _lock = FolderLock::acquire(&layer_dir);
1488 let jsonl = layer_dir.join("index.jsonl");
1489 let mut records = read_jsonl_records(&jsonl)?;
1490 records.retain(|r| r.path != file_rel);
1491 if !removing {
1492 records.push(record_from_file(
1493 &store.root.join(file_rel),
1494 file_rel.to_path_buf(),
1495 )?);
1496 }
1497 sort_records(&mut records);
1498 write_layer_jsonl(store, layer, &records)
1499}
1500
1501fn type_folder_of(file_rel: &Path) -> Option<PathBuf> {
1505 let mut comps = file_rel.components();
1506 let layer = comps.next()?.as_os_str().to_str()?;
1507 layer_from_dir_name(layer)?;
1508 let type_seg = comps.next()?.as_os_str().to_str()?;
1509 Some(PathBuf::from(layer).join(type_seg))
1510}
1511
1512fn rel_to_store(root: &Path, abs: &Path) -> Option<PathBuf> {
1514 abs.strip_prefix(root).ok().map(|p| p.to_path_buf())
1515}
1516
1517fn normalize_rel(p: &Path) -> PathBuf {
1520 let s = path_to_unix(p);
1521 let s = s.strip_prefix("./").unwrap_or(&s);
1522 PathBuf::from(s)
1523}
1524
1525fn is_index_artifact(p: &Path) -> bool {
1526 matches!(
1527 p.file_name().and_then(|n| n.to_str()),
1528 Some("index.md") | Some("index.jsonl")
1529 )
1530}
1531
1532fn is_deletable_catalog_artifact(p: &Path) -> bool {
1546 match p.file_name().and_then(|n| n.to_str()) {
1547 Some("index.jsonl") => true,
1548 Some("index.md") => match read_frontmatter(p) {
1549 Ok(meta) => meta.type_.as_deref().is_none_or(|t| t == "index"),
1551 Err(_) => true,
1553 },
1554 _ => false,
1555 }
1556}
1557
1558fn is_hidden(name: &std::ffi::OsStr) -> bool {
1559 name.to_str().map(|s| s.starts_with('.')).unwrap_or(false)
1560}
1561
1562fn layer_dir_name(layer: Layer) -> &'static str {
1563 match layer {
1564 Layer::Sources => "sources",
1565 Layer::Records => "records",
1566 }
1567}
1568
1569fn layer_from_dir_name(name: &str) -> Option<Layer> {
1572 match name {
1573 "sources" => Some(Layer::Sources),
1574 "records" => Some(Layer::Records),
1575 _ => None,
1576 }
1577}
1578
1579fn folder_basename(p: &Path) -> &str {
1581 p.file_name().and_then(|n| n.to_str()).unwrap_or("")
1582}
1583
1584fn wiki_target(p: &Path) -> String {
1588 let unix = path_to_unix(p);
1589 unix.strip_suffix(".md").unwrap_or(&unix).to_string()
1590}
1591
1592fn path_to_unix(p: &Path) -> String {
1604 p.components()
1605 .map(|c| c.as_os_str().to_string_lossy().into_owned())
1606 .collect::<Vec<_>>()
1607 .join("/")
1608}
1609
1610mod path_serde {
1616 use super::path_to_unix;
1617 use serde::{Deserialize, Deserializer, Serializer};
1618 use std::path::{Path, PathBuf};
1619
1620 pub fn serialize<S: Serializer>(p: &Path, s: S) -> Result<S::Ok, S::Error> {
1621 s.serialize_str(&path_to_unix(p))
1622 }
1623
1624 pub fn deserialize<'de, D: Deserializer<'de>>(d: D) -> Result<PathBuf, D::Error> {
1625 Ok(PathBuf::from(String::deserialize(d)?))
1626 }
1627}
1628
1629fn capitalize(s: &str) -> String {
1631 let mut chars = s.chars();
1632 match chars.next() {
1633 Some(first) => first.to_uppercase().collect::<String>() + chars.as_str(),
1634 None => String::new(),
1635 }
1636}
1637
1638fn collapse_whitespace(s: &str) -> String {
1643 s.split_whitespace().collect::<Vec<_>>().join(" ")
1644}
1645
1646fn default_display(basename: &str) -> String {
1652 let spaced: String = basename
1653 .chars()
1654 .map(|c| if c == '-' || c == '_' { ' ' } else { c })
1655 .collect();
1656 capitalize(&spaced)
1657}
1658
1659fn folder_label<'a>(
1666 tf_unix: &str,
1667 basename: &str,
1668 folders: &'a BTreeMap<String, FolderMeta>,
1669) -> (String, Option<&'a str>) {
1670 let meta = folders.get(tf_unix);
1671 let display = meta
1672 .and_then(|m| m.display.as_deref())
1673 .map(str::to_string)
1674 .unwrap_or_else(|| default_display(basename));
1675 (display, meta.and_then(|m| m.description.as_deref()))
1676}
1677
1678fn folder_entry(tf_unix: &str, display: &str, count: usize, description: Option<&str>) -> String {
1681 match description {
1682 Some(d) => format!("- [[{tf_unix}/index|{display}]] ({count}) — {d}\n"),
1683 None => format!("- [[{tf_unix}/index|{display}]] ({count})\n"),
1684 }
1685}
1686
1687fn write_atomic(path: &Path, contents: String) -> crate::Result<()> {
1694 if let Some(parent) = path.parent() {
1695 fs::create_dir_all(parent)?;
1696 }
1697 let dir = path.parent().unwrap_or_else(|| Path::new("."));
1698 let mut tmp = tempfile_in(dir)?;
1699 tmp.write_all(contents.as_bytes())?;
1700 tmp.flush()?;
1701 tmp.persist(path)?;
1702 Ok(())
1703}
1704
1705fn remove_if_exists(path: &Path) -> crate::Result<()> {
1706 match fs::remove_file(path) {
1707 Ok(()) => Ok(()),
1708 Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(()),
1709 Err(e) => Err(e.into()),
1710 }
1711}
1712
1713fn bad_index(path: &Path, msg: &str) -> crate::Error {
1714 crate::Error::Store(crate::store::StoreError::BadTypeIndex {
1715 path: path.to_path_buf(),
1716 message: msg.to_string(),
1717 })
1718}
1719
1720struct FolderLock {
1740 path: PathBuf,
1741 held: bool,
1742}
1743
1744impl FolderLock {
1745 fn acquire(folder_abs: &Path) -> Self {
1772 use std::time::{Duration, SystemTime};
1773 const SPIN: Duration = Duration::from_millis(10);
1774 const STALE_AFTER: Duration = Duration::from_secs(30);
1775
1776 let path = folder_abs.join(".index.lock");
1777 let _ = fs::create_dir_all(folder_abs);
1779 loop {
1780 match fs::OpenOptions::new()
1781 .write(true)
1782 .create_new(true)
1783 .open(&path)
1784 {
1785 Ok(_) => return FolderLock { path, held: true },
1786 Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => {
1787 let stale = fs::metadata(&path)
1790 .and_then(|m| m.modified())
1791 .ok()
1792 .and_then(|t| SystemTime::now().duration_since(t).ok())
1793 .map(|age| age > STALE_AFTER)
1794 .unwrap_or(false);
1795 if stale {
1796 let _ = fs::remove_file(&path);
1797 continue;
1798 }
1799 std::thread::sleep(SPIN);
1800 }
1801 Err(_) => return FolderLock { path, held: false },
1806 }
1807 }
1808 }
1809}
1810
1811impl Drop for FolderLock {
1812 fn drop(&mut self) {
1813 if self.held {
1814 let _ = fs::remove_file(&self.path);
1815 }
1816 }
1817}
1818
1819fn lock_folders(store: &Store, a: &Path, b: &Path) -> Vec<FolderLock> {
1825 if a == b {
1826 return vec![FolderLock::acquire(&store.root.join(a))];
1827 }
1828 let (first, second) = if a < b { (a, b) } else { (b, a) };
1829 vec![
1830 FolderLock::acquire(&store.root.join(first)),
1831 FolderLock::acquire(&store.root.join(second)),
1832 ]
1833}
1834
1835struct AtomicTemp {
1841 file: Option<fs::File>,
1842 path: PathBuf,
1843 persisted: bool,
1844}
1845
1846impl AtomicTemp {
1847 fn write_all(&mut self, bytes: &[u8]) -> std::io::Result<()> {
1848 self.file.as_mut().expect("temp file open").write_all(bytes)
1849 }
1850 fn flush(&mut self) -> std::io::Result<()> {
1851 self.file.as_mut().expect("temp file open").flush()
1852 }
1853 fn persist(mut self, dest: &Path) -> std::io::Result<()> {
1854 if let Some(f) = self.file.take() {
1855 f.sync_all().ok();
1856 }
1858 fs::rename(&self.path, dest)?;
1859 self.persisted = true;
1860 Ok(())
1861 }
1862}
1863
1864impl Drop for AtomicTemp {
1865 fn drop(&mut self) {
1866 if !self.persisted {
1868 let _ = fs::remove_file(&self.path);
1869 }
1870 }
1871}
1872
1873fn tempfile_in(dir: &Path) -> std::io::Result<AtomicTemp> {
1874 use std::time::{SystemTime, UNIX_EPOCH};
1875 let nanos = SystemTime::now()
1876 .duration_since(UNIX_EPOCH)
1877 .map(|d| d.as_nanos())
1878 .unwrap_or(0);
1879 let pid = std::process::id();
1880 let counter = next_temp_counter();
1883 let name = format!(".dbmd-index-{pid}-{nanos}-{counter}.tmp");
1884 let path = dir.join(name);
1885 let file = fs::OpenOptions::new()
1886 .write(true)
1887 .create_new(true)
1888 .open(&path)?;
1889 Ok(AtomicTemp {
1890 file: Some(file),
1891 path,
1892 persisted: false,
1893 })
1894}
1895
1896fn next_temp_counter() -> u64 {
1897 use std::sync::atomic::{AtomicU64, Ordering};
1898 static C: AtomicU64 = AtomicU64::new(0);
1899 C.fetch_add(1, Ordering::Relaxed)
1900}
1901
1902#[cfg(test)]
1903mod tests {
1904 use super::*;
1905 use std::collections::BTreeSet;
1906 use std::fs;
1907 use tempfile::TempDir;
1908
1909 fn mk_store() -> (TempDir, Store) {
1914 let dir = TempDir::new().unwrap();
1915 fs::write(dir.path().join("DB.md"), "# test store\n").unwrap();
1916 let store = Store {
1917 root: dir.path().to_path_buf(),
1918 config: crate::parser::Config::default(),
1919 };
1920 (dir, store)
1921 }
1922
1923 fn write_raw(store: &Store, rel: &str, fm: &str, body: &str) {
1926 let abs = store.root.join(rel);
1927 fs::create_dir_all(abs.parent().unwrap()).unwrap();
1928 fs::write(&abs, format!("---\n{fm}\n---\n{body}")).unwrap();
1929 }
1930
1931 fn write_doc(
1933 store: &Store,
1934 rel: &str,
1935 type_: &str,
1936 summary: Option<&str>,
1937 updated: Option<&str>,
1938 extra_yaml: &str,
1939 ) {
1940 let mut fm = format!("type: {type_}\n");
1941 if let Some(s) = summary {
1942 fm.push_str(&format!("summary: {s}\n"));
1943 }
1944 if let Some(u) = updated {
1945 fm.push_str(&format!("updated: {u}\n"));
1946 }
1947 fm.push_str(extra_yaml);
1948 write_raw(store, rel, fm.trim_end(), "\nbody text\n");
1949 }
1950
1951 fn read(store: &Store, rel: &str) -> String {
1952 fs::read_to_string(store.root.join(rel)).unwrap()
1953 }
1954
1955 fn exists(store: &Store, rel: &str) -> bool {
1956 store.root.join(rel).exists()
1957 }
1958
1959 fn snapshot_artifacts(store: &Store) -> BTreeMap<String, String> {
1962 let mut out = BTreeMap::new();
1963 for entry in walkdir::WalkDir::new(&store.root)
1964 .into_iter()
1965 .filter_map(|e| e.ok())
1966 {
1967 let p = entry.path();
1968 if is_index_artifact(p) {
1969 let rel = path_to_unix(&rel_to_store(&store.root, p).unwrap());
1970 out.insert(rel, fs::read_to_string(p).unwrap());
1971 }
1972 }
1973 out
1974 }
1975
1976 #[test]
1979 fn type_folder_aggregates_across_shards_in_recency_order() {
1980 let (_d, store) = mk_store();
1981 write_doc(
1984 &store,
1985 "sources/emails/2026/05/b-old.md",
1986 "email",
1987 Some("Older mail"),
1988 Some("2026-05-01T09:00:00Z"),
1989 "",
1990 );
1991 write_doc(
1992 &store,
1993 "sources/emails/2026/06/c-new.md",
1994 "email",
1995 Some("Newest mail"),
1996 Some("2026-06-15T12:00:00Z"),
1997 "",
1998 );
1999 write_doc(
2000 &store,
2001 "sources/emails/2026/05/a-mid.md",
2002 "email",
2003 Some("Middle mail"),
2004 Some("2026-05-20T08:00:00Z"),
2005 "",
2006 );
2007
2008 let idx = Index::build_type_folder(&store, Path::new("sources/emails")).unwrap();
2009 let paths: Vec<String> = idx.records.iter().map(|r| path_to_unix(&r.path)).collect();
2010 assert_eq!(
2011 paths,
2012 vec![
2013 "sources/emails/2026/06/c-new.md",
2014 "sources/emails/2026/05/a-mid.md",
2015 "sources/emails/2026/05/b-old.md",
2016 ],
2017 "records must aggregate across shards, newest `updated` first"
2018 );
2019 }
2020
2021 #[test]
2022 fn type_folder_md_format_entries_tags_and_derived_updated() {
2023 let (_d, store) = mk_store();
2024 write_doc(
2025 &store,
2026 "records/contacts/sarah-chen.md",
2027 "contact",
2028 Some("Renewal champion at Acme"),
2029 Some("2026-05-27T10:00:00Z"),
2030 "tags:\n - renewal\n - acme\n",
2031 );
2032 write_doc(
2033 &store,
2034 "records/contacts/no-tags.md",
2035 "contact",
2036 Some("Plain contact"),
2037 Some("2026-05-26T10:00:00Z"),
2038 "",
2039 );
2040
2041 let idx = Index::build_type_folder(&store, Path::new("records/contacts")).unwrap();
2042 let md = idx.to_markdown();
2043
2044 assert!(md.starts_with(
2047 "---\ntype: index\nscope: type-folder\nfolder: records/contacts\nupdated: 2026-05-27T10:00:00Z\n---\n\n# records/contacts\n"
2048 ), "frontmatter/heading wrong:\n{md}");
2049
2050 assert!(
2052 md.contains(
2053 "- [[records/contacts/sarah-chen]] — Renewal champion at Acme · #renewal #acme\n"
2054 ),
2055 "tagged entry wrong:\n{md}"
2056 );
2057 assert!(
2059 md.contains("- [[records/contacts/no-tags]] — Plain contact\n"),
2060 "untagged entry wrong:\n{md}"
2061 );
2062 assert!(
2063 !md.contains("Plain contact ·"),
2064 "untagged entry must not emit a tag separator"
2065 );
2066 assert!(!md.contains("## More"), "no footer expected under the cap");
2068 }
2069
2070 #[test]
2071 fn missing_summary_becomes_placeholder_not_invented() {
2072 let (_d, store) = mk_store();
2073 write_doc(
2074 &store,
2075 "records/notes/x.md",
2076 "note",
2077 None,
2078 Some("2026-05-27T10:00:00Z"),
2079 "",
2080 );
2081 let idx = Index::build_type_folder(&store, Path::new("records/notes")).unwrap();
2082 assert_eq!(idx.records[0].summary, MISSING_SUMMARY);
2083 let md = idx.to_markdown();
2084 assert!(
2085 md.contains("- [[records/notes/x]] — (no summary)\n"),
2086 "missing summary must render the placeholder, not invent text:\n{md}"
2087 );
2088 }
2089
2090 #[test]
2093 fn jsonl_is_complete_structured_and_round_trips() {
2094 let (_d, store) = mk_store();
2095 write_doc(
2096 &store,
2097 "records/expenses/2026/05/e1.md",
2098 "expense",
2099 Some("Lunch with vendor"),
2100 Some("2026-05-10T10:00:00Z"),
2101 "created: 2026-05-10T09:00:00Z\nstatus: paid\namount: 42\ncompany: [[records/companies/acme]]\nrelated:\n - [[records/concepts/spend]]\ntags:\n - food\nlinks:\n - records/concepts/spend\n - [[records/concepts/renewal]]\n",
2102 );
2103 write_doc(
2104 &store,
2105 "records/expenses/2026/06/e2.md",
2106 "expense",
2107 Some("Cloud bill"),
2108 Some("2026-06-01T10:00:00Z"),
2109 "amount: 100\n",
2110 );
2111
2112 let idx = Index::build_type_folder(&store, Path::new("records/expenses")).unwrap();
2113 let jsonl = idx.to_jsonl();
2114 let lines: Vec<&str> = jsonl.lines().collect();
2115 assert_eq!(lines.len(), 2, "one JSON object per file, uncapped");
2116
2117 let r0: IndexRecord = serde_json::from_str(lines[0]).unwrap();
2119 assert_eq!(path_to_unix(&r0.path), "records/expenses/2026/06/e2.md");
2120 assert_eq!(
2121 r0, idx.records[0],
2122 "jsonl line must round-trip to the record"
2123 );
2124
2125 let r1: IndexRecord = serde_json::from_str(lines[1]).unwrap();
2128 assert_eq!(r1.type_, "expense");
2129 assert_eq!(r1.summary, "Lunch with vendor");
2130 assert_eq!(r1.tags, vec!["food".to_string()]);
2131 assert_eq!(
2132 r1.links,
2133 vec![
2134 "records/concepts/spend".to_string(),
2135 "[[records/concepts/renewal]]".to_string()
2136 ]
2137 );
2138 assert_eq!(
2139 r1.created,
2140 Some(DateTime::parse_from_rfc3339("2026-05-10T09:00:00Z").unwrap())
2141 );
2142 assert_eq!(r1.fields.get("status"), Some(&Value::from("paid")));
2143 assert_eq!(r1.fields.get("amount"), Some(&Value::from(42)));
2144 assert_eq!(
2145 r1.fields.get("company"),
2146 Some(&Value::from("[[records/companies/acme]]"))
2147 );
2148 assert_eq!(
2149 r1.fields.get("related"),
2150 Some(&serde_json::json!(["[[records/concepts/spend]]"]))
2151 );
2152 for reserved in [
2154 "path", "type", "summary", "tags", "links", "created", "updated",
2155 ] {
2156 assert!(
2157 !r1.fields.contains_key(reserved),
2158 "reserved key {reserved} must not appear in fields"
2159 );
2160 }
2161
2162 assert!(
2164 lines[1].starts_with(
2165 r#"{"path":"records/expenses/2026/05/e1.md","type":"expense","summary":"Lunch with vendor","tags":["food"],"links":["records/concepts/spend","[[records/concepts/renewal]]"],"created":"2026-05-10T09:00:00Z","updated":"2026-05-10T10:00:00Z","#
2166 ),
2167 "jsonl key order not stable:\n{}",
2168 lines[1]
2169 );
2170 assert!(
2175 lines[1].ends_with(r#""amount":42,"company":"[[records/companies/acme]]","meta-type":"fact","related":["[[records/concepts/spend]]"],"status":"paid"}"#),
2176 "extras must be sorted:\n{}",
2177 lines[1]
2178 );
2179 }
2180
2181 #[test]
2184 fn over_cap_md_shows_500_plus_footer_jsonl_holds_all() {
2185 let (_d, store) = mk_store();
2186 let total = MD_CAP + 7;
2187 for i in 0..total {
2188 let day = 1 + (i % 27);
2190 let rel = format!("sources/emails/2026/05/m-{i:04}.md");
2191 let updated = format!("2026-05-{day:02}T00:00:{:02}Z", i % 60);
2192 write_doc(
2193 &store,
2194 &rel,
2195 "email",
2196 Some(&format!("mail {i}")),
2197 Some(&updated),
2198 "",
2199 );
2200 }
2201 let idx = Index::build_type_folder(&store, Path::new("sources/emails")).unwrap();
2202 assert_eq!(idx.records.len(), total, "jsonl/records keep every file");
2203
2204 let md = idx.to_markdown();
2205 let entry_lines = md.lines().filter(|l| l.starts_with("- [[")).count();
2206 assert_eq!(entry_lines, MD_CAP, "md browse view is capped at 500");
2207
2208 assert!(
2209 md.contains("## More\n\n"),
2210 "over-cap md needs a More footer"
2211 );
2212 assert!(
2213 md.contains(&format!(
2214 "This folder has {total} files. The 500 most recent are listed above.\n"
2215 )),
2216 "footer count wrong:\n{md}"
2217 );
2218 assert!(
2219 md.contains(
2220 "Use `dbmd index query --type email --in sources` for the complete catalog.\n"
2221 ),
2222 "footer must infer type=email layer=sources:\n{md}"
2223 );
2224
2225 let jsonl = idx.to_jsonl();
2226 assert_eq!(jsonl.lines().count(), total, "jsonl is uncapped");
2227 }
2228
2229 #[test]
2232 fn sort_breaks_ties_by_path_and_puts_undated_last() {
2233 let mut recs = vec![
2234 rec("z/a.md", Some("2026-05-01T00:00:00Z")),
2235 rec("a/b.md", Some("2026-05-01T00:00:00Z")), rec("m/c.md", None), rec("b/d.md", Some("2026-06-01T00:00:00Z")), ];
2239 sort_records(&mut recs);
2240 let order: Vec<String> = recs.iter().map(|r| path_to_unix(&r.path)).collect();
2241 assert_eq!(order, vec!["b/d.md", "a/b.md", "z/a.md", "m/c.md"]);
2242 }
2243
2244 fn rec(path: &str, updated: Option<&str>) -> IndexRecord {
2245 IndexRecord {
2246 path: PathBuf::from(path),
2247 type_: "t".into(),
2248 summary: "s".into(),
2249 tags: vec![],
2250 links: vec![],
2251 created: None,
2252 updated: updated.map(|u| DateTime::parse_from_rfc3339(u).unwrap()),
2253 fields: BTreeMap::new(),
2254 }
2255 }
2256
2257 #[test]
2260 fn layer_index_lists_type_folders_with_counts() {
2261 let (_d, store) = mk_store();
2262 write_doc(
2263 &store,
2264 "records/contacts/a.md",
2265 "contact",
2266 Some("Contact A older"),
2267 Some("2026-05-01T00:00:00Z"),
2268 "",
2269 );
2270 write_doc(
2271 &store,
2272 "records/contacts/b.md",
2273 "contact",
2274 Some("Contact B newest"),
2275 Some("2026-05-09T00:00:00Z"),
2276 "",
2277 );
2278 write_doc(
2279 &store,
2280 "records/companies/x.md",
2281 "company",
2282 Some("Acme Inc"),
2283 Some("2026-05-05T00:00:00Z"),
2284 "",
2285 );
2286 Index::write_level(&store, &IndexLevel::TypeFolder("records/contacts".into())).unwrap();
2288 Index::write_level(&store, &IndexLevel::TypeFolder("records/companies".into())).unwrap();
2289
2290 Index::write_level(&store, &IndexLevel::Layer(Layer::Records)).unwrap();
2291 let md = read(&store, "records/index.md");
2292
2293 assert!(
2294 md.starts_with("---\ntype: index\nscope: layer\nfolder: records\n"),
2295 "layer fm:\n{md}"
2296 );
2297 let companies_at = md.find("companies/index").unwrap();
2299 let contacts_at = md.find("contacts/index").unwrap();
2300 assert!(
2301 companies_at < contacts_at,
2302 "type folders must be alphabetical"
2303 );
2304 assert!(
2307 md.contains("- [[records/contacts/index|Contacts]] (2)\n"),
2308 "contacts entry:\n{md}"
2309 );
2310 assert!(
2311 md.contains("- [[records/companies/index|Companies]] (1)\n"),
2312 "companies entry:\n{md}"
2313 );
2314 assert!(
2316 !md.contains("Contact B newest") && !md.contains("Acme Inc"),
2317 "layer rollup must not quote a member summary:\n{md}"
2318 );
2319 assert!(
2321 md.contains("updated: 2026-05-09T00:00:00Z\n"),
2322 "layer updated must be max child:\n{md}"
2323 );
2324 }
2325
2326 #[test]
2327 fn folders_section_supplies_authored_display_and_description() {
2328 let (_d, mut store) = mk_store();
2332 store.config.folders.insert(
2333 "records/contacts".into(),
2334 crate::parser::FolderMeta {
2335 display: None,
2336 description: Some("people across customer + prospect accounts".into()),
2337 },
2338 );
2339 store.config.folders.insert(
2340 "sources/hubspot-exports".into(),
2341 crate::parser::FolderMeta {
2342 display: Some("HubSpot exports".into()),
2343 description: Some("deal + pipeline exports".into()),
2344 },
2345 );
2346 write_doc(
2347 &store,
2348 "records/contacts/a.md",
2349 "contact",
2350 Some("Contact A"),
2351 Some("2026-05-01T00:00:00Z"),
2352 "",
2353 );
2354 write_doc(
2356 &store,
2357 "records/companies/x.md",
2358 "company",
2359 Some("Acme Inc"),
2360 Some("2026-05-05T00:00:00Z"),
2361 "",
2362 );
2363 write_doc(
2364 &store,
2365 "sources/hubspot-exports/d.md",
2366 "hubspot-export",
2367 Some("a single deal export"),
2368 Some("2026-05-03T00:00:00Z"),
2369 "",
2370 );
2371
2372 Index::rebuild_all(&store).unwrap();
2373
2374 let records_layer = read(&store, "records/index.md");
2376 assert!(
2377 records_layer.contains("- [[records/contacts/index|Contacts]] (1) — people across customer + prospect accounts\n"),
2378 "authored description must surface:\n{records_layer}"
2379 );
2380 assert!(
2382 records_layer.contains("- [[records/companies/index|Companies]] (1)\n")
2383 && !records_layer.contains("Acme Inc"),
2384 "un-described folder is counts-only:\n{records_layer}"
2385 );
2386
2387 let sources_layer = read(&store, "sources/index.md");
2389 assert!(
2390 sources_layer.contains("- [[sources/hubspot-exports/index|HubSpot exports]] (1) — deal + pipeline exports\n"),
2391 "display override + description must surface:\n{sources_layer}"
2392 );
2393
2394 let root = read(&store, "index.md");
2396 assert!(
2397 root.contains("- [[records/contacts/index|Contacts]] (1) — people across customer + prospect accounts\n"),
2398 "root surfaces authored description:\n{root}"
2399 );
2400 assert!(
2401 root.contains("- [[sources/hubspot-exports/index|HubSpot exports]] (1) — deal + pipeline exports\n"),
2402 "root surfaces display override:\n{root}"
2403 );
2404 }
2405
2406 #[test]
2407 fn default_display_turns_separators_to_spaces_and_caps() {
2408 assert_eq!(default_display("contacts"), "Contacts");
2409 assert_eq!(default_display("hubspot-exports"), "Hubspot exports");
2410 assert_eq!(default_display("usage_exports"), "Usage exports");
2411 }
2412
2413 #[test]
2414 fn root_index_groups_layers_with_totals_and_per_type_counts() {
2415 let (_d, store) = mk_store();
2416 write_doc(
2417 &store,
2418 "sources/emails/2026/05/a.md",
2419 "email",
2420 Some("Mail"),
2421 Some("2026-05-01T00:00:00Z"),
2422 "",
2423 );
2424 write_doc(
2425 &store,
2426 "sources/docs/d.md",
2427 "doc",
2428 Some("Doc"),
2429 Some("2026-05-02T00:00:00Z"),
2430 "",
2431 );
2432 write_doc(
2433 &store,
2434 "records/contacts/c.md",
2435 "contact",
2436 Some("C"),
2437 Some("2026-05-03T00:00:00Z"),
2438 "",
2439 );
2440 Index::rebuild_all(&store).unwrap();
2443 let md = read(&store, "index.md");
2444
2445 assert!(
2446 md.starts_with("---\ntype: index\nscope: root\n"),
2447 "root fm:\n{md}"
2448 );
2449 assert!(md.contains("# Knowledge base index\n"), "root title:\n{md}");
2450 let sources_h = md
2452 .find("## Sources (2)")
2453 .expect("sources heading w/ total 2");
2454 let records_h = md
2455 .find("## Records (1)")
2456 .expect("records heading w/ total 1");
2457 assert!(sources_h < records_h, "Sources must precede Records");
2458 assert!(!md.contains("## Wiki"), "empty layer gets no section");
2459 assert!(
2461 md.contains("- [[sources/docs/index|Docs]] (1)\n"),
2462 "root docs entry:\n{md}"
2463 );
2464 assert!(
2465 md.contains("- [[sources/emails/index|Emails]] (1)\n"),
2466 "root emails entry:\n{md}"
2467 );
2468 assert!(
2469 md.contains("- [[records/contacts/index|Contacts]] (1)\n"),
2470 "root contacts entry:\n{md}"
2471 );
2472 assert!(!md.contains("— "), "root entries carry no preview text");
2473 }
2474
2475 #[test]
2478 fn on_write_matches_rebuild_byte_for_byte() {
2479 let (_d1, wt) = mk_store();
2482 let (_d2, rb) = mk_store();
2483
2484 let docs: &[(&str, &str, &str, &str, &str)] = &[
2485 (
2486 "sources/emails/2026/05/e1.md",
2487 "email",
2488 "First mail",
2489 "2026-05-01T10:00:00Z",
2490 "tags:\n - inbox\n",
2491 ),
2492 (
2493 "sources/emails/2026/06/e2.md",
2494 "email",
2495 "Second mail",
2496 "2026-06-01T10:00:00Z",
2497 "",
2498 ),
2499 (
2500 "records/contacts/sarah.md",
2501 "contact",
2502 "Sarah",
2503 "2026-05-15T10:00:00Z",
2504 "links:\n - records/profiles/sarah\n",
2505 ),
2506 (
2507 "records/contacts/elena.md",
2508 "contact",
2509 "Elena",
2510 "2026-05-20T10:00:00Z",
2511 "status: active\n",
2512 ),
2513 (
2514 "records/profiles/sarah.md",
2515 "profile",
2516 "Sarah bio",
2517 "2026-05-21T10:00:00Z",
2518 "",
2519 ),
2520 ];
2521
2522 for (rel, t, sum, upd, extra) in docs {
2523 write_doc(&wt, rel, t, Some(sum), Some(upd), extra);
2524 write_doc(&rb, rel, t, Some(sum), Some(upd), extra);
2525 Index::on_write(&wt, Path::new(rel)).unwrap();
2526 }
2527 Index::rebuild_all(&rb).unwrap();
2528
2529 let a = snapshot_artifacts(&wt);
2530 let b = snapshot_artifacts(&rb);
2531 assert_eq!(
2532 a.keys().collect::<Vec<_>>(),
2533 b.keys().collect::<Vec<_>>(),
2534 "same set of index artifacts must exist"
2535 );
2536 for (k, v) in &a {
2537 assert_eq!(v, &b[k], "artifact {k} differs between write-through and rebuild:\n--- write-through ---\n{v}\n--- rebuild ---\n{}", b[k]);
2538 }
2539 assert!(a.contains_key("index.md"));
2541 assert!(a.contains_key("sources/emails/index.jsonl"));
2542 assert!(a.contains_key("records/contacts/index.md"));
2543 }
2544
2545 #[test]
2562 fn loop_op_does_not_walk_sibling_content_tree() {
2563 let (_d, store) = mk_store();
2564
2565 write_doc(
2569 &store,
2570 "records/companies/acme.md",
2571 "company",
2572 Some("Acme Inc"),
2573 Some("2026-05-05T00:00:00Z"),
2574 "",
2575 );
2576 write_doc(
2577 &store,
2578 "records/companies/globex.md",
2579 "company",
2580 Some("Globex"),
2581 Some("2026-05-06T00:00:00Z"),
2582 "",
2583 );
2584 assert!(
2585 !exists(&store, "records/companies/index.jsonl"),
2586 "precondition: companies must be un-indexed"
2587 );
2588
2589 write_doc(
2591 &store,
2592 "records/contacts/sarah.md",
2593 "contact",
2594 Some("Sarah"),
2595 Some("2026-05-15T00:00:00Z"),
2596 "",
2597 );
2598 Index::on_write(&store, Path::new("records/contacts/sarah.md")).unwrap();
2599
2600 let layer_md = read(&store, "records/index.md");
2602 let root_md = read(&store, "index.md");
2603 assert!(
2605 layer_md.contains("- [[records/contacts/index|Contacts]] (1)\n")
2606 && !layer_md.contains("Sarah"),
2607 "layer must reflect the written folder, counts only:\n{layer_md}"
2608 );
2609 assert!(
2610 root_md.contains("- [[records/contacts/index|Contacts]] (1)\n"),
2611 "root must reflect the written folder:\n{root_md}"
2612 );
2613
2614 assert!(
2618 !layer_md.contains("companies"),
2619 "loop op walked the sibling content tree: layer rollup counts un-indexed records/companies\n{layer_md}"
2620 );
2621 assert!(
2622 !root_md.contains("companies"),
2623 "loop op walked the sibling content tree: root rollup counts un-indexed records/companies\n{root_md}"
2624 );
2625 assert!(
2627 root_md.contains("## Records (1)"),
2628 "root layer total must count only the sidecar-indexed folder (1), not walked siblings (would be 3):\n{root_md}"
2629 );
2630
2631 let (_d2, rb) = mk_store();
2636 for (rel, t, s, u) in [
2637 (
2638 "records/companies/acme.md",
2639 "company",
2640 "Acme Inc",
2641 "2026-05-05T00:00:00Z",
2642 ),
2643 (
2644 "records/companies/globex.md",
2645 "company",
2646 "Globex",
2647 "2026-05-06T00:00:00Z",
2648 ),
2649 (
2650 "records/contacts/sarah.md",
2651 "contact",
2652 "Sarah",
2653 "2026-05-15T00:00:00Z",
2654 ),
2655 ] {
2656 write_doc(&rb, rel, t, Some(s), Some(u), "");
2657 }
2658 Index::on_write(&store, Path::new("records/companies/acme.md")).unwrap();
2659 Index::on_write(&store, Path::new("records/companies/globex.md")).unwrap();
2660 Index::rebuild_all(&rb).unwrap();
2661 let a = snapshot_artifacts(&store);
2662 let b = snapshot_artifacts(&rb);
2663 assert_eq!(
2664 a.keys().collect::<BTreeSet<_>>(),
2665 b.keys().collect::<BTreeSet<_>>(),
2666 "same artifact set after indexing both folders"
2667 );
2668 for (k, v) in &a {
2669 assert_eq!(
2670 v, &b[k],
2671 "after indexing the sibling too, loop result must equal rebuild for {k}"
2672 );
2673 }
2674 assert!(
2675 read(&store, "index.md").contains("## Records (3)"),
2676 "now that both folders are indexed, the root total is 3"
2677 );
2678 }
2679
2680 #[test]
2693 fn custom_type_at_shard_path_for_is_indexable_end_to_end() {
2694 let (_d1, wt) = mk_store();
2695 let (_d2, rb) = mk_store();
2696
2697 let rel = wt
2699 .shard_path_for(
2700 "profile",
2701 &crate::parser::Frontmatter::default(),
2702 "renewal-theme",
2703 )
2704 .unwrap();
2705 let rel_str = path_to_unix(&rel);
2706 assert!(
2709 type_folder_of(&rel).is_some(),
2710 "shard_path_for produced a path the index cannot file: {rel_str}"
2711 );
2712
2713 write_doc(
2714 &wt,
2715 &rel_str,
2716 "profile",
2717 Some("Renewal theme"),
2718 Some("2026-05-21T10:00:00Z"),
2719 "",
2720 );
2721 write_doc(
2722 &rb,
2723 &rel_str,
2724 "profile",
2725 Some("Renewal theme"),
2726 Some("2026-05-21T10:00:00Z"),
2727 "",
2728 );
2729
2730 Index::on_write(&wt, &rel)
2733 .expect("on_write must succeed for a toolkit-computed custom-type path");
2734 Index::rebuild_all(&rb).unwrap();
2735
2736 let page_link = wiki_target(&rel); let tf_md = read(&rb, "records/profile/index.md");
2743 assert!(
2744 tf_md.contains(&format!("[[{page_link}]]")),
2745 "type-folder index must list the page link, got:\n{tf_md}"
2746 );
2747 assert!(
2748 exists(&rb, "records/profile/index.jsonl"),
2749 "type-folder jsonl must exist"
2750 );
2751 assert!(
2752 read(&rb, "records/profile/index.jsonl").contains(&rel_str),
2753 "type-folder jsonl must contain the page row"
2754 );
2755 let layer_md = read(&rb, "records/index.md");
2758 assert!(
2759 layer_md.contains("records/profile/index"),
2760 "layer index must roll up the records/profile type-folder, got:\n{layer_md}"
2761 );
2762
2763 let a = snapshot_artifacts(&wt);
2765 let b = snapshot_artifacts(&rb);
2766 assert_eq!(
2767 a.keys().collect::<Vec<_>>(),
2768 b.keys().collect::<Vec<_>>(),
2769 "loop and sweep must produce the same artifact set"
2770 );
2771 for (k, v) in &a {
2772 assert_eq!(
2773 v, &b[k],
2774 "custom-type artifact {k} differs between on_write and rebuild"
2775 );
2776 }
2777 }
2778
2779 #[test]
2780 fn on_remove_then_rebuild_match_and_pull_in_next_over_cap() {
2781 let (_d1, wt) = mk_store();
2782 let (_d2, rb) = mk_store();
2783 let total = MD_CAP + 3; let mut all_rels = Vec::new();
2785 for i in 0..total {
2786 let rel = format!("sources/emails/2026/05/m-{i:04}.md");
2787 let updated = format!("2026-05-10T00:{:02}:{:02}Z", i / 60, i % 60);
2789 write_doc(
2790 &wt,
2791 &rel,
2792 "email",
2793 Some(&format!("mail {i}")),
2794 Some(&updated),
2795 "",
2796 );
2797 write_doc(
2798 &rb,
2799 &rel,
2800 "email",
2801 Some(&format!("mail {i}")),
2802 Some(&updated),
2803 "",
2804 );
2805 all_rels.push(rel);
2806 }
2807 Index::rebuild_all(&wt).unwrap();
2809 let newest = &all_rels[total - 1]; fs::remove_file(wt.root.join(newest)).unwrap();
2811 Index::on_remove(&wt, Path::new(newest)).unwrap();
2812
2813 fs::remove_file(rb.root.join(newest)).unwrap();
2815 Index::rebuild_all(&rb).unwrap();
2816
2817 let a = snapshot_artifacts(&wt);
2818 let b = snapshot_artifacts(&rb);
2819 for (k, v) in &a {
2820 assert_eq!(v, &b[k], "after remove, artifact {k} drifted from rebuild");
2821 }
2822
2823 let md = read(&wt, "sources/emails/index.md");
2826 assert_eq!(md.lines().filter(|l| l.starts_with("- [[")).count(), MD_CAP);
2827 assert!(
2829 !md.contains(&format!("[[{}]]", wiki_target(Path::new(newest)))),
2830 "removed file must not be listed in md"
2831 );
2832 let pulled_in = &all_rels[2];
2836 assert!(
2837 md.contains(&format!("[[{}]]", wiki_target(Path::new(pulled_in)))),
2838 "the 501st-most-recent must be pulled into the browse view after a removal"
2839 );
2840 assert!(
2841 md.contains(&format!("This folder has {} files.", total - 1)),
2842 "footer count must decrement:\n{}",
2843 md.lines().rev().take(4).collect::<Vec<_>>().join("\n")
2844 );
2845 let jsonl = read(&wt, "sources/emails/index.jsonl");
2846 assert_eq!(
2847 jsonl.lines().count(),
2848 total - 1,
2849 "jsonl loses exactly the removed file"
2850 );
2851 assert!(
2852 !jsonl.contains(&path_to_unix(Path::new(newest))),
2853 "removed file must be gone from the jsonl too"
2854 );
2855 }
2856
2857 #[test]
2858 fn on_rename_cross_folder_matches_rebuild() {
2859 let (_d1, wt) = mk_store();
2860 let (_d2, rb) = mk_store();
2861 let seed: &[(&str, &str, &str, &str)] = &[
2863 (
2864 "records/contacts/a.md",
2865 "contact",
2866 "A",
2867 "2026-05-01T00:00:00Z",
2868 ),
2869 (
2870 "records/contacts/b.md",
2871 "contact",
2872 "B",
2873 "2026-05-02T00:00:00Z",
2874 ),
2875 (
2876 "records/companies/x.md",
2877 "company",
2878 "X",
2879 "2026-05-03T00:00:00Z",
2880 ),
2881 ];
2882 for (rel, t, s, u) in seed {
2883 write_doc(&wt, rel, t, Some(s), Some(u), "");
2884 write_doc(&rb, rel, t, Some(s), Some(u), "");
2885 }
2886 Index::rebuild_all(&wt).unwrap();
2887
2888 let old = "records/contacts/b.md";
2891 let new = "records/companies/b.md";
2892 fs::create_dir_all(wt.root.join("records/companies")).unwrap();
2893 fs::rename(wt.root.join(old), wt.root.join(new)).unwrap();
2894 Index::on_rename(&wt, Path::new(old), Path::new(new)).unwrap();
2897
2898 fs::create_dir_all(rb.root.join("records/companies")).unwrap();
2900 fs::rename(rb.root.join(old), rb.root.join(new)).unwrap();
2901 Index::rebuild_all(&rb).unwrap();
2902
2903 let a = snapshot_artifacts(&wt);
2904 let b = snapshot_artifacts(&rb);
2905 assert_eq!(a.keys().collect::<Vec<_>>(), b.keys().collect::<Vec<_>>());
2906 for (k, v) in &a {
2907 assert_eq!(v, &b[k], "rename: artifact {k} drifted from rebuild");
2908 }
2909 let contacts = read(&wt, "records/contacts/index.md");
2911 assert!(!contacts.contains("records/contacts/b]]"));
2912 let companies = read(&wt, "records/companies/index.md");
2913 assert!(companies.contains("[[records/companies/b]]"));
2914 }
2915
2916 #[test]
2917 fn on_write_updates_existing_entry_in_place() {
2918 let (_d, store) = mk_store();
2919 write_doc(
2920 &store,
2921 "records/contacts/a.md",
2922 "contact",
2923 Some("Original"),
2924 Some("2026-05-01T00:00:00Z"),
2925 "",
2926 );
2927 Index::on_write(&store, Path::new("records/contacts/a.md")).unwrap();
2928 write_doc(
2930 &store,
2931 "records/contacts/a.md",
2932 "contact",
2933 Some("Revised"),
2934 Some("2026-05-09T00:00:00Z"),
2935 "",
2936 );
2937 Index::on_write(&store, Path::new("records/contacts/a.md")).unwrap();
2938
2939 let jsonl = read(&store, "records/contacts/index.jsonl");
2940 assert_eq!(
2941 jsonl.lines().count(),
2942 1,
2943 "upsert must not duplicate the line"
2944 );
2945 assert!(jsonl.contains("Revised"), "jsonl must reflect the update");
2946 assert!(
2947 !jsonl.contains("Original"),
2948 "stale line must be gone (compacted)"
2949 );
2950 let md = read(&store, "records/contacts/index.md");
2951 assert!(md.contains("- [[records/contacts/a]] — Revised\n"));
2952 assert!(
2953 md.contains("updated: 2026-05-09T00:00:00Z\n"),
2954 "index updated must track the newer member"
2955 );
2956 }
2957
2958 #[test]
2961 fn dry_run_emits_separators_and_writes_nothing() {
2962 let (_d, store) = mk_store();
2963 write_doc(
2964 &store,
2965 "sources/emails/2026/05/a.md",
2966 "email",
2967 Some("Mail"),
2968 Some("2026-05-01T00:00:00Z"),
2969 "",
2970 );
2971 let out = Index::render_dry_run(&store, &IndexLevel::TypeFolder("sources/emails".into()))
2972 .unwrap();
2973 assert!(
2974 out.contains("--- sources/emails/index.md ---\n"),
2975 "md separator:\n{out}"
2976 );
2977 assert!(
2978 out.contains("--- sources/emails/index.jsonl ---\n"),
2979 "jsonl separator:\n{out}"
2980 );
2981 assert!(
2982 out.contains("- [[sources/emails/2026/05/a]] — Mail"),
2983 "md body present"
2984 );
2985 assert!(
2987 !exists(&store, "sources/emails/index.md"),
2988 "dry-run must not write"
2989 );
2990 assert!(
2991 !exists(&store, "sources/emails/index.jsonl"),
2992 "dry-run must not write"
2993 );
2994 }
2995
2996 #[test]
2997 fn cleanup_removes_noncanonical_and_empty_indexes() {
2998 let (_d, store) = mk_store();
2999 write_doc(
3000 &store,
3001 "sources/emails/2026/05/a.md",
3002 "email",
3003 Some("Mail"),
3004 Some("2026-05-01T00:00:00Z"),
3005 "",
3006 );
3007 fs::write(
3009 store.root.join("sources/emails/2026/05/index.md"),
3010 "stale\n",
3011 )
3012 .unwrap();
3013 fs::write(
3014 store.root.join("sources/emails/2026/05/index.jsonl"),
3015 "stale\n",
3016 )
3017 .unwrap();
3018 fs::create_dir_all(store.root.join("records/empty")).unwrap();
3020 fs::write(store.root.join("records/empty/index.md"), "stale\n").unwrap();
3021
3022 Index::cleanup(&store).unwrap();
3023
3024 assert!(
3025 !exists(&store, "sources/emails/2026/05/index.md"),
3026 "shard index must be deleted"
3027 );
3028 assert!(
3029 !exists(&store, "sources/emails/2026/05/index.jsonl"),
3030 "shard jsonl must be deleted"
3031 );
3032 assert!(
3033 !exists(&store, "records/empty/index.md"),
3034 "empty-folder index must be deleted"
3035 );
3036 assert!(exists(&store, "sources/emails/2026/05/a.md"));
3038 }
3039
3040 #[test]
3041 fn rebuild_deletes_stale_indexes_for_emptied_folders() {
3042 let (_d, store) = mk_store();
3043 write_doc(
3044 &store,
3045 "records/contacts/a.md",
3046 "contact",
3047 Some("A"),
3048 Some("2026-05-01T00:00:00Z"),
3049 "",
3050 );
3051 Index::rebuild_all(&store).unwrap();
3052 assert!(exists(&store, "records/contacts/index.md"));
3053 assert!(exists(&store, "records/index.md"));
3054 assert!(exists(&store, "index.md"));
3055
3056 fs::remove_file(store.root.join("records/contacts/a.md")).unwrap();
3058 Index::rebuild_all(&store).unwrap();
3059 assert!(
3060 !exists(&store, "records/contacts/index.md"),
3061 "emptied type-folder index gone"
3062 );
3063 assert!(
3064 !exists(&store, "records/index.md"),
3065 "now-empty layer index gone"
3066 );
3067 assert!(!exists(&store, "index.md"), "now-empty root index gone");
3068 }
3069
3070 #[test]
3073 fn property_writethrough_equals_rebuild_under_mixed_ops() {
3074 let (_d1, wt) = mk_store();
3076 let (_d2, rb) = mk_store();
3077 let mut seed: u64 = 0x9E3779B97F4A7C15;
3078 let mut next = || {
3079 seed = seed
3080 .wrapping_mul(6364136223846793005)
3081 .wrapping_add(1442695040888963407);
3082 (seed >> 33) as u32
3083 };
3084
3085 let folders = ["sources/emails", "records/contacts", "records/profiles"];
3086 let types = ["email", "contact", "profile"];
3087 let mut live: Vec<String> = Vec::new(); for step in 0..120u32 {
3090 let r = next();
3091 let op = r % 10;
3092 if op < 6 || live.is_empty() {
3093 let fi = (next() as usize) % folders.len();
3095 let folder = folders[fi];
3096 let id = next() % 40;
3097 let rel = if folder == "sources/emails" {
3098 let month = 5 + (id % 2); format!("{folder}/2026/{month:02}/f-{id:02}.md")
3100 } else {
3101 format!("{folder}/f-{id:02}.md")
3102 };
3103 let updated = format!(
3105 "2026-05-{:02}T{:02}:{:02}:00Z",
3106 1 + (step % 27),
3107 step % 24,
3108 id % 60
3109 );
3110 let extra = if id % 3 == 0 {
3111 "tags:\n - x\n - y\n"
3112 } else {
3113 ""
3114 };
3115 write_doc(
3116 &wt,
3117 &rel,
3118 types[fi],
3119 Some(&format!("sum {step}")),
3120 Some(&updated),
3121 extra,
3122 );
3123 write_doc(
3124 &rb,
3125 &rel,
3126 types[fi],
3127 Some(&format!("sum {step}")),
3128 Some(&updated),
3129 extra,
3130 );
3131 Index::on_write(&wt, Path::new(&rel)).unwrap();
3132 if !live.contains(&rel) {
3133 live.push(rel);
3134 }
3135 } else if op < 8 {
3136 let idx = (next() as usize) % live.len();
3138 let rel = live.remove(idx);
3139 fs::remove_file(wt.root.join(&rel)).unwrap();
3140 fs::remove_file(rb.root.join(&rel)).ok();
3141 Index::on_remove(&wt, Path::new(&rel)).unwrap();
3142 } else {
3143 let idx = (next() as usize) % live.len();
3145 let old = live[idx].clone();
3146 let fi = (next() as usize) % folders.len();
3148 let folder = folders[fi];
3149 let id = 50 + (next() % 40);
3150 let new = if folder == "sources/emails" {
3151 format!("{folder}/2026/05/f-{id:02}.md")
3152 } else {
3153 format!("{folder}/f-{id:02}.md")
3154 };
3155 if new == old || live.contains(&new) {
3156 continue;
3157 }
3158 fs::create_dir_all(wt.root.join(&new).parent().unwrap()).unwrap();
3159 fs::create_dir_all(rb.root.join(&new).parent().unwrap()).unwrap();
3160 fs::rename(wt.root.join(&old), wt.root.join(&new)).unwrap();
3161 fs::rename(rb.root.join(&old), rb.root.join(&new)).unwrap();
3162 Index::on_rename(&wt, Path::new(&old), Path::new(&new)).unwrap();
3163 live[idx] = new;
3164 }
3165 }
3166
3167 Index::rebuild_all(&rb).unwrap();
3169 let a = snapshot_artifacts(&wt);
3170 let b = snapshot_artifacts(&rb);
3171 assert_eq!(
3172 a.keys().collect::<BTreeSet<_>>(),
3173 b.keys().collect::<BTreeSet<_>>(),
3174 "write-through and rebuild must produce the same set of artifacts"
3175 );
3176 for (k, v) in &a {
3177 assert_eq!(
3178 v, &b[k],
3179 "INVARIANT VIOLATED: artifact {k} differs after mixed ops\n--- write-through ---\n{v}\n--- rebuild ---\n{}",
3180 b[k]
3181 );
3182 }
3183 assert!(
3184 !a.is_empty(),
3185 "the run must have produced at least one artifact"
3186 );
3187 }
3188
3189 #[test]
3195 fn cleanup_preserves_user_content_named_index_md_in_shard() {
3196 let (_d, store) = mk_store();
3197 write_doc(
3199 &store,
3200 "sources/emails/2026/06/index.md",
3201 "email",
3202 Some("Important imported mail"),
3203 Some("2026-06-11T04:23:25Z"),
3204 "",
3205 );
3206 Index::cleanup(&store).unwrap();
3207 assert!(
3208 exists(&store, "sources/emails/2026/06/index.md"),
3209 "cleanup must not delete a user content file named index.md"
3210 );
3211 Index::rebuild_all(&store).unwrap();
3213 assert!(
3214 exists(&store, "sources/emails/2026/06/index.md"),
3215 "rebuild_all must not delete a user content file named index.md"
3216 );
3217 let kept = read(&store, "sources/emails/2026/06/index.md");
3218 assert!(
3219 kept.contains("Important imported mail"),
3220 "the user's record content must be intact"
3221 );
3222 }
3223
3224 #[test]
3229 fn cleanup_keeps_canonical_type_folder_root_sidecars() {
3230 let (_d, store) = mk_store();
3231 write_doc(
3232 &store,
3233 "records/contacts/alice.md",
3234 "contact",
3235 Some("Alice"),
3236 Some("2026-05-01T00:00:00Z"),
3237 "",
3238 );
3239 Index::write_level(&store, &IndexLevel::TypeFolder("records/contacts".into())).unwrap();
3240 assert!(exists(&store, "records/contacts/index.md"));
3241 assert!(exists(&store, "records/contacts/index.jsonl"));
3242 Index::cleanup(&store).unwrap();
3243 assert!(
3244 exists(&store, "records/contacts/index.md"),
3245 "cleanup must keep the canonical type-folder index.md (non-empty folder)"
3246 );
3247 assert!(
3248 exists(&store, "records/contacts/index.jsonl"),
3249 "cleanup must keep the canonical type-folder index.jsonl (non-empty folder)"
3250 );
3251 }
3252
3253 #[test]
3259 fn on_write_ignores_index_artifact_no_phantom_row() {
3260 let (_d, store) = mk_store();
3261 write_doc(
3262 &store,
3263 "records/contacts/alice.md",
3264 "contact",
3265 Some("Alice"),
3266 Some("2026-05-01T00:00:00Z"),
3267 "",
3268 );
3269 Index::on_write(&store, Path::new("records/contacts/alice.md")).unwrap();
3270 let jsonl_before = read(&store, "records/contacts/index.jsonl");
3271 assert_eq!(jsonl_before.lines().count(), 1);
3272
3273 Index::on_write(&store, Path::new("records/contacts/index.md")).unwrap();
3275
3276 let jsonl_after = read(&store, "records/contacts/index.jsonl");
3277 assert_eq!(
3278 jsonl_after.lines().count(),
3279 1,
3280 "on_write on index.md must not add a phantom self-row"
3281 );
3282 assert!(
3283 !jsonl_after.contains("\"type\":\"index\""),
3284 "the catalog artifact must never appear as a catalogued row"
3285 );
3286 let root = read(&store, "index.md");
3288 assert!(
3289 root.contains("[[records/contacts/index|Contacts]] (1)"),
3290 "count must not inflate:\n{root}"
3291 );
3292 }
3293
3294 #[test]
3300 fn multiline_summary_is_single_lined_in_index_md() {
3301 let (_d, store) = mk_store();
3302 write_raw(
3304 &store,
3305 "records/notes/evil.md",
3306 "type: note\nupdated: 2026-06-10T00:00:00Z\nsummary: |-\n legit first line\n - [[records/secrets/fake|Click me]] — injected entry",
3307 "\nbody\n",
3308 );
3309 let idx = Index::build_type_folder(&store, Path::new("records/notes")).unwrap();
3310 let md = idx.to_markdown();
3311 let entry_lines = md.lines().filter(|l| l.starts_with("- [[")).count();
3313 assert_eq!(
3314 entry_lines, 1,
3315 "a multi-line summary must not produce extra entry lines:\n{md}"
3316 );
3317 assert!(
3318 md.contains(
3319 "- [[records/notes/evil]] — legit first line - [[records/secrets/fake|Click me]] — injected entry\n"
3320 ),
3321 "summary newlines must collapse to spaces inline:\n{md}"
3322 );
3323 }
3324
3325 #[test]
3333 fn non_string_scalar_summary_and_type_are_coerced_like_validator() {
3334 let (_d, store) = mk_store();
3335 write_raw(
3336 &store,
3337 "records/contacts/a.md",
3338 "type: contact\nupdated: 2026-05-01T00:00:00Z\nsummary: 2026",
3339 "\nbody\n",
3340 );
3341 let rec = record_from_file(
3342 &store.root.join("records/contacts/a.md"),
3343 PathBuf::from("records/contacts/a.md"),
3344 )
3345 .unwrap();
3346 assert_eq!(rec.summary, "2026");
3349 assert_eq!(rec.type_, "contact");
3350
3351 let idx = Index::build_type_folder(&store, Path::new("records/contacts")).unwrap();
3353 let md = idx.to_markdown();
3354 assert!(
3355 md.contains("- [[records/contacts/a]] — 2026\n"),
3356 "index entry must hold the coerced scalar, not the placeholder:\n{md}"
3357 );
3358
3359 write_raw(
3361 &store,
3362 "records/contacts/b.md",
3363 "type: true\nupdated: 2026-05-02T00:00:00Z\nsummary: hi",
3364 "\nbody\n",
3365 );
3366 let rec_b = record_from_file(
3367 &store.root.join("records/contacts/b.md"),
3368 PathBuf::from("records/contacts/b.md"),
3369 )
3370 .unwrap();
3371 assert_eq!(rec_b.type_, "true");
3372 }
3373
3374 #[test]
3382 fn non_utf8_body_does_not_abort_record_projection() {
3383 let (_d, store) = mk_store();
3384 let rel = "sources/emails/2026/06/x.md";
3385 let abs = store.root.join(rel);
3386 fs::create_dir_all(abs.parent().unwrap()).unwrap();
3387 let mut bytes: Vec<u8> =
3389 b"---\ntype: email\nupdated: 2026-06-11T00:00:00Z\nsummary: An imported email\n---\n\nCaf"
3390 .to_vec();
3391 bytes.push(0xE9);
3392 bytes.extend_from_slice(b" meeting notes\n");
3393 fs::write(&abs, bytes).unwrap();
3394
3395 let rec = record_from_file(&abs, PathBuf::from(rel))
3396 .expect("non-UTF-8 body must not abort the frontmatter read");
3397 assert_eq!(rec.summary, "An imported email");
3398 assert_eq!(rec.type_, "email");
3399
3400 Index::rebuild_all(&store).unwrap();
3402 assert!(
3403 exists(&store, "sources/emails/index.jsonl"),
3404 "rebuild must produce the catalog despite a non-UTF-8 body byte"
3405 );
3406 assert!(
3407 read(&store, "sources/emails/index.jsonl").contains("An imported email"),
3408 "the record must be catalogued"
3409 );
3410 }
3411
3412 #[test]
3421 fn rebuild_aborts_on_malformed_file_and_keeps_prior_catalogs() {
3422 let (_d, store) = mk_store();
3423 write_doc(
3424 &store,
3425 "records/contacts/alice.md",
3426 "contact",
3427 Some("Alice"),
3428 Some("2026-05-01T00:00:00Z"),
3429 "",
3430 );
3431 write_doc(
3432 &store,
3433 "records/companies/acme.md",
3434 "company",
3435 Some("Acme"),
3436 Some("2026-05-02T00:00:00Z"),
3437 "",
3438 );
3439
3440 Index::rebuild_all(&store).expect("clean rebuild succeeds");
3442 assert!(exists(&store, "records/contacts/index.jsonl"));
3443 assert!(exists(&store, "records/companies/index.jsonl"));
3444
3445 let bad = store.root.join("records/contacts/broken.md");
3447 fs::write(
3448 &bad,
3449 "---\ntype: contact\nsummary: \"unterminated\n---\nbody\n",
3450 )
3451 .unwrap();
3452
3453 Index::rebuild_all(&store)
3456 .expect_err("rebuild must abort, not silently skip, on a malformed file");
3457
3458 assert!(
3462 exists(&store, "records/companies/index.jsonl"),
3463 "an aborted rebuild must not destroy a clean sibling folder's catalog"
3464 );
3465 assert!(
3466 exists(&store, "records/contacts/index.jsonl"),
3467 "an aborted rebuild must not destroy the affected folder's prior catalog"
3468 );
3469 let contacts_jsonl = read(&store, "records/contacts/index.jsonl");
3470 assert!(contacts_jsonl.contains("records/contacts/alice.md"));
3471 }
3472
3473 #[test]
3486 fn rebuild_rollup_counts_equal_jsonl_records_and_write_through() {
3487 let (_d, store) = mk_store();
3488 write_doc(
3492 &store,
3493 "records/contacts/alice.md",
3494 "contact",
3495 Some("Alice"),
3496 Some("2026-05-01T00:00:00Z"),
3497 "",
3498 );
3499 write_doc(
3500 &store,
3501 "records/contacts/bob.md",
3502 "contact",
3503 Some("Bob"),
3504 Some("2026-05-02T00:00:00Z"),
3505 "",
3506 );
3507 Index::rebuild_all(&store).expect("clean rebuild succeeds");
3508
3509 let jsonl_lines = read(&store, "records/contacts/index.jsonl")
3511 .lines()
3512 .filter(|l| !l.trim().is_empty())
3513 .count();
3514 assert_eq!(jsonl_lines, 2, "two well-formed files ⇒ two jsonl records");
3515 let layer_md = read(&store, "records/index.md");
3516 let root_md = read(&store, "index.md");
3517 assert!(
3518 layer_md.contains("- [[records/contacts/index|Contacts]] (2)"),
3519 "layer rollup (N) must equal the jsonl record count (2), not a raw .md walk:\n{layer_md}"
3520 );
3521 assert!(
3522 root_md.contains("- [[records/contacts/index|Contacts]] (2)\n")
3523 && root_md.contains("## Records (2)"),
3524 "root rollup (N)/layer total must equal the jsonl record count (2):\n{root_md}"
3525 );
3526
3527 let (_d2, wt) = mk_store();
3534 write_doc(
3535 &wt,
3536 "records/contacts/alice.md",
3537 "contact",
3538 Some("Alice"),
3539 Some("2026-05-01T00:00:00Z"),
3540 "",
3541 );
3542 write_doc(
3543 &wt,
3544 "records/contacts/bob.md",
3545 "contact",
3546 Some("Bob"),
3547 Some("2026-05-02T00:00:00Z"),
3548 "",
3549 );
3550 Index::on_write(&wt, Path::new("records/contacts/alice.md")).unwrap();
3551 Index::on_write(&wt, Path::new("records/contacts/bob.md")).unwrap();
3552
3553 let a = snapshot_artifacts(&wt);
3554 let b = snapshot_artifacts(&store);
3555 assert_eq!(
3556 a.keys().collect::<BTreeSet<_>>(),
3557 b.keys().collect::<BTreeSet<_>>(),
3558 "write-through and rebuild_all must produce the same artifact set"
3559 );
3560 for (k, v) in &a {
3561 assert_eq!(
3562 v, &b[k],
3563 "rollup bytes diverged between write-through and rebuild_all for {k} \
3564 (a skip-version inflates rebuild_all's (N) above the jsonl record \
3565 count, which write-through then rewrites):\n--- write-through ---\n{v}\n--- rebuild ---\n{}",
3566 b[k]
3567 );
3568 }
3569 }
3570
3571 #[cfg(unix)]
3576 #[test]
3577 fn non_utf8_path_component_is_kept_not_dropped() {
3578 use std::ffi::OsStr;
3579 use std::os::unix::ffi::OsStrExt;
3580 let mut leaf = b"caf".to_vec();
3582 leaf.push(0xE9);
3583 leaf.extend_from_slice(b".md");
3584 let p = Path::new("sources/emails").join(OsStr::from_bytes(&leaf));
3585 let unix = path_to_unix(&p);
3586 assert_ne!(
3589 unix, "sources/emails",
3590 "non-UTF-8 leaf must not be dropped, collapsing the path to its parent dir"
3591 );
3592 assert!(
3593 unix.starts_with("sources/emails/caf"),
3594 "the lossy leaf must remain under its folder: {unix}"
3595 );
3596 }
3597
3598 #[test]
3601 fn loose_file_is_catalogued_in_layer_jsonl_not_type_folder() {
3602 let (_d, store) = mk_store();
3603 write_doc(
3605 &store,
3606 "records/contacts/alice.md",
3607 "contact",
3608 Some("Alice"),
3609 Some("2026-06-01T08:00:00Z"),
3610 "id: alice\n",
3611 );
3612 write_doc(
3613 &store,
3614 "records/loose.md",
3615 "contact",
3616 Some("Loose"),
3617 Some("2026-06-01T08:00:00Z"),
3618 "id: loose\n",
3619 );
3620 Index::rebuild_all(&store).unwrap();
3621
3622 assert!(
3625 exists(&store, "records/index.jsonl"),
3626 "layer jsonl must exist when loose files are present"
3627 );
3628 let layer_jsonl = read(&store, "records/index.jsonl");
3629 assert!(
3630 layer_jsonl.contains("records/loose.md"),
3631 "layer jsonl must list the loose file, got:\n{layer_jsonl}"
3632 );
3633 assert!(
3634 !layer_jsonl.contains("records/contacts/alice.md"),
3635 "layer jsonl must NOT list type-folder files"
3636 );
3637 let tf_jsonl = read(&store, "records/contacts/index.jsonl");
3638 assert!(tf_jsonl.contains("records/contacts/alice.md"));
3639 assert!(!tf_jsonl.contains("records/loose.md"));
3640
3641 let layer_md = read(&store, "records/index.md");
3643 assert!(
3644 layer_md.contains("records/contacts/index"),
3645 "layer md must roll up the type-folder, got:\n{layer_md}"
3646 );
3647 assert!(
3648 !layer_md.contains("records/loose"),
3649 "layer md must stay a rollup, not list loose files, got:\n{layer_md}"
3650 );
3651 }
3652
3653 #[test]
3654 fn loose_file_write_through_equals_rebuild() {
3655 let (_d1, wt) = mk_store();
3656 let (_d2, rb) = mk_store();
3657 for s in [&wt, &rb] {
3658 write_doc(
3659 s,
3660 "records/contacts/alice.md",
3661 "contact",
3662 Some("Alice"),
3663 Some("2026-06-01T08:00:00Z"),
3664 "id: alice\n",
3665 );
3666 write_doc(
3667 s,
3668 "records/loose.md",
3669 "contact",
3670 Some("Loose"),
3671 Some("2026-06-02T08:00:00Z"),
3672 "id: loose\n",
3673 );
3674 }
3675 Index::on_write(&wt, Path::new("records/contacts/alice.md")).unwrap();
3677 Index::on_write(&wt, Path::new("records/loose.md")).unwrap();
3678 Index::rebuild_all(&rb).unwrap();
3679
3680 let a = snapshot_artifacts(&wt);
3681 let b = snapshot_artifacts(&rb);
3682 assert_eq!(
3683 a.keys().collect::<Vec<_>>(),
3684 b.keys().collect::<Vec<_>>(),
3685 "loose-file loop and sweep must produce the same artifact set"
3686 );
3687 for (k, v) in &a {
3688 assert_eq!(
3689 v, &b[k],
3690 "loose-file artifact {k} differs between loop and sweep"
3691 );
3692 }
3693 }
3694
3695 #[test]
3696 fn removing_last_loose_file_clears_layer_jsonl() {
3697 let (_d, store) = mk_store();
3698 write_doc(
3699 &store,
3700 "records/loose.md",
3701 "contact",
3702 Some("Loose"),
3703 Some("2026-06-01T08:00:00Z"),
3704 "id: loose\n",
3705 );
3706 Index::on_write(&store, Path::new("records/loose.md")).unwrap();
3707 assert!(
3708 exists(&store, "records/index.jsonl"),
3709 "layer jsonl present after a loose write"
3710 );
3711 fs::remove_file(store.root.join("records/loose.md")).unwrap();
3712 Index::on_remove(&store, Path::new("records/loose.md")).unwrap();
3713 assert!(
3714 !exists(&store, "records/index.jsonl"),
3715 "layer jsonl must be removed once the last loose file is gone"
3716 );
3717 }
3718
3719 #[test]
3722 fn concurrent_writes_to_different_type_folders_match_rebuild() {
3723 use std::sync::Arc;
3724 use std::thread;
3725
3726 let (_d, store) = mk_store();
3735 let folders = ["records/contacts", "records/companies"];
3736 let n = 12usize;
3737
3738 for (fi, folder) in folders.iter().enumerate() {
3741 for i in 0..n {
3742 write_doc(
3743 &store,
3744 &format!("{folder}/f{fi}_{i}.md"),
3745 "contact",
3746 Some(&format!("Summary {fi}-{i}")),
3747 Some(&format!("2026-06-{:02}T08:00:00Z", i + 1)),
3748 &format!("id: f{fi}_{i}\n"),
3749 );
3750 }
3751 }
3752
3753 let store = Arc::new(store);
3754 let handles: Vec<_> = folders
3755 .iter()
3756 .enumerate()
3757 .map(|(fi, folder)| {
3758 let store = Arc::clone(&store);
3759 let folder = folder.to_string();
3760 thread::spawn(move || {
3761 for i in 0..n {
3762 let rel = format!("{folder}/f{fi}_{i}.md");
3763 Index::on_write(&store, Path::new(&rel)).unwrap();
3764 }
3765 })
3766 })
3767 .collect();
3768 for h in handles {
3769 h.join().unwrap();
3770 }
3771
3772 let got = snapshot_artifacts(&store);
3775 Index::rebuild_all(&store).unwrap();
3776 let want = snapshot_artifacts(&store);
3777
3778 assert_eq!(
3779 got.keys().collect::<Vec<_>>(),
3780 want.keys().collect::<Vec<_>>(),
3781 "artifact set after concurrent write-through must match rebuild"
3782 );
3783 for (k, v) in &want {
3784 assert_eq!(
3785 &got[k], v,
3786 "rollup artifact {k} diverged from rebuild after concurrent writes"
3787 );
3788 }
3789 }
3790}