1use std::collections::BTreeMap;
53use std::fs;
54use std::io::Write as _;
55use std::path::{Path, PathBuf};
56
57use chrono::{DateTime, FixedOffset, SecondsFormat};
58use serde::{Deserialize, Serialize};
59use serde_json::Value;
60
61use crate::parser::FolderMeta;
62use crate::store::{Layer, Store};
63
64const MD_CAP: usize = 500;
66
67const MISSING_SUMMARY: &str = "(no summary)";
71
72const ROOT_TITLE: &str = "Knowledge base index";
74
75#[derive(Debug, Clone, PartialEq, Eq)]
77pub enum IndexLevel {
78 Root,
80 Layer(Layer),
82 TypeFolder(PathBuf),
84}
85
86#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
94pub struct IndexRecord {
95 #[serde(with = "path_serde")]
99 pub path: PathBuf,
100 #[serde(rename = "type")]
102 pub type_: String,
103 pub summary: String,
105 #[serde(default)]
107 pub tags: Vec<String>,
108 #[serde(default)]
110 pub links: Vec<String>,
111 pub created: Option<DateTime<FixedOffset>>,
113 pub updated: Option<DateTime<FixedOffset>>,
115 #[serde(flatten)]
117 pub fields: BTreeMap<String, Value>,
118}
119
120#[derive(Debug, Clone, PartialEq)]
123pub struct Index {
124 pub level: IndexLevel,
126 pub records: Vec<IndexRecord>,
129 pub child_counts: BTreeMap<PathBuf, usize>,
131}
132
133impl Index {
134 pub fn build_type_folder(store: &Store, type_folder: &Path) -> crate::Result<Index> {
140 let rel = normalize_rel(type_folder);
141 let abs = store.root.join(&rel);
142 let mut records = Vec::new();
143 for file_abs in walk_type_folder_files(&abs) {
144 let rel_path =
145 rel_to_store(&store.root, &file_abs).expect("walked file is under the store root");
146 records.push(record_from_file(&file_abs, rel_path)?);
158 }
159 sort_records(&mut records);
160 Ok(Index {
161 level: IndexLevel::TypeFolder(rel),
162 records,
163 child_counts: BTreeMap::new(),
164 })
165 }
166
167 pub fn build_layer(store: &Store, layer: Layer) -> crate::Result<Index> {
176 let mut child_counts = BTreeMap::new();
177 for tf in type_folders_in_layer(store, layer) {
178 let abs = store.root.join(&tf);
179 let n = walk_type_folder_files(&abs).len();
180 if n > 0 {
181 child_counts.insert(tf, n);
182 }
183 }
184 let mut records = Vec::new();
185 for file_abs in loose_files_in_layer(store, layer) {
186 let rel_path =
187 rel_to_store(&store.root, &file_abs).expect("walked file is under the store root");
188 records.push(record_from_file(&file_abs, rel_path)?);
193 }
194 sort_records(&mut records);
195 Ok(Index {
196 level: IndexLevel::Layer(layer),
197 records,
198 child_counts,
199 })
200 }
201
202 pub fn build_root(store: &Store) -> crate::Result<Index> {
205 let mut child_counts = BTreeMap::new();
206 for layer in Layer::all() {
207 for tf in type_folders_in_layer(store, layer) {
208 let abs = store.root.join(&tf);
209 let n = walk_type_folder_files(&abs).len();
210 if n > 0 {
211 child_counts.insert(tf, n);
212 }
213 }
214 }
215 Ok(Index {
216 level: IndexLevel::Root,
217 records: Vec::new(),
218 child_counts,
219 })
220 }
221
222 pub fn to_markdown(&self) -> String {
224 match &self.level {
225 IndexLevel::TypeFolder(folder) => self.render_type_folder_md(folder),
226 IndexLevel::Layer(layer) => self.render_layer_md(*layer),
227 IndexLevel::Root => self.render_root_md(),
228 }
229 }
230
231 pub fn to_jsonl(&self) -> String {
237 let mut out = String::new();
238 for rec in &self.records {
239 let line = serde_json::to_string(rec).expect("IndexRecord serializes");
242 out.push_str(&line);
243 out.push('\n');
244 }
245 out
246 }
247
248 fn render_type_folder_md(&self, folder: &Path) -> String {
251 let folder_disp = path_to_unix(folder);
252 let updated = max_updated(self.records.iter().map(|r| r.updated.as_ref()));
253 let mut s = String::new();
254 s.push_str("---\n");
255 s.push_str("type: index\n");
256 s.push_str("scope: type-folder\n");
257 s.push_str(&format!("folder: {folder_disp}\n"));
258 if let Some(ts) = updated {
259 s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
260 }
261 s.push_str("---\n\n");
262 s.push_str(&format!("# {folder_disp}\n\n"));
263
264 let shown = self.records.len().min(MD_CAP);
265 for rec in self.records.iter().take(shown) {
266 s.push_str(&format_md_entry(rec));
267 s.push('\n');
268 }
269
270 if self.records.len() > MD_CAP {
271 let type_ = self.records.first().map(|r| r.type_.as_str()).unwrap_or("");
272 let layer = folder
273 .components()
274 .next()
275 .and_then(|c| c.as_os_str().to_str())
276 .unwrap_or("");
277 s.push('\n');
278 s.push_str(&more_footer(self.records.len(), type_, layer));
279 }
280 s
281 }
282
283 fn render_layer_md(&self, layer: Layer) -> String {
288 let layer_dir = layer_dir_name(layer);
289 let mut s = String::new();
290 s.push_str("---\n");
291 s.push_str("type: index\n");
292 s.push_str("scope: layer\n");
293 s.push_str(&format!("folder: {layer_dir}\n"));
294 s.push_str("---\n\n");
295 s.push_str(&format!("# {layer_dir}\n\n"));
296 for (tf, n) in &self.child_counts {
297 let tf_unix = path_to_unix(tf);
298 let display = capitalize(folder_basename(tf));
299 s.push_str(&format!("- [[{tf_unix}/index|{display}]] ({n})\n"));
300 }
301 s
302 }
303
304 fn render_root_md(&self) -> String {
307 let mut s = String::new();
308 s.push_str("---\n");
309 s.push_str("type: index\n");
310 s.push_str("scope: root\n");
311 s.push_str("---\n\n");
312 s.push_str(&format!("# {ROOT_TITLE}\n"));
313 for layer in Layer::all() {
314 let layer_dir = layer_dir_name(layer);
315 let prefix = format!("{layer_dir}/");
316 let children: Vec<(&PathBuf, &usize)> = self
317 .child_counts
318 .iter()
319 .filter(|(tf, _)| path_to_unix(tf).starts_with(&prefix))
320 .collect();
321 if children.is_empty() {
322 continue;
323 }
324 let total: usize = children.iter().map(|(_, n)| **n).sum();
325 s.push('\n');
326 s.push_str(&format!("## {} ({total})\n", capitalize(layer_dir)));
327 for (tf, n) in children {
328 let tf_unix = path_to_unix(tf);
329 let display = capitalize(folder_basename(tf));
330 s.push_str(&format!("- [[{tf_unix}/index|{display}]] ({n})\n"));
331 }
332 }
333 s
334 }
335}
336
337impl Index {
342 pub fn on_write(store: &Store, file: &Path) -> crate::Result<()> {
349 let file_rel = normalize_rel(file);
350 if is_index_artifact(&file_rel) {
357 return Ok(());
358 }
359 if let Some(layer) = loose_layer_of(&file_rel) {
363 return apply_loose_change(store, layer, &file_rel, false);
364 }
365 let file_abs = store.root.join(&file_rel);
366 let folder = type_folder_of(&file_rel)
367 .ok_or_else(|| bad_index(&file_rel, "file is not inside a layer/type-folder"))?;
368 let record = record_from_file(&file_abs, file_rel.clone())?;
369
370 let _lock = FolderLock::acquire(&store.root.join(&folder));
373 let mut records = read_jsonl_records(&store.root.join(&folder).join("index.jsonl"))?;
374 records.retain(|r| r.path != record.path);
375 records.push(record);
376 sort_records(&mut records);
377
378 write_type_folder_artifacts(store, &folder, &records)?;
379 update_parents(store, &folder)?;
380 Ok(())
381 }
382
383 pub fn on_rename(store: &Store, old: &Path, new: &Path) -> crate::Result<()> {
387 let old_rel = normalize_rel(old);
388 let new_rel = normalize_rel(new);
389 if is_index_artifact(&old_rel) || is_index_artifact(&new_rel) {
393 return Ok(());
394 }
395 if loose_layer_of(&old_rel).is_some() || loose_layer_of(&new_rel).is_some() {
401 Self::on_remove(store, &old_rel)?;
402 Self::on_write(store, &new_rel)?;
403 return Ok(());
404 }
405 let old_folder = type_folder_of(&old_rel)
406 .ok_or_else(|| bad_index(&old_rel, "source is not inside a layer/type-folder"))?;
407 let new_folder = type_folder_of(&new_rel)
408 .ok_or_else(|| bad_index(&new_rel, "target is not inside a layer/type-folder"))?;
409
410 let _locks = lock_folders(store, &old_folder, &new_folder);
414
415 let mut old_records =
417 read_jsonl_records(&store.root.join(&old_folder).join("index.jsonl"))?;
418 old_records.retain(|r| r.path != old_rel);
419
420 if old_folder == new_folder {
421 let record = record_from_file(&store.root.join(&new_rel), new_rel.clone())?;
423 old_records.retain(|r| r.path != record.path);
424 old_records.push(record);
425 sort_records(&mut old_records);
426 write_type_folder_artifacts(store, &old_folder, &old_records)?;
427 update_parents(store, &old_folder)?;
428 return Ok(());
429 }
430
431 sort_records(&mut old_records);
434 write_type_folder_artifacts(store, &old_folder, &old_records)?;
435
436 let record = record_from_file(&store.root.join(&new_rel), new_rel.clone())?;
437 let mut new_records =
438 read_jsonl_records(&store.root.join(&new_folder).join("index.jsonl"))?;
439 new_records.retain(|r| r.path != record.path);
440 new_records.push(record);
441 sort_records(&mut new_records);
442 write_type_folder_artifacts(store, &new_folder, &new_records)?;
443
444 update_parents(store, &old_folder)?;
445 update_parents(store, &new_folder)?;
446 Ok(())
447 }
448
449 pub fn on_remove(store: &Store, file: &Path) -> crate::Result<()> {
454 let file_rel = normalize_rel(file);
455 if is_index_artifact(&file_rel) {
458 return Ok(());
459 }
460 if let Some(layer) = loose_layer_of(&file_rel) {
462 return apply_loose_change(store, layer, &file_rel, true);
463 }
464 let folder = type_folder_of(&file_rel)
465 .ok_or_else(|| bad_index(&file_rel, "file is not inside a layer/type-folder"))?;
466 let _lock = FolderLock::acquire(&store.root.join(&folder));
468 let mut records = read_jsonl_records(&store.root.join(&folder).join("index.jsonl"))?;
469 let before = records.len();
470 records.retain(|r| r.path != file_rel);
471 if records.len() == before {
472 }
475 sort_records(&mut records);
476 write_type_folder_artifacts(store, &folder, &records)?;
477 update_parents(store, &folder)?;
478 Ok(())
479 }
480
481 pub fn rebuild_all(store: &Store) -> crate::Result<()> {
485 Index::cleanup(store)?;
486 for layer in Layer::all() {
487 for tf in type_folders_in_layer(store, layer) {
488 let idx = Index::build_type_folder(store, &tf)?;
489 if idx.records.is_empty() {
490 continue;
491 }
492 write_type_folder_artifacts(store, &tf, &idx.records)?;
493 }
494 let layer_idx = Index::build_layer(store, layer)?;
495 let layer_index_md = store.root.join(layer_dir_name(layer)).join("index.md");
496 if layer_idx.child_counts.is_empty() {
497 remove_if_exists(&layer_index_md)?;
498 } else {
499 write_atomic(
500 &layer_index_md,
501 render_layer_md_with_store(store, &layer_idx),
502 )?;
503 }
504 write_layer_jsonl(store, layer, &layer_idx.records)?;
508 }
509 let root_idx = Index::build_root(store)?;
510 let root_index_md = store.root.join("index.md");
511 if root_idx.child_counts.is_empty() {
512 remove_if_exists(&root_index_md)?;
513 } else {
514 write_atomic(&root_index_md, render_root_md_with_store(store, &root_idx))?;
515 }
516 Ok(())
517 }
518
519 pub fn rebuild_folder(store: &Store, folder: &Path) -> crate::Result<()> {
526 Self::write_level(store, &IndexLevel::TypeFolder(folder.to_path_buf()))?;
527 update_parents(store, folder)
528 }
529
530 pub fn write_level(store: &Store, level: &IndexLevel) -> crate::Result<()> {
532 match level {
533 IndexLevel::TypeFolder(folder) => {
534 let idx = Index::build_type_folder(store, folder)?;
535 if idx.records.is_empty() {
536 remove_if_exists(&store.root.join(folder).join("index.md"))?;
537 remove_if_exists(&store.root.join(folder).join("index.jsonl"))?;
538 } else {
539 write_type_folder_artifacts(store, folder, &idx.records)?;
540 }
541 }
542 IndexLevel::Layer(layer) => {
543 let idx = Index::build_layer(store, *layer)?;
544 let p = store.root.join(layer_dir_name(*layer)).join("index.md");
545 if idx.child_counts.is_empty() {
546 remove_if_exists(&p)?;
547 } else {
548 write_atomic(&p, render_layer_md_with_store(store, &idx))?;
549 }
550 write_layer_jsonl(store, *layer, &idx.records)?;
551 }
552 IndexLevel::Root => {
553 let idx = Index::build_root(store)?;
554 let p = store.root.join("index.md");
555 if idx.child_counts.is_empty() {
556 remove_if_exists(&p)?;
557 } else {
558 write_atomic(&p, render_root_md_with_store(store, &idx))?;
559 }
560 }
561 }
562 Ok(())
563 }
564
565 pub fn render_dry_run(store: &Store, level: &IndexLevel) -> crate::Result<String> {
568 let mut out = String::new();
569 match level {
570 IndexLevel::TypeFolder(folder) => {
571 let idx = Index::build_type_folder(store, folder)?;
572 let md_path = path_to_unix(&folder.join("index.md"));
573 let jsonl_path = path_to_unix(&folder.join("index.jsonl"));
574 out.push_str(&format!("--- {md_path} ---\n"));
575 out.push_str(&idx.to_markdown());
576 out.push_str(&format!("--- {jsonl_path} ---\n"));
577 out.push_str(&idx.to_jsonl());
578 }
579 IndexLevel::Layer(layer) => {
580 let idx = Index::build_layer(store, *layer)?;
581 let md_path = format!("{}/index.md", layer_dir_name(*layer));
582 out.push_str(&format!("--- {md_path} ---\n"));
583 out.push_str(&render_layer_md_with_store(store, &idx));
584 }
585 IndexLevel::Root => {
586 let idx = Index::build_root(store)?;
587 out.push_str("--- index.md ---\n");
588 out.push_str(&render_root_md_with_store(store, &idx));
589 }
590 }
591 Ok(out)
592 }
593
594 pub fn cleanup(store: &Store) -> crate::Result<()> {
612 for layer in Layer::all() {
613 let layer_dir = store.root.join(layer_dir_name(layer));
614 if !layer_dir.is_dir() {
615 continue;
616 }
617 for tf in type_folders_in_layer(store, layer) {
618 let tf_abs = store.root.join(&tf);
619 for entry in walkdir::WalkDir::new(&tf_abs)
623 .min_depth(2)
624 .into_iter()
625 .filter_map(|e| e.ok())
626 {
627 let p = entry.path();
628 if is_index_artifact(p) && is_deletable_catalog_artifact(p) {
629 remove_if_exists(p)?;
630 }
631 }
632 if walk_type_folder_files(&tf_abs).is_empty() {
636 let md = tf_abs.join("index.md");
637 if is_deletable_catalog_artifact(&md) {
638 remove_if_exists(&md)?;
639 }
640 remove_if_exists(&tf_abs.join("index.jsonl"))?;
641 }
642 }
643 }
644 Ok(())
645 }
646}
647
648fn write_type_folder_artifacts(
656 store: &Store,
657 folder: &Path,
658 records: &[IndexRecord],
659) -> crate::Result<()> {
660 let folder_abs = store.root.join(folder);
661 let md_path = folder_abs.join("index.md");
662 let jsonl_path = folder_abs.join("index.jsonl");
663 if records.is_empty() {
664 remove_if_exists(&md_path)?;
665 remove_if_exists(&jsonl_path)?;
666 return Ok(());
667 }
668 let idx = Index {
669 level: IndexLevel::TypeFolder(folder.to_path_buf()),
670 records: records.to_vec(),
671 child_counts: BTreeMap::new(),
672 };
673 write_atomic(&md_path, idx.to_markdown())?;
674 write_atomic(&jsonl_path, idx.to_jsonl())?;
675 Ok(())
676}
677
678fn update_parents(store: &Store, folder: &Path) -> crate::Result<()> {
691 let stats = collect_child_stats(store, &Layer::all())?;
711
712 let layer = folder
713 .components()
714 .next()
715 .and_then(|c| c.as_os_str().to_str())
716 .and_then(layer_from_dir_name);
717 if let Some(layer) = layer {
718 let p = store.root.join(layer_dir_name(layer)).join("index.md");
719 if layer_has_children(&stats, layer) {
720 write_atomic(
721 &p,
722 render_layer_md_from_stats(layer, &stats, &store.config.folders),
723 )?;
724 } else {
725 remove_if_exists(&p)?;
726 }
727 }
728 let rp = store.root.join("index.md");
729 if stats.values().any(|s| s.count > 0) {
730 write_atomic(
731 &rp,
732 render_root_md_from_stats(&stats, &store.config.folders),
733 )?;
734 } else {
735 remove_if_exists(&rp)?;
736 }
737 Ok(())
738}
739
740fn layer_has_children(stats: &BTreeMap<PathBuf, FolderStat>, layer: Layer) -> bool {
742 let prefix = format!("{}/", layer_dir_name(layer));
743 stats
744 .iter()
745 .any(|(tf, s)| s.count > 0 && path_to_unix(tf).starts_with(&prefix))
746}
747
748fn render_layer_md_from_stats(
753 layer: Layer,
754 stats: &BTreeMap<PathBuf, FolderStat>,
755 folders: &BTreeMap<String, FolderMeta>,
756) -> String {
757 let layer_dir = layer_dir_name(layer);
758 let prefix = format!("{layer_dir}/");
759 let mut max_upd: Option<DateTime<FixedOffset>> = None;
760 let mut entries = String::new();
761 for (tf, stat) in stats {
762 if stat.count == 0 || !path_to_unix(tf).starts_with(&prefix) {
763 continue;
764 }
765 if let Some(u) = stat.newest.as_ref().and_then(|r| r.updated) {
766 max_upd = Some(match max_upd {
767 Some(cur) if cur >= u => cur,
768 _ => u,
769 });
770 }
771 let tf_unix = path_to_unix(tf);
772 let (display, description) = folder_label(&tf_unix, folder_basename(tf), folders);
773 entries.push_str(&folder_entry(&tf_unix, &display, stat.count, description));
774 }
775 let mut s = String::new();
776 s.push_str("---\n");
777 s.push_str("type: index\n");
778 s.push_str("scope: layer\n");
779 s.push_str(&format!("folder: {layer_dir}\n"));
780 if let Some(ts) = max_upd {
781 s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
782 }
783 s.push_str("---\n\n");
784 s.push_str(&format!("# {layer_dir}\n\n"));
785 s.push_str(&entries);
786 s
787}
788
789fn render_root_md_from_stats(
791 stats: &BTreeMap<PathBuf, FolderStat>,
792 folders: &BTreeMap<String, FolderMeta>,
793) -> String {
794 let mut max_upd: Option<DateTime<FixedOffset>> = None;
795 for stat in stats.values() {
796 if stat.count == 0 {
797 continue;
798 }
799 if let Some(u) = stat.newest.as_ref().and_then(|r| r.updated) {
800 max_upd = Some(match max_upd {
801 Some(cur) if cur >= u => cur,
802 _ => u,
803 });
804 }
805 }
806 let mut s = String::new();
807 s.push_str("---\n");
808 s.push_str("type: index\n");
809 s.push_str("scope: root\n");
810 if let Some(ts) = max_upd {
811 s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
812 }
813 s.push_str("---\n\n");
814 s.push_str(&format!("# {ROOT_TITLE}\n"));
815 for layer in Layer::all() {
816 let layer_dir = layer_dir_name(layer);
817 let prefix = format!("{layer_dir}/");
818 let children: Vec<(&PathBuf, usize)> = stats
819 .iter()
820 .filter(|(tf, s)| s.count > 0 && path_to_unix(tf).starts_with(&prefix))
821 .map(|(tf, s)| (tf, s.count))
822 .collect();
823 if children.is_empty() {
824 continue;
825 }
826 let total: usize = children.iter().map(|(_, n)| *n).sum();
827 s.push('\n');
828 s.push_str(&format!("## {} ({total})\n", capitalize(layer_dir)));
829 for (tf, n) in children {
830 let tf_unix = path_to_unix(tf);
831 let (display, description) = folder_label(&tf_unix, folder_basename(tf), folders);
832 s.push_str(&folder_entry(&tf_unix, &display, n, description));
833 }
834 }
835 s
836}
837
838fn render_layer_md_with_store(store: &Store, idx: &Index) -> String {
845 let layer = match idx.level {
846 IndexLevel::Layer(l) => l,
847 _ => unreachable!("render_layer_md_with_store called on non-layer"),
848 };
849 let layer_dir = layer_dir_name(layer);
850 let mut max_upd: Option<DateTime<FixedOffset>> = None;
851 let mut entries = String::new();
852 for (tf, n) in &idx.child_counts {
853 let recs = read_jsonl_records(&store.root.join(tf).join("index.jsonl")).unwrap_or_default();
854 let newest = recs.first();
855 if let Some(u) = newest.and_then(|r| r.updated) {
856 max_upd = Some(match max_upd {
857 Some(cur) if cur >= u => cur,
858 _ => u,
859 });
860 }
861 let tf_unix = path_to_unix(tf);
862 let (display, description) =
863 folder_label(&tf_unix, folder_basename(tf), &store.config.folders);
864 entries.push_str(&folder_entry(&tf_unix, &display, *n, description));
865 }
866 let mut s = String::new();
867 s.push_str("---\n");
868 s.push_str("type: index\n");
869 s.push_str("scope: layer\n");
870 s.push_str(&format!("folder: {layer_dir}\n"));
871 if let Some(ts) = max_upd {
872 s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
873 }
874 s.push_str("---\n\n");
875 s.push_str(&format!("# {layer_dir}\n\n"));
876 s.push_str(&entries);
877 s
878}
879
880fn render_root_md_with_store(store: &Store, idx: &Index) -> String {
884 let mut max_upd: Option<DateTime<FixedOffset>> = None;
885 for tf in idx.child_counts.keys() {
886 let recs = read_jsonl_records(&store.root.join(tf).join("index.jsonl")).unwrap_or_default();
887 if let Some(u) = recs.first().and_then(|r| r.updated) {
888 max_upd = Some(match max_upd {
889 Some(cur) if cur >= u => cur,
890 _ => u,
891 });
892 }
893 }
894 let mut s = String::new();
895 s.push_str("---\n");
896 s.push_str("type: index\n");
897 s.push_str("scope: root\n");
898 if let Some(ts) = max_upd {
899 s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
900 }
901 s.push_str("---\n\n");
902 s.push_str(&format!("# {ROOT_TITLE}\n"));
903 for layer in Layer::all() {
904 let layer_dir = layer_dir_name(layer);
905 let prefix = format!("{layer_dir}/");
906 let children: Vec<(&PathBuf, &usize)> = idx
907 .child_counts
908 .iter()
909 .filter(|(tf, _)| path_to_unix(tf).starts_with(&prefix))
910 .collect();
911 if children.is_empty() {
912 continue;
913 }
914 let total: usize = children.iter().map(|(_, n)| **n).sum();
915 s.push('\n');
916 s.push_str(&format!("## {} ({total})\n", capitalize(layer_dir)));
917 for (tf, n) in children {
918 let tf_unix = path_to_unix(tf);
919 let (display, description) =
920 folder_label(&tf_unix, folder_basename(tf), &store.config.folders);
921 s.push_str(&folder_entry(&tf_unix, &display, *n, description));
922 }
923 }
924 s
925}
926
927fn format_md_entry(rec: &IndexRecord) -> String {
933 let path = wiki_target(&rec.path);
934 let summary = collapse_whitespace(&rec.summary);
943 let mut line = format!("- [[{path}]] — {summary}");
944 if !rec.tags.is_empty() {
945 let tags = rec
946 .tags
947 .iter()
948 .map(|t| format!("#{t}"))
949 .collect::<Vec<_>>()
950 .join(" ");
951 line.push_str(&format!(" · {tags}"));
952 }
953 line
954}
955
956fn more_footer(total: usize, type_: &str, layer: &str) -> String {
958 format!(
959 "## More\n\nThis folder has {total} files. The {MD_CAP} most recent are listed above.\nUse `dbmd index query --type {type_} --in {layer}` for the complete catalog.\n"
960 )
961}
962
963fn sort_records(records: &mut [IndexRecord]) {
967 records.sort_by(record_recency_cmp);
968}
969
970impl IndexRecord {
971 pub(crate) fn expected_from_file(abs: &Path, rel: PathBuf) -> crate::Result<IndexRecord> {
983 record_from_file(abs, rel)
984 }
985}
986
987fn record_from_file(abs: &Path, rel: PathBuf) -> crate::Result<IndexRecord> {
990 let mut meta = read_frontmatter(abs)?;
991 if rel.starts_with("records") {
996 meta.fields
997 .entry("meta-type".to_string())
998 .or_insert_with(|| Value::String("fact".to_string()));
999 }
1000 Ok(IndexRecord {
1001 path: rel,
1002 type_: meta.type_.unwrap_or_default(),
1003 summary: meta.summary.unwrap_or_else(|| MISSING_SUMMARY.to_string()),
1004 tags: meta.tags,
1005 links: meta.links,
1006 created: meta.created,
1007 updated: meta.updated,
1008 fields: meta.fields,
1009 })
1010}
1011
1012struct FileMeta {
1014 type_: Option<String>,
1015 summary: Option<String>,
1016 tags: Vec<String>,
1017 links: Vec<String>,
1018 created: Option<DateTime<FixedOffset>>,
1019 updated: Option<DateTime<FixedOffset>>,
1020 fields: BTreeMap<String, Value>,
1021}
1022
1023fn read_frontmatter(abs: &Path) -> crate::Result<FileMeta> {
1037 let bytes = fs::read(abs)?;
1038 let yaml = extract_frontmatter_block_lossy(&bytes).unwrap_or_default();
1039 let map: serde_norway::Mapping = if yaml.trim().is_empty() {
1040 serde_norway::Mapping::new()
1041 } else {
1042 serde_norway::from_str(&yaml).map_err(|e| {
1043 crate::Error::Store(crate::store::StoreError::BadTypeIndex {
1044 path: abs.to_path_buf(),
1045 message: format!("frontmatter YAML: {e}"),
1046 })
1047 })?
1048 };
1049
1050 let mut type_ = None;
1051 let mut summary = None;
1052 let mut tags = Vec::new();
1053 let mut links = Vec::new();
1054 let mut created = None;
1055 let mut updated = None;
1056 let mut fields = BTreeMap::new();
1057
1058 for (k, v) in map {
1059 let key = match k.as_str() {
1060 Some(s) => s.to_string(),
1061 None => continue,
1062 };
1063 match key.as_str() {
1064 "type" => type_ = scalar_string(&v),
1074 "summary" => summary = scalar_string(&v),
1075 "tags" => tags = yaml_string_list(&v),
1076 "links" => links = yaml_string_list(&v),
1077 "created" => created = v.as_str().and_then(parse_ts),
1078 "updated" => updated = v.as_str().and_then(parse_ts),
1079 "path" => {}
1083 _ => {
1084 fields.insert(key, yaml_to_json_value(&v));
1085 }
1086 }
1087 }
1088
1089 Ok(FileMeta {
1090 type_,
1091 summary,
1092 tags,
1093 links,
1094 created,
1095 updated,
1096 fields,
1097 })
1098}
1099
1100fn scalar_string(v: &serde_norway::Value) -> Option<String> {
1106 match v {
1107 serde_norway::Value::String(s) => Some(s.clone()),
1108 serde_norway::Value::Number(n) => Some(n.to_string()),
1109 serde_norway::Value::Bool(b) => Some(b.to_string()),
1110 _ => None,
1111 }
1112}
1113
1114fn extract_frontmatter_block_lossy(bytes: &[u8]) -> Option<String> {
1120 let text = String::from_utf8_lossy(bytes);
1125 extract_frontmatter_block(&text)
1126}
1127
1128fn extract_frontmatter_block(text: &str) -> Option<String> {
1131 let trimmed = text.strip_prefix('\u{feff}').unwrap_or(text);
1132 let mut lines = trimmed.lines();
1133 let first = lines.next()?;
1134 if first.trim_end() != "---" {
1135 return None;
1136 }
1137 let mut block = String::new();
1138 for line in lines {
1139 if line.trim_end() == "---" {
1140 return Some(block);
1141 }
1142 block.push_str(line);
1143 block.push('\n');
1144 }
1145 None }
1147
1148fn yaml_string_list(v: &serde_norway::Value) -> Vec<String> {
1151 match v {
1152 serde_norway::Value::String(s) => vec![s.clone()],
1153 serde_norway::Value::Sequence(seq) => seq
1154 .iter()
1155 .filter_map(yaml_string_or_wiki_link_literal)
1156 .collect(),
1157 _ => Vec::new(),
1158 }
1159}
1160
1161fn yaml_string_or_wiki_link_literal(v: &serde_norway::Value) -> Option<String> {
1162 v.as_str()
1163 .map(str::to_string)
1164 .or_else(|| unquoted_wiki_link_literal(v))
1165}
1166
1167fn yaml_to_json_value(v: &serde_norway::Value) -> Value {
1168 if let Some(link) = unquoted_wiki_link_literal(v) {
1169 return Value::String(link);
1170 }
1171 match v {
1172 serde_norway::Value::String(s) => Value::String(s.clone()),
1173 serde_norway::Value::Bool(b) => Value::Bool(*b),
1174 serde_norway::Value::Number(n) => {
1175 serde_json::to_value(n).unwrap_or_else(|_| Value::String(n.to_string()))
1176 }
1177 serde_norway::Value::Sequence(seq) => {
1178 Value::Array(seq.iter().map(yaml_to_json_value).collect())
1179 }
1180 serde_norway::Value::Mapping(_) | serde_norway::Value::Tagged(_) => {
1181 serde_json::to_value(v).unwrap_or(Value::Null)
1182 }
1183 serde_norway::Value::Null => Value::Null,
1184 }
1185}
1186
1187fn unquoted_wiki_link_literal(v: &serde_norway::Value) -> Option<String> {
1188 let serde_norway::Value::Sequence(outer) = v else {
1189 return None;
1190 };
1191 if outer.len() != 1 {
1192 return None;
1193 }
1194 let serde_norway::Value::Sequence(inner) = &outer[0] else {
1195 return None;
1196 };
1197 let [serde_norway::Value::String(target)] = inner.as_slice() else {
1198 return None;
1199 };
1200 Some(format!("[[{target}]]"))
1201}
1202
1203fn parse_ts(s: &str) -> Option<DateTime<FixedOffset>> {
1205 DateTime::parse_from_rfc3339(s.trim()).ok()
1206}
1207
1208fn fmt_ts(ts: &DateTime<FixedOffset>) -> String {
1212 ts.to_rfc3339_opts(SecondsFormat::AutoSi, true)
1213}
1214
1215fn max_updated<'a>(
1217 it: impl Iterator<Item = Option<&'a DateTime<FixedOffset>>>,
1218) -> Option<DateTime<FixedOffset>> {
1219 let mut best: Option<DateTime<FixedOffset>> = None;
1220 for ts in it.flatten() {
1221 best = Some(match best {
1222 Some(cur) if cur >= *ts => cur,
1223 _ => *ts,
1224 });
1225 }
1226 best
1227}
1228
1229fn read_jsonl_records(jsonl: &Path) -> crate::Result<Vec<IndexRecord>> {
1233 let text = match fs::read_to_string(jsonl) {
1234 Ok(t) => t,
1235 Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(Vec::new()),
1236 Err(e) => return Err(e.into()),
1237 };
1238 let mut by_path: BTreeMap<PathBuf, IndexRecord> = BTreeMap::new();
1240 for (i, line) in text.lines().enumerate() {
1241 if line.trim().is_empty() {
1242 continue;
1243 }
1244 let rec: IndexRecord = serde_json::from_str(line).map_err(|e| {
1245 crate::Error::Store(crate::store::StoreError::BadTypeIndex {
1246 path: jsonl.to_path_buf(),
1247 message: format!("line {}: {e}", i + 1),
1248 })
1249 })?;
1250 by_path.insert(rec.path.clone(), rec);
1251 }
1252 let mut records: Vec<IndexRecord> = by_path.into_values().collect();
1253 sort_records(&mut records);
1254 Ok(records)
1255}
1256
1257#[derive(Debug, Clone, Default, PartialEq)]
1264struct FolderStat {
1265 count: usize,
1266 newest: Option<IndexRecord>,
1267}
1268
1269fn read_folder_stat(jsonl: &Path) -> crate::Result<FolderStat> {
1279 let text = match fs::read_to_string(jsonl) {
1280 Ok(t) => t,
1281 Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(FolderStat::default()),
1282 Err(e) => return Err(e.into()),
1283 };
1284 let mut by_path: BTreeMap<PathBuf, IndexRecord> = BTreeMap::new();
1287 for (i, line) in text.lines().enumerate() {
1288 if line.trim().is_empty() {
1289 continue;
1290 }
1291 let rec: IndexRecord = serde_json::from_str(line).map_err(|e| {
1292 crate::Error::Store(crate::store::StoreError::BadTypeIndex {
1293 path: jsonl.to_path_buf(),
1294 message: format!("line {}: {e}", i + 1),
1295 })
1296 })?;
1297 by_path.insert(rec.path.clone(), rec);
1298 }
1299 let count = by_path.len();
1300 let newest = by_path.into_values().min_by(record_recency_cmp);
1304 Ok(FolderStat { count, newest })
1305}
1306
1307fn record_recency_cmp(a: &IndexRecord, b: &IndexRecord) -> std::cmp::Ordering {
1312 match (b.updated, a.updated) {
1313 (Some(bu), Some(au)) => bu.cmp(&au),
1314 (Some(_), None) => std::cmp::Ordering::Greater, (None, Some(_)) => std::cmp::Ordering::Less, (None, None) => std::cmp::Ordering::Equal,
1317 }
1318 .then_with(|| a.path.cmp(&b.path))
1319}
1320
1321fn collect_child_stats(
1334 store: &Store,
1335 layers: &[Layer],
1336) -> crate::Result<BTreeMap<PathBuf, FolderStat>> {
1337 let mut stats = BTreeMap::new();
1338 for &layer in layers {
1339 for tf in type_folders_in_layer(store, layer) {
1340 let stat = read_folder_stat(&store.root.join(&tf).join("index.jsonl"))?;
1341 if stat.count > 0 {
1342 stats.insert(tf, stat);
1343 }
1344 }
1345 }
1346 Ok(stats)
1347}
1348
1349fn walk_type_folder_files(folder_abs: &Path) -> Vec<PathBuf> {
1352 let mut out = Vec::new();
1353 if !folder_abs.is_dir() {
1354 return out;
1355 }
1356 for entry in walkdir::WalkDir::new(folder_abs)
1357 .into_iter()
1358 .filter_entry(|e| !is_hidden(e.file_name()))
1359 .filter_map(|e| e.ok())
1360 {
1361 if !entry.file_type().is_file() {
1362 continue;
1363 }
1364 let p = entry.path();
1365 if p.extension().and_then(|e| e.to_str()) != Some("md") {
1366 continue;
1367 }
1368 if p.file_name().and_then(|n| n.to_str()) == Some("index.md") {
1369 continue;
1370 }
1371 out.push(p.to_path_buf());
1372 }
1373 out
1374}
1375
1376fn type_folders_in_layer(store: &Store, layer: Layer) -> Vec<PathBuf> {
1379 let layer_dir = store.root.join(layer_dir_name(layer));
1380 let mut out = Vec::new();
1381 let rd = match fs::read_dir(&layer_dir) {
1382 Ok(rd) => rd,
1383 Err(_) => return out,
1384 };
1385 for entry in rd.flatten() {
1386 if !entry.path().is_dir() {
1387 continue;
1388 }
1389 let name = entry.file_name();
1390 let name = match name.to_str() {
1391 Some(n) => n,
1392 None => continue,
1393 };
1394 if is_hidden(entry.file_name().as_os_str()) || name == "log" {
1395 continue;
1396 }
1397 out.push(PathBuf::from(layer_dir_name(layer)).join(name));
1398 }
1399 out.sort();
1400 out
1401}
1402
1403fn loose_layer_of(file_rel: &Path) -> Option<Layer> {
1409 let mut comps = file_rel.components();
1410 let layer = layer_from_dir_name(comps.next()?.as_os_str().to_str()?)?;
1411 comps.next()?; if comps.next().is_some() {
1413 return None; }
1415 Some(layer)
1416}
1417
1418fn loose_files_in_layer(store: &Store, layer: Layer) -> Vec<PathBuf> {
1422 let layer_dir = store.root.join(layer_dir_name(layer));
1423 let mut out = Vec::new();
1424 let rd = match fs::read_dir(&layer_dir) {
1425 Ok(rd) => rd,
1426 Err(_) => return out,
1427 };
1428 for entry in rd.flatten() {
1429 let p = entry.path();
1430 if !p.is_file() {
1431 continue;
1432 }
1433 if p.extension().and_then(|e| e.to_str()) != Some("md") {
1434 continue;
1435 }
1436 if is_index_artifact(&p) || is_hidden(entry.file_name().as_os_str()) {
1437 continue;
1438 }
1439 out.push(p);
1440 }
1441 out
1442}
1443
1444fn write_layer_jsonl(store: &Store, layer: Layer, records: &[IndexRecord]) -> crate::Result<()> {
1449 let path = store.root.join(layer_dir_name(layer)).join("index.jsonl");
1450 if records.is_empty() {
1451 remove_if_exists(&path)?;
1452 return Ok(());
1453 }
1454 let idx = Index {
1455 level: IndexLevel::Layer(layer),
1456 records: records.to_vec(),
1457 child_counts: BTreeMap::new(),
1458 };
1459 write_atomic(&path, idx.to_jsonl())
1460}
1461
1462fn apply_loose_change(
1467 store: &Store,
1468 layer: Layer,
1469 file_rel: &Path,
1470 removing: bool,
1471) -> crate::Result<()> {
1472 let layer_dir = store.root.join(layer_dir_name(layer));
1473 let _lock = FolderLock::acquire(&layer_dir);
1474 let jsonl = layer_dir.join("index.jsonl");
1475 let mut records = read_jsonl_records(&jsonl)?;
1476 records.retain(|r| r.path != file_rel);
1477 if !removing {
1478 records.push(record_from_file(
1479 &store.root.join(file_rel),
1480 file_rel.to_path_buf(),
1481 )?);
1482 }
1483 sort_records(&mut records);
1484 write_layer_jsonl(store, layer, &records)
1485}
1486
1487fn type_folder_of(file_rel: &Path) -> Option<PathBuf> {
1491 let mut comps = file_rel.components();
1492 let layer = comps.next()?.as_os_str().to_str()?;
1493 layer_from_dir_name(layer)?;
1494 let type_seg = comps.next()?.as_os_str().to_str()?;
1495 Some(PathBuf::from(layer).join(type_seg))
1496}
1497
1498fn rel_to_store(root: &Path, abs: &Path) -> Option<PathBuf> {
1500 abs.strip_prefix(root).ok().map(|p| p.to_path_buf())
1501}
1502
1503fn normalize_rel(p: &Path) -> PathBuf {
1506 let s = path_to_unix(p);
1507 let s = s.strip_prefix("./").unwrap_or(&s);
1508 PathBuf::from(s)
1509}
1510
1511fn is_index_artifact(p: &Path) -> bool {
1512 matches!(
1513 p.file_name().and_then(|n| n.to_str()),
1514 Some("index.md") | Some("index.jsonl")
1515 )
1516}
1517
1518fn is_deletable_catalog_artifact(p: &Path) -> bool {
1532 match p.file_name().and_then(|n| n.to_str()) {
1533 Some("index.jsonl") => true,
1534 Some("index.md") => match read_frontmatter(p) {
1535 Ok(meta) => meta.type_.as_deref().is_none_or(|t| t == "index"),
1537 Err(_) => true,
1539 },
1540 _ => false,
1541 }
1542}
1543
1544fn is_hidden(name: &std::ffi::OsStr) -> bool {
1545 name.to_str().map(|s| s.starts_with('.')).unwrap_or(false)
1546}
1547
1548fn layer_dir_name(layer: Layer) -> &'static str {
1549 match layer {
1550 Layer::Sources => "sources",
1551 Layer::Records => "records",
1552 }
1553}
1554
1555fn layer_from_dir_name(name: &str) -> Option<Layer> {
1558 match name {
1559 "sources" => Some(Layer::Sources),
1560 "records" => Some(Layer::Records),
1561 _ => None,
1562 }
1563}
1564
1565fn folder_basename(p: &Path) -> &str {
1567 p.file_name().and_then(|n| n.to_str()).unwrap_or("")
1568}
1569
1570fn wiki_target(p: &Path) -> String {
1574 let unix = path_to_unix(p);
1575 unix.strip_suffix(".md").unwrap_or(&unix).to_string()
1576}
1577
1578fn path_to_unix(p: &Path) -> String {
1590 p.components()
1591 .map(|c| c.as_os_str().to_string_lossy().into_owned())
1592 .collect::<Vec<_>>()
1593 .join("/")
1594}
1595
1596mod path_serde {
1602 use super::path_to_unix;
1603 use serde::{Deserialize, Deserializer, Serializer};
1604 use std::path::{Path, PathBuf};
1605
1606 pub fn serialize<S: Serializer>(p: &Path, s: S) -> Result<S::Ok, S::Error> {
1607 s.serialize_str(&path_to_unix(p))
1608 }
1609
1610 pub fn deserialize<'de, D: Deserializer<'de>>(d: D) -> Result<PathBuf, D::Error> {
1611 Ok(PathBuf::from(String::deserialize(d)?))
1612 }
1613}
1614
1615fn capitalize(s: &str) -> String {
1617 let mut chars = s.chars();
1618 match chars.next() {
1619 Some(first) => first.to_uppercase().collect::<String>() + chars.as_str(),
1620 None => String::new(),
1621 }
1622}
1623
1624fn collapse_whitespace(s: &str) -> String {
1629 s.split_whitespace().collect::<Vec<_>>().join(" ")
1630}
1631
1632fn default_display(basename: &str) -> String {
1638 let spaced: String = basename
1639 .chars()
1640 .map(|c| if c == '-' || c == '_' { ' ' } else { c })
1641 .collect();
1642 capitalize(&spaced)
1643}
1644
1645fn folder_label<'a>(
1652 tf_unix: &str,
1653 basename: &str,
1654 folders: &'a BTreeMap<String, FolderMeta>,
1655) -> (String, Option<&'a str>) {
1656 let meta = folders.get(tf_unix);
1657 let display = meta
1658 .and_then(|m| m.display.as_deref())
1659 .map(str::to_string)
1660 .unwrap_or_else(|| default_display(basename));
1661 (display, meta.and_then(|m| m.description.as_deref()))
1662}
1663
1664fn folder_entry(tf_unix: &str, display: &str, count: usize, description: Option<&str>) -> String {
1667 match description {
1668 Some(d) => format!("- [[{tf_unix}/index|{display}]] ({count}) — {d}\n"),
1669 None => format!("- [[{tf_unix}/index|{display}]] ({count})\n"),
1670 }
1671}
1672
1673fn write_atomic(path: &Path, contents: String) -> crate::Result<()> {
1680 if let Some(parent) = path.parent() {
1681 fs::create_dir_all(parent)?;
1682 }
1683 let dir = path.parent().unwrap_or_else(|| Path::new("."));
1684 let mut tmp = tempfile_in(dir)?;
1685 tmp.write_all(contents.as_bytes())?;
1686 tmp.flush()?;
1687 tmp.persist(path)?;
1688 Ok(())
1689}
1690
1691fn remove_if_exists(path: &Path) -> crate::Result<()> {
1692 match fs::remove_file(path) {
1693 Ok(()) => Ok(()),
1694 Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(()),
1695 Err(e) => Err(e.into()),
1696 }
1697}
1698
1699fn bad_index(path: &Path, msg: &str) -> crate::Error {
1700 crate::Error::Store(crate::store::StoreError::BadTypeIndex {
1701 path: path.to_path_buf(),
1702 message: msg.to_string(),
1703 })
1704}
1705
1706struct FolderLock {
1726 path: PathBuf,
1727 held: bool,
1728}
1729
1730impl FolderLock {
1731 fn acquire(folder_abs: &Path) -> Self {
1738 use std::time::{Duration, SystemTime};
1739 const MAX_ATTEMPTS: u32 = 600; const SPIN: Duration = Duration::from_millis(10);
1741 const STALE_AFTER: Duration = Duration::from_secs(30);
1742
1743 let path = folder_abs.join(".index.lock");
1744 let _ = fs::create_dir_all(folder_abs);
1746 for _ in 0..MAX_ATTEMPTS {
1747 match fs::OpenOptions::new()
1748 .write(true)
1749 .create_new(true)
1750 .open(&path)
1751 {
1752 Ok(_) => {
1753 return FolderLock { path, held: true };
1754 }
1755 Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => {
1756 if let Ok(meta) = fs::metadata(&path) {
1758 if let Ok(modified) = meta.modified() {
1759 if SystemTime::now()
1760 .duration_since(modified)
1761 .map(|age| age > STALE_AFTER)
1762 .unwrap_or(false)
1763 {
1764 let _ = fs::remove_file(&path);
1765 continue;
1766 }
1767 }
1768 }
1769 std::thread::sleep(SPIN);
1770 }
1771 Err(_) => return FolderLock { path, held: false },
1774 }
1775 }
1776 FolderLock { path, held: false }
1778 }
1779}
1780
1781impl Drop for FolderLock {
1782 fn drop(&mut self) {
1783 if self.held {
1784 let _ = fs::remove_file(&self.path);
1785 }
1786 }
1787}
1788
1789fn lock_folders(store: &Store, a: &Path, b: &Path) -> Vec<FolderLock> {
1795 if a == b {
1796 return vec![FolderLock::acquire(&store.root.join(a))];
1797 }
1798 let (first, second) = if a < b { (a, b) } else { (b, a) };
1799 vec![
1800 FolderLock::acquire(&store.root.join(first)),
1801 FolderLock::acquire(&store.root.join(second)),
1802 ]
1803}
1804
1805struct AtomicTemp {
1811 file: Option<fs::File>,
1812 path: PathBuf,
1813 persisted: bool,
1814}
1815
1816impl AtomicTemp {
1817 fn write_all(&mut self, bytes: &[u8]) -> std::io::Result<()> {
1818 self.file.as_mut().expect("temp file open").write_all(bytes)
1819 }
1820 fn flush(&mut self) -> std::io::Result<()> {
1821 self.file.as_mut().expect("temp file open").flush()
1822 }
1823 fn persist(mut self, dest: &Path) -> std::io::Result<()> {
1824 if let Some(f) = self.file.take() {
1825 f.sync_all().ok();
1826 }
1828 fs::rename(&self.path, dest)?;
1829 self.persisted = true;
1830 Ok(())
1831 }
1832}
1833
1834impl Drop for AtomicTemp {
1835 fn drop(&mut self) {
1836 if !self.persisted {
1838 let _ = fs::remove_file(&self.path);
1839 }
1840 }
1841}
1842
1843fn tempfile_in(dir: &Path) -> std::io::Result<AtomicTemp> {
1844 use std::time::{SystemTime, UNIX_EPOCH};
1845 let nanos = SystemTime::now()
1846 .duration_since(UNIX_EPOCH)
1847 .map(|d| d.as_nanos())
1848 .unwrap_or(0);
1849 let pid = std::process::id();
1850 let counter = next_temp_counter();
1853 let name = format!(".dbmd-index-{pid}-{nanos}-{counter}.tmp");
1854 let path = dir.join(name);
1855 let file = fs::OpenOptions::new()
1856 .write(true)
1857 .create_new(true)
1858 .open(&path)?;
1859 Ok(AtomicTemp {
1860 file: Some(file),
1861 path,
1862 persisted: false,
1863 })
1864}
1865
1866fn next_temp_counter() -> u64 {
1867 use std::sync::atomic::{AtomicU64, Ordering};
1868 static C: AtomicU64 = AtomicU64::new(0);
1869 C.fetch_add(1, Ordering::Relaxed)
1870}
1871
1872#[cfg(test)]
1873mod tests {
1874 use super::*;
1875 use std::collections::BTreeSet;
1876 use std::fs;
1877 use tempfile::TempDir;
1878
1879 fn mk_store() -> (TempDir, Store) {
1884 let dir = TempDir::new().unwrap();
1885 fs::write(dir.path().join("DB.md"), "# test store\n").unwrap();
1886 let store = Store {
1887 root: dir.path().to_path_buf(),
1888 config: crate::parser::Config::default(),
1889 };
1890 (dir, store)
1891 }
1892
1893 fn write_raw(store: &Store, rel: &str, fm: &str, body: &str) {
1896 let abs = store.root.join(rel);
1897 fs::create_dir_all(abs.parent().unwrap()).unwrap();
1898 fs::write(&abs, format!("---\n{fm}\n---\n{body}")).unwrap();
1899 }
1900
1901 fn write_doc(
1903 store: &Store,
1904 rel: &str,
1905 type_: &str,
1906 summary: Option<&str>,
1907 updated: Option<&str>,
1908 extra_yaml: &str,
1909 ) {
1910 let mut fm = format!("type: {type_}\n");
1911 if let Some(s) = summary {
1912 fm.push_str(&format!("summary: {s}\n"));
1913 }
1914 if let Some(u) = updated {
1915 fm.push_str(&format!("updated: {u}\n"));
1916 }
1917 fm.push_str(extra_yaml);
1918 write_raw(store, rel, fm.trim_end(), "\nbody text\n");
1919 }
1920
1921 fn read(store: &Store, rel: &str) -> String {
1922 fs::read_to_string(store.root.join(rel)).unwrap()
1923 }
1924
1925 fn exists(store: &Store, rel: &str) -> bool {
1926 store.root.join(rel).exists()
1927 }
1928
1929 fn snapshot_artifacts(store: &Store) -> BTreeMap<String, String> {
1932 let mut out = BTreeMap::new();
1933 for entry in walkdir::WalkDir::new(&store.root)
1934 .into_iter()
1935 .filter_map(|e| e.ok())
1936 {
1937 let p = entry.path();
1938 if is_index_artifact(p) {
1939 let rel = path_to_unix(&rel_to_store(&store.root, p).unwrap());
1940 out.insert(rel, fs::read_to_string(p).unwrap());
1941 }
1942 }
1943 out
1944 }
1945
1946 #[test]
1949 fn type_folder_aggregates_across_shards_in_recency_order() {
1950 let (_d, store) = mk_store();
1951 write_doc(
1954 &store,
1955 "sources/emails/2026/05/b-old.md",
1956 "email",
1957 Some("Older mail"),
1958 Some("2026-05-01T09:00:00Z"),
1959 "",
1960 );
1961 write_doc(
1962 &store,
1963 "sources/emails/2026/06/c-new.md",
1964 "email",
1965 Some("Newest mail"),
1966 Some("2026-06-15T12:00:00Z"),
1967 "",
1968 );
1969 write_doc(
1970 &store,
1971 "sources/emails/2026/05/a-mid.md",
1972 "email",
1973 Some("Middle mail"),
1974 Some("2026-05-20T08:00:00Z"),
1975 "",
1976 );
1977
1978 let idx = Index::build_type_folder(&store, Path::new("sources/emails")).unwrap();
1979 let paths: Vec<String> = idx.records.iter().map(|r| path_to_unix(&r.path)).collect();
1980 assert_eq!(
1981 paths,
1982 vec![
1983 "sources/emails/2026/06/c-new.md",
1984 "sources/emails/2026/05/a-mid.md",
1985 "sources/emails/2026/05/b-old.md",
1986 ],
1987 "records must aggregate across shards, newest `updated` first"
1988 );
1989 }
1990
1991 #[test]
1992 fn type_folder_md_format_entries_tags_and_derived_updated() {
1993 let (_d, store) = mk_store();
1994 write_doc(
1995 &store,
1996 "records/contacts/sarah-chen.md",
1997 "contact",
1998 Some("Renewal champion at Acme"),
1999 Some("2026-05-27T10:00:00Z"),
2000 "tags:\n - renewal\n - acme\n",
2001 );
2002 write_doc(
2003 &store,
2004 "records/contacts/no-tags.md",
2005 "contact",
2006 Some("Plain contact"),
2007 Some("2026-05-26T10:00:00Z"),
2008 "",
2009 );
2010
2011 let idx = Index::build_type_folder(&store, Path::new("records/contacts")).unwrap();
2012 let md = idx.to_markdown();
2013
2014 assert!(md.starts_with(
2017 "---\ntype: index\nscope: type-folder\nfolder: records/contacts\nupdated: 2026-05-27T10:00:00Z\n---\n\n# records/contacts\n"
2018 ), "frontmatter/heading wrong:\n{md}");
2019
2020 assert!(
2022 md.contains(
2023 "- [[records/contacts/sarah-chen]] — Renewal champion at Acme · #renewal #acme\n"
2024 ),
2025 "tagged entry wrong:\n{md}"
2026 );
2027 assert!(
2029 md.contains("- [[records/contacts/no-tags]] — Plain contact\n"),
2030 "untagged entry wrong:\n{md}"
2031 );
2032 assert!(
2033 !md.contains("Plain contact ·"),
2034 "untagged entry must not emit a tag separator"
2035 );
2036 assert!(!md.contains("## More"), "no footer expected under the cap");
2038 }
2039
2040 #[test]
2041 fn missing_summary_becomes_placeholder_not_invented() {
2042 let (_d, store) = mk_store();
2043 write_doc(
2044 &store,
2045 "records/notes/x.md",
2046 "note",
2047 None,
2048 Some("2026-05-27T10:00:00Z"),
2049 "",
2050 );
2051 let idx = Index::build_type_folder(&store, Path::new("records/notes")).unwrap();
2052 assert_eq!(idx.records[0].summary, MISSING_SUMMARY);
2053 let md = idx.to_markdown();
2054 assert!(
2055 md.contains("- [[records/notes/x]] — (no summary)\n"),
2056 "missing summary must render the placeholder, not invent text:\n{md}"
2057 );
2058 }
2059
2060 #[test]
2063 fn jsonl_is_complete_structured_and_round_trips() {
2064 let (_d, store) = mk_store();
2065 write_doc(
2066 &store,
2067 "records/expenses/2026/05/e1.md",
2068 "expense",
2069 Some("Lunch with vendor"),
2070 Some("2026-05-10T10:00:00Z"),
2071 "created: 2026-05-10T09:00:00Z\nstatus: paid\namount: 42\ncompany: [[records/companies/acme]]\nrelated:\n - [[records/concepts/spend]]\ntags:\n - food\nlinks:\n - records/concepts/spend\n - [[records/concepts/renewal]]\n",
2072 );
2073 write_doc(
2074 &store,
2075 "records/expenses/2026/06/e2.md",
2076 "expense",
2077 Some("Cloud bill"),
2078 Some("2026-06-01T10:00:00Z"),
2079 "amount: 100\n",
2080 );
2081
2082 let idx = Index::build_type_folder(&store, Path::new("records/expenses")).unwrap();
2083 let jsonl = idx.to_jsonl();
2084 let lines: Vec<&str> = jsonl.lines().collect();
2085 assert_eq!(lines.len(), 2, "one JSON object per file, uncapped");
2086
2087 let r0: IndexRecord = serde_json::from_str(lines[0]).unwrap();
2089 assert_eq!(path_to_unix(&r0.path), "records/expenses/2026/06/e2.md");
2090 assert_eq!(
2091 r0, idx.records[0],
2092 "jsonl line must round-trip to the record"
2093 );
2094
2095 let r1: IndexRecord = serde_json::from_str(lines[1]).unwrap();
2098 assert_eq!(r1.type_, "expense");
2099 assert_eq!(r1.summary, "Lunch with vendor");
2100 assert_eq!(r1.tags, vec!["food".to_string()]);
2101 assert_eq!(
2102 r1.links,
2103 vec![
2104 "records/concepts/spend".to_string(),
2105 "[[records/concepts/renewal]]".to_string()
2106 ]
2107 );
2108 assert_eq!(
2109 r1.created,
2110 Some(DateTime::parse_from_rfc3339("2026-05-10T09:00:00Z").unwrap())
2111 );
2112 assert_eq!(r1.fields.get("status"), Some(&Value::from("paid")));
2113 assert_eq!(r1.fields.get("amount"), Some(&Value::from(42)));
2114 assert_eq!(
2115 r1.fields.get("company"),
2116 Some(&Value::from("[[records/companies/acme]]"))
2117 );
2118 assert_eq!(
2119 r1.fields.get("related"),
2120 Some(&serde_json::json!(["[[records/concepts/spend]]"]))
2121 );
2122 for reserved in [
2124 "path", "type", "summary", "tags", "links", "created", "updated",
2125 ] {
2126 assert!(
2127 !r1.fields.contains_key(reserved),
2128 "reserved key {reserved} must not appear in fields"
2129 );
2130 }
2131
2132 assert!(
2134 lines[1].starts_with(
2135 r#"{"path":"records/expenses/2026/05/e1.md","type":"expense","summary":"Lunch with vendor","tags":["food"],"links":["records/concepts/spend","[[records/concepts/renewal]]"],"created":"2026-05-10T09:00:00Z","updated":"2026-05-10T10:00:00Z","#
2136 ),
2137 "jsonl key order not stable:\n{}",
2138 lines[1]
2139 );
2140 assert!(
2145 lines[1].ends_with(r#""amount":42,"company":"[[records/companies/acme]]","meta-type":"fact","related":["[[records/concepts/spend]]"],"status":"paid"}"#),
2146 "extras must be sorted:\n{}",
2147 lines[1]
2148 );
2149 }
2150
2151 #[test]
2154 fn over_cap_md_shows_500_plus_footer_jsonl_holds_all() {
2155 let (_d, store) = mk_store();
2156 let total = MD_CAP + 7;
2157 for i in 0..total {
2158 let day = 1 + (i % 27);
2160 let rel = format!("sources/emails/2026/05/m-{i:04}.md");
2161 let updated = format!("2026-05-{day:02}T00:00:{:02}Z", i % 60);
2162 write_doc(
2163 &store,
2164 &rel,
2165 "email",
2166 Some(&format!("mail {i}")),
2167 Some(&updated),
2168 "",
2169 );
2170 }
2171 let idx = Index::build_type_folder(&store, Path::new("sources/emails")).unwrap();
2172 assert_eq!(idx.records.len(), total, "jsonl/records keep every file");
2173
2174 let md = idx.to_markdown();
2175 let entry_lines = md.lines().filter(|l| l.starts_with("- [[")).count();
2176 assert_eq!(entry_lines, MD_CAP, "md browse view is capped at 500");
2177
2178 assert!(
2179 md.contains("## More\n\n"),
2180 "over-cap md needs a More footer"
2181 );
2182 assert!(
2183 md.contains(&format!(
2184 "This folder has {total} files. The 500 most recent are listed above.\n"
2185 )),
2186 "footer count wrong:\n{md}"
2187 );
2188 assert!(
2189 md.contains(
2190 "Use `dbmd index query --type email --in sources` for the complete catalog.\n"
2191 ),
2192 "footer must infer type=email layer=sources:\n{md}"
2193 );
2194
2195 let jsonl = idx.to_jsonl();
2196 assert_eq!(jsonl.lines().count(), total, "jsonl is uncapped");
2197 }
2198
2199 #[test]
2202 fn sort_breaks_ties_by_path_and_puts_undated_last() {
2203 let mut recs = vec![
2204 rec("z/a.md", Some("2026-05-01T00:00:00Z")),
2205 rec("a/b.md", Some("2026-05-01T00:00:00Z")), rec("m/c.md", None), rec("b/d.md", Some("2026-06-01T00:00:00Z")), ];
2209 sort_records(&mut recs);
2210 let order: Vec<String> = recs.iter().map(|r| path_to_unix(&r.path)).collect();
2211 assert_eq!(order, vec!["b/d.md", "a/b.md", "z/a.md", "m/c.md"]);
2212 }
2213
2214 fn rec(path: &str, updated: Option<&str>) -> IndexRecord {
2215 IndexRecord {
2216 path: PathBuf::from(path),
2217 type_: "t".into(),
2218 summary: "s".into(),
2219 tags: vec![],
2220 links: vec![],
2221 created: None,
2222 updated: updated.map(|u| DateTime::parse_from_rfc3339(u).unwrap()),
2223 fields: BTreeMap::new(),
2224 }
2225 }
2226
2227 #[test]
2230 fn layer_index_lists_type_folders_with_counts() {
2231 let (_d, store) = mk_store();
2232 write_doc(
2233 &store,
2234 "records/contacts/a.md",
2235 "contact",
2236 Some("Contact A older"),
2237 Some("2026-05-01T00:00:00Z"),
2238 "",
2239 );
2240 write_doc(
2241 &store,
2242 "records/contacts/b.md",
2243 "contact",
2244 Some("Contact B newest"),
2245 Some("2026-05-09T00:00:00Z"),
2246 "",
2247 );
2248 write_doc(
2249 &store,
2250 "records/companies/x.md",
2251 "company",
2252 Some("Acme Inc"),
2253 Some("2026-05-05T00:00:00Z"),
2254 "",
2255 );
2256 Index::write_level(&store, &IndexLevel::TypeFolder("records/contacts".into())).unwrap();
2258 Index::write_level(&store, &IndexLevel::TypeFolder("records/companies".into())).unwrap();
2259
2260 Index::write_level(&store, &IndexLevel::Layer(Layer::Records)).unwrap();
2261 let md = read(&store, "records/index.md");
2262
2263 assert!(
2264 md.starts_with("---\ntype: index\nscope: layer\nfolder: records\n"),
2265 "layer fm:\n{md}"
2266 );
2267 let companies_at = md.find("companies/index").unwrap();
2269 let contacts_at = md.find("contacts/index").unwrap();
2270 assert!(
2271 companies_at < contacts_at,
2272 "type folders must be alphabetical"
2273 );
2274 assert!(
2277 md.contains("- [[records/contacts/index|Contacts]] (2)\n"),
2278 "contacts entry:\n{md}"
2279 );
2280 assert!(
2281 md.contains("- [[records/companies/index|Companies]] (1)\n"),
2282 "companies entry:\n{md}"
2283 );
2284 assert!(
2286 !md.contains("Contact B newest") && !md.contains("Acme Inc"),
2287 "layer rollup must not quote a member summary:\n{md}"
2288 );
2289 assert!(
2291 md.contains("updated: 2026-05-09T00:00:00Z\n"),
2292 "layer updated must be max child:\n{md}"
2293 );
2294 }
2295
2296 #[test]
2297 fn folders_section_supplies_authored_display_and_description() {
2298 let (_d, mut store) = mk_store();
2302 store.config.folders.insert(
2303 "records/contacts".into(),
2304 crate::parser::FolderMeta {
2305 display: None,
2306 description: Some("people across customer + prospect accounts".into()),
2307 },
2308 );
2309 store.config.folders.insert(
2310 "sources/hubspot-exports".into(),
2311 crate::parser::FolderMeta {
2312 display: Some("HubSpot exports".into()),
2313 description: Some("deal + pipeline exports".into()),
2314 },
2315 );
2316 write_doc(
2317 &store,
2318 "records/contacts/a.md",
2319 "contact",
2320 Some("Contact A"),
2321 Some("2026-05-01T00:00:00Z"),
2322 "",
2323 );
2324 write_doc(
2326 &store,
2327 "records/companies/x.md",
2328 "company",
2329 Some("Acme Inc"),
2330 Some("2026-05-05T00:00:00Z"),
2331 "",
2332 );
2333 write_doc(
2334 &store,
2335 "sources/hubspot-exports/d.md",
2336 "hubspot-export",
2337 Some("a single deal export"),
2338 Some("2026-05-03T00:00:00Z"),
2339 "",
2340 );
2341
2342 Index::rebuild_all(&store).unwrap();
2343
2344 let records_layer = read(&store, "records/index.md");
2346 assert!(
2347 records_layer.contains("- [[records/contacts/index|Contacts]] (1) — people across customer + prospect accounts\n"),
2348 "authored description must surface:\n{records_layer}"
2349 );
2350 assert!(
2352 records_layer.contains("- [[records/companies/index|Companies]] (1)\n")
2353 && !records_layer.contains("Acme Inc"),
2354 "un-described folder is counts-only:\n{records_layer}"
2355 );
2356
2357 let sources_layer = read(&store, "sources/index.md");
2359 assert!(
2360 sources_layer.contains("- [[sources/hubspot-exports/index|HubSpot exports]] (1) — deal + pipeline exports\n"),
2361 "display override + description must surface:\n{sources_layer}"
2362 );
2363
2364 let root = read(&store, "index.md");
2366 assert!(
2367 root.contains("- [[records/contacts/index|Contacts]] (1) — people across customer + prospect accounts\n"),
2368 "root surfaces authored description:\n{root}"
2369 );
2370 assert!(
2371 root.contains("- [[sources/hubspot-exports/index|HubSpot exports]] (1) — deal + pipeline exports\n"),
2372 "root surfaces display override:\n{root}"
2373 );
2374 }
2375
2376 #[test]
2377 fn default_display_turns_separators_to_spaces_and_caps() {
2378 assert_eq!(default_display("contacts"), "Contacts");
2379 assert_eq!(default_display("hubspot-exports"), "Hubspot exports");
2380 assert_eq!(default_display("usage_exports"), "Usage exports");
2381 }
2382
2383 #[test]
2384 fn root_index_groups_layers_with_totals_and_per_type_counts() {
2385 let (_d, store) = mk_store();
2386 write_doc(
2387 &store,
2388 "sources/emails/2026/05/a.md",
2389 "email",
2390 Some("Mail"),
2391 Some("2026-05-01T00:00:00Z"),
2392 "",
2393 );
2394 write_doc(
2395 &store,
2396 "sources/docs/d.md",
2397 "doc",
2398 Some("Doc"),
2399 Some("2026-05-02T00:00:00Z"),
2400 "",
2401 );
2402 write_doc(
2403 &store,
2404 "records/contacts/c.md",
2405 "contact",
2406 Some("C"),
2407 Some("2026-05-03T00:00:00Z"),
2408 "",
2409 );
2410 Index::rebuild_all(&store).unwrap();
2413 let md = read(&store, "index.md");
2414
2415 assert!(
2416 md.starts_with("---\ntype: index\nscope: root\n"),
2417 "root fm:\n{md}"
2418 );
2419 assert!(md.contains("# Knowledge base index\n"), "root title:\n{md}");
2420 let sources_h = md
2422 .find("## Sources (2)")
2423 .expect("sources heading w/ total 2");
2424 let records_h = md
2425 .find("## Records (1)")
2426 .expect("records heading w/ total 1");
2427 assert!(sources_h < records_h, "Sources must precede Records");
2428 assert!(!md.contains("## Wiki"), "empty layer gets no section");
2429 assert!(
2431 md.contains("- [[sources/docs/index|Docs]] (1)\n"),
2432 "root docs entry:\n{md}"
2433 );
2434 assert!(
2435 md.contains("- [[sources/emails/index|Emails]] (1)\n"),
2436 "root emails entry:\n{md}"
2437 );
2438 assert!(
2439 md.contains("- [[records/contacts/index|Contacts]] (1)\n"),
2440 "root contacts entry:\n{md}"
2441 );
2442 assert!(!md.contains("— "), "root entries carry no preview text");
2443 }
2444
2445 #[test]
2448 fn on_write_matches_rebuild_byte_for_byte() {
2449 let (_d1, wt) = mk_store();
2452 let (_d2, rb) = mk_store();
2453
2454 let docs: &[(&str, &str, &str, &str, &str)] = &[
2455 (
2456 "sources/emails/2026/05/e1.md",
2457 "email",
2458 "First mail",
2459 "2026-05-01T10:00:00Z",
2460 "tags:\n - inbox\n",
2461 ),
2462 (
2463 "sources/emails/2026/06/e2.md",
2464 "email",
2465 "Second mail",
2466 "2026-06-01T10:00:00Z",
2467 "",
2468 ),
2469 (
2470 "records/contacts/sarah.md",
2471 "contact",
2472 "Sarah",
2473 "2026-05-15T10:00:00Z",
2474 "links:\n - records/profiles/sarah\n",
2475 ),
2476 (
2477 "records/contacts/elena.md",
2478 "contact",
2479 "Elena",
2480 "2026-05-20T10:00:00Z",
2481 "status: active\n",
2482 ),
2483 (
2484 "records/profiles/sarah.md",
2485 "profile",
2486 "Sarah bio",
2487 "2026-05-21T10:00:00Z",
2488 "",
2489 ),
2490 ];
2491
2492 for (rel, t, sum, upd, extra) in docs {
2493 write_doc(&wt, rel, t, Some(sum), Some(upd), extra);
2494 write_doc(&rb, rel, t, Some(sum), Some(upd), extra);
2495 Index::on_write(&wt, Path::new(rel)).unwrap();
2496 }
2497 Index::rebuild_all(&rb).unwrap();
2498
2499 let a = snapshot_artifacts(&wt);
2500 let b = snapshot_artifacts(&rb);
2501 assert_eq!(
2502 a.keys().collect::<Vec<_>>(),
2503 b.keys().collect::<Vec<_>>(),
2504 "same set of index artifacts must exist"
2505 );
2506 for (k, v) in &a {
2507 assert_eq!(v, &b[k], "artifact {k} differs between write-through and rebuild:\n--- write-through ---\n{v}\n--- rebuild ---\n{}", b[k]);
2508 }
2509 assert!(a.contains_key("index.md"));
2511 assert!(a.contains_key("sources/emails/index.jsonl"));
2512 assert!(a.contains_key("records/contacts/index.md"));
2513 }
2514
2515 #[test]
2532 fn loop_op_does_not_walk_sibling_content_tree() {
2533 let (_d, store) = mk_store();
2534
2535 write_doc(
2539 &store,
2540 "records/companies/acme.md",
2541 "company",
2542 Some("Acme Inc"),
2543 Some("2026-05-05T00:00:00Z"),
2544 "",
2545 );
2546 write_doc(
2547 &store,
2548 "records/companies/globex.md",
2549 "company",
2550 Some("Globex"),
2551 Some("2026-05-06T00:00:00Z"),
2552 "",
2553 );
2554 assert!(
2555 !exists(&store, "records/companies/index.jsonl"),
2556 "precondition: companies must be un-indexed"
2557 );
2558
2559 write_doc(
2561 &store,
2562 "records/contacts/sarah.md",
2563 "contact",
2564 Some("Sarah"),
2565 Some("2026-05-15T00:00:00Z"),
2566 "",
2567 );
2568 Index::on_write(&store, Path::new("records/contacts/sarah.md")).unwrap();
2569
2570 let layer_md = read(&store, "records/index.md");
2572 let root_md = read(&store, "index.md");
2573 assert!(
2575 layer_md.contains("- [[records/contacts/index|Contacts]] (1)\n")
2576 && !layer_md.contains("Sarah"),
2577 "layer must reflect the written folder, counts only:\n{layer_md}"
2578 );
2579 assert!(
2580 root_md.contains("- [[records/contacts/index|Contacts]] (1)\n"),
2581 "root must reflect the written folder:\n{root_md}"
2582 );
2583
2584 assert!(
2588 !layer_md.contains("companies"),
2589 "loop op walked the sibling content tree: layer rollup counts un-indexed records/companies\n{layer_md}"
2590 );
2591 assert!(
2592 !root_md.contains("companies"),
2593 "loop op walked the sibling content tree: root rollup counts un-indexed records/companies\n{root_md}"
2594 );
2595 assert!(
2597 root_md.contains("## Records (1)"),
2598 "root layer total must count only the sidecar-indexed folder (1), not walked siblings (would be 3):\n{root_md}"
2599 );
2600
2601 let (_d2, rb) = mk_store();
2606 for (rel, t, s, u) in [
2607 (
2608 "records/companies/acme.md",
2609 "company",
2610 "Acme Inc",
2611 "2026-05-05T00:00:00Z",
2612 ),
2613 (
2614 "records/companies/globex.md",
2615 "company",
2616 "Globex",
2617 "2026-05-06T00:00:00Z",
2618 ),
2619 (
2620 "records/contacts/sarah.md",
2621 "contact",
2622 "Sarah",
2623 "2026-05-15T00:00:00Z",
2624 ),
2625 ] {
2626 write_doc(&rb, rel, t, Some(s), Some(u), "");
2627 }
2628 Index::on_write(&store, Path::new("records/companies/acme.md")).unwrap();
2629 Index::on_write(&store, Path::new("records/companies/globex.md")).unwrap();
2630 Index::rebuild_all(&rb).unwrap();
2631 let a = snapshot_artifacts(&store);
2632 let b = snapshot_artifacts(&rb);
2633 assert_eq!(
2634 a.keys().collect::<BTreeSet<_>>(),
2635 b.keys().collect::<BTreeSet<_>>(),
2636 "same artifact set after indexing both folders"
2637 );
2638 for (k, v) in &a {
2639 assert_eq!(
2640 v, &b[k],
2641 "after indexing the sibling too, loop result must equal rebuild for {k}"
2642 );
2643 }
2644 assert!(
2645 read(&store, "index.md").contains("## Records (3)"),
2646 "now that both folders are indexed, the root total is 3"
2647 );
2648 }
2649
2650 #[test]
2663 fn custom_type_at_shard_path_for_is_indexable_end_to_end() {
2664 let (_d1, wt) = mk_store();
2665 let (_d2, rb) = mk_store();
2666
2667 let rel = wt
2669 .shard_path_for(
2670 "profile",
2671 &crate::parser::Frontmatter::default(),
2672 "renewal-theme",
2673 )
2674 .unwrap();
2675 let rel_str = path_to_unix(&rel);
2676 assert!(
2679 type_folder_of(&rel).is_some(),
2680 "shard_path_for produced a path the index cannot file: {rel_str}"
2681 );
2682
2683 write_doc(
2684 &wt,
2685 &rel_str,
2686 "profile",
2687 Some("Renewal theme"),
2688 Some("2026-05-21T10:00:00Z"),
2689 "",
2690 );
2691 write_doc(
2692 &rb,
2693 &rel_str,
2694 "profile",
2695 Some("Renewal theme"),
2696 Some("2026-05-21T10:00:00Z"),
2697 "",
2698 );
2699
2700 Index::on_write(&wt, &rel)
2703 .expect("on_write must succeed for a toolkit-computed custom-type path");
2704 Index::rebuild_all(&rb).unwrap();
2705
2706 let page_link = wiki_target(&rel); let tf_md = read(&rb, "records/profile/index.md");
2713 assert!(
2714 tf_md.contains(&format!("[[{page_link}]]")),
2715 "type-folder index must list the page link, got:\n{tf_md}"
2716 );
2717 assert!(
2718 exists(&rb, "records/profile/index.jsonl"),
2719 "type-folder jsonl must exist"
2720 );
2721 assert!(
2722 read(&rb, "records/profile/index.jsonl").contains(&rel_str),
2723 "type-folder jsonl must contain the page row"
2724 );
2725 let layer_md = read(&rb, "records/index.md");
2728 assert!(
2729 layer_md.contains("records/profile/index"),
2730 "layer index must roll up the records/profile type-folder, got:\n{layer_md}"
2731 );
2732
2733 let a = snapshot_artifacts(&wt);
2735 let b = snapshot_artifacts(&rb);
2736 assert_eq!(
2737 a.keys().collect::<Vec<_>>(),
2738 b.keys().collect::<Vec<_>>(),
2739 "loop and sweep must produce the same artifact set"
2740 );
2741 for (k, v) in &a {
2742 assert_eq!(
2743 v, &b[k],
2744 "custom-type artifact {k} differs between on_write and rebuild"
2745 );
2746 }
2747 }
2748
2749 #[test]
2750 fn on_remove_then_rebuild_match_and_pull_in_next_over_cap() {
2751 let (_d1, wt) = mk_store();
2752 let (_d2, rb) = mk_store();
2753 let total = MD_CAP + 3; let mut all_rels = Vec::new();
2755 for i in 0..total {
2756 let rel = format!("sources/emails/2026/05/m-{i:04}.md");
2757 let updated = format!("2026-05-10T00:{:02}:{:02}Z", i / 60, i % 60);
2759 write_doc(
2760 &wt,
2761 &rel,
2762 "email",
2763 Some(&format!("mail {i}")),
2764 Some(&updated),
2765 "",
2766 );
2767 write_doc(
2768 &rb,
2769 &rel,
2770 "email",
2771 Some(&format!("mail {i}")),
2772 Some(&updated),
2773 "",
2774 );
2775 all_rels.push(rel);
2776 }
2777 Index::rebuild_all(&wt).unwrap();
2779 let newest = &all_rels[total - 1]; fs::remove_file(wt.root.join(newest)).unwrap();
2781 Index::on_remove(&wt, Path::new(newest)).unwrap();
2782
2783 fs::remove_file(rb.root.join(newest)).unwrap();
2785 Index::rebuild_all(&rb).unwrap();
2786
2787 let a = snapshot_artifacts(&wt);
2788 let b = snapshot_artifacts(&rb);
2789 for (k, v) in &a {
2790 assert_eq!(v, &b[k], "after remove, artifact {k} drifted from rebuild");
2791 }
2792
2793 let md = read(&wt, "sources/emails/index.md");
2796 assert_eq!(md.lines().filter(|l| l.starts_with("- [[")).count(), MD_CAP);
2797 assert!(
2799 !md.contains(&format!("[[{}]]", wiki_target(Path::new(newest)))),
2800 "removed file must not be listed in md"
2801 );
2802 let pulled_in = &all_rels[2];
2806 assert!(
2807 md.contains(&format!("[[{}]]", wiki_target(Path::new(pulled_in)))),
2808 "the 501st-most-recent must be pulled into the browse view after a removal"
2809 );
2810 assert!(
2811 md.contains(&format!("This folder has {} files.", total - 1)),
2812 "footer count must decrement:\n{}",
2813 md.lines().rev().take(4).collect::<Vec<_>>().join("\n")
2814 );
2815 let jsonl = read(&wt, "sources/emails/index.jsonl");
2816 assert_eq!(
2817 jsonl.lines().count(),
2818 total - 1,
2819 "jsonl loses exactly the removed file"
2820 );
2821 assert!(
2822 !jsonl.contains(&path_to_unix(Path::new(newest))),
2823 "removed file must be gone from the jsonl too"
2824 );
2825 }
2826
2827 #[test]
2828 fn on_rename_cross_folder_matches_rebuild() {
2829 let (_d1, wt) = mk_store();
2830 let (_d2, rb) = mk_store();
2831 let seed: &[(&str, &str, &str, &str)] = &[
2833 (
2834 "records/contacts/a.md",
2835 "contact",
2836 "A",
2837 "2026-05-01T00:00:00Z",
2838 ),
2839 (
2840 "records/contacts/b.md",
2841 "contact",
2842 "B",
2843 "2026-05-02T00:00:00Z",
2844 ),
2845 (
2846 "records/companies/x.md",
2847 "company",
2848 "X",
2849 "2026-05-03T00:00:00Z",
2850 ),
2851 ];
2852 for (rel, t, s, u) in seed {
2853 write_doc(&wt, rel, t, Some(s), Some(u), "");
2854 write_doc(&rb, rel, t, Some(s), Some(u), "");
2855 }
2856 Index::rebuild_all(&wt).unwrap();
2857
2858 let old = "records/contacts/b.md";
2861 let new = "records/companies/b.md";
2862 fs::create_dir_all(wt.root.join("records/companies")).unwrap();
2863 fs::rename(wt.root.join(old), wt.root.join(new)).unwrap();
2864 Index::on_rename(&wt, Path::new(old), Path::new(new)).unwrap();
2867
2868 fs::create_dir_all(rb.root.join("records/companies")).unwrap();
2870 fs::rename(rb.root.join(old), rb.root.join(new)).unwrap();
2871 Index::rebuild_all(&rb).unwrap();
2872
2873 let a = snapshot_artifacts(&wt);
2874 let b = snapshot_artifacts(&rb);
2875 assert_eq!(a.keys().collect::<Vec<_>>(), b.keys().collect::<Vec<_>>());
2876 for (k, v) in &a {
2877 assert_eq!(v, &b[k], "rename: artifact {k} drifted from rebuild");
2878 }
2879 let contacts = read(&wt, "records/contacts/index.md");
2881 assert!(!contacts.contains("records/contacts/b]]"));
2882 let companies = read(&wt, "records/companies/index.md");
2883 assert!(companies.contains("[[records/companies/b]]"));
2884 }
2885
2886 #[test]
2887 fn on_write_updates_existing_entry_in_place() {
2888 let (_d, store) = mk_store();
2889 write_doc(
2890 &store,
2891 "records/contacts/a.md",
2892 "contact",
2893 Some("Original"),
2894 Some("2026-05-01T00:00:00Z"),
2895 "",
2896 );
2897 Index::on_write(&store, Path::new("records/contacts/a.md")).unwrap();
2898 write_doc(
2900 &store,
2901 "records/contacts/a.md",
2902 "contact",
2903 Some("Revised"),
2904 Some("2026-05-09T00:00:00Z"),
2905 "",
2906 );
2907 Index::on_write(&store, Path::new("records/contacts/a.md")).unwrap();
2908
2909 let jsonl = read(&store, "records/contacts/index.jsonl");
2910 assert_eq!(
2911 jsonl.lines().count(),
2912 1,
2913 "upsert must not duplicate the line"
2914 );
2915 assert!(jsonl.contains("Revised"), "jsonl must reflect the update");
2916 assert!(
2917 !jsonl.contains("Original"),
2918 "stale line must be gone (compacted)"
2919 );
2920 let md = read(&store, "records/contacts/index.md");
2921 assert!(md.contains("- [[records/contacts/a]] — Revised\n"));
2922 assert!(
2923 md.contains("updated: 2026-05-09T00:00:00Z\n"),
2924 "index updated must track the newer member"
2925 );
2926 }
2927
2928 #[test]
2931 fn dry_run_emits_separators_and_writes_nothing() {
2932 let (_d, store) = mk_store();
2933 write_doc(
2934 &store,
2935 "sources/emails/2026/05/a.md",
2936 "email",
2937 Some("Mail"),
2938 Some("2026-05-01T00:00:00Z"),
2939 "",
2940 );
2941 let out = Index::render_dry_run(&store, &IndexLevel::TypeFolder("sources/emails".into()))
2942 .unwrap();
2943 assert!(
2944 out.contains("--- sources/emails/index.md ---\n"),
2945 "md separator:\n{out}"
2946 );
2947 assert!(
2948 out.contains("--- sources/emails/index.jsonl ---\n"),
2949 "jsonl separator:\n{out}"
2950 );
2951 assert!(
2952 out.contains("- [[sources/emails/2026/05/a]] — Mail"),
2953 "md body present"
2954 );
2955 assert!(
2957 !exists(&store, "sources/emails/index.md"),
2958 "dry-run must not write"
2959 );
2960 assert!(
2961 !exists(&store, "sources/emails/index.jsonl"),
2962 "dry-run must not write"
2963 );
2964 }
2965
2966 #[test]
2967 fn cleanup_removes_noncanonical_and_empty_indexes() {
2968 let (_d, store) = mk_store();
2969 write_doc(
2970 &store,
2971 "sources/emails/2026/05/a.md",
2972 "email",
2973 Some("Mail"),
2974 Some("2026-05-01T00:00:00Z"),
2975 "",
2976 );
2977 fs::write(
2979 store.root.join("sources/emails/2026/05/index.md"),
2980 "stale\n",
2981 )
2982 .unwrap();
2983 fs::write(
2984 store.root.join("sources/emails/2026/05/index.jsonl"),
2985 "stale\n",
2986 )
2987 .unwrap();
2988 fs::create_dir_all(store.root.join("records/empty")).unwrap();
2990 fs::write(store.root.join("records/empty/index.md"), "stale\n").unwrap();
2991
2992 Index::cleanup(&store).unwrap();
2993
2994 assert!(
2995 !exists(&store, "sources/emails/2026/05/index.md"),
2996 "shard index must be deleted"
2997 );
2998 assert!(
2999 !exists(&store, "sources/emails/2026/05/index.jsonl"),
3000 "shard jsonl must be deleted"
3001 );
3002 assert!(
3003 !exists(&store, "records/empty/index.md"),
3004 "empty-folder index must be deleted"
3005 );
3006 assert!(exists(&store, "sources/emails/2026/05/a.md"));
3008 }
3009
3010 #[test]
3011 fn rebuild_deletes_stale_indexes_for_emptied_folders() {
3012 let (_d, store) = mk_store();
3013 write_doc(
3014 &store,
3015 "records/contacts/a.md",
3016 "contact",
3017 Some("A"),
3018 Some("2026-05-01T00:00:00Z"),
3019 "",
3020 );
3021 Index::rebuild_all(&store).unwrap();
3022 assert!(exists(&store, "records/contacts/index.md"));
3023 assert!(exists(&store, "records/index.md"));
3024 assert!(exists(&store, "index.md"));
3025
3026 fs::remove_file(store.root.join("records/contacts/a.md")).unwrap();
3028 Index::rebuild_all(&store).unwrap();
3029 assert!(
3030 !exists(&store, "records/contacts/index.md"),
3031 "emptied type-folder index gone"
3032 );
3033 assert!(
3034 !exists(&store, "records/index.md"),
3035 "now-empty layer index gone"
3036 );
3037 assert!(!exists(&store, "index.md"), "now-empty root index gone");
3038 }
3039
3040 #[test]
3043 fn property_writethrough_equals_rebuild_under_mixed_ops() {
3044 let (_d1, wt) = mk_store();
3046 let (_d2, rb) = mk_store();
3047 let mut seed: u64 = 0x9E3779B97F4A7C15;
3048 let mut next = || {
3049 seed = seed
3050 .wrapping_mul(6364136223846793005)
3051 .wrapping_add(1442695040888963407);
3052 (seed >> 33) as u32
3053 };
3054
3055 let folders = ["sources/emails", "records/contacts", "records/profiles"];
3056 let types = ["email", "contact", "profile"];
3057 let mut live: Vec<String> = Vec::new(); for step in 0..120u32 {
3060 let r = next();
3061 let op = r % 10;
3062 if op < 6 || live.is_empty() {
3063 let fi = (next() as usize) % folders.len();
3065 let folder = folders[fi];
3066 let id = next() % 40;
3067 let rel = if folder == "sources/emails" {
3068 let month = 5 + (id % 2); format!("{folder}/2026/{month:02}/f-{id:02}.md")
3070 } else {
3071 format!("{folder}/f-{id:02}.md")
3072 };
3073 let updated = format!(
3075 "2026-05-{:02}T{:02}:{:02}:00Z",
3076 1 + (step % 27),
3077 step % 24,
3078 id % 60
3079 );
3080 let extra = if id % 3 == 0 {
3081 "tags:\n - x\n - y\n"
3082 } else {
3083 ""
3084 };
3085 write_doc(
3086 &wt,
3087 &rel,
3088 types[fi],
3089 Some(&format!("sum {step}")),
3090 Some(&updated),
3091 extra,
3092 );
3093 write_doc(
3094 &rb,
3095 &rel,
3096 types[fi],
3097 Some(&format!("sum {step}")),
3098 Some(&updated),
3099 extra,
3100 );
3101 Index::on_write(&wt, Path::new(&rel)).unwrap();
3102 if !live.contains(&rel) {
3103 live.push(rel);
3104 }
3105 } else if op < 8 {
3106 let idx = (next() as usize) % live.len();
3108 let rel = live.remove(idx);
3109 fs::remove_file(wt.root.join(&rel)).unwrap();
3110 fs::remove_file(rb.root.join(&rel)).ok();
3111 Index::on_remove(&wt, Path::new(&rel)).unwrap();
3112 } else {
3113 let idx = (next() as usize) % live.len();
3115 let old = live[idx].clone();
3116 let fi = (next() as usize) % folders.len();
3118 let folder = folders[fi];
3119 let id = 50 + (next() % 40);
3120 let new = if folder == "sources/emails" {
3121 format!("{folder}/2026/05/f-{id:02}.md")
3122 } else {
3123 format!("{folder}/f-{id:02}.md")
3124 };
3125 if new == old || live.contains(&new) {
3126 continue;
3127 }
3128 fs::create_dir_all(wt.root.join(&new).parent().unwrap()).unwrap();
3129 fs::create_dir_all(rb.root.join(&new).parent().unwrap()).unwrap();
3130 fs::rename(wt.root.join(&old), wt.root.join(&new)).unwrap();
3131 fs::rename(rb.root.join(&old), rb.root.join(&new)).unwrap();
3132 Index::on_rename(&wt, Path::new(&old), Path::new(&new)).unwrap();
3133 live[idx] = new;
3134 }
3135 }
3136
3137 Index::rebuild_all(&rb).unwrap();
3139 let a = snapshot_artifacts(&wt);
3140 let b = snapshot_artifacts(&rb);
3141 assert_eq!(
3142 a.keys().collect::<BTreeSet<_>>(),
3143 b.keys().collect::<BTreeSet<_>>(),
3144 "write-through and rebuild must produce the same set of artifacts"
3145 );
3146 for (k, v) in &a {
3147 assert_eq!(
3148 v, &b[k],
3149 "INVARIANT VIOLATED: artifact {k} differs after mixed ops\n--- write-through ---\n{v}\n--- rebuild ---\n{}",
3150 b[k]
3151 );
3152 }
3153 assert!(
3154 !a.is_empty(),
3155 "the run must have produced at least one artifact"
3156 );
3157 }
3158
3159 #[test]
3165 fn cleanup_preserves_user_content_named_index_md_in_shard() {
3166 let (_d, store) = mk_store();
3167 write_doc(
3169 &store,
3170 "sources/emails/2026/06/index.md",
3171 "email",
3172 Some("Important imported mail"),
3173 Some("2026-06-11T04:23:25Z"),
3174 "",
3175 );
3176 Index::cleanup(&store).unwrap();
3177 assert!(
3178 exists(&store, "sources/emails/2026/06/index.md"),
3179 "cleanup must not delete a user content file named index.md"
3180 );
3181 Index::rebuild_all(&store).unwrap();
3183 assert!(
3184 exists(&store, "sources/emails/2026/06/index.md"),
3185 "rebuild_all must not delete a user content file named index.md"
3186 );
3187 let kept = read(&store, "sources/emails/2026/06/index.md");
3188 assert!(
3189 kept.contains("Important imported mail"),
3190 "the user's record content must be intact"
3191 );
3192 }
3193
3194 #[test]
3199 fn cleanup_keeps_canonical_type_folder_root_sidecars() {
3200 let (_d, store) = mk_store();
3201 write_doc(
3202 &store,
3203 "records/contacts/alice.md",
3204 "contact",
3205 Some("Alice"),
3206 Some("2026-05-01T00:00:00Z"),
3207 "",
3208 );
3209 Index::write_level(&store, &IndexLevel::TypeFolder("records/contacts".into())).unwrap();
3210 assert!(exists(&store, "records/contacts/index.md"));
3211 assert!(exists(&store, "records/contacts/index.jsonl"));
3212 Index::cleanup(&store).unwrap();
3213 assert!(
3214 exists(&store, "records/contacts/index.md"),
3215 "cleanup must keep the canonical type-folder index.md (non-empty folder)"
3216 );
3217 assert!(
3218 exists(&store, "records/contacts/index.jsonl"),
3219 "cleanup must keep the canonical type-folder index.jsonl (non-empty folder)"
3220 );
3221 }
3222
3223 #[test]
3229 fn on_write_ignores_index_artifact_no_phantom_row() {
3230 let (_d, store) = mk_store();
3231 write_doc(
3232 &store,
3233 "records/contacts/alice.md",
3234 "contact",
3235 Some("Alice"),
3236 Some("2026-05-01T00:00:00Z"),
3237 "",
3238 );
3239 Index::on_write(&store, Path::new("records/contacts/alice.md")).unwrap();
3240 let jsonl_before = read(&store, "records/contacts/index.jsonl");
3241 assert_eq!(jsonl_before.lines().count(), 1);
3242
3243 Index::on_write(&store, Path::new("records/contacts/index.md")).unwrap();
3245
3246 let jsonl_after = read(&store, "records/contacts/index.jsonl");
3247 assert_eq!(
3248 jsonl_after.lines().count(),
3249 1,
3250 "on_write on index.md must not add a phantom self-row"
3251 );
3252 assert!(
3253 !jsonl_after.contains("\"type\":\"index\""),
3254 "the catalog artifact must never appear as a catalogued row"
3255 );
3256 let root = read(&store, "index.md");
3258 assert!(
3259 root.contains("[[records/contacts/index|Contacts]] (1)"),
3260 "count must not inflate:\n{root}"
3261 );
3262 }
3263
3264 #[test]
3270 fn multiline_summary_is_single_lined_in_index_md() {
3271 let (_d, store) = mk_store();
3272 write_raw(
3274 &store,
3275 "records/notes/evil.md",
3276 "type: note\nupdated: 2026-06-10T00:00:00Z\nsummary: |-\n legit first line\n - [[records/secrets/fake|Click me]] — injected entry",
3277 "\nbody\n",
3278 );
3279 let idx = Index::build_type_folder(&store, Path::new("records/notes")).unwrap();
3280 let md = idx.to_markdown();
3281 let entry_lines = md.lines().filter(|l| l.starts_with("- [[")).count();
3283 assert_eq!(
3284 entry_lines, 1,
3285 "a multi-line summary must not produce extra entry lines:\n{md}"
3286 );
3287 assert!(
3288 md.contains(
3289 "- [[records/notes/evil]] — legit first line - [[records/secrets/fake|Click me]] — injected entry\n"
3290 ),
3291 "summary newlines must collapse to spaces inline:\n{md}"
3292 );
3293 }
3294
3295 #[test]
3303 fn non_string_scalar_summary_and_type_are_coerced_like_validator() {
3304 let (_d, store) = mk_store();
3305 write_raw(
3306 &store,
3307 "records/contacts/a.md",
3308 "type: contact\nupdated: 2026-05-01T00:00:00Z\nsummary: 2026",
3309 "\nbody\n",
3310 );
3311 let rec = record_from_file(
3312 &store.root.join("records/contacts/a.md"),
3313 PathBuf::from("records/contacts/a.md"),
3314 )
3315 .unwrap();
3316 assert_eq!(rec.summary, "2026");
3319 assert_eq!(rec.type_, "contact");
3320
3321 let idx = Index::build_type_folder(&store, Path::new("records/contacts")).unwrap();
3323 let md = idx.to_markdown();
3324 assert!(
3325 md.contains("- [[records/contacts/a]] — 2026\n"),
3326 "index entry must hold the coerced scalar, not the placeholder:\n{md}"
3327 );
3328
3329 write_raw(
3331 &store,
3332 "records/contacts/b.md",
3333 "type: true\nupdated: 2026-05-02T00:00:00Z\nsummary: hi",
3334 "\nbody\n",
3335 );
3336 let rec_b = record_from_file(
3337 &store.root.join("records/contacts/b.md"),
3338 PathBuf::from("records/contacts/b.md"),
3339 )
3340 .unwrap();
3341 assert_eq!(rec_b.type_, "true");
3342 }
3343
3344 #[test]
3352 fn non_utf8_body_does_not_abort_record_projection() {
3353 let (_d, store) = mk_store();
3354 let rel = "sources/emails/2026/06/x.md";
3355 let abs = store.root.join(rel);
3356 fs::create_dir_all(abs.parent().unwrap()).unwrap();
3357 let mut bytes: Vec<u8> =
3359 b"---\ntype: email\nupdated: 2026-06-11T00:00:00Z\nsummary: An imported email\n---\n\nCaf"
3360 .to_vec();
3361 bytes.push(0xE9);
3362 bytes.extend_from_slice(b" meeting notes\n");
3363 fs::write(&abs, bytes).unwrap();
3364
3365 let rec = record_from_file(&abs, PathBuf::from(rel))
3366 .expect("non-UTF-8 body must not abort the frontmatter read");
3367 assert_eq!(rec.summary, "An imported email");
3368 assert_eq!(rec.type_, "email");
3369
3370 Index::rebuild_all(&store).unwrap();
3372 assert!(
3373 exists(&store, "sources/emails/index.jsonl"),
3374 "rebuild must produce the catalog despite a non-UTF-8 body byte"
3375 );
3376 assert!(
3377 read(&store, "sources/emails/index.jsonl").contains("An imported email"),
3378 "the record must be catalogued"
3379 );
3380 }
3381
3382 #[test]
3391 fn rebuild_aborts_on_malformed_file_and_keeps_prior_catalogs() {
3392 let (_d, store) = mk_store();
3393 write_doc(
3394 &store,
3395 "records/contacts/alice.md",
3396 "contact",
3397 Some("Alice"),
3398 Some("2026-05-01T00:00:00Z"),
3399 "",
3400 );
3401 write_doc(
3402 &store,
3403 "records/companies/acme.md",
3404 "company",
3405 Some("Acme"),
3406 Some("2026-05-02T00:00:00Z"),
3407 "",
3408 );
3409
3410 Index::rebuild_all(&store).expect("clean rebuild succeeds");
3412 assert!(exists(&store, "records/contacts/index.jsonl"));
3413 assert!(exists(&store, "records/companies/index.jsonl"));
3414
3415 let bad = store.root.join("records/contacts/broken.md");
3417 fs::write(
3418 &bad,
3419 "---\ntype: contact\nsummary: \"unterminated\n---\nbody\n",
3420 )
3421 .unwrap();
3422
3423 Index::rebuild_all(&store)
3426 .expect_err("rebuild must abort, not silently skip, on a malformed file");
3427
3428 assert!(
3432 exists(&store, "records/companies/index.jsonl"),
3433 "an aborted rebuild must not destroy a clean sibling folder's catalog"
3434 );
3435 assert!(
3436 exists(&store, "records/contacts/index.jsonl"),
3437 "an aborted rebuild must not destroy the affected folder's prior catalog"
3438 );
3439 let contacts_jsonl = read(&store, "records/contacts/index.jsonl");
3440 assert!(contacts_jsonl.contains("records/contacts/alice.md"));
3441 }
3442
3443 #[test]
3456 fn rebuild_rollup_counts_equal_jsonl_records_and_write_through() {
3457 let (_d, store) = mk_store();
3458 write_doc(
3462 &store,
3463 "records/contacts/alice.md",
3464 "contact",
3465 Some("Alice"),
3466 Some("2026-05-01T00:00:00Z"),
3467 "",
3468 );
3469 write_doc(
3470 &store,
3471 "records/contacts/bob.md",
3472 "contact",
3473 Some("Bob"),
3474 Some("2026-05-02T00:00:00Z"),
3475 "",
3476 );
3477 Index::rebuild_all(&store).expect("clean rebuild succeeds");
3478
3479 let jsonl_lines = read(&store, "records/contacts/index.jsonl")
3481 .lines()
3482 .filter(|l| !l.trim().is_empty())
3483 .count();
3484 assert_eq!(jsonl_lines, 2, "two well-formed files ⇒ two jsonl records");
3485 let layer_md = read(&store, "records/index.md");
3486 let root_md = read(&store, "index.md");
3487 assert!(
3488 layer_md.contains("- [[records/contacts/index|Contacts]] (2)"),
3489 "layer rollup (N) must equal the jsonl record count (2), not a raw .md walk:\n{layer_md}"
3490 );
3491 assert!(
3492 root_md.contains("- [[records/contacts/index|Contacts]] (2)\n")
3493 && root_md.contains("## Records (2)"),
3494 "root rollup (N)/layer total must equal the jsonl record count (2):\n{root_md}"
3495 );
3496
3497 let (_d2, wt) = mk_store();
3504 write_doc(
3505 &wt,
3506 "records/contacts/alice.md",
3507 "contact",
3508 Some("Alice"),
3509 Some("2026-05-01T00:00:00Z"),
3510 "",
3511 );
3512 write_doc(
3513 &wt,
3514 "records/contacts/bob.md",
3515 "contact",
3516 Some("Bob"),
3517 Some("2026-05-02T00:00:00Z"),
3518 "",
3519 );
3520 Index::on_write(&wt, Path::new("records/contacts/alice.md")).unwrap();
3521 Index::on_write(&wt, Path::new("records/contacts/bob.md")).unwrap();
3522
3523 let a = snapshot_artifacts(&wt);
3524 let b = snapshot_artifacts(&store);
3525 assert_eq!(
3526 a.keys().collect::<BTreeSet<_>>(),
3527 b.keys().collect::<BTreeSet<_>>(),
3528 "write-through and rebuild_all must produce the same artifact set"
3529 );
3530 for (k, v) in &a {
3531 assert_eq!(
3532 v, &b[k],
3533 "rollup bytes diverged between write-through and rebuild_all for {k} \
3534 (a skip-version inflates rebuild_all's (N) above the jsonl record \
3535 count, which write-through then rewrites):\n--- write-through ---\n{v}\n--- rebuild ---\n{}",
3536 b[k]
3537 );
3538 }
3539 }
3540
3541 #[cfg(unix)]
3546 #[test]
3547 fn non_utf8_path_component_is_kept_not_dropped() {
3548 use std::ffi::OsStr;
3549 use std::os::unix::ffi::OsStrExt;
3550 let mut leaf = b"caf".to_vec();
3552 leaf.push(0xE9);
3553 leaf.extend_from_slice(b".md");
3554 let p = Path::new("sources/emails").join(OsStr::from_bytes(&leaf));
3555 let unix = path_to_unix(&p);
3556 assert_ne!(
3559 unix, "sources/emails",
3560 "non-UTF-8 leaf must not be dropped, collapsing the path to its parent dir"
3561 );
3562 assert!(
3563 unix.starts_with("sources/emails/caf"),
3564 "the lossy leaf must remain under its folder: {unix}"
3565 );
3566 }
3567
3568 #[test]
3571 fn loose_file_is_catalogued_in_layer_jsonl_not_type_folder() {
3572 let (_d, store) = mk_store();
3573 write_doc(
3575 &store,
3576 "records/contacts/alice.md",
3577 "contact",
3578 Some("Alice"),
3579 Some("2026-06-01T08:00:00Z"),
3580 "id: alice\n",
3581 );
3582 write_doc(
3583 &store,
3584 "records/loose.md",
3585 "contact",
3586 Some("Loose"),
3587 Some("2026-06-01T08:00:00Z"),
3588 "id: loose\n",
3589 );
3590 Index::rebuild_all(&store).unwrap();
3591
3592 assert!(
3595 exists(&store, "records/index.jsonl"),
3596 "layer jsonl must exist when loose files are present"
3597 );
3598 let layer_jsonl = read(&store, "records/index.jsonl");
3599 assert!(
3600 layer_jsonl.contains("records/loose.md"),
3601 "layer jsonl must list the loose file, got:\n{layer_jsonl}"
3602 );
3603 assert!(
3604 !layer_jsonl.contains("records/contacts/alice.md"),
3605 "layer jsonl must NOT list type-folder files"
3606 );
3607 let tf_jsonl = read(&store, "records/contacts/index.jsonl");
3608 assert!(tf_jsonl.contains("records/contacts/alice.md"));
3609 assert!(!tf_jsonl.contains("records/loose.md"));
3610
3611 let layer_md = read(&store, "records/index.md");
3613 assert!(
3614 layer_md.contains("records/contacts/index"),
3615 "layer md must roll up the type-folder, got:\n{layer_md}"
3616 );
3617 assert!(
3618 !layer_md.contains("records/loose"),
3619 "layer md must stay a rollup, not list loose files, got:\n{layer_md}"
3620 );
3621 }
3622
3623 #[test]
3624 fn loose_file_write_through_equals_rebuild() {
3625 let (_d1, wt) = mk_store();
3626 let (_d2, rb) = mk_store();
3627 for s in [&wt, &rb] {
3628 write_doc(
3629 s,
3630 "records/contacts/alice.md",
3631 "contact",
3632 Some("Alice"),
3633 Some("2026-06-01T08:00:00Z"),
3634 "id: alice\n",
3635 );
3636 write_doc(
3637 s,
3638 "records/loose.md",
3639 "contact",
3640 Some("Loose"),
3641 Some("2026-06-02T08:00:00Z"),
3642 "id: loose\n",
3643 );
3644 }
3645 Index::on_write(&wt, Path::new("records/contacts/alice.md")).unwrap();
3647 Index::on_write(&wt, Path::new("records/loose.md")).unwrap();
3648 Index::rebuild_all(&rb).unwrap();
3649
3650 let a = snapshot_artifacts(&wt);
3651 let b = snapshot_artifacts(&rb);
3652 assert_eq!(
3653 a.keys().collect::<Vec<_>>(),
3654 b.keys().collect::<Vec<_>>(),
3655 "loose-file loop and sweep must produce the same artifact set"
3656 );
3657 for (k, v) in &a {
3658 assert_eq!(
3659 v, &b[k],
3660 "loose-file artifact {k} differs between loop and sweep"
3661 );
3662 }
3663 }
3664
3665 #[test]
3666 fn removing_last_loose_file_clears_layer_jsonl() {
3667 let (_d, store) = mk_store();
3668 write_doc(
3669 &store,
3670 "records/loose.md",
3671 "contact",
3672 Some("Loose"),
3673 Some("2026-06-01T08:00:00Z"),
3674 "id: loose\n",
3675 );
3676 Index::on_write(&store, Path::new("records/loose.md")).unwrap();
3677 assert!(
3678 exists(&store, "records/index.jsonl"),
3679 "layer jsonl present after a loose write"
3680 );
3681 fs::remove_file(store.root.join("records/loose.md")).unwrap();
3682 Index::on_remove(&store, Path::new("records/loose.md")).unwrap();
3683 assert!(
3684 !exists(&store, "records/index.jsonl"),
3685 "layer jsonl must be removed once the last loose file is gone"
3686 );
3687 }
3688}