1use std::collections::BTreeMap;
53use std::fs;
54use std::io::Write as _;
55use std::path::{Path, PathBuf};
56
57use chrono::{DateTime, FixedOffset, SecondsFormat};
58use serde::{Deserialize, Serialize};
59use serde_json::Value;
60
61use crate::parser::FolderMeta;
62use crate::store::{Layer, Store};
63
64const MD_CAP: usize = 500;
66
67const MISSING_SUMMARY: &str = "(no summary)";
71
72const ROOT_TITLE: &str = "Knowledge base index";
74
75#[derive(Debug, Clone, PartialEq, Eq)]
77pub enum IndexLevel {
78 Root,
80 Layer(Layer),
82 TypeFolder(PathBuf),
84}
85
86#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
94pub struct IndexRecord {
95 #[serde(with = "path_serde")]
99 pub path: PathBuf,
100 #[serde(rename = "type")]
102 pub type_: String,
103 pub summary: String,
105 #[serde(default)]
107 pub tags: Vec<String>,
108 #[serde(default)]
110 pub links: Vec<String>,
111 pub created: Option<DateTime<FixedOffset>>,
113 pub updated: Option<DateTime<FixedOffset>>,
115 #[serde(flatten)]
120 pub fields: BTreeMap<String, Value>,
121}
122
123#[derive(Debug, Clone, PartialEq)]
126pub struct Index {
127 pub level: IndexLevel,
129 pub records: Vec<IndexRecord>,
132 pub child_counts: BTreeMap<PathBuf, usize>,
134}
135
136impl Index {
137 pub fn build_type_folder(store: &Store, type_folder: &Path) -> crate::Result<Index> {
143 let rel = normalize_rel(type_folder);
144 let abs = store.root.join(&rel);
145 let mut records = Vec::new();
146 for file_abs in walk_type_folder_files(&abs) {
147 let rel_path =
148 rel_to_store(&store.root, &file_abs).expect("walked file is under the store root");
149 records.push(record_from_file(&file_abs, rel_path)?);
161 }
162 sort_records(&mut records);
163 Ok(Index {
164 level: IndexLevel::TypeFolder(rel),
165 records,
166 child_counts: BTreeMap::new(),
167 })
168 }
169
170 pub fn build_layer(store: &Store, layer: Layer) -> crate::Result<Index> {
179 let mut child_counts = BTreeMap::new();
180 for tf in type_folders_in_layer(store, layer) {
181 let abs = store.root.join(&tf);
182 let n = walk_type_folder_files(&abs).len();
183 if n > 0 {
184 child_counts.insert(tf, n);
185 }
186 }
187 let mut records = Vec::new();
188 for file_abs in loose_files_in_layer(store, layer) {
189 let rel_path =
190 rel_to_store(&store.root, &file_abs).expect("walked file is under the store root");
191 records.push(record_from_file(&file_abs, rel_path)?);
196 }
197 sort_records(&mut records);
198 Ok(Index {
199 level: IndexLevel::Layer(layer),
200 records,
201 child_counts,
202 })
203 }
204
205 pub fn build_root(store: &Store) -> crate::Result<Index> {
208 let mut child_counts = BTreeMap::new();
209 for layer in Layer::all() {
210 for tf in type_folders_in_layer(store, layer) {
211 let abs = store.root.join(&tf);
212 let n = walk_type_folder_files(&abs).len();
213 if n > 0 {
214 child_counts.insert(tf, n);
215 }
216 }
217 }
218 Ok(Index {
219 level: IndexLevel::Root,
220 records: Vec::new(),
221 child_counts,
222 })
223 }
224
225 pub fn to_markdown(&self) -> String {
227 match &self.level {
228 IndexLevel::TypeFolder(folder) => self.render_type_folder_md(folder),
229 IndexLevel::Layer(layer) => self.render_layer_md(*layer),
230 IndexLevel::Root => self.render_root_md(),
231 }
232 }
233
234 pub fn to_jsonl(&self) -> String {
240 let mut out = String::new();
241 for rec in &self.records {
242 let line = serde_json::to_string(rec).expect("IndexRecord serializes");
245 out.push_str(&line);
246 out.push('\n');
247 }
248 out
249 }
250
251 fn render_type_folder_md(&self, folder: &Path) -> String {
254 let folder_disp = path_to_unix(folder);
255 let updated = max_updated(self.records.iter().map(|r| r.updated.as_ref()));
256 let mut s = String::new();
257 s.push_str("---\n");
258 s.push_str("type: index\n");
259 s.push_str("scope: type-folder\n");
260 s.push_str(&format!("folder: {folder_disp}\n"));
261 if let Some(ts) = updated {
262 s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
263 }
264 s.push_str("---\n\n");
265 s.push_str(&format!("# {folder_disp}\n\n"));
266
267 let shown = self.records.len().min(MD_CAP);
268 for rec in self.records.iter().take(shown) {
269 s.push_str(&format_md_entry(rec));
270 s.push('\n');
271 }
272
273 if self.records.len() > MD_CAP {
274 let type_ = self.records.first().map(|r| r.type_.as_str()).unwrap_or("");
275 let layer = folder
276 .components()
277 .next()
278 .and_then(|c| c.as_os_str().to_str())
279 .unwrap_or("");
280 s.push('\n');
281 s.push_str(&more_footer(self.records.len(), type_, layer));
282 }
283 s
284 }
285
286 fn render_layer_md(&self, layer: Layer) -> String {
291 let layer_dir = layer_dir_name(layer);
292 let mut s = String::new();
293 s.push_str("---\n");
294 s.push_str("type: index\n");
295 s.push_str("scope: layer\n");
296 s.push_str(&format!("folder: {layer_dir}\n"));
297 s.push_str("---\n\n");
298 s.push_str(&format!("# {layer_dir}\n\n"));
299 for (tf, n) in &self.child_counts {
300 let tf_unix = path_to_unix(tf);
301 let display = capitalize(folder_basename(tf));
302 s.push_str(&format!("- [[{tf_unix}/index|{display}]] ({n})\n"));
303 }
304 s
305 }
306
307 fn render_root_md(&self) -> String {
310 let mut s = String::new();
311 s.push_str("---\n");
312 s.push_str("type: index\n");
313 s.push_str("scope: root\n");
314 s.push_str("---\n\n");
315 s.push_str(&format!("# {ROOT_TITLE}\n"));
316 for layer in Layer::all() {
317 let layer_dir = layer_dir_name(layer);
318 let prefix = format!("{layer_dir}/");
319 let children: Vec<(&PathBuf, &usize)> = self
320 .child_counts
321 .iter()
322 .filter(|(tf, _)| path_to_unix(tf).starts_with(&prefix))
323 .collect();
324 if children.is_empty() {
325 continue;
326 }
327 let total: usize = children.iter().map(|(_, n)| **n).sum();
328 s.push('\n');
329 s.push_str(&format!("## {} ({total})\n", capitalize(layer_dir)));
330 for (tf, n) in children {
331 let tf_unix = path_to_unix(tf);
332 let display = capitalize(folder_basename(tf));
333 s.push_str(&format!("- [[{tf_unix}/index|{display}]] ({n})\n"));
334 }
335 }
336 s
337 }
338}
339
340impl Index {
345 pub fn on_write(store: &Store, file: &Path) -> crate::Result<()> {
352 let file_rel = normalize_rel(file);
353 if is_index_artifact(&file_rel) {
360 return Ok(());
361 }
362 if let Some(layer) = loose_layer_of(&file_rel) {
366 return apply_loose_change(store, layer, &file_rel, false);
367 }
368 let file_abs = store.root.join(&file_rel);
369 let folder = type_folder_of(&file_rel)
370 .ok_or_else(|| bad_index(&file_rel, "file is not inside a layer/type-folder"))?;
371 let record = record_from_file(&file_abs, file_rel.clone())?;
372
373 let _lock = FolderLock::acquire(&store.root.join(&folder));
376 let mut records = read_jsonl_records(&store.root.join(&folder).join("index.jsonl"))?;
377 records.retain(|r| r.path != record.path);
378 records.push(record);
379 sort_records(&mut records);
380
381 write_type_folder_artifacts(store, &folder, &records)?;
382 update_parents(store, &folder)?;
383 Ok(())
384 }
385
386 pub fn on_rename(store: &Store, old: &Path, new: &Path) -> crate::Result<()> {
390 let old_rel = normalize_rel(old);
391 let new_rel = normalize_rel(new);
392 if is_index_artifact(&old_rel) || is_index_artifact(&new_rel) {
396 return Ok(());
397 }
398 if loose_layer_of(&old_rel).is_some() || loose_layer_of(&new_rel).is_some() {
404 Self::on_remove(store, &old_rel)?;
405 Self::on_write(store, &new_rel)?;
406 return Ok(());
407 }
408 let old_folder = type_folder_of(&old_rel)
409 .ok_or_else(|| bad_index(&old_rel, "source is not inside a layer/type-folder"))?;
410 let new_folder = type_folder_of(&new_rel)
411 .ok_or_else(|| bad_index(&new_rel, "target is not inside a layer/type-folder"))?;
412
413 let _locks = lock_folders(store, &old_folder, &new_folder);
417
418 let mut old_records =
420 read_jsonl_records(&store.root.join(&old_folder).join("index.jsonl"))?;
421 old_records.retain(|r| r.path != old_rel);
422
423 if old_folder == new_folder {
424 let record = record_from_file(&store.root.join(&new_rel), new_rel.clone())?;
426 old_records.retain(|r| r.path != record.path);
427 old_records.push(record);
428 sort_records(&mut old_records);
429 write_type_folder_artifacts(store, &old_folder, &old_records)?;
430 update_parents(store, &old_folder)?;
431 return Ok(());
432 }
433
434 sort_records(&mut old_records);
437 write_type_folder_artifacts(store, &old_folder, &old_records)?;
438
439 let record = record_from_file(&store.root.join(&new_rel), new_rel.clone())?;
440 let mut new_records =
441 read_jsonl_records(&store.root.join(&new_folder).join("index.jsonl"))?;
442 new_records.retain(|r| r.path != record.path);
443 new_records.push(record);
444 sort_records(&mut new_records);
445 write_type_folder_artifacts(store, &new_folder, &new_records)?;
446
447 update_parents(store, &old_folder)?;
448 update_parents(store, &new_folder)?;
449 Ok(())
450 }
451
452 pub fn on_remove(store: &Store, file: &Path) -> crate::Result<()> {
457 let file_rel = normalize_rel(file);
458 if is_index_artifact(&file_rel) {
461 return Ok(());
462 }
463 if let Some(layer) = loose_layer_of(&file_rel) {
465 return apply_loose_change(store, layer, &file_rel, true);
466 }
467 let folder = type_folder_of(&file_rel)
468 .ok_or_else(|| bad_index(&file_rel, "file is not inside a layer/type-folder"))?;
469 let _lock = FolderLock::acquire(&store.root.join(&folder));
471 let mut records = read_jsonl_records(&store.root.join(&folder).join("index.jsonl"))?;
472 let before = records.len();
473 records.retain(|r| r.path != file_rel);
474 if records.len() == before {
475 }
478 sort_records(&mut records);
479 write_type_folder_artifacts(store, &folder, &records)?;
480 update_parents(store, &folder)?;
481 Ok(())
482 }
483
484 pub fn rebuild_all(store: &Store) -> crate::Result<()> {
488 Index::cleanup(store)?;
489 for layer in Layer::all() {
490 for tf in type_folders_in_layer(store, layer) {
491 let idx = Index::build_type_folder(store, &tf)?;
492 if idx.records.is_empty() {
493 continue;
494 }
495 write_type_folder_artifacts(store, &tf, &idx.records)?;
496 }
497 let layer_idx = Index::build_layer(store, layer)?;
498 let layer_index_md = store.root.join(layer_dir_name(layer)).join("index.md");
499 if layer_idx.child_counts.is_empty() {
500 remove_if_exists(&layer_index_md)?;
501 } else {
502 write_atomic(
503 &layer_index_md,
504 render_layer_md_with_store(store, &layer_idx),
505 )?;
506 }
507 write_layer_jsonl(store, layer, &layer_idx.records)?;
511 }
512 let root_idx = Index::build_root(store)?;
513 let root_index_md = store.root.join("index.md");
514 if root_idx.child_counts.is_empty() {
515 remove_if_exists(&root_index_md)?;
516 } else {
517 write_atomic(&root_index_md, render_root_md_with_store(store, &root_idx))?;
518 }
519 Ok(())
520 }
521
522 pub fn rebuild_folder(store: &Store, folder: &Path) -> crate::Result<()> {
529 Self::write_level(store, &IndexLevel::TypeFolder(folder.to_path_buf()))?;
530 update_parents(store, folder)
531 }
532
533 pub fn write_level(store: &Store, level: &IndexLevel) -> crate::Result<()> {
535 match level {
536 IndexLevel::TypeFolder(folder) => {
537 let idx = Index::build_type_folder(store, folder)?;
538 if idx.records.is_empty() {
539 remove_if_exists(&store.root.join(folder).join("index.md"))?;
540 remove_if_exists(&store.root.join(folder).join("index.jsonl"))?;
541 } else {
542 write_type_folder_artifacts(store, folder, &idx.records)?;
543 }
544 }
545 IndexLevel::Layer(layer) => {
546 let idx = Index::build_layer(store, *layer)?;
547 let p = store.root.join(layer_dir_name(*layer)).join("index.md");
548 if idx.child_counts.is_empty() {
549 remove_if_exists(&p)?;
550 } else {
551 write_atomic(&p, render_layer_md_with_store(store, &idx))?;
552 }
553 write_layer_jsonl(store, *layer, &idx.records)?;
554 }
555 IndexLevel::Root => {
556 let idx = Index::build_root(store)?;
557 let p = store.root.join("index.md");
558 if idx.child_counts.is_empty() {
559 remove_if_exists(&p)?;
560 } else {
561 write_atomic(&p, render_root_md_with_store(store, &idx))?;
562 }
563 }
564 }
565 Ok(())
566 }
567
568 pub fn render_dry_run(store: &Store, level: &IndexLevel) -> crate::Result<String> {
571 let mut out = String::new();
572 match level {
573 IndexLevel::TypeFolder(folder) => {
574 let idx = Index::build_type_folder(store, folder)?;
575 let md_path = path_to_unix(&folder.join("index.md"));
576 let jsonl_path = path_to_unix(&folder.join("index.jsonl"));
577 out.push_str(&format!("--- {md_path} ---\n"));
578 out.push_str(&idx.to_markdown());
579 out.push_str(&format!("--- {jsonl_path} ---\n"));
580 out.push_str(&idx.to_jsonl());
581 }
582 IndexLevel::Layer(layer) => {
583 let idx = Index::build_layer(store, *layer)?;
584 let md_path = format!("{}/index.md", layer_dir_name(*layer));
585 out.push_str(&format!("--- {md_path} ---\n"));
586 out.push_str(&render_layer_md_with_store(store, &idx));
587 }
588 IndexLevel::Root => {
589 let idx = Index::build_root(store)?;
590 out.push_str("--- index.md ---\n");
591 out.push_str(&render_root_md_with_store(store, &idx));
592 }
593 }
594 Ok(out)
595 }
596
597 pub fn cleanup(store: &Store) -> crate::Result<()> {
615 for layer in Layer::all() {
616 let layer_dir = store.root.join(layer_dir_name(layer));
617 if !layer_dir.is_dir() {
618 continue;
619 }
620 for tf in type_folders_in_layer(store, layer) {
621 let tf_abs = store.root.join(&tf);
622 for entry in walkdir::WalkDir::new(&tf_abs)
626 .min_depth(2)
627 .into_iter()
628 .filter_map(|e| e.ok())
629 {
630 let p = entry.path();
631 if is_index_artifact(p) && is_deletable_catalog_artifact(p) {
632 remove_if_exists(p)?;
633 }
634 }
635 if walk_type_folder_files(&tf_abs).is_empty() {
639 let md = tf_abs.join("index.md");
640 if is_deletable_catalog_artifact(&md) {
641 remove_if_exists(&md)?;
642 }
643 remove_if_exists(&tf_abs.join("index.jsonl"))?;
644 }
645 }
646 }
647 Ok(())
648 }
649}
650
651fn write_type_folder_artifacts(
659 store: &Store,
660 folder: &Path,
661 records: &[IndexRecord],
662) -> crate::Result<()> {
663 let folder_abs = store.root.join(folder);
664 let md_path = folder_abs.join("index.md");
665 let jsonl_path = folder_abs.join("index.jsonl");
666 if records.is_empty() {
667 remove_if_exists(&md_path)?;
668 remove_if_exists(&jsonl_path)?;
669 return Ok(());
670 }
671 let idx = Index {
672 level: IndexLevel::TypeFolder(folder.to_path_buf()),
673 records: records.to_vec(),
674 child_counts: BTreeMap::new(),
675 };
676 write_atomic(&md_path, idx.to_markdown())?;
677 write_atomic(&jsonl_path, idx.to_jsonl())?;
678 Ok(())
679}
680
681fn update_parents(store: &Store, folder: &Path) -> crate::Result<()> {
694 let _root_lock = FolderLock::acquire(&store.root);
727 let stats = collect_child_stats(store, &Layer::all())?;
728
729 let layer = folder
730 .components()
731 .next()
732 .and_then(|c| c.as_os_str().to_str())
733 .and_then(layer_from_dir_name);
734 if let Some(layer) = layer {
735 let p = store.root.join(layer_dir_name(layer)).join("index.md");
736 if layer_has_children(&stats, layer) {
737 write_atomic(
738 &p,
739 render_layer_md_from_stats(layer, &stats, &store.config.folders),
740 )?;
741 } else {
742 remove_if_exists(&p)?;
743 }
744 }
745 let rp = store.root.join("index.md");
746 if stats.values().any(|s| s.count > 0) {
747 write_atomic(
748 &rp,
749 render_root_md_from_stats(&stats, &store.config.folders),
750 )?;
751 } else {
752 remove_if_exists(&rp)?;
753 }
754 Ok(())
755}
756
757fn layer_has_children(stats: &BTreeMap<PathBuf, FolderStat>, layer: Layer) -> bool {
759 let prefix = format!("{}/", layer_dir_name(layer));
760 stats
761 .iter()
762 .any(|(tf, s)| s.count > 0 && path_to_unix(tf).starts_with(&prefix))
763}
764
765fn render_layer_md_from_stats(
770 layer: Layer,
771 stats: &BTreeMap<PathBuf, FolderStat>,
772 folders: &BTreeMap<String, FolderMeta>,
773) -> String {
774 let layer_dir = layer_dir_name(layer);
775 let prefix = format!("{layer_dir}/");
776 let mut max_upd: Option<DateTime<FixedOffset>> = None;
777 let mut entries = String::new();
778 for (tf, stat) in stats {
779 if stat.count == 0 || !path_to_unix(tf).starts_with(&prefix) {
780 continue;
781 }
782 if let Some(u) = stat.newest.as_ref().and_then(|r| r.updated) {
783 max_upd = Some(match max_upd {
784 Some(cur) if cur >= u => cur,
785 _ => u,
786 });
787 }
788 let tf_unix = path_to_unix(tf);
789 let (display, description) = folder_label(&tf_unix, folder_basename(tf), folders);
790 entries.push_str(&folder_entry(&tf_unix, &display, stat.count, description));
791 }
792 let mut s = String::new();
793 s.push_str("---\n");
794 s.push_str("type: index\n");
795 s.push_str("scope: layer\n");
796 s.push_str(&format!("folder: {layer_dir}\n"));
797 if let Some(ts) = max_upd {
798 s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
799 }
800 s.push_str("---\n\n");
801 s.push_str(&format!("# {layer_dir}\n\n"));
802 s.push_str(&entries);
803 s
804}
805
806fn render_root_md_from_stats(
808 stats: &BTreeMap<PathBuf, FolderStat>,
809 folders: &BTreeMap<String, FolderMeta>,
810) -> String {
811 let mut max_upd: Option<DateTime<FixedOffset>> = None;
812 for stat in stats.values() {
813 if stat.count == 0 {
814 continue;
815 }
816 if let Some(u) = stat.newest.as_ref().and_then(|r| r.updated) {
817 max_upd = Some(match max_upd {
818 Some(cur) if cur >= u => cur,
819 _ => u,
820 });
821 }
822 }
823 let mut s = String::new();
824 s.push_str("---\n");
825 s.push_str("type: index\n");
826 s.push_str("scope: root\n");
827 if let Some(ts) = max_upd {
828 s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
829 }
830 s.push_str("---\n\n");
831 s.push_str(&format!("# {ROOT_TITLE}\n"));
832 for layer in Layer::all() {
833 let layer_dir = layer_dir_name(layer);
834 let prefix = format!("{layer_dir}/");
835 let children: Vec<(&PathBuf, usize)> = stats
836 .iter()
837 .filter(|(tf, s)| s.count > 0 && path_to_unix(tf).starts_with(&prefix))
838 .map(|(tf, s)| (tf, s.count))
839 .collect();
840 if children.is_empty() {
841 continue;
842 }
843 let total: usize = children.iter().map(|(_, n)| *n).sum();
844 s.push('\n');
845 s.push_str(&format!("## {} ({total})\n", capitalize(layer_dir)));
846 for (tf, n) in children {
847 let tf_unix = path_to_unix(tf);
848 let (display, description) = folder_label(&tf_unix, folder_basename(tf), folders);
849 s.push_str(&folder_entry(&tf_unix, &display, n, description));
850 }
851 }
852 s
853}
854
855fn render_layer_md_with_store(store: &Store, idx: &Index) -> String {
862 let layer = match idx.level {
863 IndexLevel::Layer(l) => l,
864 _ => unreachable!("render_layer_md_with_store called on non-layer"),
865 };
866 let layer_dir = layer_dir_name(layer);
867 let mut max_upd: Option<DateTime<FixedOffset>> = None;
868 let mut entries = String::new();
869 for (tf, n) in &idx.child_counts {
870 let recs = read_jsonl_records(&store.root.join(tf).join("index.jsonl")).unwrap_or_default();
871 let newest = recs.first();
872 if let Some(u) = newest.and_then(|r| r.updated) {
873 max_upd = Some(match max_upd {
874 Some(cur) if cur >= u => cur,
875 _ => u,
876 });
877 }
878 let tf_unix = path_to_unix(tf);
879 let (display, description) =
880 folder_label(&tf_unix, folder_basename(tf), &store.config.folders);
881 entries.push_str(&folder_entry(&tf_unix, &display, *n, description));
882 }
883 let mut s = String::new();
884 s.push_str("---\n");
885 s.push_str("type: index\n");
886 s.push_str("scope: layer\n");
887 s.push_str(&format!("folder: {layer_dir}\n"));
888 if let Some(ts) = max_upd {
889 s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
890 }
891 s.push_str("---\n\n");
892 s.push_str(&format!("# {layer_dir}\n\n"));
893 s.push_str(&entries);
894 s
895}
896
897fn render_root_md_with_store(store: &Store, idx: &Index) -> String {
901 let mut max_upd: Option<DateTime<FixedOffset>> = None;
902 for tf in idx.child_counts.keys() {
903 let recs = read_jsonl_records(&store.root.join(tf).join("index.jsonl")).unwrap_or_default();
904 if let Some(u) = recs.first().and_then(|r| r.updated) {
905 max_upd = Some(match max_upd {
906 Some(cur) if cur >= u => cur,
907 _ => u,
908 });
909 }
910 }
911 let mut s = String::new();
912 s.push_str("---\n");
913 s.push_str("type: index\n");
914 s.push_str("scope: root\n");
915 if let Some(ts) = max_upd {
916 s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
917 }
918 s.push_str("---\n\n");
919 s.push_str(&format!("# {ROOT_TITLE}\n"));
920 for layer in Layer::all() {
921 let layer_dir = layer_dir_name(layer);
922 let prefix = format!("{layer_dir}/");
923 let children: Vec<(&PathBuf, &usize)> = idx
924 .child_counts
925 .iter()
926 .filter(|(tf, _)| path_to_unix(tf).starts_with(&prefix))
927 .collect();
928 if children.is_empty() {
929 continue;
930 }
931 let total: usize = children.iter().map(|(_, n)| **n).sum();
932 s.push('\n');
933 s.push_str(&format!("## {} ({total})\n", capitalize(layer_dir)));
934 for (tf, n) in children {
935 let tf_unix = path_to_unix(tf);
936 let (display, description) =
937 folder_label(&tf_unix, folder_basename(tf), &store.config.folders);
938 s.push_str(&folder_entry(&tf_unix, &display, *n, description));
939 }
940 }
941 s
942}
943
944fn format_md_entry(rec: &IndexRecord) -> String {
950 let path = wiki_target(&rec.path);
951 let summary = collapse_whitespace(&rec.summary);
960 let mut line = format!("- [[{path}]] — {summary}");
961 if !rec.tags.is_empty() {
962 let tags = rec
963 .tags
964 .iter()
965 .map(|t| format!("#{t}"))
966 .collect::<Vec<_>>()
967 .join(" ");
968 line.push_str(&format!(" · {tags}"));
969 }
970 line
971}
972
973fn more_footer(total: usize, type_: &str, layer: &str) -> String {
975 format!(
976 "## More\n\nThis folder has {total} files. The {MD_CAP} most recent are listed above.\nUse `dbmd query --type {type_} --in {layer}` for the complete catalog.\n"
977 )
978}
979
980fn sort_records(records: &mut [IndexRecord]) {
984 records.sort_by(record_recency_cmp);
985}
986
987impl IndexRecord {
988 pub(crate) fn expected_from_file(abs: &Path, rel: PathBuf) -> crate::Result<IndexRecord> {
1000 record_from_file(abs, rel)
1001 }
1002}
1003
1004fn record_from_file(abs: &Path, rel: PathBuf) -> crate::Result<IndexRecord> {
1007 let mut meta = read_frontmatter(abs)?;
1008 if rel.starts_with("records") {
1013 meta.fields
1014 .entry("meta-type".to_string())
1015 .or_insert_with(|| Value::String("fact".to_string()));
1016 }
1017 Ok(IndexRecord {
1018 path: rel,
1019 type_: meta.type_.unwrap_or_default(),
1020 summary: meta.summary.unwrap_or_else(|| MISSING_SUMMARY.to_string()),
1021 tags: meta.tags,
1022 links: meta.links,
1023 created: meta.created,
1024 updated: meta.updated,
1025 fields: meta.fields,
1026 })
1027}
1028
1029struct FileMeta {
1031 type_: Option<String>,
1032 summary: Option<String>,
1033 tags: Vec<String>,
1034 links: Vec<String>,
1035 created: Option<DateTime<FixedOffset>>,
1036 updated: Option<DateTime<FixedOffset>>,
1037 fields: BTreeMap<String, Value>,
1038}
1039
1040fn read_frontmatter(abs: &Path) -> crate::Result<FileMeta> {
1054 let bytes = fs::read(abs)?;
1055 let yaml = extract_frontmatter_block_lossy(&bytes).unwrap_or_default();
1056 let map: serde_norway::Mapping = if yaml.trim().is_empty() {
1057 serde_norway::Mapping::new()
1058 } else {
1059 serde_norway::from_str(&yaml).map_err(|e| {
1060 crate::Error::Store(crate::store::StoreError::BadTypeIndex {
1061 path: abs.to_path_buf(),
1062 message: format!("frontmatter YAML: {e}"),
1063 })
1064 })?
1065 };
1066
1067 let mut type_ = None;
1068 let mut summary = None;
1069 let mut tags = Vec::new();
1070 let mut links = Vec::new();
1071 let mut created = None;
1072 let mut updated = None;
1073 let mut fields = BTreeMap::new();
1074
1075 for (k, v) in map {
1076 let key = match k.as_str() {
1077 Some(s) => s.to_string(),
1078 None => continue,
1079 };
1080 match key.as_str() {
1081 "type" => type_ = scalar_string(&v),
1091 "summary" => summary = scalar_string(&v),
1092 "tags" => tags = yaml_string_list(&v),
1093 "links" => links = yaml_string_list(&v),
1094 "created" => created = v.as_str().and_then(parse_ts),
1095 "updated" => updated = v.as_str().and_then(parse_ts),
1096 "path" => {}
1100 _ => {
1101 fields.insert(key, yaml_to_json_value(&v));
1102 }
1103 }
1104 }
1105
1106 Ok(FileMeta {
1107 type_,
1108 summary,
1109 tags,
1110 links,
1111 created,
1112 updated,
1113 fields,
1114 })
1115}
1116
1117fn scalar_string(v: &serde_norway::Value) -> Option<String> {
1123 match v {
1124 serde_norway::Value::String(s) => Some(s.clone()),
1125 serde_norway::Value::Number(n) => Some(n.to_string()),
1126 serde_norway::Value::Bool(b) => Some(b.to_string()),
1127 _ => None,
1128 }
1129}
1130
1131fn extract_frontmatter_block_lossy(bytes: &[u8]) -> Option<String> {
1137 let text = String::from_utf8_lossy(bytes);
1142 extract_frontmatter_block(&text)
1143}
1144
1145fn extract_frontmatter_block(text: &str) -> Option<String> {
1148 let trimmed = text.strip_prefix('\u{feff}').unwrap_or(text);
1149 let mut lines = trimmed.lines();
1150 let first = lines.next()?;
1151 if first.trim_end() != "---" {
1152 return None;
1153 }
1154 let mut block = String::new();
1155 for line in lines {
1156 if line.trim_end() == "---" {
1157 return Some(block);
1158 }
1159 block.push_str(line);
1160 block.push('\n');
1161 }
1162 None }
1164
1165fn yaml_string_list(v: &serde_norway::Value) -> Vec<String> {
1168 match v {
1169 serde_norway::Value::String(s) => vec![s.clone()],
1170 serde_norway::Value::Sequence(seq) => seq
1171 .iter()
1172 .filter_map(yaml_string_or_wiki_link_literal)
1173 .collect(),
1174 _ => Vec::new(),
1175 }
1176}
1177
1178fn yaml_string_or_wiki_link_literal(v: &serde_norway::Value) -> Option<String> {
1179 v.as_str()
1180 .map(str::to_string)
1181 .or_else(|| unquoted_wiki_link_literal(v))
1182}
1183
1184fn yaml_to_json_value(v: &serde_norway::Value) -> Value {
1185 if let Some(link) = unquoted_wiki_link_literal(v) {
1186 return Value::String(link);
1187 }
1188 match v {
1189 serde_norway::Value::String(s) => Value::String(s.clone()),
1190 serde_norway::Value::Bool(b) => Value::Bool(*b),
1191 serde_norway::Value::Number(n) => {
1192 serde_json::to_value(n).unwrap_or_else(|_| Value::String(n.to_string()))
1193 }
1194 serde_norway::Value::Sequence(seq) => {
1195 Value::Array(seq.iter().map(yaml_to_json_value).collect())
1196 }
1197 serde_norway::Value::Mapping(_) | serde_norway::Value::Tagged(_) => {
1198 serde_json::to_value(v).unwrap_or(Value::Null)
1199 }
1200 serde_norway::Value::Null => Value::Null,
1201 }
1202}
1203
1204fn unquoted_wiki_link_literal(v: &serde_norway::Value) -> Option<String> {
1205 let serde_norway::Value::Sequence(outer) = v else {
1206 return None;
1207 };
1208 if outer.len() != 1 {
1209 return None;
1210 }
1211 let serde_norway::Value::Sequence(inner) = &outer[0] else {
1212 return None;
1213 };
1214 let [serde_norway::Value::String(target)] = inner.as_slice() else {
1215 return None;
1216 };
1217 Some(format!("[[{target}]]"))
1218}
1219
1220fn parse_ts(s: &str) -> Option<DateTime<FixedOffset>> {
1222 DateTime::parse_from_rfc3339(s.trim()).ok()
1223}
1224
1225fn fmt_ts(ts: &DateTime<FixedOffset>) -> String {
1229 ts.to_rfc3339_opts(SecondsFormat::AutoSi, true)
1230}
1231
1232fn max_updated<'a>(
1234 it: impl Iterator<Item = Option<&'a DateTime<FixedOffset>>>,
1235) -> Option<DateTime<FixedOffset>> {
1236 let mut best: Option<DateTime<FixedOffset>> = None;
1237 for ts in it.flatten() {
1238 best = Some(match best {
1239 Some(cur) if cur >= *ts => cur,
1240 _ => *ts,
1241 });
1242 }
1243 best
1244}
1245
1246fn read_jsonl_records(jsonl: &Path) -> crate::Result<Vec<IndexRecord>> {
1250 let text = match fs::read_to_string(jsonl) {
1251 Ok(t) => t,
1252 Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(Vec::new()),
1253 Err(e) => return Err(e.into()),
1254 };
1255 let mut by_path: BTreeMap<PathBuf, IndexRecord> = BTreeMap::new();
1257 for (i, line) in text.lines().enumerate() {
1258 if line.trim().is_empty() {
1259 continue;
1260 }
1261 let rec: IndexRecord = serde_json::from_str(line).map_err(|e| {
1262 crate::Error::Store(crate::store::StoreError::BadTypeIndex {
1263 path: jsonl.to_path_buf(),
1264 message: format!("line {}: {e}", i + 1),
1265 })
1266 })?;
1267 by_path.insert(rec.path.clone(), rec);
1268 }
1269 let mut records: Vec<IndexRecord> = by_path.into_values().collect();
1270 sort_records(&mut records);
1271 Ok(records)
1272}
1273
1274#[derive(Debug, Clone, Default, PartialEq)]
1281struct FolderStat {
1282 count: usize,
1283 newest: Option<IndexRecord>,
1284}
1285
1286fn read_folder_stat(jsonl: &Path) -> crate::Result<FolderStat> {
1296 let text = match fs::read_to_string(jsonl) {
1297 Ok(t) => t,
1298 Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(FolderStat::default()),
1299 Err(e) => return Err(e.into()),
1300 };
1301 let mut by_path: BTreeMap<PathBuf, IndexRecord> = BTreeMap::new();
1304 for (i, line) in text.lines().enumerate() {
1305 if line.trim().is_empty() {
1306 continue;
1307 }
1308 let rec: IndexRecord = serde_json::from_str(line).map_err(|e| {
1309 crate::Error::Store(crate::store::StoreError::BadTypeIndex {
1310 path: jsonl.to_path_buf(),
1311 message: format!("line {}: {e}", i + 1),
1312 })
1313 })?;
1314 by_path.insert(rec.path.clone(), rec);
1315 }
1316 let count = by_path.len();
1317 let newest = by_path.into_values().min_by(record_recency_cmp);
1321 Ok(FolderStat { count, newest })
1322}
1323
1324fn record_recency_cmp(a: &IndexRecord, b: &IndexRecord) -> std::cmp::Ordering {
1329 match (b.updated, a.updated) {
1330 (Some(bu), Some(au)) => bu.cmp(&au),
1331 (Some(_), None) => std::cmp::Ordering::Greater, (None, Some(_)) => std::cmp::Ordering::Less, (None, None) => std::cmp::Ordering::Equal,
1334 }
1335 .then_with(|| a.path.cmp(&b.path))
1336}
1337
1338fn collect_child_stats(
1351 store: &Store,
1352 layers: &[Layer],
1353) -> crate::Result<BTreeMap<PathBuf, FolderStat>> {
1354 let mut stats = BTreeMap::new();
1355 for &layer in layers {
1356 for tf in type_folders_in_layer(store, layer) {
1357 let stat = read_folder_stat(&store.root.join(&tf).join("index.jsonl"))?;
1358 if stat.count > 0 {
1359 stats.insert(tf, stat);
1360 }
1361 }
1362 }
1363 Ok(stats)
1364}
1365
1366fn walk_type_folder_files(folder_abs: &Path) -> Vec<PathBuf> {
1369 let mut out = Vec::new();
1370 if !folder_abs.is_dir() {
1371 return out;
1372 }
1373 for entry in walkdir::WalkDir::new(folder_abs)
1374 .into_iter()
1375 .filter_entry(|e| !is_hidden(e.file_name()))
1376 .filter_map(|e| e.ok())
1377 {
1378 if !entry.file_type().is_file() {
1379 continue;
1380 }
1381 let p = entry.path();
1382 if p.extension().and_then(|e| e.to_str()) != Some("md") {
1383 continue;
1384 }
1385 if p.file_name().and_then(|n| n.to_str()) == Some("index.md") {
1386 continue;
1387 }
1388 out.push(p.to_path_buf());
1389 }
1390 out
1391}
1392
1393fn type_folders_in_layer(store: &Store, layer: Layer) -> Vec<PathBuf> {
1396 let layer_dir = store.root.join(layer_dir_name(layer));
1397 let mut out = Vec::new();
1398 let rd = match fs::read_dir(&layer_dir) {
1399 Ok(rd) => rd,
1400 Err(_) => return out,
1401 };
1402 for entry in rd.flatten() {
1403 if !entry.path().is_dir() {
1404 continue;
1405 }
1406 let name = entry.file_name();
1407 let name = match name.to_str() {
1408 Some(n) => n,
1409 None => continue,
1410 };
1411 if is_hidden(entry.file_name().as_os_str()) || name == "log" {
1412 continue;
1413 }
1414 out.push(PathBuf::from(layer_dir_name(layer)).join(name));
1415 }
1416 out.sort();
1417 out
1418}
1419
1420fn loose_layer_of(file_rel: &Path) -> Option<Layer> {
1426 let mut comps = file_rel.components();
1427 let layer = layer_from_dir_name(comps.next()?.as_os_str().to_str()?)?;
1428 comps.next()?; if comps.next().is_some() {
1430 return None; }
1432 Some(layer)
1433}
1434
1435fn loose_files_in_layer(store: &Store, layer: Layer) -> Vec<PathBuf> {
1439 let layer_dir = store.root.join(layer_dir_name(layer));
1440 let mut out = Vec::new();
1441 let rd = match fs::read_dir(&layer_dir) {
1442 Ok(rd) => rd,
1443 Err(_) => return out,
1444 };
1445 for entry in rd.flatten() {
1446 let p = entry.path();
1447 if !p.is_file() {
1448 continue;
1449 }
1450 if p.extension().and_then(|e| e.to_str()) != Some("md") {
1451 continue;
1452 }
1453 if is_index_artifact(&p) || is_hidden(entry.file_name().as_os_str()) {
1454 continue;
1455 }
1456 out.push(p);
1457 }
1458 out
1459}
1460
1461fn write_layer_jsonl(store: &Store, layer: Layer, records: &[IndexRecord]) -> crate::Result<()> {
1466 let path = store.root.join(layer_dir_name(layer)).join("index.jsonl");
1467 if records.is_empty() {
1468 remove_if_exists(&path)?;
1469 return Ok(());
1470 }
1471 let idx = Index {
1472 level: IndexLevel::Layer(layer),
1473 records: records.to_vec(),
1474 child_counts: BTreeMap::new(),
1475 };
1476 write_atomic(&path, idx.to_jsonl())
1477}
1478
1479fn apply_loose_change(
1484 store: &Store,
1485 layer: Layer,
1486 file_rel: &Path,
1487 removing: bool,
1488) -> crate::Result<()> {
1489 let layer_dir = store.root.join(layer_dir_name(layer));
1490 let _lock = FolderLock::acquire(&layer_dir);
1491 let jsonl = layer_dir.join("index.jsonl");
1492 let mut records = read_jsonl_records(&jsonl)?;
1493 records.retain(|r| r.path != file_rel);
1494 if !removing {
1495 records.push(record_from_file(
1496 &store.root.join(file_rel),
1497 file_rel.to_path_buf(),
1498 )?);
1499 }
1500 sort_records(&mut records);
1501 write_layer_jsonl(store, layer, &records)
1502}
1503
1504fn type_folder_of(file_rel: &Path) -> Option<PathBuf> {
1508 let mut comps = file_rel.components();
1509 let layer = comps.next()?.as_os_str().to_str()?;
1510 layer_from_dir_name(layer)?;
1511 let type_seg = comps.next()?.as_os_str().to_str()?;
1512 Some(PathBuf::from(layer).join(type_seg))
1513}
1514
1515fn rel_to_store(root: &Path, abs: &Path) -> Option<PathBuf> {
1517 abs.strip_prefix(root).ok().map(|p| p.to_path_buf())
1518}
1519
1520fn normalize_rel(p: &Path) -> PathBuf {
1523 let s = path_to_unix(p);
1524 let s = s.strip_prefix("./").unwrap_or(&s);
1525 PathBuf::from(s)
1526}
1527
1528fn is_index_artifact(p: &Path) -> bool {
1529 matches!(
1530 p.file_name().and_then(|n| n.to_str()),
1531 Some("index.md") | Some("index.jsonl")
1532 )
1533}
1534
1535fn is_deletable_catalog_artifact(p: &Path) -> bool {
1549 match p.file_name().and_then(|n| n.to_str()) {
1550 Some("index.jsonl") => true,
1551 Some("index.md") => match read_frontmatter(p) {
1552 Ok(meta) => meta.type_.as_deref().is_none_or(|t| t == "index"),
1554 Err(_) => true,
1556 },
1557 _ => false,
1558 }
1559}
1560
1561fn is_hidden(name: &std::ffi::OsStr) -> bool {
1562 name.to_str().map(|s| s.starts_with('.')).unwrap_or(false)
1563}
1564
1565fn layer_dir_name(layer: Layer) -> &'static str {
1566 match layer {
1567 Layer::Sources => "sources",
1568 Layer::Records => "records",
1569 }
1570}
1571
1572fn layer_from_dir_name(name: &str) -> Option<Layer> {
1575 match name {
1576 "sources" => Some(Layer::Sources),
1577 "records" => Some(Layer::Records),
1578 _ => None,
1579 }
1580}
1581
1582fn folder_basename(p: &Path) -> &str {
1584 p.file_name().and_then(|n| n.to_str()).unwrap_or("")
1585}
1586
1587fn wiki_target(p: &Path) -> String {
1591 let unix = path_to_unix(p);
1592 unix.strip_suffix(".md").unwrap_or(&unix).to_string()
1593}
1594
1595fn path_to_unix(p: &Path) -> String {
1607 p.components()
1608 .map(|c| c.as_os_str().to_string_lossy().into_owned())
1609 .collect::<Vec<_>>()
1610 .join("/")
1611}
1612
1613mod path_serde {
1619 use super::path_to_unix;
1620 use serde::{Deserialize, Deserializer, Serializer};
1621 use std::path::{Path, PathBuf};
1622
1623 pub fn serialize<S: Serializer>(p: &Path, s: S) -> Result<S::Ok, S::Error> {
1624 s.serialize_str(&path_to_unix(p))
1625 }
1626
1627 pub fn deserialize<'de, D: Deserializer<'de>>(d: D) -> Result<PathBuf, D::Error> {
1628 Ok(PathBuf::from(String::deserialize(d)?))
1629 }
1630}
1631
1632fn capitalize(s: &str) -> String {
1634 let mut chars = s.chars();
1635 match chars.next() {
1636 Some(first) => first.to_uppercase().collect::<String>() + chars.as_str(),
1637 None => String::new(),
1638 }
1639}
1640
1641fn collapse_whitespace(s: &str) -> String {
1646 s.split_whitespace().collect::<Vec<_>>().join(" ")
1647}
1648
1649fn default_display(basename: &str) -> String {
1655 let spaced: String = basename
1656 .chars()
1657 .map(|c| if c == '-' || c == '_' { ' ' } else { c })
1658 .collect();
1659 capitalize(&spaced)
1660}
1661
1662fn folder_label<'a>(
1669 tf_unix: &str,
1670 basename: &str,
1671 folders: &'a BTreeMap<String, FolderMeta>,
1672) -> (String, Option<&'a str>) {
1673 let meta = folders.get(tf_unix);
1674 let display = meta
1675 .and_then(|m| m.display.as_deref())
1676 .map(str::to_string)
1677 .unwrap_or_else(|| default_display(basename));
1678 (display, meta.and_then(|m| m.description.as_deref()))
1679}
1680
1681fn folder_entry(tf_unix: &str, display: &str, count: usize, description: Option<&str>) -> String {
1684 match description {
1685 Some(d) => format!("- [[{tf_unix}/index|{display}]] ({count}) — {d}\n"),
1686 None => format!("- [[{tf_unix}/index|{display}]] ({count})\n"),
1687 }
1688}
1689
1690fn write_atomic(path: &Path, contents: String) -> crate::Result<()> {
1697 if let Some(parent) = path.parent() {
1698 fs::create_dir_all(parent)?;
1699 }
1700 let dir = path.parent().unwrap_or_else(|| Path::new("."));
1701 let mut tmp = tempfile_in(dir)?;
1702 tmp.write_all(contents.as_bytes())?;
1703 tmp.flush()?;
1704 tmp.persist(path)?;
1705 Ok(())
1706}
1707
1708fn remove_if_exists(path: &Path) -> crate::Result<()> {
1709 match fs::remove_file(path) {
1710 Ok(()) => Ok(()),
1711 Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(()),
1712 Err(e) => Err(e.into()),
1713 }
1714}
1715
1716fn bad_index(path: &Path, msg: &str) -> crate::Error {
1717 crate::Error::Store(crate::store::StoreError::BadTypeIndex {
1718 path: path.to_path_buf(),
1719 message: msg.to_string(),
1720 })
1721}
1722
1723struct FolderLock {
1743 path: PathBuf,
1744 held: bool,
1745}
1746
1747impl FolderLock {
1748 fn acquire(folder_abs: &Path) -> Self {
1775 use std::time::{Duration, SystemTime};
1776 const SPIN: Duration = Duration::from_millis(10);
1777 const STALE_AFTER: Duration = Duration::from_secs(30);
1778
1779 let path = folder_abs.join(".index.lock");
1780 let _ = fs::create_dir_all(folder_abs);
1782 loop {
1783 match fs::OpenOptions::new()
1784 .write(true)
1785 .create_new(true)
1786 .open(&path)
1787 {
1788 Ok(_) => return FolderLock { path, held: true },
1789 Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => {
1790 let stale = fs::metadata(&path)
1793 .and_then(|m| m.modified())
1794 .ok()
1795 .and_then(|t| SystemTime::now().duration_since(t).ok())
1796 .map(|age| age > STALE_AFTER)
1797 .unwrap_or(false);
1798 if stale {
1799 let _ = fs::remove_file(&path);
1800 continue;
1801 }
1802 std::thread::sleep(SPIN);
1803 }
1804 Err(_) => return FolderLock { path, held: false },
1809 }
1810 }
1811 }
1812}
1813
1814impl Drop for FolderLock {
1815 fn drop(&mut self) {
1816 if self.held {
1817 let _ = fs::remove_file(&self.path);
1818 }
1819 }
1820}
1821
1822fn lock_folders(store: &Store, a: &Path, b: &Path) -> Vec<FolderLock> {
1828 if a == b {
1829 return vec![FolderLock::acquire(&store.root.join(a))];
1830 }
1831 let (first, second) = if a < b { (a, b) } else { (b, a) };
1832 vec![
1833 FolderLock::acquire(&store.root.join(first)),
1834 FolderLock::acquire(&store.root.join(second)),
1835 ]
1836}
1837
1838struct AtomicTemp {
1844 file: Option<fs::File>,
1845 path: PathBuf,
1846 persisted: bool,
1847}
1848
1849impl AtomicTemp {
1850 fn write_all(&mut self, bytes: &[u8]) -> std::io::Result<()> {
1851 self.file.as_mut().expect("temp file open").write_all(bytes)
1852 }
1853 fn flush(&mut self) -> std::io::Result<()> {
1854 self.file.as_mut().expect("temp file open").flush()
1855 }
1856 fn persist(mut self, dest: &Path) -> std::io::Result<()> {
1857 if let Some(f) = self.file.take() {
1858 f.sync_all().ok();
1859 }
1861 fs::rename(&self.path, dest)?;
1862 self.persisted = true;
1863 Ok(())
1864 }
1865}
1866
1867impl Drop for AtomicTemp {
1868 fn drop(&mut self) {
1869 if !self.persisted {
1871 let _ = fs::remove_file(&self.path);
1872 }
1873 }
1874}
1875
1876fn tempfile_in(dir: &Path) -> std::io::Result<AtomicTemp> {
1877 use std::time::{SystemTime, UNIX_EPOCH};
1878 let nanos = SystemTime::now()
1879 .duration_since(UNIX_EPOCH)
1880 .map(|d| d.as_nanos())
1881 .unwrap_or(0);
1882 let pid = std::process::id();
1883 let counter = next_temp_counter();
1886 let name = format!(".dbmd-index-{pid}-{nanos}-{counter}.tmp");
1887 let path = dir.join(name);
1888 let file = fs::OpenOptions::new()
1889 .write(true)
1890 .create_new(true)
1891 .open(&path)?;
1892 Ok(AtomicTemp {
1893 file: Some(file),
1894 path,
1895 persisted: false,
1896 })
1897}
1898
1899fn next_temp_counter() -> u64 {
1900 use std::sync::atomic::{AtomicU64, Ordering};
1901 static C: AtomicU64 = AtomicU64::new(0);
1902 C.fetch_add(1, Ordering::Relaxed)
1903}
1904
1905#[cfg(test)]
1906mod tests {
1907 use super::*;
1908 use std::collections::BTreeSet;
1909 use std::fs;
1910 use tempfile::TempDir;
1911
1912 fn mk_store() -> (TempDir, Store) {
1917 let dir = TempDir::new().unwrap();
1918 fs::write(dir.path().join("DB.md"), "# test store\n").unwrap();
1919 let store = Store {
1920 root: dir.path().to_path_buf(),
1921 config: crate::parser::Config::default(),
1922 };
1923 (dir, store)
1924 }
1925
1926 fn write_raw(store: &Store, rel: &str, fm: &str, body: &str) {
1929 let abs = store.root.join(rel);
1930 fs::create_dir_all(abs.parent().unwrap()).unwrap();
1931 fs::write(&abs, format!("---\n{fm}\n---\n{body}")).unwrap();
1932 }
1933
1934 fn write_doc(
1936 store: &Store,
1937 rel: &str,
1938 type_: &str,
1939 summary: Option<&str>,
1940 updated: Option<&str>,
1941 extra_yaml: &str,
1942 ) {
1943 let mut fm = format!("type: {type_}\n");
1944 if let Some(s) = summary {
1945 fm.push_str(&format!("summary: {s}\n"));
1946 }
1947 if let Some(u) = updated {
1948 fm.push_str(&format!("updated: {u}\n"));
1949 }
1950 fm.push_str(extra_yaml);
1951 write_raw(store, rel, fm.trim_end(), "\nbody text\n");
1952 }
1953
1954 fn read(store: &Store, rel: &str) -> String {
1955 fs::read_to_string(store.root.join(rel)).unwrap()
1956 }
1957
1958 fn exists(store: &Store, rel: &str) -> bool {
1959 store.root.join(rel).exists()
1960 }
1961
1962 fn snapshot_artifacts(store: &Store) -> BTreeMap<String, String> {
1965 let mut out = BTreeMap::new();
1966 for entry in walkdir::WalkDir::new(&store.root)
1967 .into_iter()
1968 .filter_map(|e| e.ok())
1969 {
1970 let p = entry.path();
1971 if is_index_artifact(p) {
1972 let rel = path_to_unix(&rel_to_store(&store.root, p).unwrap());
1973 out.insert(rel, fs::read_to_string(p).unwrap());
1974 }
1975 }
1976 out
1977 }
1978
1979 #[test]
1982 fn type_folder_aggregates_across_shards_in_recency_order() {
1983 let (_d, store) = mk_store();
1984 write_doc(
1987 &store,
1988 "sources/emails/2026/05/b-old.md",
1989 "email",
1990 Some("Older mail"),
1991 Some("2026-05-01T09:00:00Z"),
1992 "",
1993 );
1994 write_doc(
1995 &store,
1996 "sources/emails/2026/06/c-new.md",
1997 "email",
1998 Some("Newest mail"),
1999 Some("2026-06-15T12:00:00Z"),
2000 "",
2001 );
2002 write_doc(
2003 &store,
2004 "sources/emails/2026/05/a-mid.md",
2005 "email",
2006 Some("Middle mail"),
2007 Some("2026-05-20T08:00:00Z"),
2008 "",
2009 );
2010
2011 let idx = Index::build_type_folder(&store, Path::new("sources/emails")).unwrap();
2012 let paths: Vec<String> = idx.records.iter().map(|r| path_to_unix(&r.path)).collect();
2013 assert_eq!(
2014 paths,
2015 vec![
2016 "sources/emails/2026/06/c-new.md",
2017 "sources/emails/2026/05/a-mid.md",
2018 "sources/emails/2026/05/b-old.md",
2019 ],
2020 "records must aggregate across shards, newest `updated` first"
2021 );
2022 }
2023
2024 #[test]
2025 fn type_folder_md_format_entries_tags_and_derived_updated() {
2026 let (_d, store) = mk_store();
2027 write_doc(
2028 &store,
2029 "records/contacts/sarah-chen.md",
2030 "contact",
2031 Some("Renewal champion at Acme"),
2032 Some("2026-05-27T10:00:00Z"),
2033 "tags:\n - renewal\n - acme\n",
2034 );
2035 write_doc(
2036 &store,
2037 "records/contacts/no-tags.md",
2038 "contact",
2039 Some("Plain contact"),
2040 Some("2026-05-26T10:00:00Z"),
2041 "",
2042 );
2043
2044 let idx = Index::build_type_folder(&store, Path::new("records/contacts")).unwrap();
2045 let md = idx.to_markdown();
2046
2047 assert!(md.starts_with(
2050 "---\ntype: index\nscope: type-folder\nfolder: records/contacts\nupdated: 2026-05-27T10:00:00Z\n---\n\n# records/contacts\n"
2051 ), "frontmatter/heading wrong:\n{md}");
2052
2053 assert!(
2055 md.contains(
2056 "- [[records/contacts/sarah-chen]] — Renewal champion at Acme · #renewal #acme\n"
2057 ),
2058 "tagged entry wrong:\n{md}"
2059 );
2060 assert!(
2062 md.contains("- [[records/contacts/no-tags]] — Plain contact\n"),
2063 "untagged entry wrong:\n{md}"
2064 );
2065 assert!(
2066 !md.contains("Plain contact ·"),
2067 "untagged entry must not emit a tag separator"
2068 );
2069 assert!(!md.contains("## More"), "no footer expected under the cap");
2071 }
2072
2073 #[test]
2074 fn missing_summary_becomes_placeholder_not_invented() {
2075 let (_d, store) = mk_store();
2076 write_doc(
2077 &store,
2078 "records/notes/x.md",
2079 "note",
2080 None,
2081 Some("2026-05-27T10:00:00Z"),
2082 "",
2083 );
2084 let idx = Index::build_type_folder(&store, Path::new("records/notes")).unwrap();
2085 assert_eq!(idx.records[0].summary, MISSING_SUMMARY);
2086 let md = idx.to_markdown();
2087 assert!(
2088 md.contains("- [[records/notes/x]] — (no summary)\n"),
2089 "missing summary must render the placeholder, not invent text:\n{md}"
2090 );
2091 }
2092
2093 #[test]
2096 fn jsonl_is_complete_structured_and_round_trips() {
2097 let (_d, store) = mk_store();
2098 write_doc(
2099 &store,
2100 "records/expenses/2026/05/e1.md",
2101 "expense",
2102 Some("Lunch with vendor"),
2103 Some("2026-05-10T10:00:00Z"),
2104 "created: 2026-05-10T09:00:00Z\nstatus: paid\namount: 42\ncompany: [[records/companies/acme]]\nrelated:\n - [[records/concepts/spend]]\ntags:\n - food\nlinks:\n - records/concepts/spend\n - [[records/concepts/renewal]]\n",
2105 );
2106 write_doc(
2107 &store,
2108 "records/expenses/2026/06/e2.md",
2109 "expense",
2110 Some("Cloud bill"),
2111 Some("2026-06-01T10:00:00Z"),
2112 "amount: 100\n",
2113 );
2114
2115 let idx = Index::build_type_folder(&store, Path::new("records/expenses")).unwrap();
2116 let jsonl = idx.to_jsonl();
2117 let lines: Vec<&str> = jsonl.lines().collect();
2118 assert_eq!(lines.len(), 2, "one JSON object per file, uncapped");
2119
2120 let r0: IndexRecord = serde_json::from_str(lines[0]).unwrap();
2122 assert_eq!(path_to_unix(&r0.path), "records/expenses/2026/06/e2.md");
2123 assert_eq!(
2124 r0, idx.records[0],
2125 "jsonl line must round-trip to the record"
2126 );
2127
2128 let r1: IndexRecord = serde_json::from_str(lines[1]).unwrap();
2131 assert_eq!(r1.type_, "expense");
2132 assert_eq!(r1.summary, "Lunch with vendor");
2133 assert_eq!(r1.tags, vec!["food".to_string()]);
2134 assert_eq!(
2135 r1.links,
2136 vec![
2137 "records/concepts/spend".to_string(),
2138 "[[records/concepts/renewal]]".to_string()
2139 ]
2140 );
2141 assert_eq!(
2142 r1.created,
2143 Some(DateTime::parse_from_rfc3339("2026-05-10T09:00:00Z").unwrap())
2144 );
2145 assert_eq!(r1.fields.get("status"), Some(&Value::from("paid")));
2146 assert_eq!(r1.fields.get("amount"), Some(&Value::from(42)));
2147 assert_eq!(
2148 r1.fields.get("company"),
2149 Some(&Value::from("[[records/companies/acme]]"))
2150 );
2151 assert_eq!(
2152 r1.fields.get("related"),
2153 Some(&serde_json::json!(["[[records/concepts/spend]]"]))
2154 );
2155 for reserved in [
2157 "path", "type", "summary", "tags", "links", "created", "updated",
2158 ] {
2159 assert!(
2160 !r1.fields.contains_key(reserved),
2161 "reserved key {reserved} must not appear in fields"
2162 );
2163 }
2164
2165 assert!(
2167 lines[1].starts_with(
2168 r#"{"path":"records/expenses/2026/05/e1.md","type":"expense","summary":"Lunch with vendor","tags":["food"],"links":["records/concepts/spend","[[records/concepts/renewal]]"],"created":"2026-05-10T09:00:00Z","updated":"2026-05-10T10:00:00Z","#
2169 ),
2170 "jsonl key order not stable:\n{}",
2171 lines[1]
2172 );
2173 assert!(
2178 lines[1].ends_with(r#""amount":42,"company":"[[records/companies/acme]]","meta-type":"fact","related":["[[records/concepts/spend]]"],"status":"paid"}"#),
2179 "extras must be sorted:\n{}",
2180 lines[1]
2181 );
2182 }
2183
2184 #[test]
2187 fn over_cap_md_shows_500_plus_footer_jsonl_holds_all() {
2188 let (_d, store) = mk_store();
2189 let total = MD_CAP + 7;
2190 for i in 0..total {
2191 let day = 1 + (i % 27);
2193 let rel = format!("sources/emails/2026/05/m-{i:04}.md");
2194 let updated = format!("2026-05-{day:02}T00:00:{:02}Z", i % 60);
2195 write_doc(
2196 &store,
2197 &rel,
2198 "email",
2199 Some(&format!("mail {i}")),
2200 Some(&updated),
2201 "",
2202 );
2203 }
2204 let idx = Index::build_type_folder(&store, Path::new("sources/emails")).unwrap();
2205 assert_eq!(idx.records.len(), total, "jsonl/records keep every file");
2206
2207 let md = idx.to_markdown();
2208 let entry_lines = md.lines().filter(|l| l.starts_with("- [[")).count();
2209 assert_eq!(entry_lines, MD_CAP, "md browse view is capped at 500");
2210
2211 assert!(
2212 md.contains("## More\n\n"),
2213 "over-cap md needs a More footer"
2214 );
2215 assert!(
2216 md.contains(&format!(
2217 "This folder has {total} files. The 500 most recent are listed above.\n"
2218 )),
2219 "footer count wrong:\n{md}"
2220 );
2221 assert!(
2222 md.contains("Use `dbmd query --type email --in sources` for the complete catalog.\n"),
2223 "footer must infer type=email layer=sources:\n{md}"
2224 );
2225
2226 let jsonl = idx.to_jsonl();
2227 assert_eq!(jsonl.lines().count(), total, "jsonl is uncapped");
2228 }
2229
2230 #[test]
2233 fn sort_breaks_ties_by_path_and_puts_undated_last() {
2234 let mut recs = vec![
2235 rec("z/a.md", Some("2026-05-01T00:00:00Z")),
2236 rec("a/b.md", Some("2026-05-01T00:00:00Z")), rec("m/c.md", None), rec("b/d.md", Some("2026-06-01T00:00:00Z")), ];
2240 sort_records(&mut recs);
2241 let order: Vec<String> = recs.iter().map(|r| path_to_unix(&r.path)).collect();
2242 assert_eq!(order, vec!["b/d.md", "a/b.md", "z/a.md", "m/c.md"]);
2243 }
2244
2245 fn rec(path: &str, updated: Option<&str>) -> IndexRecord {
2246 IndexRecord {
2247 path: PathBuf::from(path),
2248 type_: "t".into(),
2249 summary: "s".into(),
2250 tags: vec![],
2251 links: vec![],
2252 created: None,
2253 updated: updated.map(|u| DateTime::parse_from_rfc3339(u).unwrap()),
2254 fields: BTreeMap::new(),
2255 }
2256 }
2257
2258 #[test]
2261 fn layer_index_lists_type_folders_with_counts() {
2262 let (_d, store) = mk_store();
2263 write_doc(
2264 &store,
2265 "records/contacts/a.md",
2266 "contact",
2267 Some("Contact A older"),
2268 Some("2026-05-01T00:00:00Z"),
2269 "",
2270 );
2271 write_doc(
2272 &store,
2273 "records/contacts/b.md",
2274 "contact",
2275 Some("Contact B newest"),
2276 Some("2026-05-09T00:00:00Z"),
2277 "",
2278 );
2279 write_doc(
2280 &store,
2281 "records/companies/x.md",
2282 "company",
2283 Some("Acme Inc"),
2284 Some("2026-05-05T00:00:00Z"),
2285 "",
2286 );
2287 Index::write_level(&store, &IndexLevel::TypeFolder("records/contacts".into())).unwrap();
2289 Index::write_level(&store, &IndexLevel::TypeFolder("records/companies".into())).unwrap();
2290
2291 Index::write_level(&store, &IndexLevel::Layer(Layer::Records)).unwrap();
2292 let md = read(&store, "records/index.md");
2293
2294 assert!(
2295 md.starts_with("---\ntype: index\nscope: layer\nfolder: records\n"),
2296 "layer fm:\n{md}"
2297 );
2298 let companies_at = md.find("companies/index").unwrap();
2300 let contacts_at = md.find("contacts/index").unwrap();
2301 assert!(
2302 companies_at < contacts_at,
2303 "type folders must be alphabetical"
2304 );
2305 assert!(
2308 md.contains("- [[records/contacts/index|Contacts]] (2)\n"),
2309 "contacts entry:\n{md}"
2310 );
2311 assert!(
2312 md.contains("- [[records/companies/index|Companies]] (1)\n"),
2313 "companies entry:\n{md}"
2314 );
2315 assert!(
2317 !md.contains("Contact B newest") && !md.contains("Acme Inc"),
2318 "layer rollup must not quote a member summary:\n{md}"
2319 );
2320 assert!(
2322 md.contains("updated: 2026-05-09T00:00:00Z\n"),
2323 "layer updated must be max child:\n{md}"
2324 );
2325 }
2326
2327 #[test]
2328 fn folders_section_supplies_authored_display_and_description() {
2329 let (_d, mut store) = mk_store();
2333 store.config.folders.insert(
2334 "records/contacts".into(),
2335 crate::parser::FolderMeta {
2336 display: None,
2337 description: Some("people across customer + prospect accounts".into()),
2338 },
2339 );
2340 store.config.folders.insert(
2341 "sources/hubspot-exports".into(),
2342 crate::parser::FolderMeta {
2343 display: Some("HubSpot exports".into()),
2344 description: Some("deal + pipeline exports".into()),
2345 },
2346 );
2347 write_doc(
2348 &store,
2349 "records/contacts/a.md",
2350 "contact",
2351 Some("Contact A"),
2352 Some("2026-05-01T00:00:00Z"),
2353 "",
2354 );
2355 write_doc(
2357 &store,
2358 "records/companies/x.md",
2359 "company",
2360 Some("Acme Inc"),
2361 Some("2026-05-05T00:00:00Z"),
2362 "",
2363 );
2364 write_doc(
2365 &store,
2366 "sources/hubspot-exports/d.md",
2367 "hubspot-export",
2368 Some("a single deal export"),
2369 Some("2026-05-03T00:00:00Z"),
2370 "",
2371 );
2372
2373 Index::rebuild_all(&store).unwrap();
2374
2375 let records_layer = read(&store, "records/index.md");
2377 assert!(
2378 records_layer.contains("- [[records/contacts/index|Contacts]] (1) — people across customer + prospect accounts\n"),
2379 "authored description must surface:\n{records_layer}"
2380 );
2381 assert!(
2383 records_layer.contains("- [[records/companies/index|Companies]] (1)\n")
2384 && !records_layer.contains("Acme Inc"),
2385 "un-described folder is counts-only:\n{records_layer}"
2386 );
2387
2388 let sources_layer = read(&store, "sources/index.md");
2390 assert!(
2391 sources_layer.contains("- [[sources/hubspot-exports/index|HubSpot exports]] (1) — deal + pipeline exports\n"),
2392 "display override + description must surface:\n{sources_layer}"
2393 );
2394
2395 let root = read(&store, "index.md");
2397 assert!(
2398 root.contains("- [[records/contacts/index|Contacts]] (1) — people across customer + prospect accounts\n"),
2399 "root surfaces authored description:\n{root}"
2400 );
2401 assert!(
2402 root.contains("- [[sources/hubspot-exports/index|HubSpot exports]] (1) — deal + pipeline exports\n"),
2403 "root surfaces display override:\n{root}"
2404 );
2405 }
2406
2407 #[test]
2408 fn default_display_turns_separators_to_spaces_and_caps() {
2409 assert_eq!(default_display("contacts"), "Contacts");
2410 assert_eq!(default_display("hubspot-exports"), "Hubspot exports");
2411 assert_eq!(default_display("usage_exports"), "Usage exports");
2412 }
2413
2414 #[test]
2415 fn root_index_groups_layers_with_totals_and_per_type_counts() {
2416 let (_d, store) = mk_store();
2417 write_doc(
2418 &store,
2419 "sources/emails/2026/05/a.md",
2420 "email",
2421 Some("Mail"),
2422 Some("2026-05-01T00:00:00Z"),
2423 "",
2424 );
2425 write_doc(
2426 &store,
2427 "sources/docs/d.md",
2428 "doc",
2429 Some("Doc"),
2430 Some("2026-05-02T00:00:00Z"),
2431 "",
2432 );
2433 write_doc(
2434 &store,
2435 "records/contacts/c.md",
2436 "contact",
2437 Some("C"),
2438 Some("2026-05-03T00:00:00Z"),
2439 "",
2440 );
2441 Index::rebuild_all(&store).unwrap();
2444 let md = read(&store, "index.md");
2445
2446 assert!(
2447 md.starts_with("---\ntype: index\nscope: root\n"),
2448 "root fm:\n{md}"
2449 );
2450 assert!(md.contains("# Knowledge base index\n"), "root title:\n{md}");
2451 let sources_h = md
2453 .find("## Sources (2)")
2454 .expect("sources heading w/ total 2");
2455 let records_h = md
2456 .find("## Records (1)")
2457 .expect("records heading w/ total 1");
2458 assert!(sources_h < records_h, "Sources must precede Records");
2459 assert!(!md.contains("## Wiki"), "empty layer gets no section");
2460 assert!(
2462 md.contains("- [[sources/docs/index|Docs]] (1)\n"),
2463 "root docs entry:\n{md}"
2464 );
2465 assert!(
2466 md.contains("- [[sources/emails/index|Emails]] (1)\n"),
2467 "root emails entry:\n{md}"
2468 );
2469 assert!(
2470 md.contains("- [[records/contacts/index|Contacts]] (1)\n"),
2471 "root contacts entry:\n{md}"
2472 );
2473 assert!(!md.contains("— "), "root entries carry no preview text");
2474 }
2475
2476 #[test]
2479 fn on_write_matches_rebuild_byte_for_byte() {
2480 let (_d1, wt) = mk_store();
2483 let (_d2, rb) = mk_store();
2484
2485 let docs: &[(&str, &str, &str, &str, &str)] = &[
2486 (
2487 "sources/emails/2026/05/e1.md",
2488 "email",
2489 "First mail",
2490 "2026-05-01T10:00:00Z",
2491 "tags:\n - inbox\n",
2492 ),
2493 (
2494 "sources/emails/2026/06/e2.md",
2495 "email",
2496 "Second mail",
2497 "2026-06-01T10:00:00Z",
2498 "",
2499 ),
2500 (
2501 "records/contacts/sarah.md",
2502 "contact",
2503 "Sarah",
2504 "2026-05-15T10:00:00Z",
2505 "links:\n - records/profiles/sarah\n",
2506 ),
2507 (
2508 "records/contacts/elena.md",
2509 "contact",
2510 "Elena",
2511 "2026-05-20T10:00:00Z",
2512 "status: active\n",
2513 ),
2514 (
2515 "records/profiles/sarah.md",
2516 "profile",
2517 "Sarah bio",
2518 "2026-05-21T10:00:00Z",
2519 "",
2520 ),
2521 ];
2522
2523 for (rel, t, sum, upd, extra) in docs {
2524 write_doc(&wt, rel, t, Some(sum), Some(upd), extra);
2525 write_doc(&rb, rel, t, Some(sum), Some(upd), extra);
2526 Index::on_write(&wt, Path::new(rel)).unwrap();
2527 }
2528 Index::rebuild_all(&rb).unwrap();
2529
2530 let a = snapshot_artifacts(&wt);
2531 let b = snapshot_artifacts(&rb);
2532 assert_eq!(
2533 a.keys().collect::<Vec<_>>(),
2534 b.keys().collect::<Vec<_>>(),
2535 "same set of index artifacts must exist"
2536 );
2537 for (k, v) in &a {
2538 assert_eq!(v, &b[k], "artifact {k} differs between write-through and rebuild:\n--- write-through ---\n{v}\n--- rebuild ---\n{}", b[k]);
2539 }
2540 assert!(a.contains_key("index.md"));
2542 assert!(a.contains_key("sources/emails/index.jsonl"));
2543 assert!(a.contains_key("records/contacts/index.md"));
2544 }
2545
2546 #[test]
2563 fn loop_op_does_not_walk_sibling_content_tree() {
2564 let (_d, store) = mk_store();
2565
2566 write_doc(
2570 &store,
2571 "records/companies/acme.md",
2572 "company",
2573 Some("Acme Inc"),
2574 Some("2026-05-05T00:00:00Z"),
2575 "",
2576 );
2577 write_doc(
2578 &store,
2579 "records/companies/globex.md",
2580 "company",
2581 Some("Globex"),
2582 Some("2026-05-06T00:00:00Z"),
2583 "",
2584 );
2585 assert!(
2586 !exists(&store, "records/companies/index.jsonl"),
2587 "precondition: companies must be un-indexed"
2588 );
2589
2590 write_doc(
2592 &store,
2593 "records/contacts/sarah.md",
2594 "contact",
2595 Some("Sarah"),
2596 Some("2026-05-15T00:00:00Z"),
2597 "",
2598 );
2599 Index::on_write(&store, Path::new("records/contacts/sarah.md")).unwrap();
2600
2601 let layer_md = read(&store, "records/index.md");
2603 let root_md = read(&store, "index.md");
2604 assert!(
2606 layer_md.contains("- [[records/contacts/index|Contacts]] (1)\n")
2607 && !layer_md.contains("Sarah"),
2608 "layer must reflect the written folder, counts only:\n{layer_md}"
2609 );
2610 assert!(
2611 root_md.contains("- [[records/contacts/index|Contacts]] (1)\n"),
2612 "root must reflect the written folder:\n{root_md}"
2613 );
2614
2615 assert!(
2619 !layer_md.contains("companies"),
2620 "loop op walked the sibling content tree: layer rollup counts un-indexed records/companies\n{layer_md}"
2621 );
2622 assert!(
2623 !root_md.contains("companies"),
2624 "loop op walked the sibling content tree: root rollup counts un-indexed records/companies\n{root_md}"
2625 );
2626 assert!(
2628 root_md.contains("## Records (1)"),
2629 "root layer total must count only the sidecar-indexed folder (1), not walked siblings (would be 3):\n{root_md}"
2630 );
2631
2632 let (_d2, rb) = mk_store();
2637 for (rel, t, s, u) in [
2638 (
2639 "records/companies/acme.md",
2640 "company",
2641 "Acme Inc",
2642 "2026-05-05T00:00:00Z",
2643 ),
2644 (
2645 "records/companies/globex.md",
2646 "company",
2647 "Globex",
2648 "2026-05-06T00:00:00Z",
2649 ),
2650 (
2651 "records/contacts/sarah.md",
2652 "contact",
2653 "Sarah",
2654 "2026-05-15T00:00:00Z",
2655 ),
2656 ] {
2657 write_doc(&rb, rel, t, Some(s), Some(u), "");
2658 }
2659 Index::on_write(&store, Path::new("records/companies/acme.md")).unwrap();
2660 Index::on_write(&store, Path::new("records/companies/globex.md")).unwrap();
2661 Index::rebuild_all(&rb).unwrap();
2662 let a = snapshot_artifacts(&store);
2663 let b = snapshot_artifacts(&rb);
2664 assert_eq!(
2665 a.keys().collect::<BTreeSet<_>>(),
2666 b.keys().collect::<BTreeSet<_>>(),
2667 "same artifact set after indexing both folders"
2668 );
2669 for (k, v) in &a {
2670 assert_eq!(
2671 v, &b[k],
2672 "after indexing the sibling too, loop result must equal rebuild for {k}"
2673 );
2674 }
2675 assert!(
2676 read(&store, "index.md").contains("## Records (3)"),
2677 "now that both folders are indexed, the root total is 3"
2678 );
2679 }
2680
2681 #[test]
2694 fn custom_type_at_shard_path_for_is_indexable_end_to_end() {
2695 let (_d1, wt) = mk_store();
2696 let (_d2, rb) = mk_store();
2697
2698 let rel = wt
2700 .shard_path_for(
2701 "profile",
2702 &crate::parser::Frontmatter::default(),
2703 "renewal-theme",
2704 )
2705 .unwrap();
2706 let rel_str = path_to_unix(&rel);
2707 assert!(
2710 type_folder_of(&rel).is_some(),
2711 "shard_path_for produced a path the index cannot file: {rel_str}"
2712 );
2713
2714 write_doc(
2715 &wt,
2716 &rel_str,
2717 "profile",
2718 Some("Renewal theme"),
2719 Some("2026-05-21T10:00:00Z"),
2720 "",
2721 );
2722 write_doc(
2723 &rb,
2724 &rel_str,
2725 "profile",
2726 Some("Renewal theme"),
2727 Some("2026-05-21T10:00:00Z"),
2728 "",
2729 );
2730
2731 Index::on_write(&wt, &rel)
2734 .expect("on_write must succeed for a toolkit-computed custom-type path");
2735 Index::rebuild_all(&rb).unwrap();
2736
2737 let page_link = wiki_target(&rel); let tf_md = read(&rb, "records/profile/index.md");
2744 assert!(
2745 tf_md.contains(&format!("[[{page_link}]]")),
2746 "type-folder index must list the page link, got:\n{tf_md}"
2747 );
2748 assert!(
2749 exists(&rb, "records/profile/index.jsonl"),
2750 "type-folder jsonl must exist"
2751 );
2752 assert!(
2753 read(&rb, "records/profile/index.jsonl").contains(&rel_str),
2754 "type-folder jsonl must contain the page row"
2755 );
2756 let layer_md = read(&rb, "records/index.md");
2759 assert!(
2760 layer_md.contains("records/profile/index"),
2761 "layer index must roll up the records/profile type-folder, got:\n{layer_md}"
2762 );
2763
2764 let a = snapshot_artifacts(&wt);
2766 let b = snapshot_artifacts(&rb);
2767 assert_eq!(
2768 a.keys().collect::<Vec<_>>(),
2769 b.keys().collect::<Vec<_>>(),
2770 "loop and sweep must produce the same artifact set"
2771 );
2772 for (k, v) in &a {
2773 assert_eq!(
2774 v, &b[k],
2775 "custom-type artifact {k} differs between on_write and rebuild"
2776 );
2777 }
2778 }
2779
2780 #[test]
2781 fn on_remove_then_rebuild_match_and_pull_in_next_over_cap() {
2782 let (_d1, wt) = mk_store();
2783 let (_d2, rb) = mk_store();
2784 let total = MD_CAP + 3; let mut all_rels = Vec::new();
2786 for i in 0..total {
2787 let rel = format!("sources/emails/2026/05/m-{i:04}.md");
2788 let updated = format!("2026-05-10T00:{:02}:{:02}Z", i / 60, i % 60);
2790 write_doc(
2791 &wt,
2792 &rel,
2793 "email",
2794 Some(&format!("mail {i}")),
2795 Some(&updated),
2796 "",
2797 );
2798 write_doc(
2799 &rb,
2800 &rel,
2801 "email",
2802 Some(&format!("mail {i}")),
2803 Some(&updated),
2804 "",
2805 );
2806 all_rels.push(rel);
2807 }
2808 Index::rebuild_all(&wt).unwrap();
2810 let newest = &all_rels[total - 1]; fs::remove_file(wt.root.join(newest)).unwrap();
2812 Index::on_remove(&wt, Path::new(newest)).unwrap();
2813
2814 fs::remove_file(rb.root.join(newest)).unwrap();
2816 Index::rebuild_all(&rb).unwrap();
2817
2818 let a = snapshot_artifacts(&wt);
2819 let b = snapshot_artifacts(&rb);
2820 for (k, v) in &a {
2821 assert_eq!(v, &b[k], "after remove, artifact {k} drifted from rebuild");
2822 }
2823
2824 let md = read(&wt, "sources/emails/index.md");
2827 assert_eq!(md.lines().filter(|l| l.starts_with("- [[")).count(), MD_CAP);
2828 assert!(
2830 !md.contains(&format!("[[{}]]", wiki_target(Path::new(newest)))),
2831 "removed file must not be listed in md"
2832 );
2833 let pulled_in = &all_rels[2];
2837 assert!(
2838 md.contains(&format!("[[{}]]", wiki_target(Path::new(pulled_in)))),
2839 "the 501st-most-recent must be pulled into the browse view after a removal"
2840 );
2841 assert!(
2842 md.contains(&format!("This folder has {} files.", total - 1)),
2843 "footer count must decrement:\n{}",
2844 md.lines().rev().take(4).collect::<Vec<_>>().join("\n")
2845 );
2846 let jsonl = read(&wt, "sources/emails/index.jsonl");
2847 assert_eq!(
2848 jsonl.lines().count(),
2849 total - 1,
2850 "jsonl loses exactly the removed file"
2851 );
2852 assert!(
2853 !jsonl.contains(&path_to_unix(Path::new(newest))),
2854 "removed file must be gone from the jsonl too"
2855 );
2856 }
2857
2858 #[test]
2859 fn on_rename_cross_folder_matches_rebuild() {
2860 let (_d1, wt) = mk_store();
2861 let (_d2, rb) = mk_store();
2862 let seed: &[(&str, &str, &str, &str)] = &[
2864 (
2865 "records/contacts/a.md",
2866 "contact",
2867 "A",
2868 "2026-05-01T00:00:00Z",
2869 ),
2870 (
2871 "records/contacts/b.md",
2872 "contact",
2873 "B",
2874 "2026-05-02T00:00:00Z",
2875 ),
2876 (
2877 "records/companies/x.md",
2878 "company",
2879 "X",
2880 "2026-05-03T00:00:00Z",
2881 ),
2882 ];
2883 for (rel, t, s, u) in seed {
2884 write_doc(&wt, rel, t, Some(s), Some(u), "");
2885 write_doc(&rb, rel, t, Some(s), Some(u), "");
2886 }
2887 Index::rebuild_all(&wt).unwrap();
2888
2889 let old = "records/contacts/b.md";
2892 let new = "records/companies/b.md";
2893 fs::create_dir_all(wt.root.join("records/companies")).unwrap();
2894 fs::rename(wt.root.join(old), wt.root.join(new)).unwrap();
2895 Index::on_rename(&wt, Path::new(old), Path::new(new)).unwrap();
2898
2899 fs::create_dir_all(rb.root.join("records/companies")).unwrap();
2901 fs::rename(rb.root.join(old), rb.root.join(new)).unwrap();
2902 Index::rebuild_all(&rb).unwrap();
2903
2904 let a = snapshot_artifacts(&wt);
2905 let b = snapshot_artifacts(&rb);
2906 assert_eq!(a.keys().collect::<Vec<_>>(), b.keys().collect::<Vec<_>>());
2907 for (k, v) in &a {
2908 assert_eq!(v, &b[k], "rename: artifact {k} drifted from rebuild");
2909 }
2910 let contacts = read(&wt, "records/contacts/index.md");
2912 assert!(!contacts.contains("records/contacts/b]]"));
2913 let companies = read(&wt, "records/companies/index.md");
2914 assert!(companies.contains("[[records/companies/b]]"));
2915 }
2916
2917 #[test]
2918 fn on_write_updates_existing_entry_in_place() {
2919 let (_d, store) = mk_store();
2920 write_doc(
2921 &store,
2922 "records/contacts/a.md",
2923 "contact",
2924 Some("Original"),
2925 Some("2026-05-01T00:00:00Z"),
2926 "",
2927 );
2928 Index::on_write(&store, Path::new("records/contacts/a.md")).unwrap();
2929 write_doc(
2931 &store,
2932 "records/contacts/a.md",
2933 "contact",
2934 Some("Revised"),
2935 Some("2026-05-09T00:00:00Z"),
2936 "",
2937 );
2938 Index::on_write(&store, Path::new("records/contacts/a.md")).unwrap();
2939
2940 let jsonl = read(&store, "records/contacts/index.jsonl");
2941 assert_eq!(
2942 jsonl.lines().count(),
2943 1,
2944 "upsert must not duplicate the line"
2945 );
2946 assert!(jsonl.contains("Revised"), "jsonl must reflect the update");
2947 assert!(
2948 !jsonl.contains("Original"),
2949 "stale line must be gone (compacted)"
2950 );
2951 let md = read(&store, "records/contacts/index.md");
2952 assert!(md.contains("- [[records/contacts/a]] — Revised\n"));
2953 assert!(
2954 md.contains("updated: 2026-05-09T00:00:00Z\n"),
2955 "index updated must track the newer member"
2956 );
2957 }
2958
2959 #[test]
2962 fn dry_run_emits_separators_and_writes_nothing() {
2963 let (_d, store) = mk_store();
2964 write_doc(
2965 &store,
2966 "sources/emails/2026/05/a.md",
2967 "email",
2968 Some("Mail"),
2969 Some("2026-05-01T00:00:00Z"),
2970 "",
2971 );
2972 let out = Index::render_dry_run(&store, &IndexLevel::TypeFolder("sources/emails".into()))
2973 .unwrap();
2974 assert!(
2975 out.contains("--- sources/emails/index.md ---\n"),
2976 "md separator:\n{out}"
2977 );
2978 assert!(
2979 out.contains("--- sources/emails/index.jsonl ---\n"),
2980 "jsonl separator:\n{out}"
2981 );
2982 assert!(
2983 out.contains("- [[sources/emails/2026/05/a]] — Mail"),
2984 "md body present"
2985 );
2986 assert!(
2988 !exists(&store, "sources/emails/index.md"),
2989 "dry-run must not write"
2990 );
2991 assert!(
2992 !exists(&store, "sources/emails/index.jsonl"),
2993 "dry-run must not write"
2994 );
2995 }
2996
2997 #[test]
2998 fn cleanup_removes_noncanonical_and_empty_indexes() {
2999 let (_d, store) = mk_store();
3000 write_doc(
3001 &store,
3002 "sources/emails/2026/05/a.md",
3003 "email",
3004 Some("Mail"),
3005 Some("2026-05-01T00:00:00Z"),
3006 "",
3007 );
3008 fs::write(
3010 store.root.join("sources/emails/2026/05/index.md"),
3011 "stale\n",
3012 )
3013 .unwrap();
3014 fs::write(
3015 store.root.join("sources/emails/2026/05/index.jsonl"),
3016 "stale\n",
3017 )
3018 .unwrap();
3019 fs::create_dir_all(store.root.join("records/empty")).unwrap();
3021 fs::write(store.root.join("records/empty/index.md"), "stale\n").unwrap();
3022
3023 Index::cleanup(&store).unwrap();
3024
3025 assert!(
3026 !exists(&store, "sources/emails/2026/05/index.md"),
3027 "shard index must be deleted"
3028 );
3029 assert!(
3030 !exists(&store, "sources/emails/2026/05/index.jsonl"),
3031 "shard jsonl must be deleted"
3032 );
3033 assert!(
3034 !exists(&store, "records/empty/index.md"),
3035 "empty-folder index must be deleted"
3036 );
3037 assert!(exists(&store, "sources/emails/2026/05/a.md"));
3039 }
3040
3041 #[test]
3042 fn rebuild_deletes_stale_indexes_for_emptied_folders() {
3043 let (_d, store) = mk_store();
3044 write_doc(
3045 &store,
3046 "records/contacts/a.md",
3047 "contact",
3048 Some("A"),
3049 Some("2026-05-01T00:00:00Z"),
3050 "",
3051 );
3052 Index::rebuild_all(&store).unwrap();
3053 assert!(exists(&store, "records/contacts/index.md"));
3054 assert!(exists(&store, "records/index.md"));
3055 assert!(exists(&store, "index.md"));
3056
3057 fs::remove_file(store.root.join("records/contacts/a.md")).unwrap();
3059 Index::rebuild_all(&store).unwrap();
3060 assert!(
3061 !exists(&store, "records/contacts/index.md"),
3062 "emptied type-folder index gone"
3063 );
3064 assert!(
3065 !exists(&store, "records/index.md"),
3066 "now-empty layer index gone"
3067 );
3068 assert!(!exists(&store, "index.md"), "now-empty root index gone");
3069 }
3070
3071 #[test]
3074 fn property_writethrough_equals_rebuild_under_mixed_ops() {
3075 let (_d1, wt) = mk_store();
3077 let (_d2, rb) = mk_store();
3078 let mut seed: u64 = 0x9E3779B97F4A7C15;
3079 let mut next = || {
3080 seed = seed
3081 .wrapping_mul(6364136223846793005)
3082 .wrapping_add(1442695040888963407);
3083 (seed >> 33) as u32
3084 };
3085
3086 let folders = ["sources/emails", "records/contacts", "records/profiles"];
3087 let types = ["email", "contact", "profile"];
3088 let mut live: Vec<String> = Vec::new(); for step in 0..120u32 {
3091 let r = next();
3092 let op = r % 10;
3093 if op < 6 || live.is_empty() {
3094 let fi = (next() as usize) % folders.len();
3096 let folder = folders[fi];
3097 let id = next() % 40;
3098 let rel = if folder == "sources/emails" {
3099 let month = 5 + (id % 2); format!("{folder}/2026/{month:02}/f-{id:02}.md")
3101 } else {
3102 format!("{folder}/f-{id:02}.md")
3103 };
3104 let updated = format!(
3106 "2026-05-{:02}T{:02}:{:02}:00Z",
3107 1 + (step % 27),
3108 step % 24,
3109 id % 60
3110 );
3111 let extra = if id % 3 == 0 {
3112 "tags:\n - x\n - y\n"
3113 } else {
3114 ""
3115 };
3116 write_doc(
3117 &wt,
3118 &rel,
3119 types[fi],
3120 Some(&format!("sum {step}")),
3121 Some(&updated),
3122 extra,
3123 );
3124 write_doc(
3125 &rb,
3126 &rel,
3127 types[fi],
3128 Some(&format!("sum {step}")),
3129 Some(&updated),
3130 extra,
3131 );
3132 Index::on_write(&wt, Path::new(&rel)).unwrap();
3133 if !live.contains(&rel) {
3134 live.push(rel);
3135 }
3136 } else if op < 8 {
3137 let idx = (next() as usize) % live.len();
3139 let rel = live.remove(idx);
3140 fs::remove_file(wt.root.join(&rel)).unwrap();
3141 fs::remove_file(rb.root.join(&rel)).ok();
3142 Index::on_remove(&wt, Path::new(&rel)).unwrap();
3143 } else {
3144 let idx = (next() as usize) % live.len();
3146 let old = live[idx].clone();
3147 let fi = (next() as usize) % folders.len();
3149 let folder = folders[fi];
3150 let id = 50 + (next() % 40);
3151 let new = if folder == "sources/emails" {
3152 format!("{folder}/2026/05/f-{id:02}.md")
3153 } else {
3154 format!("{folder}/f-{id:02}.md")
3155 };
3156 if new == old || live.contains(&new) {
3157 continue;
3158 }
3159 fs::create_dir_all(wt.root.join(&new).parent().unwrap()).unwrap();
3160 fs::create_dir_all(rb.root.join(&new).parent().unwrap()).unwrap();
3161 fs::rename(wt.root.join(&old), wt.root.join(&new)).unwrap();
3162 fs::rename(rb.root.join(&old), rb.root.join(&new)).unwrap();
3163 Index::on_rename(&wt, Path::new(&old), Path::new(&new)).unwrap();
3164 live[idx] = new;
3165 }
3166 }
3167
3168 Index::rebuild_all(&rb).unwrap();
3170 let a = snapshot_artifacts(&wt);
3171 let b = snapshot_artifacts(&rb);
3172 assert_eq!(
3173 a.keys().collect::<BTreeSet<_>>(),
3174 b.keys().collect::<BTreeSet<_>>(),
3175 "write-through and rebuild must produce the same set of artifacts"
3176 );
3177 for (k, v) in &a {
3178 assert_eq!(
3179 v, &b[k],
3180 "INVARIANT VIOLATED: artifact {k} differs after mixed ops\n--- write-through ---\n{v}\n--- rebuild ---\n{}",
3181 b[k]
3182 );
3183 }
3184 assert!(
3185 !a.is_empty(),
3186 "the run must have produced at least one artifact"
3187 );
3188 }
3189
3190 #[test]
3196 fn cleanup_preserves_user_content_named_index_md_in_shard() {
3197 let (_d, store) = mk_store();
3198 write_doc(
3200 &store,
3201 "sources/emails/2026/06/index.md",
3202 "email",
3203 Some("Important imported mail"),
3204 Some("2026-06-11T04:23:25Z"),
3205 "",
3206 );
3207 Index::cleanup(&store).unwrap();
3208 assert!(
3209 exists(&store, "sources/emails/2026/06/index.md"),
3210 "cleanup must not delete a user content file named index.md"
3211 );
3212 Index::rebuild_all(&store).unwrap();
3214 assert!(
3215 exists(&store, "sources/emails/2026/06/index.md"),
3216 "rebuild_all must not delete a user content file named index.md"
3217 );
3218 let kept = read(&store, "sources/emails/2026/06/index.md");
3219 assert!(
3220 kept.contains("Important imported mail"),
3221 "the user's record content must be intact"
3222 );
3223 }
3224
3225 #[test]
3230 fn cleanup_keeps_canonical_type_folder_root_sidecars() {
3231 let (_d, store) = mk_store();
3232 write_doc(
3233 &store,
3234 "records/contacts/alice.md",
3235 "contact",
3236 Some("Alice"),
3237 Some("2026-05-01T00:00:00Z"),
3238 "",
3239 );
3240 Index::write_level(&store, &IndexLevel::TypeFolder("records/contacts".into())).unwrap();
3241 assert!(exists(&store, "records/contacts/index.md"));
3242 assert!(exists(&store, "records/contacts/index.jsonl"));
3243 Index::cleanup(&store).unwrap();
3244 assert!(
3245 exists(&store, "records/contacts/index.md"),
3246 "cleanup must keep the canonical type-folder index.md (non-empty folder)"
3247 );
3248 assert!(
3249 exists(&store, "records/contacts/index.jsonl"),
3250 "cleanup must keep the canonical type-folder index.jsonl (non-empty folder)"
3251 );
3252 }
3253
3254 #[test]
3260 fn on_write_ignores_index_artifact_no_phantom_row() {
3261 let (_d, store) = mk_store();
3262 write_doc(
3263 &store,
3264 "records/contacts/alice.md",
3265 "contact",
3266 Some("Alice"),
3267 Some("2026-05-01T00:00:00Z"),
3268 "",
3269 );
3270 Index::on_write(&store, Path::new("records/contacts/alice.md")).unwrap();
3271 let jsonl_before = read(&store, "records/contacts/index.jsonl");
3272 assert_eq!(jsonl_before.lines().count(), 1);
3273
3274 Index::on_write(&store, Path::new("records/contacts/index.md")).unwrap();
3276
3277 let jsonl_after = read(&store, "records/contacts/index.jsonl");
3278 assert_eq!(
3279 jsonl_after.lines().count(),
3280 1,
3281 "on_write on index.md must not add a phantom self-row"
3282 );
3283 assert!(
3284 !jsonl_after.contains("\"type\":\"index\""),
3285 "the catalog artifact must never appear as a catalogued row"
3286 );
3287 let root = read(&store, "index.md");
3289 assert!(
3290 root.contains("[[records/contacts/index|Contacts]] (1)"),
3291 "count must not inflate:\n{root}"
3292 );
3293 }
3294
3295 #[test]
3301 fn multiline_summary_is_single_lined_in_index_md() {
3302 let (_d, store) = mk_store();
3303 write_raw(
3305 &store,
3306 "records/notes/evil.md",
3307 "type: note\nupdated: 2026-06-10T00:00:00Z\nsummary: |-\n legit first line\n - [[records/secrets/fake|Click me]] — injected entry",
3308 "\nbody\n",
3309 );
3310 let idx = Index::build_type_folder(&store, Path::new("records/notes")).unwrap();
3311 let md = idx.to_markdown();
3312 let entry_lines = md.lines().filter(|l| l.starts_with("- [[")).count();
3314 assert_eq!(
3315 entry_lines, 1,
3316 "a multi-line summary must not produce extra entry lines:\n{md}"
3317 );
3318 assert!(
3319 md.contains(
3320 "- [[records/notes/evil]] — legit first line - [[records/secrets/fake|Click me]] — injected entry\n"
3321 ),
3322 "summary newlines must collapse to spaces inline:\n{md}"
3323 );
3324 }
3325
3326 #[test]
3334 fn non_string_scalar_summary_and_type_are_coerced_like_validator() {
3335 let (_d, store) = mk_store();
3336 write_raw(
3337 &store,
3338 "records/contacts/a.md",
3339 "type: contact\nupdated: 2026-05-01T00:00:00Z\nsummary: 2026",
3340 "\nbody\n",
3341 );
3342 let rec = record_from_file(
3343 &store.root.join("records/contacts/a.md"),
3344 PathBuf::from("records/contacts/a.md"),
3345 )
3346 .unwrap();
3347 assert_eq!(rec.summary, "2026");
3350 assert_eq!(rec.type_, "contact");
3351
3352 let idx = Index::build_type_folder(&store, Path::new("records/contacts")).unwrap();
3354 let md = idx.to_markdown();
3355 assert!(
3356 md.contains("- [[records/contacts/a]] — 2026\n"),
3357 "index entry must hold the coerced scalar, not the placeholder:\n{md}"
3358 );
3359
3360 write_raw(
3362 &store,
3363 "records/contacts/b.md",
3364 "type: true\nupdated: 2026-05-02T00:00:00Z\nsummary: hi",
3365 "\nbody\n",
3366 );
3367 let rec_b = record_from_file(
3368 &store.root.join("records/contacts/b.md"),
3369 PathBuf::from("records/contacts/b.md"),
3370 )
3371 .unwrap();
3372 assert_eq!(rec_b.type_, "true");
3373 }
3374
3375 #[test]
3383 fn non_utf8_body_does_not_abort_record_projection() {
3384 let (_d, store) = mk_store();
3385 let rel = "sources/emails/2026/06/x.md";
3386 let abs = store.root.join(rel);
3387 fs::create_dir_all(abs.parent().unwrap()).unwrap();
3388 let mut bytes: Vec<u8> =
3390 b"---\ntype: email\nupdated: 2026-06-11T00:00:00Z\nsummary: An imported email\n---\n\nCaf"
3391 .to_vec();
3392 bytes.push(0xE9);
3393 bytes.extend_from_slice(b" meeting notes\n");
3394 fs::write(&abs, bytes).unwrap();
3395
3396 let rec = record_from_file(&abs, PathBuf::from(rel))
3397 .expect("non-UTF-8 body must not abort the frontmatter read");
3398 assert_eq!(rec.summary, "An imported email");
3399 assert_eq!(rec.type_, "email");
3400
3401 Index::rebuild_all(&store).unwrap();
3403 assert!(
3404 exists(&store, "sources/emails/index.jsonl"),
3405 "rebuild must produce the catalog despite a non-UTF-8 body byte"
3406 );
3407 assert!(
3408 read(&store, "sources/emails/index.jsonl").contains("An imported email"),
3409 "the record must be catalogued"
3410 );
3411 }
3412
3413 #[test]
3422 fn rebuild_aborts_on_malformed_file_and_keeps_prior_catalogs() {
3423 let (_d, store) = mk_store();
3424 write_doc(
3425 &store,
3426 "records/contacts/alice.md",
3427 "contact",
3428 Some("Alice"),
3429 Some("2026-05-01T00:00:00Z"),
3430 "",
3431 );
3432 write_doc(
3433 &store,
3434 "records/companies/acme.md",
3435 "company",
3436 Some("Acme"),
3437 Some("2026-05-02T00:00:00Z"),
3438 "",
3439 );
3440
3441 Index::rebuild_all(&store).expect("clean rebuild succeeds");
3443 assert!(exists(&store, "records/contacts/index.jsonl"));
3444 assert!(exists(&store, "records/companies/index.jsonl"));
3445
3446 let bad = store.root.join("records/contacts/broken.md");
3448 fs::write(
3449 &bad,
3450 "---\ntype: contact\nsummary: \"unterminated\n---\nbody\n",
3451 )
3452 .unwrap();
3453
3454 Index::rebuild_all(&store)
3457 .expect_err("rebuild must abort, not silently skip, on a malformed file");
3458
3459 assert!(
3463 exists(&store, "records/companies/index.jsonl"),
3464 "an aborted rebuild must not destroy a clean sibling folder's catalog"
3465 );
3466 assert!(
3467 exists(&store, "records/contacts/index.jsonl"),
3468 "an aborted rebuild must not destroy the affected folder's prior catalog"
3469 );
3470 let contacts_jsonl = read(&store, "records/contacts/index.jsonl");
3471 assert!(contacts_jsonl.contains("records/contacts/alice.md"));
3472 }
3473
3474 #[test]
3487 fn rebuild_rollup_counts_equal_jsonl_records_and_write_through() {
3488 let (_d, store) = mk_store();
3489 write_doc(
3493 &store,
3494 "records/contacts/alice.md",
3495 "contact",
3496 Some("Alice"),
3497 Some("2026-05-01T00:00:00Z"),
3498 "",
3499 );
3500 write_doc(
3501 &store,
3502 "records/contacts/bob.md",
3503 "contact",
3504 Some("Bob"),
3505 Some("2026-05-02T00:00:00Z"),
3506 "",
3507 );
3508 Index::rebuild_all(&store).expect("clean rebuild succeeds");
3509
3510 let jsonl_lines = read(&store, "records/contacts/index.jsonl")
3512 .lines()
3513 .filter(|l| !l.trim().is_empty())
3514 .count();
3515 assert_eq!(jsonl_lines, 2, "two well-formed files ⇒ two jsonl records");
3516 let layer_md = read(&store, "records/index.md");
3517 let root_md = read(&store, "index.md");
3518 assert!(
3519 layer_md.contains("- [[records/contacts/index|Contacts]] (2)"),
3520 "layer rollup (N) must equal the jsonl record count (2), not a raw .md walk:\n{layer_md}"
3521 );
3522 assert!(
3523 root_md.contains("- [[records/contacts/index|Contacts]] (2)\n")
3524 && root_md.contains("## Records (2)"),
3525 "root rollup (N)/layer total must equal the jsonl record count (2):\n{root_md}"
3526 );
3527
3528 let (_d2, wt) = mk_store();
3535 write_doc(
3536 &wt,
3537 "records/contacts/alice.md",
3538 "contact",
3539 Some("Alice"),
3540 Some("2026-05-01T00:00:00Z"),
3541 "",
3542 );
3543 write_doc(
3544 &wt,
3545 "records/contacts/bob.md",
3546 "contact",
3547 Some("Bob"),
3548 Some("2026-05-02T00:00:00Z"),
3549 "",
3550 );
3551 Index::on_write(&wt, Path::new("records/contacts/alice.md")).unwrap();
3552 Index::on_write(&wt, Path::new("records/contacts/bob.md")).unwrap();
3553
3554 let a = snapshot_artifacts(&wt);
3555 let b = snapshot_artifacts(&store);
3556 assert_eq!(
3557 a.keys().collect::<BTreeSet<_>>(),
3558 b.keys().collect::<BTreeSet<_>>(),
3559 "write-through and rebuild_all must produce the same artifact set"
3560 );
3561 for (k, v) in &a {
3562 assert_eq!(
3563 v, &b[k],
3564 "rollup bytes diverged between write-through and rebuild_all for {k} \
3565 (a skip-version inflates rebuild_all's (N) above the jsonl record \
3566 count, which write-through then rewrites):\n--- write-through ---\n{v}\n--- rebuild ---\n{}",
3567 b[k]
3568 );
3569 }
3570 }
3571
3572 #[cfg(unix)]
3577 #[test]
3578 fn non_utf8_path_component_is_kept_not_dropped() {
3579 use std::ffi::OsStr;
3580 use std::os::unix::ffi::OsStrExt;
3581 let mut leaf = b"caf".to_vec();
3583 leaf.push(0xE9);
3584 leaf.extend_from_slice(b".md");
3585 let p = Path::new("sources/emails").join(OsStr::from_bytes(&leaf));
3586 let unix = path_to_unix(&p);
3587 assert_ne!(
3590 unix, "sources/emails",
3591 "non-UTF-8 leaf must not be dropped, collapsing the path to its parent dir"
3592 );
3593 assert!(
3594 unix.starts_with("sources/emails/caf"),
3595 "the lossy leaf must remain under its folder: {unix}"
3596 );
3597 }
3598
3599 #[test]
3602 fn loose_file_is_catalogued_in_layer_jsonl_not_type_folder() {
3603 let (_d, store) = mk_store();
3604 write_doc(
3606 &store,
3607 "records/contacts/alice.md",
3608 "contact",
3609 Some("Alice"),
3610 Some("2026-06-01T08:00:00Z"),
3611 "id: alice\n",
3612 );
3613 write_doc(
3614 &store,
3615 "records/loose.md",
3616 "contact",
3617 Some("Loose"),
3618 Some("2026-06-01T08:00:00Z"),
3619 "id: loose\n",
3620 );
3621 Index::rebuild_all(&store).unwrap();
3622
3623 assert!(
3626 exists(&store, "records/index.jsonl"),
3627 "layer jsonl must exist when loose files are present"
3628 );
3629 let layer_jsonl = read(&store, "records/index.jsonl");
3630 assert!(
3631 layer_jsonl.contains("records/loose.md"),
3632 "layer jsonl must list the loose file, got:\n{layer_jsonl}"
3633 );
3634 assert!(
3635 !layer_jsonl.contains("records/contacts/alice.md"),
3636 "layer jsonl must NOT list type-folder files"
3637 );
3638 let tf_jsonl = read(&store, "records/contacts/index.jsonl");
3639 assert!(tf_jsonl.contains("records/contacts/alice.md"));
3640 assert!(!tf_jsonl.contains("records/loose.md"));
3641
3642 let layer_md = read(&store, "records/index.md");
3644 assert!(
3645 layer_md.contains("records/contacts/index"),
3646 "layer md must roll up the type-folder, got:\n{layer_md}"
3647 );
3648 assert!(
3649 !layer_md.contains("records/loose"),
3650 "layer md must stay a rollup, not list loose files, got:\n{layer_md}"
3651 );
3652 }
3653
3654 #[test]
3655 fn loose_file_write_through_equals_rebuild() {
3656 let (_d1, wt) = mk_store();
3657 let (_d2, rb) = mk_store();
3658 for s in [&wt, &rb] {
3659 write_doc(
3660 s,
3661 "records/contacts/alice.md",
3662 "contact",
3663 Some("Alice"),
3664 Some("2026-06-01T08:00:00Z"),
3665 "id: alice\n",
3666 );
3667 write_doc(
3668 s,
3669 "records/loose.md",
3670 "contact",
3671 Some("Loose"),
3672 Some("2026-06-02T08:00:00Z"),
3673 "id: loose\n",
3674 );
3675 }
3676 Index::on_write(&wt, Path::new("records/contacts/alice.md")).unwrap();
3678 Index::on_write(&wt, Path::new("records/loose.md")).unwrap();
3679 Index::rebuild_all(&rb).unwrap();
3680
3681 let a = snapshot_artifacts(&wt);
3682 let b = snapshot_artifacts(&rb);
3683 assert_eq!(
3684 a.keys().collect::<Vec<_>>(),
3685 b.keys().collect::<Vec<_>>(),
3686 "loose-file loop and sweep must produce the same artifact set"
3687 );
3688 for (k, v) in &a {
3689 assert_eq!(
3690 v, &b[k],
3691 "loose-file artifact {k} differs between loop and sweep"
3692 );
3693 }
3694 }
3695
3696 #[test]
3697 fn removing_last_loose_file_clears_layer_jsonl() {
3698 let (_d, store) = mk_store();
3699 write_doc(
3700 &store,
3701 "records/loose.md",
3702 "contact",
3703 Some("Loose"),
3704 Some("2026-06-01T08:00:00Z"),
3705 "id: loose\n",
3706 );
3707 Index::on_write(&store, Path::new("records/loose.md")).unwrap();
3708 assert!(
3709 exists(&store, "records/index.jsonl"),
3710 "layer jsonl present after a loose write"
3711 );
3712 fs::remove_file(store.root.join("records/loose.md")).unwrap();
3713 Index::on_remove(&store, Path::new("records/loose.md")).unwrap();
3714 assert!(
3715 !exists(&store, "records/index.jsonl"),
3716 "layer jsonl must be removed once the last loose file is gone"
3717 );
3718 }
3719
3720 #[test]
3723 fn concurrent_writes_to_different_type_folders_match_rebuild() {
3724 use std::sync::Arc;
3725 use std::thread;
3726
3727 let (_d, store) = mk_store();
3736 let folders = ["records/contacts", "records/companies"];
3737 let n = 12usize;
3738
3739 for (fi, folder) in folders.iter().enumerate() {
3742 for i in 0..n {
3743 write_doc(
3744 &store,
3745 &format!("{folder}/f{fi}_{i}.md"),
3746 "contact",
3747 Some(&format!("Summary {fi}-{i}")),
3748 Some(&format!("2026-06-{:02}T08:00:00Z", i + 1)),
3749 &format!("id: f{fi}_{i}\n"),
3750 );
3751 }
3752 }
3753
3754 let store = Arc::new(store);
3755 let handles: Vec<_> = folders
3756 .iter()
3757 .enumerate()
3758 .map(|(fi, folder)| {
3759 let store = Arc::clone(&store);
3760 let folder = folder.to_string();
3761 thread::spawn(move || {
3762 for i in 0..n {
3763 let rel = format!("{folder}/f{fi}_{i}.md");
3764 Index::on_write(&store, Path::new(&rel)).unwrap();
3765 }
3766 })
3767 })
3768 .collect();
3769 for h in handles {
3770 h.join().unwrap();
3771 }
3772
3773 let got = snapshot_artifacts(&store);
3776 Index::rebuild_all(&store).unwrap();
3777 let want = snapshot_artifacts(&store);
3778
3779 assert_eq!(
3780 got.keys().collect::<Vec<_>>(),
3781 want.keys().collect::<Vec<_>>(),
3782 "artifact set after concurrent write-through must match rebuild"
3783 );
3784 for (k, v) in &want {
3785 assert_eq!(
3786 &got[k], v,
3787 "rollup artifact {k} diverged from rebuild after concurrent writes"
3788 );
3789 }
3790 }
3791}