1use std::collections::BTreeMap;
53use std::fs;
54use std::io::Write as _;
55use std::path::{Path, PathBuf};
56
57use chrono::{DateTime, FixedOffset, SecondsFormat};
58use serde::{Deserialize, Serialize};
59use serde_json::Value;
60
61use crate::store::{Layer, Store};
62
63const MD_CAP: usize = 500;
65
66const MISSING_SUMMARY: &str = "(no summary)";
70
71const ROOT_TITLE: &str = "Knowledge base index";
73
74#[derive(Debug, Clone, PartialEq, Eq)]
76pub enum IndexLevel {
77 Root,
79 Layer(Layer),
81 TypeFolder(PathBuf),
83}
84
85#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
93pub struct IndexRecord {
94 #[serde(with = "path_serde")]
98 pub path: PathBuf,
99 #[serde(rename = "type")]
101 pub type_: String,
102 pub summary: String,
104 #[serde(default)]
106 pub tags: Vec<String>,
107 #[serde(default)]
109 pub links: Vec<String>,
110 pub created: Option<DateTime<FixedOffset>>,
112 pub updated: Option<DateTime<FixedOffset>>,
114 #[serde(flatten)]
116 pub fields: BTreeMap<String, Value>,
117}
118
119#[derive(Debug, Clone, PartialEq)]
122pub struct Index {
123 pub level: IndexLevel,
125 pub records: Vec<IndexRecord>,
128 pub child_counts: BTreeMap<PathBuf, usize>,
130}
131
132impl Index {
133 pub fn build_type_folder(store: &Store, type_folder: &Path) -> crate::Result<Index> {
139 let rel = normalize_rel(type_folder);
140 let abs = store.root.join(&rel);
141 let mut records = Vec::new();
142 for file_abs in walk_type_folder_files(&abs) {
143 let rel_path =
144 rel_to_store(&store.root, &file_abs).expect("walked file is under the store root");
145 records.push(record_from_file(&file_abs, rel_path)?);
157 }
158 sort_records(&mut records);
159 Ok(Index {
160 level: IndexLevel::TypeFolder(rel),
161 records,
162 child_counts: BTreeMap::new(),
163 })
164 }
165
166 pub fn build_layer(store: &Store, layer: Layer) -> crate::Result<Index> {
169 let mut child_counts = BTreeMap::new();
170 for tf in type_folders_in_layer(store, layer) {
171 let abs = store.root.join(&tf);
172 let n = walk_type_folder_files(&abs).len();
173 if n > 0 {
174 child_counts.insert(tf, n);
175 }
176 }
177 Ok(Index {
178 level: IndexLevel::Layer(layer),
179 records: Vec::new(),
180 child_counts,
181 })
182 }
183
184 pub fn build_root(store: &Store) -> crate::Result<Index> {
187 let mut child_counts = BTreeMap::new();
188 for layer in Layer::all() {
189 for tf in type_folders_in_layer(store, layer) {
190 let abs = store.root.join(&tf);
191 let n = walk_type_folder_files(&abs).len();
192 if n > 0 {
193 child_counts.insert(tf, n);
194 }
195 }
196 }
197 Ok(Index {
198 level: IndexLevel::Root,
199 records: Vec::new(),
200 child_counts,
201 })
202 }
203
204 pub fn to_markdown(&self) -> String {
206 match &self.level {
207 IndexLevel::TypeFolder(folder) => self.render_type_folder_md(folder),
208 IndexLevel::Layer(layer) => self.render_layer_md(*layer),
209 IndexLevel::Root => self.render_root_md(),
210 }
211 }
212
213 pub fn to_jsonl(&self) -> String {
217 let mut out = String::new();
218 for rec in &self.records {
219 let line = serde_json::to_string(rec).expect("IndexRecord serializes");
222 out.push_str(&line);
223 out.push('\n');
224 }
225 out
226 }
227
228 fn render_type_folder_md(&self, folder: &Path) -> String {
231 let folder_disp = path_to_unix(folder);
232 let updated = max_updated(self.records.iter().map(|r| r.updated.as_ref()));
233 let mut s = String::new();
234 s.push_str("---\n");
235 s.push_str("type: index\n");
236 s.push_str("scope: type-folder\n");
237 s.push_str(&format!("folder: {folder_disp}\n"));
238 if let Some(ts) = updated {
239 s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
240 }
241 s.push_str("---\n\n");
242 s.push_str(&format!("# {folder_disp}\n\n"));
243
244 let shown = self.records.len().min(MD_CAP);
245 for rec in self.records.iter().take(shown) {
246 s.push_str(&format_md_entry(rec));
247 s.push('\n');
248 }
249
250 if self.records.len() > MD_CAP {
251 let type_ = self.records.first().map(|r| r.type_.as_str()).unwrap_or("");
252 let layer = folder
253 .components()
254 .next()
255 .and_then(|c| c.as_os_str().to_str())
256 .unwrap_or("");
257 s.push('\n');
258 s.push_str(&more_footer(self.records.len(), type_, layer));
259 }
260 s
261 }
262
263 fn render_layer_md(&self, layer: Layer) -> String {
268 let layer_dir = layer_dir_name(layer);
269 let mut s = String::new();
270 s.push_str("---\n");
271 s.push_str("type: index\n");
272 s.push_str("scope: layer\n");
273 s.push_str(&format!("folder: {layer_dir}\n"));
274 s.push_str("---\n\n");
275 s.push_str(&format!("# {layer_dir}\n\n"));
276 for (tf, n) in &self.child_counts {
277 let tf_unix = path_to_unix(tf);
278 let display = capitalize(folder_basename(tf));
279 s.push_str(&format!("- [[{tf_unix}/index|{display}]] ({n})\n"));
280 }
281 s
282 }
283
284 fn render_root_md(&self) -> String {
287 let mut s = String::new();
288 s.push_str("---\n");
289 s.push_str("type: index\n");
290 s.push_str("scope: root\n");
291 s.push_str("---\n\n");
292 s.push_str(&format!("# {ROOT_TITLE}\n"));
293 for layer in Layer::all() {
294 let layer_dir = layer_dir_name(layer);
295 let prefix = format!("{layer_dir}/");
296 let children: Vec<(&PathBuf, &usize)> = self
297 .child_counts
298 .iter()
299 .filter(|(tf, _)| path_to_unix(tf).starts_with(&prefix))
300 .collect();
301 if children.is_empty() {
302 continue;
303 }
304 let total: usize = children.iter().map(|(_, n)| **n).sum();
305 s.push('\n');
306 s.push_str(&format!("## {} ({total})\n", capitalize(layer_dir)));
307 for (tf, n) in children {
308 let tf_unix = path_to_unix(tf);
309 let display = capitalize(folder_basename(tf));
310 s.push_str(&format!("- [[{tf_unix}/index|{display}]] ({n})\n"));
311 }
312 }
313 s
314 }
315}
316
317impl Index {
322 pub fn on_write(store: &Store, file: &Path) -> crate::Result<()> {
329 let file_rel = normalize_rel(file);
330 if is_index_artifact(&file_rel) {
337 return Ok(());
338 }
339 let file_abs = store.root.join(&file_rel);
340 let folder = type_folder_of(&file_rel)
341 .ok_or_else(|| bad_index(&file_rel, "file is not inside a layer/type-folder"))?;
342 let record = record_from_file(&file_abs, file_rel.clone())?;
343
344 let _lock = FolderLock::acquire(&store.root.join(&folder));
347 let mut records = read_jsonl_records(&store.root.join(&folder).join("index.jsonl"))?;
348 records.retain(|r| r.path != record.path);
349 records.push(record);
350 sort_records(&mut records);
351
352 write_type_folder_artifacts(store, &folder, &records)?;
353 update_parents(store, &folder)?;
354 Ok(())
355 }
356
357 pub fn on_rename(store: &Store, old: &Path, new: &Path) -> crate::Result<()> {
361 let old_rel = normalize_rel(old);
362 let new_rel = normalize_rel(new);
363 if is_index_artifact(&old_rel) || is_index_artifact(&new_rel) {
367 return Ok(());
368 }
369 let old_folder = type_folder_of(&old_rel)
370 .ok_or_else(|| bad_index(&old_rel, "source is not inside a layer/type-folder"))?;
371 let new_folder = type_folder_of(&new_rel)
372 .ok_or_else(|| bad_index(&new_rel, "target is not inside a layer/type-folder"))?;
373
374 let _locks = lock_folders(store, &old_folder, &new_folder);
378
379 let mut old_records =
381 read_jsonl_records(&store.root.join(&old_folder).join("index.jsonl"))?;
382 old_records.retain(|r| r.path != old_rel);
383
384 if old_folder == new_folder {
385 let record = record_from_file(&store.root.join(&new_rel), new_rel.clone())?;
387 old_records.retain(|r| r.path != record.path);
388 old_records.push(record);
389 sort_records(&mut old_records);
390 write_type_folder_artifacts(store, &old_folder, &old_records)?;
391 update_parents(store, &old_folder)?;
392 return Ok(());
393 }
394
395 sort_records(&mut old_records);
398 write_type_folder_artifacts(store, &old_folder, &old_records)?;
399
400 let record = record_from_file(&store.root.join(&new_rel), new_rel.clone())?;
401 let mut new_records =
402 read_jsonl_records(&store.root.join(&new_folder).join("index.jsonl"))?;
403 new_records.retain(|r| r.path != record.path);
404 new_records.push(record);
405 sort_records(&mut new_records);
406 write_type_folder_artifacts(store, &new_folder, &new_records)?;
407
408 update_parents(store, &old_folder)?;
409 update_parents(store, &new_folder)?;
410 Ok(())
411 }
412
413 pub fn on_remove(store: &Store, file: &Path) -> crate::Result<()> {
418 let file_rel = normalize_rel(file);
419 if is_index_artifact(&file_rel) {
422 return Ok(());
423 }
424 let folder = type_folder_of(&file_rel)
425 .ok_or_else(|| bad_index(&file_rel, "file is not inside a layer/type-folder"))?;
426 let _lock = FolderLock::acquire(&store.root.join(&folder));
428 let mut records = read_jsonl_records(&store.root.join(&folder).join("index.jsonl"))?;
429 let before = records.len();
430 records.retain(|r| r.path != file_rel);
431 if records.len() == before {
432 }
435 sort_records(&mut records);
436 write_type_folder_artifacts(store, &folder, &records)?;
437 update_parents(store, &folder)?;
438 Ok(())
439 }
440
441 pub fn rebuild_all(store: &Store) -> crate::Result<()> {
445 Index::cleanup(store)?;
446 for layer in Layer::all() {
447 for tf in type_folders_in_layer(store, layer) {
448 let idx = Index::build_type_folder(store, &tf)?;
449 if idx.records.is_empty() {
450 continue;
451 }
452 write_type_folder_artifacts(store, &tf, &idx.records)?;
453 }
454 let layer_idx = Index::build_layer(store, layer)?;
455 let layer_index_md = store.root.join(layer_dir_name(layer)).join("index.md");
456 if layer_idx.child_counts.is_empty() {
457 remove_if_exists(&layer_index_md)?;
458 } else {
459 write_atomic(
460 &layer_index_md,
461 render_layer_md_with_store(store, &layer_idx),
462 )?;
463 }
464 }
465 let root_idx = Index::build_root(store)?;
466 let root_index_md = store.root.join("index.md");
467 if root_idx.child_counts.is_empty() {
468 remove_if_exists(&root_index_md)?;
469 } else {
470 write_atomic(&root_index_md, render_root_md_with_store(store, &root_idx))?;
471 }
472 Ok(())
473 }
474
475 pub fn rebuild_folder(store: &Store, folder: &Path) -> crate::Result<()> {
482 Self::write_level(store, &IndexLevel::TypeFolder(folder.to_path_buf()))?;
483 update_parents(store, folder)
484 }
485
486 pub fn write_level(store: &Store, level: &IndexLevel) -> crate::Result<()> {
488 match level {
489 IndexLevel::TypeFolder(folder) => {
490 let idx = Index::build_type_folder(store, folder)?;
491 if idx.records.is_empty() {
492 remove_if_exists(&store.root.join(folder).join("index.md"))?;
493 remove_if_exists(&store.root.join(folder).join("index.jsonl"))?;
494 } else {
495 write_type_folder_artifacts(store, folder, &idx.records)?;
496 }
497 }
498 IndexLevel::Layer(layer) => {
499 let idx = Index::build_layer(store, *layer)?;
500 let p = store.root.join(layer_dir_name(*layer)).join("index.md");
501 if idx.child_counts.is_empty() {
502 remove_if_exists(&p)?;
503 } else {
504 write_atomic(&p, render_layer_md_with_store(store, &idx))?;
505 }
506 }
507 IndexLevel::Root => {
508 let idx = Index::build_root(store)?;
509 let p = store.root.join("index.md");
510 if idx.child_counts.is_empty() {
511 remove_if_exists(&p)?;
512 } else {
513 write_atomic(&p, render_root_md_with_store(store, &idx))?;
514 }
515 }
516 }
517 Ok(())
518 }
519
520 pub fn render_dry_run(store: &Store, level: &IndexLevel) -> crate::Result<String> {
523 let mut out = String::new();
524 match level {
525 IndexLevel::TypeFolder(folder) => {
526 let idx = Index::build_type_folder(store, folder)?;
527 let md_path = path_to_unix(&folder.join("index.md"));
528 let jsonl_path = path_to_unix(&folder.join("index.jsonl"));
529 out.push_str(&format!("--- {md_path} ---\n"));
530 out.push_str(&idx.to_markdown());
531 out.push_str(&format!("--- {jsonl_path} ---\n"));
532 out.push_str(&idx.to_jsonl());
533 }
534 IndexLevel::Layer(layer) => {
535 let idx = Index::build_layer(store, *layer)?;
536 let md_path = format!("{}/index.md", layer_dir_name(*layer));
537 out.push_str(&format!("--- {md_path} ---\n"));
538 out.push_str(&render_layer_md_with_store(store, &idx));
539 }
540 IndexLevel::Root => {
541 let idx = Index::build_root(store)?;
542 out.push_str("--- index.md ---\n");
543 out.push_str(&render_root_md_with_store(store, &idx));
544 }
545 }
546 Ok(out)
547 }
548
549 pub fn cleanup(store: &Store) -> crate::Result<()> {
567 for layer in Layer::all() {
568 let layer_dir = store.root.join(layer_dir_name(layer));
569 if !layer_dir.is_dir() {
570 continue;
571 }
572 for tf in type_folders_in_layer(store, layer) {
573 let tf_abs = store.root.join(&tf);
574 for entry in walkdir::WalkDir::new(&tf_abs)
578 .min_depth(2)
579 .into_iter()
580 .filter_map(|e| e.ok())
581 {
582 let p = entry.path();
583 if is_index_artifact(p) && is_deletable_catalog_artifact(p) {
584 remove_if_exists(p)?;
585 }
586 }
587 if walk_type_folder_files(&tf_abs).is_empty() {
591 let md = tf_abs.join("index.md");
592 if is_deletable_catalog_artifact(&md) {
593 remove_if_exists(&md)?;
594 }
595 remove_if_exists(&tf_abs.join("index.jsonl"))?;
596 }
597 }
598 }
599 Ok(())
600 }
601}
602
603fn write_type_folder_artifacts(
611 store: &Store,
612 folder: &Path,
613 records: &[IndexRecord],
614) -> crate::Result<()> {
615 let folder_abs = store.root.join(folder);
616 let md_path = folder_abs.join("index.md");
617 let jsonl_path = folder_abs.join("index.jsonl");
618 if records.is_empty() {
619 remove_if_exists(&md_path)?;
620 remove_if_exists(&jsonl_path)?;
621 return Ok(());
622 }
623 let idx = Index {
624 level: IndexLevel::TypeFolder(folder.to_path_buf()),
625 records: records.to_vec(),
626 child_counts: BTreeMap::new(),
627 };
628 write_atomic(&md_path, idx.to_markdown())?;
629 write_atomic(&jsonl_path, idx.to_jsonl())?;
630 Ok(())
631}
632
633fn update_parents(store: &Store, folder: &Path) -> crate::Result<()> {
646 let stats = collect_child_stats(store, &Layer::all())?;
660
661 let layer = folder
662 .components()
663 .next()
664 .and_then(|c| c.as_os_str().to_str())
665 .and_then(layer_from_dir_name);
666 if let Some(layer) = layer {
667 let p = store.root.join(layer_dir_name(layer)).join("index.md");
668 if layer_has_children(&stats, layer) {
669 write_atomic(&p, render_layer_md_from_stats(layer, &stats))?;
670 } else {
671 remove_if_exists(&p)?;
672 }
673 }
674 let rp = store.root.join("index.md");
675 if stats.values().any(|s| s.count > 0) {
676 write_atomic(&rp, render_root_md_from_stats(&stats))?;
677 } else {
678 remove_if_exists(&rp)?;
679 }
680 Ok(())
681}
682
683fn layer_has_children(stats: &BTreeMap<PathBuf, FolderStat>, layer: Layer) -> bool {
685 let prefix = format!("{}/", layer_dir_name(layer));
686 stats
687 .iter()
688 .any(|(tf, s)| s.count > 0 && path_to_unix(tf).starts_with(&prefix))
689}
690
691fn render_layer_md_from_stats(layer: Layer, stats: &BTreeMap<PathBuf, FolderStat>) -> String {
696 let layer_dir = layer_dir_name(layer);
697 let prefix = format!("{layer_dir}/");
698 let mut max_upd: Option<DateTime<FixedOffset>> = None;
699 let mut entries = String::new();
700 for (tf, stat) in stats {
701 if stat.count == 0 || !path_to_unix(tf).starts_with(&prefix) {
702 continue;
703 }
704 let newest = stat.newest.as_ref();
705 if let Some(u) = newest.and_then(|r| r.updated) {
706 max_upd = Some(match max_upd {
707 Some(cur) if cur >= u => cur,
708 _ => u,
709 });
710 }
711 let tf_unix = path_to_unix(tf);
712 let display = capitalize(folder_basename(tf));
713 let preview = newest
714 .map(|r| truncate(&r.summary, 80))
715 .filter(|p| !p.is_empty() && p != MISSING_SUMMARY);
716 match preview {
717 Some(p) => entries.push_str(&format!(
718 "- [[{tf_unix}/index|{display}]] ({}) — {p}\n",
719 stat.count
720 )),
721 None => entries.push_str(&format!(
722 "- [[{tf_unix}/index|{display}]] ({})\n",
723 stat.count
724 )),
725 }
726 }
727 let mut s = String::new();
728 s.push_str("---\n");
729 s.push_str("type: index\n");
730 s.push_str("scope: layer\n");
731 s.push_str(&format!("folder: {layer_dir}\n"));
732 if let Some(ts) = max_upd {
733 s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
734 }
735 s.push_str("---\n\n");
736 s.push_str(&format!("# {layer_dir}\n\n"));
737 s.push_str(&entries);
738 s
739}
740
741fn render_root_md_from_stats(stats: &BTreeMap<PathBuf, FolderStat>) -> String {
743 let mut max_upd: Option<DateTime<FixedOffset>> = None;
744 for stat in stats.values() {
745 if stat.count == 0 {
746 continue;
747 }
748 if let Some(u) = stat.newest.as_ref().and_then(|r| r.updated) {
749 max_upd = Some(match max_upd {
750 Some(cur) if cur >= u => cur,
751 _ => u,
752 });
753 }
754 }
755 let mut s = String::new();
756 s.push_str("---\n");
757 s.push_str("type: index\n");
758 s.push_str("scope: root\n");
759 if let Some(ts) = max_upd {
760 s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
761 }
762 s.push_str("---\n\n");
763 s.push_str(&format!("# {ROOT_TITLE}\n"));
764 for layer in Layer::all() {
765 let layer_dir = layer_dir_name(layer);
766 let prefix = format!("{layer_dir}/");
767 let children: Vec<(&PathBuf, usize)> = stats
768 .iter()
769 .filter(|(tf, s)| s.count > 0 && path_to_unix(tf).starts_with(&prefix))
770 .map(|(tf, s)| (tf, s.count))
771 .collect();
772 if children.is_empty() {
773 continue;
774 }
775 let total: usize = children.iter().map(|(_, n)| *n).sum();
776 s.push('\n');
777 s.push_str(&format!("## {} ({total})\n", capitalize(layer_dir)));
778 for (tf, n) in children {
779 let tf_unix = path_to_unix(tf);
780 let display = capitalize(folder_basename(tf));
781 s.push_str(&format!("- [[{tf_unix}/index|{display}]] ({n})\n"));
782 }
783 }
784 s
785}
786
787fn render_layer_md_with_store(store: &Store, idx: &Index) -> String {
794 let layer = match idx.level {
795 IndexLevel::Layer(l) => l,
796 _ => unreachable!("render_layer_md_with_store called on non-layer"),
797 };
798 let layer_dir = layer_dir_name(layer);
799 let mut max_upd: Option<DateTime<FixedOffset>> = None;
800 let mut entries = String::new();
801 for (tf, n) in &idx.child_counts {
802 let recs = read_jsonl_records(&store.root.join(tf).join("index.jsonl")).unwrap_or_default();
803 let newest = recs.first();
804 if let Some(u) = newest.and_then(|r| r.updated) {
805 max_upd = Some(match max_upd {
806 Some(cur) if cur >= u => cur,
807 _ => u,
808 });
809 }
810 let tf_unix = path_to_unix(tf);
811 let display = capitalize(folder_basename(tf));
812 let preview = newest
813 .map(|r| truncate(&r.summary, 80))
814 .filter(|p| !p.is_empty() && p != MISSING_SUMMARY);
815 match preview {
816 Some(p) => entries.push_str(&format!("- [[{tf_unix}/index|{display}]] ({n}) — {p}\n")),
817 None => entries.push_str(&format!("- [[{tf_unix}/index|{display}]] ({n})\n")),
818 }
819 }
820 let mut s = String::new();
821 s.push_str("---\n");
822 s.push_str("type: index\n");
823 s.push_str("scope: layer\n");
824 s.push_str(&format!("folder: {layer_dir}\n"));
825 if let Some(ts) = max_upd {
826 s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
827 }
828 s.push_str("---\n\n");
829 s.push_str(&format!("# {layer_dir}\n\n"));
830 s.push_str(&entries);
831 s
832}
833
834fn render_root_md_with_store(store: &Store, idx: &Index) -> String {
838 let mut max_upd: Option<DateTime<FixedOffset>> = None;
839 for tf in idx.child_counts.keys() {
840 let recs = read_jsonl_records(&store.root.join(tf).join("index.jsonl")).unwrap_or_default();
841 if let Some(u) = recs.first().and_then(|r| r.updated) {
842 max_upd = Some(match max_upd {
843 Some(cur) if cur >= u => cur,
844 _ => u,
845 });
846 }
847 }
848 let mut s = String::new();
849 s.push_str("---\n");
850 s.push_str("type: index\n");
851 s.push_str("scope: root\n");
852 if let Some(ts) = max_upd {
853 s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
854 }
855 s.push_str("---\n\n");
856 s.push_str(&format!("# {ROOT_TITLE}\n"));
857 for layer in Layer::all() {
858 let layer_dir = layer_dir_name(layer);
859 let prefix = format!("{layer_dir}/");
860 let children: Vec<(&PathBuf, &usize)> = idx
861 .child_counts
862 .iter()
863 .filter(|(tf, _)| path_to_unix(tf).starts_with(&prefix))
864 .collect();
865 if children.is_empty() {
866 continue;
867 }
868 let total: usize = children.iter().map(|(_, n)| **n).sum();
869 s.push('\n');
870 s.push_str(&format!("## {} ({total})\n", capitalize(layer_dir)));
871 for (tf, n) in children {
872 let tf_unix = path_to_unix(tf);
873 let display = capitalize(folder_basename(tf));
874 s.push_str(&format!("- [[{tf_unix}/index|{display}]] ({n})\n"));
875 }
876 }
877 s
878}
879
880fn format_md_entry(rec: &IndexRecord) -> String {
886 let path = wiki_target(&rec.path);
887 let summary = collapse_whitespace(&rec.summary);
897 let mut line = format!("- [[{path}]] — {summary}");
898 if !rec.tags.is_empty() {
899 let tags = rec
900 .tags
901 .iter()
902 .map(|t| format!("#{t}"))
903 .collect::<Vec<_>>()
904 .join(" ");
905 line.push_str(&format!(" · {tags}"));
906 }
907 line
908}
909
910fn more_footer(total: usize, type_: &str, layer: &str) -> String {
912 format!(
913 "## More\n\nThis folder has {total} files. The {MD_CAP} most recent are listed above.\nUse `dbmd index query --type {type_} --in {layer}` for the complete catalog.\n"
914 )
915}
916
917fn sort_records(records: &mut [IndexRecord]) {
921 records.sort_by(record_recency_cmp);
922}
923
924impl IndexRecord {
925 pub(crate) fn expected_from_file(abs: &Path, rel: PathBuf) -> crate::Result<IndexRecord> {
937 record_from_file(abs, rel)
938 }
939}
940
941fn record_from_file(abs: &Path, rel: PathBuf) -> crate::Result<IndexRecord> {
944 let meta = read_frontmatter(abs)?;
945 Ok(IndexRecord {
946 path: rel,
947 type_: meta.type_.unwrap_or_default(),
948 summary: meta.summary.unwrap_or_else(|| MISSING_SUMMARY.to_string()),
949 tags: meta.tags,
950 links: meta.links,
951 created: meta.created,
952 updated: meta.updated,
953 fields: meta.fields,
954 })
955}
956
957struct FileMeta {
959 type_: Option<String>,
960 summary: Option<String>,
961 tags: Vec<String>,
962 links: Vec<String>,
963 created: Option<DateTime<FixedOffset>>,
964 updated: Option<DateTime<FixedOffset>>,
965 fields: BTreeMap<String, Value>,
966}
967
968fn read_frontmatter(abs: &Path) -> crate::Result<FileMeta> {
982 let bytes = fs::read(abs)?;
983 let yaml = extract_frontmatter_block_lossy(&bytes).unwrap_or_default();
984 let map: serde_norway::Mapping = if yaml.trim().is_empty() {
985 serde_norway::Mapping::new()
986 } else {
987 serde_norway::from_str(&yaml).map_err(|e| {
988 crate::Error::Store(crate::store::StoreError::BadTypeIndex {
989 path: abs.to_path_buf(),
990 message: format!("frontmatter YAML: {e}"),
991 })
992 })?
993 };
994
995 let mut type_ = None;
996 let mut summary = None;
997 let mut tags = Vec::new();
998 let mut links = Vec::new();
999 let mut created = None;
1000 let mut updated = None;
1001 let mut fields = BTreeMap::new();
1002
1003 for (k, v) in map {
1004 let key = match k.as_str() {
1005 Some(s) => s.to_string(),
1006 None => continue,
1007 };
1008 match key.as_str() {
1009 "type" => type_ = scalar_string(&v),
1019 "summary" => summary = scalar_string(&v),
1020 "tags" => tags = yaml_string_list(&v),
1021 "links" => links = yaml_string_list(&v),
1022 "created" => created = v.as_str().and_then(parse_ts),
1023 "updated" => updated = v.as_str().and_then(parse_ts),
1024 "path" => {}
1028 _ => {
1029 fields.insert(key, yaml_to_json_value(&v));
1030 }
1031 }
1032 }
1033
1034 Ok(FileMeta {
1035 type_,
1036 summary,
1037 tags,
1038 links,
1039 created,
1040 updated,
1041 fields,
1042 })
1043}
1044
1045fn scalar_string(v: &serde_norway::Value) -> Option<String> {
1051 match v {
1052 serde_norway::Value::String(s) => Some(s.clone()),
1053 serde_norway::Value::Number(n) => Some(n.to_string()),
1054 serde_norway::Value::Bool(b) => Some(b.to_string()),
1055 _ => None,
1056 }
1057}
1058
1059fn extract_frontmatter_block_lossy(bytes: &[u8]) -> Option<String> {
1065 let text = String::from_utf8_lossy(bytes);
1070 extract_frontmatter_block(&text)
1071}
1072
1073fn extract_frontmatter_block(text: &str) -> Option<String> {
1076 let trimmed = text.strip_prefix('\u{feff}').unwrap_or(text);
1077 let mut lines = trimmed.lines();
1078 let first = lines.next()?;
1079 if first.trim_end() != "---" {
1080 return None;
1081 }
1082 let mut block = String::new();
1083 for line in lines {
1084 if line.trim_end() == "---" {
1085 return Some(block);
1086 }
1087 block.push_str(line);
1088 block.push('\n');
1089 }
1090 None }
1092
1093fn yaml_string_list(v: &serde_norway::Value) -> Vec<String> {
1096 match v {
1097 serde_norway::Value::String(s) => vec![s.clone()],
1098 serde_norway::Value::Sequence(seq) => seq
1099 .iter()
1100 .filter_map(yaml_string_or_wiki_link_literal)
1101 .collect(),
1102 _ => Vec::new(),
1103 }
1104}
1105
1106fn yaml_string_or_wiki_link_literal(v: &serde_norway::Value) -> Option<String> {
1107 v.as_str()
1108 .map(str::to_string)
1109 .or_else(|| unquoted_wiki_link_literal(v))
1110}
1111
1112fn yaml_to_json_value(v: &serde_norway::Value) -> Value {
1113 if let Some(link) = unquoted_wiki_link_literal(v) {
1114 return Value::String(link);
1115 }
1116 match v {
1117 serde_norway::Value::String(s) => Value::String(s.clone()),
1118 serde_norway::Value::Bool(b) => Value::Bool(*b),
1119 serde_norway::Value::Number(n) => {
1120 serde_json::to_value(n).unwrap_or_else(|_| Value::String(n.to_string()))
1121 }
1122 serde_norway::Value::Sequence(seq) => {
1123 Value::Array(seq.iter().map(yaml_to_json_value).collect())
1124 }
1125 serde_norway::Value::Mapping(_) | serde_norway::Value::Tagged(_) => {
1126 serde_json::to_value(v).unwrap_or(Value::Null)
1127 }
1128 serde_norway::Value::Null => Value::Null,
1129 }
1130}
1131
1132fn unquoted_wiki_link_literal(v: &serde_norway::Value) -> Option<String> {
1133 let serde_norway::Value::Sequence(outer) = v else {
1134 return None;
1135 };
1136 if outer.len() != 1 {
1137 return None;
1138 }
1139 let serde_norway::Value::Sequence(inner) = &outer[0] else {
1140 return None;
1141 };
1142 let [serde_norway::Value::String(target)] = inner.as_slice() else {
1143 return None;
1144 };
1145 Some(format!("[[{target}]]"))
1146}
1147
1148fn parse_ts(s: &str) -> Option<DateTime<FixedOffset>> {
1150 DateTime::parse_from_rfc3339(s.trim()).ok()
1151}
1152
1153fn fmt_ts(ts: &DateTime<FixedOffset>) -> String {
1157 ts.to_rfc3339_opts(SecondsFormat::AutoSi, true)
1158}
1159
1160fn max_updated<'a>(
1162 it: impl Iterator<Item = Option<&'a DateTime<FixedOffset>>>,
1163) -> Option<DateTime<FixedOffset>> {
1164 let mut best: Option<DateTime<FixedOffset>> = None;
1165 for ts in it.flatten() {
1166 best = Some(match best {
1167 Some(cur) if cur >= *ts => cur,
1168 _ => *ts,
1169 });
1170 }
1171 best
1172}
1173
1174fn read_jsonl_records(jsonl: &Path) -> crate::Result<Vec<IndexRecord>> {
1178 let text = match fs::read_to_string(jsonl) {
1179 Ok(t) => t,
1180 Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(Vec::new()),
1181 Err(e) => return Err(e.into()),
1182 };
1183 let mut by_path: BTreeMap<PathBuf, IndexRecord> = BTreeMap::new();
1185 for (i, line) in text.lines().enumerate() {
1186 if line.trim().is_empty() {
1187 continue;
1188 }
1189 let rec: IndexRecord = serde_json::from_str(line).map_err(|e| {
1190 crate::Error::Store(crate::store::StoreError::BadTypeIndex {
1191 path: jsonl.to_path_buf(),
1192 message: format!("line {}: {e}", i + 1),
1193 })
1194 })?;
1195 by_path.insert(rec.path.clone(), rec);
1196 }
1197 let mut records: Vec<IndexRecord> = by_path.into_values().collect();
1198 sort_records(&mut records);
1199 Ok(records)
1200}
1201
1202#[derive(Debug, Clone, Default, PartialEq)]
1209struct FolderStat {
1210 count: usize,
1211 newest: Option<IndexRecord>,
1212}
1213
1214fn read_folder_stat(jsonl: &Path) -> crate::Result<FolderStat> {
1224 let text = match fs::read_to_string(jsonl) {
1225 Ok(t) => t,
1226 Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(FolderStat::default()),
1227 Err(e) => return Err(e.into()),
1228 };
1229 let mut by_path: BTreeMap<PathBuf, IndexRecord> = BTreeMap::new();
1232 for (i, line) in text.lines().enumerate() {
1233 if line.trim().is_empty() {
1234 continue;
1235 }
1236 let rec: IndexRecord = serde_json::from_str(line).map_err(|e| {
1237 crate::Error::Store(crate::store::StoreError::BadTypeIndex {
1238 path: jsonl.to_path_buf(),
1239 message: format!("line {}: {e}", i + 1),
1240 })
1241 })?;
1242 by_path.insert(rec.path.clone(), rec);
1243 }
1244 let count = by_path.len();
1245 let newest = by_path.into_values().min_by(record_recency_cmp);
1249 Ok(FolderStat { count, newest })
1250}
1251
1252fn record_recency_cmp(a: &IndexRecord, b: &IndexRecord) -> std::cmp::Ordering {
1257 match (b.updated, a.updated) {
1258 (Some(bu), Some(au)) => bu.cmp(&au),
1259 (Some(_), None) => std::cmp::Ordering::Greater, (None, Some(_)) => std::cmp::Ordering::Less, (None, None) => std::cmp::Ordering::Equal,
1262 }
1263 .then_with(|| a.path.cmp(&b.path))
1264}
1265
1266fn collect_child_stats(
1274 store: &Store,
1275 layers: &[Layer],
1276) -> crate::Result<BTreeMap<PathBuf, FolderStat>> {
1277 let mut stats = BTreeMap::new();
1278 for &layer in layers {
1279 for tf in type_folders_in_layer(store, layer) {
1280 let stat = read_folder_stat(&store.root.join(&tf).join("index.jsonl"))?;
1281 if stat.count > 0 {
1282 stats.insert(tf, stat);
1283 }
1284 }
1285 }
1286 Ok(stats)
1287}
1288
1289fn walk_type_folder_files(folder_abs: &Path) -> Vec<PathBuf> {
1292 let mut out = Vec::new();
1293 if !folder_abs.is_dir() {
1294 return out;
1295 }
1296 for entry in walkdir::WalkDir::new(folder_abs)
1297 .into_iter()
1298 .filter_entry(|e| !is_hidden(e.file_name()))
1299 .filter_map(|e| e.ok())
1300 {
1301 if !entry.file_type().is_file() {
1302 continue;
1303 }
1304 let p = entry.path();
1305 if p.extension().and_then(|e| e.to_str()) != Some("md") {
1306 continue;
1307 }
1308 if p.file_name().and_then(|n| n.to_str()) == Some("index.md") {
1309 continue;
1310 }
1311 out.push(p.to_path_buf());
1312 }
1313 out
1314}
1315
1316fn type_folders_in_layer(store: &Store, layer: Layer) -> Vec<PathBuf> {
1319 let layer_dir = store.root.join(layer_dir_name(layer));
1320 let mut out = Vec::new();
1321 let rd = match fs::read_dir(&layer_dir) {
1322 Ok(rd) => rd,
1323 Err(_) => return out,
1324 };
1325 for entry in rd.flatten() {
1326 if !entry.path().is_dir() {
1327 continue;
1328 }
1329 let name = entry.file_name();
1330 let name = match name.to_str() {
1331 Some(n) => n,
1332 None => continue,
1333 };
1334 if is_hidden(entry.file_name().as_os_str()) || name == "log" {
1335 continue;
1336 }
1337 out.push(PathBuf::from(layer_dir_name(layer)).join(name));
1338 }
1339 out.sort();
1340 out
1341}
1342
1343fn type_folder_of(file_rel: &Path) -> Option<PathBuf> {
1347 let mut comps = file_rel.components();
1348 let layer = comps.next()?.as_os_str().to_str()?;
1349 layer_from_dir_name(layer)?;
1350 let type_seg = comps.next()?.as_os_str().to_str()?;
1351 Some(PathBuf::from(layer).join(type_seg))
1352}
1353
1354fn rel_to_store(root: &Path, abs: &Path) -> Option<PathBuf> {
1356 abs.strip_prefix(root).ok().map(|p| p.to_path_buf())
1357}
1358
1359fn normalize_rel(p: &Path) -> PathBuf {
1362 let s = path_to_unix(p);
1363 let s = s.strip_prefix("./").unwrap_or(&s);
1364 PathBuf::from(s)
1365}
1366
1367fn is_index_artifact(p: &Path) -> bool {
1368 matches!(
1369 p.file_name().and_then(|n| n.to_str()),
1370 Some("index.md") | Some("index.jsonl")
1371 )
1372}
1373
1374fn is_deletable_catalog_artifact(p: &Path) -> bool {
1388 match p.file_name().and_then(|n| n.to_str()) {
1389 Some("index.jsonl") => true,
1390 Some("index.md") => match read_frontmatter(p) {
1391 Ok(meta) => meta.type_.as_deref().is_none_or(|t| t == "index"),
1393 Err(_) => true,
1395 },
1396 _ => false,
1397 }
1398}
1399
1400fn is_hidden(name: &std::ffi::OsStr) -> bool {
1401 name.to_str().map(|s| s.starts_with('.')).unwrap_or(false)
1402}
1403
1404fn layer_dir_name(layer: Layer) -> &'static str {
1405 match layer {
1406 Layer::Sources => "sources",
1407 Layer::Records => "records",
1408 Layer::Wiki => "wiki",
1409 }
1410}
1411
1412fn layer_from_dir_name(name: &str) -> Option<Layer> {
1415 match name {
1416 "sources" => Some(Layer::Sources),
1417 "records" => Some(Layer::Records),
1418 "wiki" => Some(Layer::Wiki),
1419 _ => None,
1420 }
1421}
1422
1423fn folder_basename(p: &Path) -> &str {
1425 p.file_name().and_then(|n| n.to_str()).unwrap_or("")
1426}
1427
1428fn wiki_target(p: &Path) -> String {
1432 let unix = path_to_unix(p);
1433 unix.strip_suffix(".md").unwrap_or(&unix).to_string()
1434}
1435
1436fn path_to_unix(p: &Path) -> String {
1448 p.components()
1449 .map(|c| c.as_os_str().to_string_lossy().into_owned())
1450 .collect::<Vec<_>>()
1451 .join("/")
1452}
1453
1454mod path_serde {
1460 use super::path_to_unix;
1461 use serde::{Deserialize, Deserializer, Serializer};
1462 use std::path::{Path, PathBuf};
1463
1464 pub fn serialize<S: Serializer>(p: &Path, s: S) -> Result<S::Ok, S::Error> {
1465 s.serialize_str(&path_to_unix(p))
1466 }
1467
1468 pub fn deserialize<'de, D: Deserializer<'de>>(d: D) -> Result<PathBuf, D::Error> {
1469 Ok(PathBuf::from(String::deserialize(d)?))
1470 }
1471}
1472
1473fn capitalize(s: &str) -> String {
1475 let mut chars = s.chars();
1476 match chars.next() {
1477 Some(first) => first.to_uppercase().collect::<String>() + chars.as_str(),
1478 None => String::new(),
1479 }
1480}
1481
1482fn collapse_whitespace(s: &str) -> String {
1487 s.split_whitespace().collect::<Vec<_>>().join(" ")
1488}
1489
1490fn truncate(s: &str, max: usize) -> String {
1492 let one_line = collapse_whitespace(s);
1493 if one_line.chars().count() <= max {
1494 one_line
1495 } else {
1496 one_line.chars().take(max).collect()
1497 }
1498}
1499
1500fn write_atomic(path: &Path, contents: String) -> crate::Result<()> {
1507 if let Some(parent) = path.parent() {
1508 fs::create_dir_all(parent)?;
1509 }
1510 let dir = path.parent().unwrap_or_else(|| Path::new("."));
1511 let mut tmp = tempfile_in(dir)?;
1512 tmp.write_all(contents.as_bytes())?;
1513 tmp.flush()?;
1514 tmp.persist(path)?;
1515 Ok(())
1516}
1517
1518fn remove_if_exists(path: &Path) -> crate::Result<()> {
1519 match fs::remove_file(path) {
1520 Ok(()) => Ok(()),
1521 Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(()),
1522 Err(e) => Err(e.into()),
1523 }
1524}
1525
1526fn bad_index(path: &Path, msg: &str) -> crate::Error {
1527 crate::Error::Store(crate::store::StoreError::BadTypeIndex {
1528 path: path.to_path_buf(),
1529 message: msg.to_string(),
1530 })
1531}
1532
1533struct FolderLock {
1553 path: PathBuf,
1554 held: bool,
1555}
1556
1557impl FolderLock {
1558 fn acquire(folder_abs: &Path) -> Self {
1565 use std::time::{Duration, SystemTime};
1566 const MAX_ATTEMPTS: u32 = 600; const SPIN: Duration = Duration::from_millis(10);
1568 const STALE_AFTER: Duration = Duration::from_secs(30);
1569
1570 let path = folder_abs.join(".index.lock");
1571 let _ = fs::create_dir_all(folder_abs);
1573 for _ in 0..MAX_ATTEMPTS {
1574 match fs::OpenOptions::new()
1575 .write(true)
1576 .create_new(true)
1577 .open(&path)
1578 {
1579 Ok(_) => {
1580 return FolderLock { path, held: true };
1581 }
1582 Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => {
1583 if let Ok(meta) = fs::metadata(&path) {
1585 if let Ok(modified) = meta.modified() {
1586 if SystemTime::now()
1587 .duration_since(modified)
1588 .map(|age| age > STALE_AFTER)
1589 .unwrap_or(false)
1590 {
1591 let _ = fs::remove_file(&path);
1592 continue;
1593 }
1594 }
1595 }
1596 std::thread::sleep(SPIN);
1597 }
1598 Err(_) => return FolderLock { path, held: false },
1601 }
1602 }
1603 FolderLock { path, held: false }
1605 }
1606}
1607
1608impl Drop for FolderLock {
1609 fn drop(&mut self) {
1610 if self.held {
1611 let _ = fs::remove_file(&self.path);
1612 }
1613 }
1614}
1615
1616fn lock_folders(store: &Store, a: &Path, b: &Path) -> Vec<FolderLock> {
1622 if a == b {
1623 return vec![FolderLock::acquire(&store.root.join(a))];
1624 }
1625 let (first, second) = if a < b { (a, b) } else { (b, a) };
1626 vec![
1627 FolderLock::acquire(&store.root.join(first)),
1628 FolderLock::acquire(&store.root.join(second)),
1629 ]
1630}
1631
1632struct AtomicTemp {
1638 file: Option<fs::File>,
1639 path: PathBuf,
1640 persisted: bool,
1641}
1642
1643impl AtomicTemp {
1644 fn write_all(&mut self, bytes: &[u8]) -> std::io::Result<()> {
1645 self.file.as_mut().expect("temp file open").write_all(bytes)
1646 }
1647 fn flush(&mut self) -> std::io::Result<()> {
1648 self.file.as_mut().expect("temp file open").flush()
1649 }
1650 fn persist(mut self, dest: &Path) -> std::io::Result<()> {
1651 if let Some(f) = self.file.take() {
1652 f.sync_all().ok();
1653 }
1655 fs::rename(&self.path, dest)?;
1656 self.persisted = true;
1657 Ok(())
1658 }
1659}
1660
1661impl Drop for AtomicTemp {
1662 fn drop(&mut self) {
1663 if !self.persisted {
1665 let _ = fs::remove_file(&self.path);
1666 }
1667 }
1668}
1669
1670fn tempfile_in(dir: &Path) -> std::io::Result<AtomicTemp> {
1671 use std::time::{SystemTime, UNIX_EPOCH};
1672 let nanos = SystemTime::now()
1673 .duration_since(UNIX_EPOCH)
1674 .map(|d| d.as_nanos())
1675 .unwrap_or(0);
1676 let pid = std::process::id();
1677 let counter = next_temp_counter();
1680 let name = format!(".dbmd-index-{pid}-{nanos}-{counter}.tmp");
1681 let path = dir.join(name);
1682 let file = fs::OpenOptions::new()
1683 .write(true)
1684 .create_new(true)
1685 .open(&path)?;
1686 Ok(AtomicTemp {
1687 file: Some(file),
1688 path,
1689 persisted: false,
1690 })
1691}
1692
1693fn next_temp_counter() -> u64 {
1694 use std::sync::atomic::{AtomicU64, Ordering};
1695 static C: AtomicU64 = AtomicU64::new(0);
1696 C.fetch_add(1, Ordering::Relaxed)
1697}
1698
1699#[cfg(test)]
1700mod tests {
1701 use super::*;
1702 use std::collections::BTreeSet;
1703 use std::fs;
1704 use tempfile::TempDir;
1705
1706 fn mk_store() -> (TempDir, Store) {
1711 let dir = TempDir::new().unwrap();
1712 fs::write(dir.path().join("DB.md"), "# test store\n").unwrap();
1713 let store = Store {
1714 root: dir.path().to_path_buf(),
1715 config: crate::parser::Config::default(),
1716 };
1717 (dir, store)
1718 }
1719
1720 fn write_raw(store: &Store, rel: &str, fm: &str, body: &str) {
1723 let abs = store.root.join(rel);
1724 fs::create_dir_all(abs.parent().unwrap()).unwrap();
1725 fs::write(&abs, format!("---\n{fm}\n---\n{body}")).unwrap();
1726 }
1727
1728 fn write_doc(
1730 store: &Store,
1731 rel: &str,
1732 type_: &str,
1733 summary: Option<&str>,
1734 updated: Option<&str>,
1735 extra_yaml: &str,
1736 ) {
1737 let mut fm = format!("type: {type_}\n");
1738 if let Some(s) = summary {
1739 fm.push_str(&format!("summary: {s}\n"));
1740 }
1741 if let Some(u) = updated {
1742 fm.push_str(&format!("updated: {u}\n"));
1743 }
1744 fm.push_str(extra_yaml);
1745 write_raw(store, rel, fm.trim_end(), "\nbody text\n");
1746 }
1747
1748 fn read(store: &Store, rel: &str) -> String {
1749 fs::read_to_string(store.root.join(rel)).unwrap()
1750 }
1751
1752 fn exists(store: &Store, rel: &str) -> bool {
1753 store.root.join(rel).exists()
1754 }
1755
1756 fn snapshot_artifacts(store: &Store) -> BTreeMap<String, String> {
1759 let mut out = BTreeMap::new();
1760 for entry in walkdir::WalkDir::new(&store.root)
1761 .into_iter()
1762 .filter_map(|e| e.ok())
1763 {
1764 let p = entry.path();
1765 if is_index_artifact(p) {
1766 let rel = path_to_unix(&rel_to_store(&store.root, p).unwrap());
1767 out.insert(rel, fs::read_to_string(p).unwrap());
1768 }
1769 }
1770 out
1771 }
1772
1773 #[test]
1776 fn type_folder_aggregates_across_shards_in_recency_order() {
1777 let (_d, store) = mk_store();
1778 write_doc(
1781 &store,
1782 "sources/emails/2026/05/b-old.md",
1783 "email",
1784 Some("Older mail"),
1785 Some("2026-05-01T09:00:00Z"),
1786 "",
1787 );
1788 write_doc(
1789 &store,
1790 "sources/emails/2026/06/c-new.md",
1791 "email",
1792 Some("Newest mail"),
1793 Some("2026-06-15T12:00:00Z"),
1794 "",
1795 );
1796 write_doc(
1797 &store,
1798 "sources/emails/2026/05/a-mid.md",
1799 "email",
1800 Some("Middle mail"),
1801 Some("2026-05-20T08:00:00Z"),
1802 "",
1803 );
1804
1805 let idx = Index::build_type_folder(&store, Path::new("sources/emails")).unwrap();
1806 let paths: Vec<String> = idx.records.iter().map(|r| path_to_unix(&r.path)).collect();
1807 assert_eq!(
1808 paths,
1809 vec![
1810 "sources/emails/2026/06/c-new.md",
1811 "sources/emails/2026/05/a-mid.md",
1812 "sources/emails/2026/05/b-old.md",
1813 ],
1814 "records must aggregate across shards, newest `updated` first"
1815 );
1816 }
1817
1818 #[test]
1819 fn type_folder_md_format_entries_tags_and_derived_updated() {
1820 let (_d, store) = mk_store();
1821 write_doc(
1822 &store,
1823 "records/contacts/sarah-chen.md",
1824 "contact",
1825 Some("Renewal champion at Acme"),
1826 Some("2026-05-27T10:00:00Z"),
1827 "tags:\n - renewal\n - acme\n",
1828 );
1829 write_doc(
1830 &store,
1831 "records/contacts/no-tags.md",
1832 "contact",
1833 Some("Plain contact"),
1834 Some("2026-05-26T10:00:00Z"),
1835 "",
1836 );
1837
1838 let idx = Index::build_type_folder(&store, Path::new("records/contacts")).unwrap();
1839 let md = idx.to_markdown();
1840
1841 assert!(md.starts_with(
1844 "---\ntype: index\nscope: type-folder\nfolder: records/contacts\nupdated: 2026-05-27T10:00:00Z\n---\n\n# records/contacts\n"
1845 ), "frontmatter/heading wrong:\n{md}");
1846
1847 assert!(
1849 md.contains(
1850 "- [[records/contacts/sarah-chen]] — Renewal champion at Acme · #renewal #acme\n"
1851 ),
1852 "tagged entry wrong:\n{md}"
1853 );
1854 assert!(
1856 md.contains("- [[records/contacts/no-tags]] — Plain contact\n"),
1857 "untagged entry wrong:\n{md}"
1858 );
1859 assert!(
1860 !md.contains("Plain contact ·"),
1861 "untagged entry must not emit a tag separator"
1862 );
1863 assert!(!md.contains("## More"), "no footer expected under the cap");
1865 }
1866
1867 #[test]
1868 fn missing_summary_becomes_placeholder_not_invented() {
1869 let (_d, store) = mk_store();
1870 write_doc(
1871 &store,
1872 "records/notes/x.md",
1873 "note",
1874 None,
1875 Some("2026-05-27T10:00:00Z"),
1876 "",
1877 );
1878 let idx = Index::build_type_folder(&store, Path::new("records/notes")).unwrap();
1879 assert_eq!(idx.records[0].summary, MISSING_SUMMARY);
1880 let md = idx.to_markdown();
1881 assert!(
1882 md.contains("- [[records/notes/x]] — (no summary)\n"),
1883 "missing summary must render the placeholder, not invent text:\n{md}"
1884 );
1885 }
1886
1887 #[test]
1890 fn jsonl_is_complete_structured_and_round_trips() {
1891 let (_d, store) = mk_store();
1892 write_doc(
1893 &store,
1894 "records/expenses/2026/05/e1.md",
1895 "expense",
1896 Some("Lunch with vendor"),
1897 Some("2026-05-10T10:00:00Z"),
1898 "created: 2026-05-10T09:00:00Z\nstatus: paid\namount: 42\ncompany: [[records/companies/acme]]\nrelated:\n - [[wiki/themes/spend]]\ntags:\n - food\nlinks:\n - wiki/themes/spend\n - [[wiki/themes/renewal]]\n",
1899 );
1900 write_doc(
1901 &store,
1902 "records/expenses/2026/06/e2.md",
1903 "expense",
1904 Some("Cloud bill"),
1905 Some("2026-06-01T10:00:00Z"),
1906 "amount: 100\n",
1907 );
1908
1909 let idx = Index::build_type_folder(&store, Path::new("records/expenses")).unwrap();
1910 let jsonl = idx.to_jsonl();
1911 let lines: Vec<&str> = jsonl.lines().collect();
1912 assert_eq!(lines.len(), 2, "one JSON object per file, uncapped");
1913
1914 let r0: IndexRecord = serde_json::from_str(lines[0]).unwrap();
1916 assert_eq!(path_to_unix(&r0.path), "records/expenses/2026/06/e2.md");
1917 assert_eq!(
1918 r0, idx.records[0],
1919 "jsonl line must round-trip to the record"
1920 );
1921
1922 let r1: IndexRecord = serde_json::from_str(lines[1]).unwrap();
1925 assert_eq!(r1.type_, "expense");
1926 assert_eq!(r1.summary, "Lunch with vendor");
1927 assert_eq!(r1.tags, vec!["food".to_string()]);
1928 assert_eq!(
1929 r1.links,
1930 vec![
1931 "wiki/themes/spend".to_string(),
1932 "[[wiki/themes/renewal]]".to_string()
1933 ]
1934 );
1935 assert_eq!(
1936 r1.created,
1937 Some(DateTime::parse_from_rfc3339("2026-05-10T09:00:00Z").unwrap())
1938 );
1939 assert_eq!(r1.fields.get("status"), Some(&Value::from("paid")));
1940 assert_eq!(r1.fields.get("amount"), Some(&Value::from(42)));
1941 assert_eq!(
1942 r1.fields.get("company"),
1943 Some(&Value::from("[[records/companies/acme]]"))
1944 );
1945 assert_eq!(
1946 r1.fields.get("related"),
1947 Some(&serde_json::json!(["[[wiki/themes/spend]]"]))
1948 );
1949 for reserved in [
1951 "path", "type", "summary", "tags", "links", "created", "updated",
1952 ] {
1953 assert!(
1954 !r1.fields.contains_key(reserved),
1955 "reserved key {reserved} must not appear in fields"
1956 );
1957 }
1958
1959 assert!(
1961 lines[1].starts_with(
1962 r#"{"path":"records/expenses/2026/05/e1.md","type":"expense","summary":"Lunch with vendor","tags":["food"],"links":["wiki/themes/spend","[[wiki/themes/renewal]]"],"created":"2026-05-10T09:00:00Z","updated":"2026-05-10T10:00:00Z","#
1963 ),
1964 "jsonl key order not stable:\n{}",
1965 lines[1]
1966 );
1967 assert!(
1969 lines[1].ends_with(r#""amount":42,"company":"[[records/companies/acme]]","related":["[[wiki/themes/spend]]"],"status":"paid"}"#),
1970 "extras must be sorted:\n{}",
1971 lines[1]
1972 );
1973 }
1974
1975 #[test]
1978 fn over_cap_md_shows_500_plus_footer_jsonl_holds_all() {
1979 let (_d, store) = mk_store();
1980 let total = MD_CAP + 7;
1981 for i in 0..total {
1982 let day = 1 + (i % 27);
1984 let rel = format!("sources/emails/2026/05/m-{i:04}.md");
1985 let updated = format!("2026-05-{day:02}T00:00:{:02}Z", i % 60);
1986 write_doc(
1987 &store,
1988 &rel,
1989 "email",
1990 Some(&format!("mail {i}")),
1991 Some(&updated),
1992 "",
1993 );
1994 }
1995 let idx = Index::build_type_folder(&store, Path::new("sources/emails")).unwrap();
1996 assert_eq!(idx.records.len(), total, "jsonl/records keep every file");
1997
1998 let md = idx.to_markdown();
1999 let entry_lines = md.lines().filter(|l| l.starts_with("- [[")).count();
2000 assert_eq!(entry_lines, MD_CAP, "md browse view is capped at 500");
2001
2002 assert!(
2003 md.contains("## More\n\n"),
2004 "over-cap md needs a More footer"
2005 );
2006 assert!(
2007 md.contains(&format!(
2008 "This folder has {total} files. The 500 most recent are listed above.\n"
2009 )),
2010 "footer count wrong:\n{md}"
2011 );
2012 assert!(
2013 md.contains(
2014 "Use `dbmd index query --type email --in sources` for the complete catalog.\n"
2015 ),
2016 "footer must infer type=email layer=sources:\n{md}"
2017 );
2018
2019 let jsonl = idx.to_jsonl();
2020 assert_eq!(jsonl.lines().count(), total, "jsonl is uncapped");
2021 }
2022
2023 #[test]
2026 fn sort_breaks_ties_by_path_and_puts_undated_last() {
2027 let mut recs = vec![
2028 rec("z/a.md", Some("2026-05-01T00:00:00Z")),
2029 rec("a/b.md", Some("2026-05-01T00:00:00Z")), rec("m/c.md", None), rec("b/d.md", Some("2026-06-01T00:00:00Z")), ];
2033 sort_records(&mut recs);
2034 let order: Vec<String> = recs.iter().map(|r| path_to_unix(&r.path)).collect();
2035 assert_eq!(order, vec!["b/d.md", "a/b.md", "z/a.md", "m/c.md"]);
2036 }
2037
2038 fn rec(path: &str, updated: Option<&str>) -> IndexRecord {
2039 IndexRecord {
2040 path: PathBuf::from(path),
2041 type_: "t".into(),
2042 summary: "s".into(),
2043 tags: vec![],
2044 links: vec![],
2045 created: None,
2046 updated: updated.map(|u| DateTime::parse_from_rfc3339(u).unwrap()),
2047 fields: BTreeMap::new(),
2048 }
2049 }
2050
2051 #[test]
2054 fn layer_index_lists_type_folders_with_counts_and_preview() {
2055 let (_d, store) = mk_store();
2056 write_doc(
2057 &store,
2058 "records/contacts/a.md",
2059 "contact",
2060 Some("Contact A older"),
2061 Some("2026-05-01T00:00:00Z"),
2062 "",
2063 );
2064 write_doc(
2065 &store,
2066 "records/contacts/b.md",
2067 "contact",
2068 Some("Contact B newest"),
2069 Some("2026-05-09T00:00:00Z"),
2070 "",
2071 );
2072 write_doc(
2073 &store,
2074 "records/companies/x.md",
2075 "company",
2076 Some("Acme Inc"),
2077 Some("2026-05-05T00:00:00Z"),
2078 "",
2079 );
2080 Index::write_level(&store, &IndexLevel::TypeFolder("records/contacts".into())).unwrap();
2082 Index::write_level(&store, &IndexLevel::TypeFolder("records/companies".into())).unwrap();
2083
2084 Index::write_level(&store, &IndexLevel::Layer(Layer::Records)).unwrap();
2085 let md = read(&store, "records/index.md");
2086
2087 assert!(
2088 md.starts_with("---\ntype: index\nscope: layer\nfolder: records\n"),
2089 "layer fm:\n{md}"
2090 );
2091 let companies_at = md.find("companies/index").unwrap();
2093 let contacts_at = md.find("contacts/index").unwrap();
2094 assert!(
2095 companies_at < contacts_at,
2096 "type folders must be alphabetical"
2097 );
2098 assert!(
2100 md.contains("- [[records/contacts/index|Contacts]] (2) — Contact B newest\n"),
2101 "contacts entry:\n{md}"
2102 );
2103 assert!(
2104 md.contains("- [[records/companies/index|Companies]] (1) — Acme Inc\n"),
2105 "companies entry:\n{md}"
2106 );
2107 assert!(
2109 md.contains("updated: 2026-05-09T00:00:00Z\n"),
2110 "layer updated must be max child:\n{md}"
2111 );
2112 }
2113
2114 #[test]
2115 fn root_index_groups_layers_with_totals_and_per_type_counts() {
2116 let (_d, store) = mk_store();
2117 write_doc(
2118 &store,
2119 "sources/emails/2026/05/a.md",
2120 "email",
2121 Some("Mail"),
2122 Some("2026-05-01T00:00:00Z"),
2123 "",
2124 );
2125 write_doc(
2126 &store,
2127 "sources/docs/d.md",
2128 "doc",
2129 Some("Doc"),
2130 Some("2026-05-02T00:00:00Z"),
2131 "",
2132 );
2133 write_doc(
2134 &store,
2135 "records/contacts/c.md",
2136 "contact",
2137 Some("C"),
2138 Some("2026-05-03T00:00:00Z"),
2139 "",
2140 );
2141 Index::rebuild_all(&store).unwrap();
2144 let md = read(&store, "index.md");
2145
2146 assert!(
2147 md.starts_with("---\ntype: index\nscope: root\n"),
2148 "root fm:\n{md}"
2149 );
2150 assert!(md.contains("# Knowledge base index\n"), "root title:\n{md}");
2151 let sources_h = md
2153 .find("## Sources (2)")
2154 .expect("sources heading w/ total 2");
2155 let records_h = md
2156 .find("## Records (1)")
2157 .expect("records heading w/ total 1");
2158 assert!(sources_h < records_h, "Sources must precede Records");
2159 assert!(!md.contains("## Wiki"), "empty layer gets no section");
2160 assert!(
2162 md.contains("- [[sources/docs/index|Docs]] (1)\n"),
2163 "root docs entry:\n{md}"
2164 );
2165 assert!(
2166 md.contains("- [[sources/emails/index|Emails]] (1)\n"),
2167 "root emails entry:\n{md}"
2168 );
2169 assert!(
2170 md.contains("- [[records/contacts/index|Contacts]] (1)\n"),
2171 "root contacts entry:\n{md}"
2172 );
2173 assert!(!md.contains("— "), "root entries carry no preview text");
2174 }
2175
2176 #[test]
2179 fn on_write_matches_rebuild_byte_for_byte() {
2180 let (_d1, wt) = mk_store();
2183 let (_d2, rb) = mk_store();
2184
2185 let docs: &[(&str, &str, &str, &str, &str)] = &[
2186 (
2187 "sources/emails/2026/05/e1.md",
2188 "email",
2189 "First mail",
2190 "2026-05-01T10:00:00Z",
2191 "tags:\n - inbox\n",
2192 ),
2193 (
2194 "sources/emails/2026/06/e2.md",
2195 "email",
2196 "Second mail",
2197 "2026-06-01T10:00:00Z",
2198 "",
2199 ),
2200 (
2201 "records/contacts/sarah.md",
2202 "contact",
2203 "Sarah",
2204 "2026-05-15T10:00:00Z",
2205 "links:\n - wiki/people/sarah\n",
2206 ),
2207 (
2208 "records/contacts/elena.md",
2209 "contact",
2210 "Elena",
2211 "2026-05-20T10:00:00Z",
2212 "status: active\n",
2213 ),
2214 (
2215 "wiki/people/sarah.md",
2216 "wiki-page",
2217 "Sarah bio",
2218 "2026-05-21T10:00:00Z",
2219 "",
2220 ),
2221 ];
2222
2223 for (rel, t, sum, upd, extra) in docs {
2224 write_doc(&wt, rel, t, Some(sum), Some(upd), extra);
2225 write_doc(&rb, rel, t, Some(sum), Some(upd), extra);
2226 Index::on_write(&wt, Path::new(rel)).unwrap();
2227 }
2228 Index::rebuild_all(&rb).unwrap();
2229
2230 let a = snapshot_artifacts(&wt);
2231 let b = snapshot_artifacts(&rb);
2232 assert_eq!(
2233 a.keys().collect::<Vec<_>>(),
2234 b.keys().collect::<Vec<_>>(),
2235 "same set of index artifacts must exist"
2236 );
2237 for (k, v) in &a {
2238 assert_eq!(v, &b[k], "artifact {k} differs between write-through and rebuild:\n--- write-through ---\n{v}\n--- rebuild ---\n{}", b[k]);
2239 }
2240 assert!(a.contains_key("index.md"));
2242 assert!(a.contains_key("sources/emails/index.jsonl"));
2243 assert!(a.contains_key("records/contacts/index.md"));
2244 }
2245
2246 #[test]
2263 fn loop_op_does_not_walk_sibling_content_tree() {
2264 let (_d, store) = mk_store();
2265
2266 write_doc(
2270 &store,
2271 "records/companies/acme.md",
2272 "company",
2273 Some("Acme Inc"),
2274 Some("2026-05-05T00:00:00Z"),
2275 "",
2276 );
2277 write_doc(
2278 &store,
2279 "records/companies/globex.md",
2280 "company",
2281 Some("Globex"),
2282 Some("2026-05-06T00:00:00Z"),
2283 "",
2284 );
2285 assert!(
2286 !exists(&store, "records/companies/index.jsonl"),
2287 "precondition: companies must be un-indexed"
2288 );
2289
2290 write_doc(
2292 &store,
2293 "records/contacts/sarah.md",
2294 "contact",
2295 Some("Sarah"),
2296 Some("2026-05-15T00:00:00Z"),
2297 "",
2298 );
2299 Index::on_write(&store, Path::new("records/contacts/sarah.md")).unwrap();
2300
2301 let layer_md = read(&store, "records/index.md");
2303 let root_md = read(&store, "index.md");
2304 assert!(
2306 layer_md.contains("- [[records/contacts/index|Contacts]] (1) — Sarah\n"),
2307 "layer must reflect the written folder:\n{layer_md}"
2308 );
2309 assert!(
2310 root_md.contains("- [[records/contacts/index|Contacts]] (1)\n"),
2311 "root must reflect the written folder:\n{root_md}"
2312 );
2313
2314 assert!(
2318 !layer_md.contains("companies"),
2319 "loop op walked the sibling content tree: layer rollup counts un-indexed records/companies\n{layer_md}"
2320 );
2321 assert!(
2322 !root_md.contains("companies"),
2323 "loop op walked the sibling content tree: root rollup counts un-indexed records/companies\n{root_md}"
2324 );
2325 assert!(
2327 root_md.contains("## Records (1)"),
2328 "root layer total must count only the sidecar-indexed folder (1), not walked siblings (would be 3):\n{root_md}"
2329 );
2330
2331 let (_d2, rb) = mk_store();
2336 for (rel, t, s, u) in [
2337 (
2338 "records/companies/acme.md",
2339 "company",
2340 "Acme Inc",
2341 "2026-05-05T00:00:00Z",
2342 ),
2343 (
2344 "records/companies/globex.md",
2345 "company",
2346 "Globex",
2347 "2026-05-06T00:00:00Z",
2348 ),
2349 (
2350 "records/contacts/sarah.md",
2351 "contact",
2352 "Sarah",
2353 "2026-05-15T00:00:00Z",
2354 ),
2355 ] {
2356 write_doc(&rb, rel, t, Some(s), Some(u), "");
2357 }
2358 Index::on_write(&store, Path::new("records/companies/acme.md")).unwrap();
2359 Index::on_write(&store, Path::new("records/companies/globex.md")).unwrap();
2360 Index::rebuild_all(&rb).unwrap();
2361 let a = snapshot_artifacts(&store);
2362 let b = snapshot_artifacts(&rb);
2363 assert_eq!(
2364 a.keys().collect::<BTreeSet<_>>(),
2365 b.keys().collect::<BTreeSet<_>>(),
2366 "same artifact set after indexing both folders"
2367 );
2368 for (k, v) in &a {
2369 assert_eq!(
2370 v, &b[k],
2371 "after indexing the sibling too, loop result must equal rebuild for {k}"
2372 );
2373 }
2374 assert!(
2375 read(&store, "index.md").contains("## Records (3)"),
2376 "now that both folders are indexed, the root total is 3"
2377 );
2378 }
2379
2380 #[test]
2391 fn wiki_page_at_shard_path_for_is_indexable_end_to_end() {
2392 let (_d1, wt) = mk_store();
2393 let (_d2, rb) = mk_store();
2394
2395 let rel = wt
2397 .shard_path_for(
2398 "wiki-page",
2399 &crate::parser::Frontmatter::default(),
2400 "renewal-theme",
2401 )
2402 .unwrap();
2403 let rel_str = path_to_unix(&rel);
2404 assert!(
2407 type_folder_of(&rel).is_some(),
2408 "shard_path_for produced a path the index cannot file: {rel_str}"
2409 );
2410
2411 write_doc(
2412 &wt,
2413 &rel_str,
2414 "wiki-page",
2415 Some("Renewal theme"),
2416 Some("2026-05-21T10:00:00Z"),
2417 "",
2418 );
2419 write_doc(
2420 &rb,
2421 &rel_str,
2422 "wiki-page",
2423 Some("Renewal theme"),
2424 Some("2026-05-21T10:00:00Z"),
2425 "",
2426 );
2427
2428 Index::on_write(&wt, &rel)
2431 .expect("on_write must succeed for a toolkit-computed wiki-page path");
2432 Index::rebuild_all(&rb).unwrap();
2433
2434 let page_link = wiki_target(&rel); let tf_md = read(&rb, "wiki/topics/index.md");
2440 assert!(
2441 tf_md.contains(&format!("[[{page_link}]]")),
2442 "type-folder index must list the page link, got:\n{tf_md}"
2443 );
2444 assert!(
2445 exists(&rb, "wiki/topics/index.jsonl"),
2446 "type-folder jsonl must exist"
2447 );
2448 assert!(
2449 read(&rb, "wiki/topics/index.jsonl").contains(&rel_str),
2450 "type-folder jsonl must contain the page row"
2451 );
2452 let layer_md = read(&rb, "wiki/index.md");
2455 assert!(
2456 layer_md.contains("wiki/topics/index"),
2457 "layer index must roll up the wiki/topics type-folder, got:\n{layer_md}"
2458 );
2459
2460 let a = snapshot_artifacts(&wt);
2462 let b = snapshot_artifacts(&rb);
2463 assert_eq!(
2464 a.keys().collect::<Vec<_>>(),
2465 b.keys().collect::<Vec<_>>(),
2466 "loop and sweep must produce the same artifact set"
2467 );
2468 for (k, v) in &a {
2469 assert_eq!(
2470 v, &b[k],
2471 "wiki-page artifact {k} differs between on_write and rebuild"
2472 );
2473 }
2474 }
2475
2476 #[test]
2477 fn on_remove_then_rebuild_match_and_pull_in_next_over_cap() {
2478 let (_d1, wt) = mk_store();
2479 let (_d2, rb) = mk_store();
2480 let total = MD_CAP + 3; let mut all_rels = Vec::new();
2482 for i in 0..total {
2483 let rel = format!("sources/emails/2026/05/m-{i:04}.md");
2484 let updated = format!("2026-05-10T00:{:02}:{:02}Z", i / 60, i % 60);
2486 write_doc(
2487 &wt,
2488 &rel,
2489 "email",
2490 Some(&format!("mail {i}")),
2491 Some(&updated),
2492 "",
2493 );
2494 write_doc(
2495 &rb,
2496 &rel,
2497 "email",
2498 Some(&format!("mail {i}")),
2499 Some(&updated),
2500 "",
2501 );
2502 all_rels.push(rel);
2503 }
2504 Index::rebuild_all(&wt).unwrap();
2506 let newest = &all_rels[total - 1]; fs::remove_file(wt.root.join(newest)).unwrap();
2508 Index::on_remove(&wt, Path::new(newest)).unwrap();
2509
2510 fs::remove_file(rb.root.join(newest)).unwrap();
2512 Index::rebuild_all(&rb).unwrap();
2513
2514 let a = snapshot_artifacts(&wt);
2515 let b = snapshot_artifacts(&rb);
2516 for (k, v) in &a {
2517 assert_eq!(v, &b[k], "after remove, artifact {k} drifted from rebuild");
2518 }
2519
2520 let md = read(&wt, "sources/emails/index.md");
2523 assert_eq!(md.lines().filter(|l| l.starts_with("- [[")).count(), MD_CAP);
2524 assert!(
2526 !md.contains(&format!("[[{}]]", wiki_target(Path::new(newest)))),
2527 "removed file must not be listed in md"
2528 );
2529 let pulled_in = &all_rels[2];
2533 assert!(
2534 md.contains(&format!("[[{}]]", wiki_target(Path::new(pulled_in)))),
2535 "the 501st-most-recent must be pulled into the browse view after a removal"
2536 );
2537 assert!(
2538 md.contains(&format!("This folder has {} files.", total - 1)),
2539 "footer count must decrement:\n{}",
2540 md.lines().rev().take(4).collect::<Vec<_>>().join("\n")
2541 );
2542 let jsonl = read(&wt, "sources/emails/index.jsonl");
2543 assert_eq!(
2544 jsonl.lines().count(),
2545 total - 1,
2546 "jsonl loses exactly the removed file"
2547 );
2548 assert!(
2549 !jsonl.contains(&path_to_unix(Path::new(newest))),
2550 "removed file must be gone from the jsonl too"
2551 );
2552 }
2553
2554 #[test]
2555 fn on_rename_cross_folder_matches_rebuild() {
2556 let (_d1, wt) = mk_store();
2557 let (_d2, rb) = mk_store();
2558 let seed: &[(&str, &str, &str, &str)] = &[
2560 (
2561 "records/contacts/a.md",
2562 "contact",
2563 "A",
2564 "2026-05-01T00:00:00Z",
2565 ),
2566 (
2567 "records/contacts/b.md",
2568 "contact",
2569 "B",
2570 "2026-05-02T00:00:00Z",
2571 ),
2572 (
2573 "records/companies/x.md",
2574 "company",
2575 "X",
2576 "2026-05-03T00:00:00Z",
2577 ),
2578 ];
2579 for (rel, t, s, u) in seed {
2580 write_doc(&wt, rel, t, Some(s), Some(u), "");
2581 write_doc(&rb, rel, t, Some(s), Some(u), "");
2582 }
2583 Index::rebuild_all(&wt).unwrap();
2584
2585 let old = "records/contacts/b.md";
2588 let new = "records/companies/b.md";
2589 fs::create_dir_all(wt.root.join("records/companies")).unwrap();
2590 fs::rename(wt.root.join(old), wt.root.join(new)).unwrap();
2591 Index::on_rename(&wt, Path::new(old), Path::new(new)).unwrap();
2594
2595 fs::create_dir_all(rb.root.join("records/companies")).unwrap();
2597 fs::rename(rb.root.join(old), rb.root.join(new)).unwrap();
2598 Index::rebuild_all(&rb).unwrap();
2599
2600 let a = snapshot_artifacts(&wt);
2601 let b = snapshot_artifacts(&rb);
2602 assert_eq!(a.keys().collect::<Vec<_>>(), b.keys().collect::<Vec<_>>());
2603 for (k, v) in &a {
2604 assert_eq!(v, &b[k], "rename: artifact {k} drifted from rebuild");
2605 }
2606 let contacts = read(&wt, "records/contacts/index.md");
2608 assert!(!contacts.contains("records/contacts/b]]"));
2609 let companies = read(&wt, "records/companies/index.md");
2610 assert!(companies.contains("[[records/companies/b]]"));
2611 }
2612
2613 #[test]
2614 fn on_write_updates_existing_entry_in_place() {
2615 let (_d, store) = mk_store();
2616 write_doc(
2617 &store,
2618 "records/contacts/a.md",
2619 "contact",
2620 Some("Original"),
2621 Some("2026-05-01T00:00:00Z"),
2622 "",
2623 );
2624 Index::on_write(&store, Path::new("records/contacts/a.md")).unwrap();
2625 write_doc(
2627 &store,
2628 "records/contacts/a.md",
2629 "contact",
2630 Some("Revised"),
2631 Some("2026-05-09T00:00:00Z"),
2632 "",
2633 );
2634 Index::on_write(&store, Path::new("records/contacts/a.md")).unwrap();
2635
2636 let jsonl = read(&store, "records/contacts/index.jsonl");
2637 assert_eq!(
2638 jsonl.lines().count(),
2639 1,
2640 "upsert must not duplicate the line"
2641 );
2642 assert!(jsonl.contains("Revised"), "jsonl must reflect the update");
2643 assert!(
2644 !jsonl.contains("Original"),
2645 "stale line must be gone (compacted)"
2646 );
2647 let md = read(&store, "records/contacts/index.md");
2648 assert!(md.contains("- [[records/contacts/a]] — Revised\n"));
2649 assert!(
2650 md.contains("updated: 2026-05-09T00:00:00Z\n"),
2651 "index updated must track the newer member"
2652 );
2653 }
2654
2655 #[test]
2658 fn dry_run_emits_separators_and_writes_nothing() {
2659 let (_d, store) = mk_store();
2660 write_doc(
2661 &store,
2662 "sources/emails/2026/05/a.md",
2663 "email",
2664 Some("Mail"),
2665 Some("2026-05-01T00:00:00Z"),
2666 "",
2667 );
2668 let out = Index::render_dry_run(&store, &IndexLevel::TypeFolder("sources/emails".into()))
2669 .unwrap();
2670 assert!(
2671 out.contains("--- sources/emails/index.md ---\n"),
2672 "md separator:\n{out}"
2673 );
2674 assert!(
2675 out.contains("--- sources/emails/index.jsonl ---\n"),
2676 "jsonl separator:\n{out}"
2677 );
2678 assert!(
2679 out.contains("- [[sources/emails/2026/05/a]] — Mail"),
2680 "md body present"
2681 );
2682 assert!(
2684 !exists(&store, "sources/emails/index.md"),
2685 "dry-run must not write"
2686 );
2687 assert!(
2688 !exists(&store, "sources/emails/index.jsonl"),
2689 "dry-run must not write"
2690 );
2691 }
2692
2693 #[test]
2694 fn cleanup_removes_noncanonical_and_empty_indexes() {
2695 let (_d, store) = mk_store();
2696 write_doc(
2697 &store,
2698 "sources/emails/2026/05/a.md",
2699 "email",
2700 Some("Mail"),
2701 Some("2026-05-01T00:00:00Z"),
2702 "",
2703 );
2704 fs::write(
2706 store.root.join("sources/emails/2026/05/index.md"),
2707 "stale\n",
2708 )
2709 .unwrap();
2710 fs::write(
2711 store.root.join("sources/emails/2026/05/index.jsonl"),
2712 "stale\n",
2713 )
2714 .unwrap();
2715 fs::create_dir_all(store.root.join("records/empty")).unwrap();
2717 fs::write(store.root.join("records/empty/index.md"), "stale\n").unwrap();
2718
2719 Index::cleanup(&store).unwrap();
2720
2721 assert!(
2722 !exists(&store, "sources/emails/2026/05/index.md"),
2723 "shard index must be deleted"
2724 );
2725 assert!(
2726 !exists(&store, "sources/emails/2026/05/index.jsonl"),
2727 "shard jsonl must be deleted"
2728 );
2729 assert!(
2730 !exists(&store, "records/empty/index.md"),
2731 "empty-folder index must be deleted"
2732 );
2733 assert!(exists(&store, "sources/emails/2026/05/a.md"));
2735 }
2736
2737 #[test]
2738 fn rebuild_deletes_stale_indexes_for_emptied_folders() {
2739 let (_d, store) = mk_store();
2740 write_doc(
2741 &store,
2742 "records/contacts/a.md",
2743 "contact",
2744 Some("A"),
2745 Some("2026-05-01T00:00:00Z"),
2746 "",
2747 );
2748 Index::rebuild_all(&store).unwrap();
2749 assert!(exists(&store, "records/contacts/index.md"));
2750 assert!(exists(&store, "records/index.md"));
2751 assert!(exists(&store, "index.md"));
2752
2753 fs::remove_file(store.root.join("records/contacts/a.md")).unwrap();
2755 Index::rebuild_all(&store).unwrap();
2756 assert!(
2757 !exists(&store, "records/contacts/index.md"),
2758 "emptied type-folder index gone"
2759 );
2760 assert!(
2761 !exists(&store, "records/index.md"),
2762 "now-empty layer index gone"
2763 );
2764 assert!(!exists(&store, "index.md"), "now-empty root index gone");
2765 }
2766
2767 #[test]
2770 fn property_writethrough_equals_rebuild_under_mixed_ops() {
2771 let (_d1, wt) = mk_store();
2773 let (_d2, rb) = mk_store();
2774 let mut seed: u64 = 0x9E3779B97F4A7C15;
2775 let mut next = || {
2776 seed = seed
2777 .wrapping_mul(6364136223846793005)
2778 .wrapping_add(1442695040888963407);
2779 (seed >> 33) as u32
2780 };
2781
2782 let folders = ["sources/emails", "records/contacts", "wiki/people"];
2783 let types = ["email", "contact", "wiki-page"];
2784 let mut live: Vec<String> = Vec::new(); for step in 0..120u32 {
2787 let r = next();
2788 let op = r % 10;
2789 if op < 6 || live.is_empty() {
2790 let fi = (next() as usize) % folders.len();
2792 let folder = folders[fi];
2793 let id = next() % 40;
2794 let rel = if folder == "sources/emails" {
2795 let month = 5 + (id % 2); format!("{folder}/2026/{month:02}/f-{id:02}.md")
2797 } else {
2798 format!("{folder}/f-{id:02}.md")
2799 };
2800 let updated = format!(
2802 "2026-05-{:02}T{:02}:{:02}:00Z",
2803 1 + (step % 27),
2804 step % 24,
2805 id % 60
2806 );
2807 let extra = if id % 3 == 0 {
2808 "tags:\n - x\n - y\n"
2809 } else {
2810 ""
2811 };
2812 write_doc(
2813 &wt,
2814 &rel,
2815 types[fi],
2816 Some(&format!("sum {step}")),
2817 Some(&updated),
2818 extra,
2819 );
2820 write_doc(
2821 &rb,
2822 &rel,
2823 types[fi],
2824 Some(&format!("sum {step}")),
2825 Some(&updated),
2826 extra,
2827 );
2828 Index::on_write(&wt, Path::new(&rel)).unwrap();
2829 if !live.contains(&rel) {
2830 live.push(rel);
2831 }
2832 } else if op < 8 {
2833 let idx = (next() as usize) % live.len();
2835 let rel = live.remove(idx);
2836 fs::remove_file(wt.root.join(&rel)).unwrap();
2837 fs::remove_file(rb.root.join(&rel)).ok();
2838 Index::on_remove(&wt, Path::new(&rel)).unwrap();
2839 } else {
2840 let idx = (next() as usize) % live.len();
2842 let old = live[idx].clone();
2843 let fi = (next() as usize) % folders.len();
2845 let folder = folders[fi];
2846 let id = 50 + (next() % 40);
2847 let new = if folder == "sources/emails" {
2848 format!("{folder}/2026/05/f-{id:02}.md")
2849 } else {
2850 format!("{folder}/f-{id:02}.md")
2851 };
2852 if new == old || live.contains(&new) {
2853 continue;
2854 }
2855 fs::create_dir_all(wt.root.join(&new).parent().unwrap()).unwrap();
2856 fs::create_dir_all(rb.root.join(&new).parent().unwrap()).unwrap();
2857 fs::rename(wt.root.join(&old), wt.root.join(&new)).unwrap();
2858 fs::rename(rb.root.join(&old), rb.root.join(&new)).unwrap();
2859 Index::on_rename(&wt, Path::new(&old), Path::new(&new)).unwrap();
2860 live[idx] = new;
2861 }
2862 }
2863
2864 Index::rebuild_all(&rb).unwrap();
2866 let a = snapshot_artifacts(&wt);
2867 let b = snapshot_artifacts(&rb);
2868 assert_eq!(
2869 a.keys().collect::<BTreeSet<_>>(),
2870 b.keys().collect::<BTreeSet<_>>(),
2871 "write-through and rebuild must produce the same set of artifacts"
2872 );
2873 for (k, v) in &a {
2874 assert_eq!(
2875 v, &b[k],
2876 "INVARIANT VIOLATED: artifact {k} differs after mixed ops\n--- write-through ---\n{v}\n--- rebuild ---\n{}",
2877 b[k]
2878 );
2879 }
2880 assert!(
2881 !a.is_empty(),
2882 "the run must have produced at least one artifact"
2883 );
2884 }
2885
2886 #[test]
2892 fn cleanup_preserves_user_content_named_index_md_in_shard() {
2893 let (_d, store) = mk_store();
2894 write_doc(
2896 &store,
2897 "sources/emails/2026/06/index.md",
2898 "email",
2899 Some("Important imported mail"),
2900 Some("2026-06-11T04:23:25Z"),
2901 "",
2902 );
2903 Index::cleanup(&store).unwrap();
2904 assert!(
2905 exists(&store, "sources/emails/2026/06/index.md"),
2906 "cleanup must not delete a user content file named index.md"
2907 );
2908 Index::rebuild_all(&store).unwrap();
2910 assert!(
2911 exists(&store, "sources/emails/2026/06/index.md"),
2912 "rebuild_all must not delete a user content file named index.md"
2913 );
2914 let kept = read(&store, "sources/emails/2026/06/index.md");
2915 assert!(
2916 kept.contains("Important imported mail"),
2917 "the user's record content must be intact"
2918 );
2919 }
2920
2921 #[test]
2926 fn cleanup_keeps_canonical_type_folder_root_sidecars() {
2927 let (_d, store) = mk_store();
2928 write_doc(
2929 &store,
2930 "records/contacts/alice.md",
2931 "contact",
2932 Some("Alice"),
2933 Some("2026-05-01T00:00:00Z"),
2934 "",
2935 );
2936 Index::write_level(&store, &IndexLevel::TypeFolder("records/contacts".into())).unwrap();
2937 assert!(exists(&store, "records/contacts/index.md"));
2938 assert!(exists(&store, "records/contacts/index.jsonl"));
2939 Index::cleanup(&store).unwrap();
2940 assert!(
2941 exists(&store, "records/contacts/index.md"),
2942 "cleanup must keep the canonical type-folder index.md (non-empty folder)"
2943 );
2944 assert!(
2945 exists(&store, "records/contacts/index.jsonl"),
2946 "cleanup must keep the canonical type-folder index.jsonl (non-empty folder)"
2947 );
2948 }
2949
2950 #[test]
2956 fn on_write_ignores_index_artifact_no_phantom_row() {
2957 let (_d, store) = mk_store();
2958 write_doc(
2959 &store,
2960 "records/contacts/alice.md",
2961 "contact",
2962 Some("Alice"),
2963 Some("2026-05-01T00:00:00Z"),
2964 "",
2965 );
2966 Index::on_write(&store, Path::new("records/contacts/alice.md")).unwrap();
2967 let jsonl_before = read(&store, "records/contacts/index.jsonl");
2968 assert_eq!(jsonl_before.lines().count(), 1);
2969
2970 Index::on_write(&store, Path::new("records/contacts/index.md")).unwrap();
2972
2973 let jsonl_after = read(&store, "records/contacts/index.jsonl");
2974 assert_eq!(
2975 jsonl_after.lines().count(),
2976 1,
2977 "on_write on index.md must not add a phantom self-row"
2978 );
2979 assert!(
2980 !jsonl_after.contains("\"type\":\"index\""),
2981 "the catalog artifact must never appear as a catalogued row"
2982 );
2983 let root = read(&store, "index.md");
2985 assert!(
2986 root.contains("[[records/contacts/index|Contacts]] (1)"),
2987 "count must not inflate:\n{root}"
2988 );
2989 }
2990
2991 #[test]
2997 fn multiline_summary_is_single_lined_in_index_md() {
2998 let (_d, store) = mk_store();
2999 write_raw(
3001 &store,
3002 "records/notes/evil.md",
3003 "type: note\nupdated: 2026-06-10T00:00:00Z\nsummary: |-\n legit first line\n - [[records/secrets/fake|Click me]] — injected entry",
3004 "\nbody\n",
3005 );
3006 let idx = Index::build_type_folder(&store, Path::new("records/notes")).unwrap();
3007 let md = idx.to_markdown();
3008 let entry_lines = md.lines().filter(|l| l.starts_with("- [[")).count();
3010 assert_eq!(
3011 entry_lines, 1,
3012 "a multi-line summary must not produce extra entry lines:\n{md}"
3013 );
3014 assert!(
3015 md.contains(
3016 "- [[records/notes/evil]] — legit first line - [[records/secrets/fake|Click me]] — injected entry\n"
3017 ),
3018 "summary newlines must collapse to spaces inline:\n{md}"
3019 );
3020 }
3021
3022 #[test]
3030 fn non_string_scalar_summary_and_type_are_coerced_like_validator() {
3031 let (_d, store) = mk_store();
3032 write_raw(
3033 &store,
3034 "records/contacts/a.md",
3035 "type: contact\nupdated: 2026-05-01T00:00:00Z\nsummary: 2026",
3036 "\nbody\n",
3037 );
3038 let rec = record_from_file(
3039 &store.root.join("records/contacts/a.md"),
3040 PathBuf::from("records/contacts/a.md"),
3041 )
3042 .unwrap();
3043 assert_eq!(rec.summary, "2026");
3046 assert_eq!(rec.type_, "contact");
3047
3048 let idx = Index::build_type_folder(&store, Path::new("records/contacts")).unwrap();
3050 let md = idx.to_markdown();
3051 assert!(
3052 md.contains("- [[records/contacts/a]] — 2026\n"),
3053 "index entry must hold the coerced scalar, not the placeholder:\n{md}"
3054 );
3055
3056 write_raw(
3058 &store,
3059 "records/contacts/b.md",
3060 "type: true\nupdated: 2026-05-02T00:00:00Z\nsummary: hi",
3061 "\nbody\n",
3062 );
3063 let rec_b = record_from_file(
3064 &store.root.join("records/contacts/b.md"),
3065 PathBuf::from("records/contacts/b.md"),
3066 )
3067 .unwrap();
3068 assert_eq!(rec_b.type_, "true");
3069 }
3070
3071 #[test]
3079 fn non_utf8_body_does_not_abort_record_projection() {
3080 let (_d, store) = mk_store();
3081 let rel = "sources/emails/2026/06/x.md";
3082 let abs = store.root.join(rel);
3083 fs::create_dir_all(abs.parent().unwrap()).unwrap();
3084 let mut bytes: Vec<u8> =
3086 b"---\ntype: email\nupdated: 2026-06-11T00:00:00Z\nsummary: An imported email\n---\n\nCaf"
3087 .to_vec();
3088 bytes.push(0xE9);
3089 bytes.extend_from_slice(b" meeting notes\n");
3090 fs::write(&abs, bytes).unwrap();
3091
3092 let rec = record_from_file(&abs, PathBuf::from(rel))
3093 .expect("non-UTF-8 body must not abort the frontmatter read");
3094 assert_eq!(rec.summary, "An imported email");
3095 assert_eq!(rec.type_, "email");
3096
3097 Index::rebuild_all(&store).unwrap();
3099 assert!(
3100 exists(&store, "sources/emails/index.jsonl"),
3101 "rebuild must produce the catalog despite a non-UTF-8 body byte"
3102 );
3103 assert!(
3104 read(&store, "sources/emails/index.jsonl").contains("An imported email"),
3105 "the record must be catalogued"
3106 );
3107 }
3108
3109 #[test]
3118 fn rebuild_aborts_on_malformed_file_and_keeps_prior_catalogs() {
3119 let (_d, store) = mk_store();
3120 write_doc(
3121 &store,
3122 "records/contacts/alice.md",
3123 "contact",
3124 Some("Alice"),
3125 Some("2026-05-01T00:00:00Z"),
3126 "",
3127 );
3128 write_doc(
3129 &store,
3130 "records/companies/acme.md",
3131 "company",
3132 Some("Acme"),
3133 Some("2026-05-02T00:00:00Z"),
3134 "",
3135 );
3136
3137 Index::rebuild_all(&store).expect("clean rebuild succeeds");
3139 assert!(exists(&store, "records/contacts/index.jsonl"));
3140 assert!(exists(&store, "records/companies/index.jsonl"));
3141
3142 let bad = store.root.join("records/contacts/broken.md");
3144 fs::write(
3145 &bad,
3146 "---\ntype: contact\nsummary: \"unterminated\n---\nbody\n",
3147 )
3148 .unwrap();
3149
3150 Index::rebuild_all(&store)
3153 .expect_err("rebuild must abort, not silently skip, on a malformed file");
3154
3155 assert!(
3159 exists(&store, "records/companies/index.jsonl"),
3160 "an aborted rebuild must not destroy a clean sibling folder's catalog"
3161 );
3162 assert!(
3163 exists(&store, "records/contacts/index.jsonl"),
3164 "an aborted rebuild must not destroy the affected folder's prior catalog"
3165 );
3166 let contacts_jsonl = read(&store, "records/contacts/index.jsonl");
3167 assert!(contacts_jsonl.contains("records/contacts/alice.md"));
3168 }
3169
3170 #[test]
3183 fn rebuild_rollup_counts_equal_jsonl_records_and_write_through() {
3184 let (_d, store) = mk_store();
3185 write_doc(
3189 &store,
3190 "records/contacts/alice.md",
3191 "contact",
3192 Some("Alice"),
3193 Some("2026-05-01T00:00:00Z"),
3194 "",
3195 );
3196 write_doc(
3197 &store,
3198 "records/contacts/bob.md",
3199 "contact",
3200 Some("Bob"),
3201 Some("2026-05-02T00:00:00Z"),
3202 "",
3203 );
3204 Index::rebuild_all(&store).expect("clean rebuild succeeds");
3205
3206 let jsonl_lines = read(&store, "records/contacts/index.jsonl")
3208 .lines()
3209 .filter(|l| !l.trim().is_empty())
3210 .count();
3211 assert_eq!(jsonl_lines, 2, "two well-formed files ⇒ two jsonl records");
3212 let layer_md = read(&store, "records/index.md");
3213 let root_md = read(&store, "index.md");
3214 assert!(
3215 layer_md.contains("- [[records/contacts/index|Contacts]] (2)"),
3216 "layer rollup (N) must equal the jsonl record count (2), not a raw .md walk:\n{layer_md}"
3217 );
3218 assert!(
3219 root_md.contains("- [[records/contacts/index|Contacts]] (2)\n")
3220 && root_md.contains("## Records (2)"),
3221 "root rollup (N)/layer total must equal the jsonl record count (2):\n{root_md}"
3222 );
3223
3224 let (_d2, wt) = mk_store();
3231 write_doc(
3232 &wt,
3233 "records/contacts/alice.md",
3234 "contact",
3235 Some("Alice"),
3236 Some("2026-05-01T00:00:00Z"),
3237 "",
3238 );
3239 write_doc(
3240 &wt,
3241 "records/contacts/bob.md",
3242 "contact",
3243 Some("Bob"),
3244 Some("2026-05-02T00:00:00Z"),
3245 "",
3246 );
3247 Index::on_write(&wt, Path::new("records/contacts/alice.md")).unwrap();
3248 Index::on_write(&wt, Path::new("records/contacts/bob.md")).unwrap();
3249
3250 let a = snapshot_artifacts(&wt);
3251 let b = snapshot_artifacts(&store);
3252 assert_eq!(
3253 a.keys().collect::<BTreeSet<_>>(),
3254 b.keys().collect::<BTreeSet<_>>(),
3255 "write-through and rebuild_all must produce the same artifact set"
3256 );
3257 for (k, v) in &a {
3258 assert_eq!(
3259 v, &b[k],
3260 "rollup bytes diverged between write-through and rebuild_all for {k} \
3261 (a skip-version inflates rebuild_all's (N) above the jsonl record \
3262 count, which write-through then rewrites):\n--- write-through ---\n{v}\n--- rebuild ---\n{}",
3263 b[k]
3264 );
3265 }
3266 }
3267
3268 #[cfg(unix)]
3273 #[test]
3274 fn non_utf8_path_component_is_kept_not_dropped() {
3275 use std::ffi::OsStr;
3276 use std::os::unix::ffi::OsStrExt;
3277 let mut leaf = b"caf".to_vec();
3279 leaf.push(0xE9);
3280 leaf.extend_from_slice(b".md");
3281 let p = Path::new("sources/emails").join(OsStr::from_bytes(&leaf));
3282 let unix = path_to_unix(&p);
3283 assert_ne!(
3286 unix, "sources/emails",
3287 "non-UTF-8 leaf must not be dropped, collapsing the path to its parent dir"
3288 );
3289 assert!(
3290 unix.starts_with("sources/emails/caf"),
3291 "the lossy leaf must remain under its folder: {unix}"
3292 );
3293 }
3294}