1use std::collections::BTreeMap;
53use std::fs;
54use std::io::Write as _;
55use std::path::{Path, PathBuf};
56
57use chrono::{DateTime, FixedOffset, SecondsFormat};
58use serde::{Deserialize, Serialize};
59use serde_json::Value;
60
61use crate::store::{Layer, Store};
62
63const MD_CAP: usize = 500;
65
66const MISSING_SUMMARY: &str = "(no summary)";
70
71const ROOT_TITLE: &str = "Knowledge base index";
73
74#[derive(Debug, Clone, PartialEq, Eq)]
76pub enum IndexLevel {
77 Root,
79 Layer(Layer),
81 TypeFolder(PathBuf),
83}
84
85#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
93pub struct IndexRecord {
94 #[serde(with = "path_serde")]
98 pub path: PathBuf,
99 #[serde(rename = "type")]
101 pub type_: String,
102 pub summary: String,
104 #[serde(default)]
106 pub tags: Vec<String>,
107 #[serde(default)]
109 pub links: Vec<String>,
110 pub created: Option<DateTime<FixedOffset>>,
112 pub updated: Option<DateTime<FixedOffset>>,
114 #[serde(flatten)]
116 pub fields: BTreeMap<String, Value>,
117}
118
119#[derive(Debug, Clone, PartialEq)]
122pub struct Index {
123 pub level: IndexLevel,
125 pub records: Vec<IndexRecord>,
128 pub child_counts: BTreeMap<PathBuf, usize>,
130}
131
132impl Index {
133 pub fn build_type_folder(store: &Store, type_folder: &Path) -> crate::Result<Index> {
139 let rel = normalize_rel(type_folder);
140 let abs = store.root.join(&rel);
141 let mut records = Vec::new();
142 for file_abs in walk_type_folder_files(&abs) {
143 let rel_path =
144 rel_to_store(&store.root, &file_abs).expect("walked file is under the store root");
145 records.push(record_from_file(&file_abs, rel_path)?);
157 }
158 sort_records(&mut records);
159 Ok(Index {
160 level: IndexLevel::TypeFolder(rel),
161 records,
162 child_counts: BTreeMap::new(),
163 })
164 }
165
166 pub fn build_layer(store: &Store, layer: Layer) -> crate::Result<Index> {
169 let mut child_counts = BTreeMap::new();
170 for tf in type_folders_in_layer(store, layer) {
171 let abs = store.root.join(&tf);
172 let n = walk_type_folder_files(&abs).len();
173 if n > 0 {
174 child_counts.insert(tf, n);
175 }
176 }
177 Ok(Index {
178 level: IndexLevel::Layer(layer),
179 records: Vec::new(),
180 child_counts,
181 })
182 }
183
184 pub fn build_root(store: &Store) -> crate::Result<Index> {
187 let mut child_counts = BTreeMap::new();
188 for layer in Layer::all() {
189 for tf in type_folders_in_layer(store, layer) {
190 let abs = store.root.join(&tf);
191 let n = walk_type_folder_files(&abs).len();
192 if n > 0 {
193 child_counts.insert(tf, n);
194 }
195 }
196 }
197 Ok(Index {
198 level: IndexLevel::Root,
199 records: Vec::new(),
200 child_counts,
201 })
202 }
203
204 pub fn to_markdown(&self) -> String {
206 match &self.level {
207 IndexLevel::TypeFolder(folder) => self.render_type_folder_md(folder),
208 IndexLevel::Layer(layer) => self.render_layer_md(*layer),
209 IndexLevel::Root => self.render_root_md(),
210 }
211 }
212
213 pub fn to_jsonl(&self) -> String {
217 let mut out = String::new();
218 for rec in &self.records {
219 let line = serde_json::to_string(rec).expect("IndexRecord serializes");
222 out.push_str(&line);
223 out.push('\n');
224 }
225 out
226 }
227
228 fn render_type_folder_md(&self, folder: &Path) -> String {
231 let folder_disp = path_to_unix(folder);
232 let updated = max_updated(self.records.iter().map(|r| r.updated.as_ref()));
233 let mut s = String::new();
234 s.push_str("---\n");
235 s.push_str("type: index\n");
236 s.push_str("scope: type-folder\n");
237 s.push_str(&format!("folder: {folder_disp}\n"));
238 if let Some(ts) = updated {
239 s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
240 }
241 s.push_str("---\n\n");
242 s.push_str(&format!("# {folder_disp}\n\n"));
243
244 let shown = self.records.len().min(MD_CAP);
245 for rec in self.records.iter().take(shown) {
246 s.push_str(&format_md_entry(rec));
247 s.push('\n');
248 }
249
250 if self.records.len() > MD_CAP {
251 let type_ = self.records.first().map(|r| r.type_.as_str()).unwrap_or("");
252 let layer = folder
253 .components()
254 .next()
255 .and_then(|c| c.as_os_str().to_str())
256 .unwrap_or("");
257 s.push('\n');
258 s.push_str(&more_footer(self.records.len(), type_, layer));
259 }
260 s
261 }
262
263 fn render_layer_md(&self, layer: Layer) -> String {
268 let layer_dir = layer_dir_name(layer);
269 let mut s = String::new();
270 s.push_str("---\n");
271 s.push_str("type: index\n");
272 s.push_str("scope: layer\n");
273 s.push_str(&format!("folder: {layer_dir}\n"));
274 s.push_str("---\n\n");
275 s.push_str(&format!("# {layer_dir}\n\n"));
276 for (tf, n) in &self.child_counts {
277 let tf_unix = path_to_unix(tf);
278 let display = capitalize(folder_basename(tf));
279 s.push_str(&format!("- [[{tf_unix}/index|{display}]] ({n})\n"));
280 }
281 s
282 }
283
284 fn render_root_md(&self) -> String {
287 let mut s = String::new();
288 s.push_str("---\n");
289 s.push_str("type: index\n");
290 s.push_str("scope: root\n");
291 s.push_str("---\n\n");
292 s.push_str(&format!("# {ROOT_TITLE}\n"));
293 for layer in Layer::all() {
294 let layer_dir = layer_dir_name(layer);
295 let prefix = format!("{layer_dir}/");
296 let children: Vec<(&PathBuf, &usize)> = self
297 .child_counts
298 .iter()
299 .filter(|(tf, _)| path_to_unix(tf).starts_with(&prefix))
300 .collect();
301 if children.is_empty() {
302 continue;
303 }
304 let total: usize = children.iter().map(|(_, n)| **n).sum();
305 s.push('\n');
306 s.push_str(&format!("## {} ({total})\n", capitalize(layer_dir)));
307 for (tf, n) in children {
308 let tf_unix = path_to_unix(tf);
309 let display = capitalize(folder_basename(tf));
310 s.push_str(&format!("- [[{tf_unix}/index|{display}]] ({n})\n"));
311 }
312 }
313 s
314 }
315}
316
317impl Index {
322 pub fn on_write(store: &Store, file: &Path) -> crate::Result<()> {
329 let file_rel = normalize_rel(file);
330 if is_index_artifact(&file_rel) {
337 return Ok(());
338 }
339 let file_abs = store.root.join(&file_rel);
340 let folder = type_folder_of(&file_rel)
341 .ok_or_else(|| bad_index(&file_rel, "file is not inside a layer/type-folder"))?;
342 let record = record_from_file(&file_abs, file_rel.clone())?;
343
344 let _lock = FolderLock::acquire(&store.root.join(&folder));
347 let mut records = read_jsonl_records(&store.root.join(&folder).join("index.jsonl"))?;
348 records.retain(|r| r.path != record.path);
349 records.push(record);
350 sort_records(&mut records);
351
352 write_type_folder_artifacts(store, &folder, &records)?;
353 update_parents(store, &folder)?;
354 Ok(())
355 }
356
357 pub fn on_rename(store: &Store, old: &Path, new: &Path) -> crate::Result<()> {
361 let old_rel = normalize_rel(old);
362 let new_rel = normalize_rel(new);
363 if is_index_artifact(&old_rel) || is_index_artifact(&new_rel) {
367 return Ok(());
368 }
369 let old_folder = type_folder_of(&old_rel)
370 .ok_or_else(|| bad_index(&old_rel, "source is not inside a layer/type-folder"))?;
371 let new_folder = type_folder_of(&new_rel)
372 .ok_or_else(|| bad_index(&new_rel, "target is not inside a layer/type-folder"))?;
373
374 let _locks = lock_folders(store, &old_folder, &new_folder);
378
379 let mut old_records =
381 read_jsonl_records(&store.root.join(&old_folder).join("index.jsonl"))?;
382 old_records.retain(|r| r.path != old_rel);
383
384 if old_folder == new_folder {
385 let record = record_from_file(&store.root.join(&new_rel), new_rel.clone())?;
387 old_records.retain(|r| r.path != record.path);
388 old_records.push(record);
389 sort_records(&mut old_records);
390 write_type_folder_artifacts(store, &old_folder, &old_records)?;
391 update_parents(store, &old_folder)?;
392 return Ok(());
393 }
394
395 sort_records(&mut old_records);
398 write_type_folder_artifacts(store, &old_folder, &old_records)?;
399
400 let record = record_from_file(&store.root.join(&new_rel), new_rel.clone())?;
401 let mut new_records =
402 read_jsonl_records(&store.root.join(&new_folder).join("index.jsonl"))?;
403 new_records.retain(|r| r.path != record.path);
404 new_records.push(record);
405 sort_records(&mut new_records);
406 write_type_folder_artifacts(store, &new_folder, &new_records)?;
407
408 update_parents(store, &old_folder)?;
409 update_parents(store, &new_folder)?;
410 Ok(())
411 }
412
413 pub fn on_remove(store: &Store, file: &Path) -> crate::Result<()> {
418 let file_rel = normalize_rel(file);
419 if is_index_artifact(&file_rel) {
422 return Ok(());
423 }
424 let folder = type_folder_of(&file_rel)
425 .ok_or_else(|| bad_index(&file_rel, "file is not inside a layer/type-folder"))?;
426 let _lock = FolderLock::acquire(&store.root.join(&folder));
428 let mut records = read_jsonl_records(&store.root.join(&folder).join("index.jsonl"))?;
429 let before = records.len();
430 records.retain(|r| r.path != file_rel);
431 if records.len() == before {
432 }
435 sort_records(&mut records);
436 write_type_folder_artifacts(store, &folder, &records)?;
437 update_parents(store, &folder)?;
438 Ok(())
439 }
440
441 pub fn rebuild_all(store: &Store) -> crate::Result<()> {
445 Index::cleanup(store)?;
446 for layer in Layer::all() {
447 for tf in type_folders_in_layer(store, layer) {
448 let idx = Index::build_type_folder(store, &tf)?;
449 if idx.records.is_empty() {
450 continue;
451 }
452 write_type_folder_artifacts(store, &tf, &idx.records)?;
453 }
454 let layer_idx = Index::build_layer(store, layer)?;
455 let layer_index_md = store.root.join(layer_dir_name(layer)).join("index.md");
456 if layer_idx.child_counts.is_empty() {
457 remove_if_exists(&layer_index_md)?;
458 } else {
459 write_atomic(
460 &layer_index_md,
461 render_layer_md_with_store(store, &layer_idx),
462 )?;
463 }
464 }
465 let root_idx = Index::build_root(store)?;
466 let root_index_md = store.root.join("index.md");
467 if root_idx.child_counts.is_empty() {
468 remove_if_exists(&root_index_md)?;
469 } else {
470 write_atomic(&root_index_md, render_root_md_with_store(store, &root_idx))?;
471 }
472 Ok(())
473 }
474
475 pub fn rebuild_folder(store: &Store, folder: &Path) -> crate::Result<()> {
482 Self::write_level(store, &IndexLevel::TypeFolder(folder.to_path_buf()))?;
483 update_parents(store, folder)
484 }
485
486 pub fn write_level(store: &Store, level: &IndexLevel) -> crate::Result<()> {
488 match level {
489 IndexLevel::TypeFolder(folder) => {
490 let idx = Index::build_type_folder(store, folder)?;
491 if idx.records.is_empty() {
492 remove_if_exists(&store.root.join(folder).join("index.md"))?;
493 remove_if_exists(&store.root.join(folder).join("index.jsonl"))?;
494 } else {
495 write_type_folder_artifacts(store, folder, &idx.records)?;
496 }
497 }
498 IndexLevel::Layer(layer) => {
499 let idx = Index::build_layer(store, *layer)?;
500 let p = store.root.join(layer_dir_name(*layer)).join("index.md");
501 if idx.child_counts.is_empty() {
502 remove_if_exists(&p)?;
503 } else {
504 write_atomic(&p, render_layer_md_with_store(store, &idx))?;
505 }
506 }
507 IndexLevel::Root => {
508 let idx = Index::build_root(store)?;
509 let p = store.root.join("index.md");
510 if idx.child_counts.is_empty() {
511 remove_if_exists(&p)?;
512 } else {
513 write_atomic(&p, render_root_md_with_store(store, &idx))?;
514 }
515 }
516 }
517 Ok(())
518 }
519
520 pub fn render_dry_run(store: &Store, level: &IndexLevel) -> crate::Result<String> {
523 let mut out = String::new();
524 match level {
525 IndexLevel::TypeFolder(folder) => {
526 let idx = Index::build_type_folder(store, folder)?;
527 let md_path = path_to_unix(&folder.join("index.md"));
528 let jsonl_path = path_to_unix(&folder.join("index.jsonl"));
529 out.push_str(&format!("--- {md_path} ---\n"));
530 out.push_str(&idx.to_markdown());
531 out.push_str(&format!("--- {jsonl_path} ---\n"));
532 out.push_str(&idx.to_jsonl());
533 }
534 IndexLevel::Layer(layer) => {
535 let idx = Index::build_layer(store, *layer)?;
536 let md_path = format!("{}/index.md", layer_dir_name(*layer));
537 out.push_str(&format!("--- {md_path} ---\n"));
538 out.push_str(&render_layer_md_with_store(store, &idx));
539 }
540 IndexLevel::Root => {
541 let idx = Index::build_root(store)?;
542 out.push_str("--- index.md ---\n");
543 out.push_str(&render_root_md_with_store(store, &idx));
544 }
545 }
546 Ok(out)
547 }
548
549 pub fn cleanup(store: &Store) -> crate::Result<()> {
567 for layer in Layer::all() {
568 let layer_dir = store.root.join(layer_dir_name(layer));
569 if !layer_dir.is_dir() {
570 continue;
571 }
572 for tf in type_folders_in_layer(store, layer) {
573 let tf_abs = store.root.join(&tf);
574 for entry in walkdir::WalkDir::new(&tf_abs)
578 .min_depth(2)
579 .into_iter()
580 .filter_map(|e| e.ok())
581 {
582 let p = entry.path();
583 if is_index_artifact(p) && is_deletable_catalog_artifact(p) {
584 remove_if_exists(p)?;
585 }
586 }
587 if walk_type_folder_files(&tf_abs).is_empty() {
591 let md = tf_abs.join("index.md");
592 if is_deletable_catalog_artifact(&md) {
593 remove_if_exists(&md)?;
594 }
595 remove_if_exists(&tf_abs.join("index.jsonl"))?;
596 }
597 }
598 }
599 Ok(())
600 }
601}
602
603fn write_type_folder_artifacts(
611 store: &Store,
612 folder: &Path,
613 records: &[IndexRecord],
614) -> crate::Result<()> {
615 let folder_abs = store.root.join(folder);
616 let md_path = folder_abs.join("index.md");
617 let jsonl_path = folder_abs.join("index.jsonl");
618 if records.is_empty() {
619 remove_if_exists(&md_path)?;
620 remove_if_exists(&jsonl_path)?;
621 return Ok(());
622 }
623 let idx = Index {
624 level: IndexLevel::TypeFolder(folder.to_path_buf()),
625 records: records.to_vec(),
626 child_counts: BTreeMap::new(),
627 };
628 write_atomic(&md_path, idx.to_markdown())?;
629 write_atomic(&jsonl_path, idx.to_jsonl())?;
630 Ok(())
631}
632
633fn update_parents(store: &Store, folder: &Path) -> crate::Result<()> {
646 let stats = collect_child_stats(store, &Layer::all())?;
660
661 let layer = folder
662 .components()
663 .next()
664 .and_then(|c| c.as_os_str().to_str())
665 .and_then(layer_from_dir_name);
666 if let Some(layer) = layer {
667 let p = store.root.join(layer_dir_name(layer)).join("index.md");
668 if layer_has_children(&stats, layer) {
669 write_atomic(&p, render_layer_md_from_stats(layer, &stats))?;
670 } else {
671 remove_if_exists(&p)?;
672 }
673 }
674 let rp = store.root.join("index.md");
675 if stats.values().any(|s| s.count > 0) {
676 write_atomic(&rp, render_root_md_from_stats(&stats))?;
677 } else {
678 remove_if_exists(&rp)?;
679 }
680 Ok(())
681}
682
683fn layer_has_children(stats: &BTreeMap<PathBuf, FolderStat>, layer: Layer) -> bool {
685 let prefix = format!("{}/", layer_dir_name(layer));
686 stats
687 .iter()
688 .any(|(tf, s)| s.count > 0 && path_to_unix(tf).starts_with(&prefix))
689}
690
691fn render_layer_md_from_stats(layer: Layer, stats: &BTreeMap<PathBuf, FolderStat>) -> String {
696 let layer_dir = layer_dir_name(layer);
697 let prefix = format!("{layer_dir}/");
698 let mut max_upd: Option<DateTime<FixedOffset>> = None;
699 let mut entries = String::new();
700 for (tf, stat) in stats {
701 if stat.count == 0 || !path_to_unix(tf).starts_with(&prefix) {
702 continue;
703 }
704 let newest = stat.newest.as_ref();
705 if let Some(u) = newest.and_then(|r| r.updated) {
706 max_upd = Some(match max_upd {
707 Some(cur) if cur >= u => cur,
708 _ => u,
709 });
710 }
711 let tf_unix = path_to_unix(tf);
712 let display = capitalize(folder_basename(tf));
713 let preview = newest
714 .map(|r| truncate(&r.summary, 80))
715 .filter(|p| !p.is_empty() && p != MISSING_SUMMARY);
716 match preview {
717 Some(p) => entries.push_str(&format!(
718 "- [[{tf_unix}/index|{display}]] ({}) — {p}\n",
719 stat.count
720 )),
721 None => entries.push_str(&format!(
722 "- [[{tf_unix}/index|{display}]] ({})\n",
723 stat.count
724 )),
725 }
726 }
727 let mut s = String::new();
728 s.push_str("---\n");
729 s.push_str("type: index\n");
730 s.push_str("scope: layer\n");
731 s.push_str(&format!("folder: {layer_dir}\n"));
732 if let Some(ts) = max_upd {
733 s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
734 }
735 s.push_str("---\n\n");
736 s.push_str(&format!("# {layer_dir}\n\n"));
737 s.push_str(&entries);
738 s
739}
740
741fn render_root_md_from_stats(stats: &BTreeMap<PathBuf, FolderStat>) -> String {
743 let mut max_upd: Option<DateTime<FixedOffset>> = None;
744 for stat in stats.values() {
745 if stat.count == 0 {
746 continue;
747 }
748 if let Some(u) = stat.newest.as_ref().and_then(|r| r.updated) {
749 max_upd = Some(match max_upd {
750 Some(cur) if cur >= u => cur,
751 _ => u,
752 });
753 }
754 }
755 let mut s = String::new();
756 s.push_str("---\n");
757 s.push_str("type: index\n");
758 s.push_str("scope: root\n");
759 if let Some(ts) = max_upd {
760 s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
761 }
762 s.push_str("---\n\n");
763 s.push_str(&format!("# {ROOT_TITLE}\n"));
764 for layer in Layer::all() {
765 let layer_dir = layer_dir_name(layer);
766 let prefix = format!("{layer_dir}/");
767 let children: Vec<(&PathBuf, usize)> = stats
768 .iter()
769 .filter(|(tf, s)| s.count > 0 && path_to_unix(tf).starts_with(&prefix))
770 .map(|(tf, s)| (tf, s.count))
771 .collect();
772 if children.is_empty() {
773 continue;
774 }
775 let total: usize = children.iter().map(|(_, n)| *n).sum();
776 s.push('\n');
777 s.push_str(&format!("## {} ({total})\n", capitalize(layer_dir)));
778 for (tf, n) in children {
779 let tf_unix = path_to_unix(tf);
780 let display = capitalize(folder_basename(tf));
781 s.push_str(&format!("- [[{tf_unix}/index|{display}]] ({n})\n"));
782 }
783 }
784 s
785}
786
787fn render_layer_md_with_store(store: &Store, idx: &Index) -> String {
794 let layer = match idx.level {
795 IndexLevel::Layer(l) => l,
796 _ => unreachable!("render_layer_md_with_store called on non-layer"),
797 };
798 let layer_dir = layer_dir_name(layer);
799 let mut max_upd: Option<DateTime<FixedOffset>> = None;
800 let mut entries = String::new();
801 for (tf, n) in &idx.child_counts {
802 let recs = read_jsonl_records(&store.root.join(tf).join("index.jsonl")).unwrap_or_default();
803 let newest = recs.first();
804 if let Some(u) = newest.and_then(|r| r.updated) {
805 max_upd = Some(match max_upd {
806 Some(cur) if cur >= u => cur,
807 _ => u,
808 });
809 }
810 let tf_unix = path_to_unix(tf);
811 let display = capitalize(folder_basename(tf));
812 let preview = newest
813 .map(|r| truncate(&r.summary, 80))
814 .filter(|p| !p.is_empty() && p != MISSING_SUMMARY);
815 match preview {
816 Some(p) => entries.push_str(&format!("- [[{tf_unix}/index|{display}]] ({n}) — {p}\n")),
817 None => entries.push_str(&format!("- [[{tf_unix}/index|{display}]] ({n})\n")),
818 }
819 }
820 let mut s = String::new();
821 s.push_str("---\n");
822 s.push_str("type: index\n");
823 s.push_str("scope: layer\n");
824 s.push_str(&format!("folder: {layer_dir}\n"));
825 if let Some(ts) = max_upd {
826 s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
827 }
828 s.push_str("---\n\n");
829 s.push_str(&format!("# {layer_dir}\n\n"));
830 s.push_str(&entries);
831 s
832}
833
834fn render_root_md_with_store(store: &Store, idx: &Index) -> String {
838 let mut max_upd: Option<DateTime<FixedOffset>> = None;
839 for tf in idx.child_counts.keys() {
840 let recs = read_jsonl_records(&store.root.join(tf).join("index.jsonl")).unwrap_or_default();
841 if let Some(u) = recs.first().and_then(|r| r.updated) {
842 max_upd = Some(match max_upd {
843 Some(cur) if cur >= u => cur,
844 _ => u,
845 });
846 }
847 }
848 let mut s = String::new();
849 s.push_str("---\n");
850 s.push_str("type: index\n");
851 s.push_str("scope: root\n");
852 if let Some(ts) = max_upd {
853 s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
854 }
855 s.push_str("---\n\n");
856 s.push_str(&format!("# {ROOT_TITLE}\n"));
857 for layer in Layer::all() {
858 let layer_dir = layer_dir_name(layer);
859 let prefix = format!("{layer_dir}/");
860 let children: Vec<(&PathBuf, &usize)> = idx
861 .child_counts
862 .iter()
863 .filter(|(tf, _)| path_to_unix(tf).starts_with(&prefix))
864 .collect();
865 if children.is_empty() {
866 continue;
867 }
868 let total: usize = children.iter().map(|(_, n)| **n).sum();
869 s.push('\n');
870 s.push_str(&format!("## {} ({total})\n", capitalize(layer_dir)));
871 for (tf, n) in children {
872 let tf_unix = path_to_unix(tf);
873 let display = capitalize(folder_basename(tf));
874 s.push_str(&format!("- [[{tf_unix}/index|{display}]] ({n})\n"));
875 }
876 }
877 s
878}
879
880fn format_md_entry(rec: &IndexRecord) -> String {
886 let path = wiki_target(&rec.path);
887 let summary = collapse_whitespace(&rec.summary);
897 let mut line = format!("- [[{path}]] — {summary}");
898 if !rec.tags.is_empty() {
899 let tags = rec
900 .tags
901 .iter()
902 .map(|t| format!("#{t}"))
903 .collect::<Vec<_>>()
904 .join(" ");
905 line.push_str(&format!(" · {tags}"));
906 }
907 line
908}
909
910fn more_footer(total: usize, type_: &str, layer: &str) -> String {
912 format!(
913 "## More\n\nThis folder has {total} files. The {MD_CAP} most recent are listed above.\nUse `dbmd index query --type {type_} --in {layer}` for the complete catalog.\n"
914 )
915}
916
917fn sort_records(records: &mut [IndexRecord]) {
921 records.sort_by(record_recency_cmp);
922}
923
924impl IndexRecord {
925 pub(crate) fn expected_from_file(abs: &Path, rel: PathBuf) -> crate::Result<IndexRecord> {
937 record_from_file(abs, rel)
938 }
939}
940
941fn record_from_file(abs: &Path, rel: PathBuf) -> crate::Result<IndexRecord> {
944 let mut meta = read_frontmatter(abs)?;
945 if rel.starts_with("records") {
950 meta.fields
951 .entry("meta-type".to_string())
952 .or_insert_with(|| Value::String("fact".to_string()));
953 }
954 Ok(IndexRecord {
955 path: rel,
956 type_: meta.type_.unwrap_or_default(),
957 summary: meta.summary.unwrap_or_else(|| MISSING_SUMMARY.to_string()),
958 tags: meta.tags,
959 links: meta.links,
960 created: meta.created,
961 updated: meta.updated,
962 fields: meta.fields,
963 })
964}
965
966struct FileMeta {
968 type_: Option<String>,
969 summary: Option<String>,
970 tags: Vec<String>,
971 links: Vec<String>,
972 created: Option<DateTime<FixedOffset>>,
973 updated: Option<DateTime<FixedOffset>>,
974 fields: BTreeMap<String, Value>,
975}
976
977fn read_frontmatter(abs: &Path) -> crate::Result<FileMeta> {
991 let bytes = fs::read(abs)?;
992 let yaml = extract_frontmatter_block_lossy(&bytes).unwrap_or_default();
993 let map: serde_norway::Mapping = if yaml.trim().is_empty() {
994 serde_norway::Mapping::new()
995 } else {
996 serde_norway::from_str(&yaml).map_err(|e| {
997 crate::Error::Store(crate::store::StoreError::BadTypeIndex {
998 path: abs.to_path_buf(),
999 message: format!("frontmatter YAML: {e}"),
1000 })
1001 })?
1002 };
1003
1004 let mut type_ = None;
1005 let mut summary = None;
1006 let mut tags = Vec::new();
1007 let mut links = Vec::new();
1008 let mut created = None;
1009 let mut updated = None;
1010 let mut fields = BTreeMap::new();
1011
1012 for (k, v) in map {
1013 let key = match k.as_str() {
1014 Some(s) => s.to_string(),
1015 None => continue,
1016 };
1017 match key.as_str() {
1018 "type" => type_ = scalar_string(&v),
1028 "summary" => summary = scalar_string(&v),
1029 "tags" => tags = yaml_string_list(&v),
1030 "links" => links = yaml_string_list(&v),
1031 "created" => created = v.as_str().and_then(parse_ts),
1032 "updated" => updated = v.as_str().and_then(parse_ts),
1033 "path" => {}
1037 _ => {
1038 fields.insert(key, yaml_to_json_value(&v));
1039 }
1040 }
1041 }
1042
1043 Ok(FileMeta {
1044 type_,
1045 summary,
1046 tags,
1047 links,
1048 created,
1049 updated,
1050 fields,
1051 })
1052}
1053
1054fn scalar_string(v: &serde_norway::Value) -> Option<String> {
1060 match v {
1061 serde_norway::Value::String(s) => Some(s.clone()),
1062 serde_norway::Value::Number(n) => Some(n.to_string()),
1063 serde_norway::Value::Bool(b) => Some(b.to_string()),
1064 _ => None,
1065 }
1066}
1067
1068fn extract_frontmatter_block_lossy(bytes: &[u8]) -> Option<String> {
1074 let text = String::from_utf8_lossy(bytes);
1079 extract_frontmatter_block(&text)
1080}
1081
1082fn extract_frontmatter_block(text: &str) -> Option<String> {
1085 let trimmed = text.strip_prefix('\u{feff}').unwrap_or(text);
1086 let mut lines = trimmed.lines();
1087 let first = lines.next()?;
1088 if first.trim_end() != "---" {
1089 return None;
1090 }
1091 let mut block = String::new();
1092 for line in lines {
1093 if line.trim_end() == "---" {
1094 return Some(block);
1095 }
1096 block.push_str(line);
1097 block.push('\n');
1098 }
1099 None }
1101
1102fn yaml_string_list(v: &serde_norway::Value) -> Vec<String> {
1105 match v {
1106 serde_norway::Value::String(s) => vec![s.clone()],
1107 serde_norway::Value::Sequence(seq) => seq
1108 .iter()
1109 .filter_map(yaml_string_or_wiki_link_literal)
1110 .collect(),
1111 _ => Vec::new(),
1112 }
1113}
1114
1115fn yaml_string_or_wiki_link_literal(v: &serde_norway::Value) -> Option<String> {
1116 v.as_str()
1117 .map(str::to_string)
1118 .or_else(|| unquoted_wiki_link_literal(v))
1119}
1120
1121fn yaml_to_json_value(v: &serde_norway::Value) -> Value {
1122 if let Some(link) = unquoted_wiki_link_literal(v) {
1123 return Value::String(link);
1124 }
1125 match v {
1126 serde_norway::Value::String(s) => Value::String(s.clone()),
1127 serde_norway::Value::Bool(b) => Value::Bool(*b),
1128 serde_norway::Value::Number(n) => {
1129 serde_json::to_value(n).unwrap_or_else(|_| Value::String(n.to_string()))
1130 }
1131 serde_norway::Value::Sequence(seq) => {
1132 Value::Array(seq.iter().map(yaml_to_json_value).collect())
1133 }
1134 serde_norway::Value::Mapping(_) | serde_norway::Value::Tagged(_) => {
1135 serde_json::to_value(v).unwrap_or(Value::Null)
1136 }
1137 serde_norway::Value::Null => Value::Null,
1138 }
1139}
1140
1141fn unquoted_wiki_link_literal(v: &serde_norway::Value) -> Option<String> {
1142 let serde_norway::Value::Sequence(outer) = v else {
1143 return None;
1144 };
1145 if outer.len() != 1 {
1146 return None;
1147 }
1148 let serde_norway::Value::Sequence(inner) = &outer[0] else {
1149 return None;
1150 };
1151 let [serde_norway::Value::String(target)] = inner.as_slice() else {
1152 return None;
1153 };
1154 Some(format!("[[{target}]]"))
1155}
1156
1157fn parse_ts(s: &str) -> Option<DateTime<FixedOffset>> {
1159 DateTime::parse_from_rfc3339(s.trim()).ok()
1160}
1161
1162fn fmt_ts(ts: &DateTime<FixedOffset>) -> String {
1166 ts.to_rfc3339_opts(SecondsFormat::AutoSi, true)
1167}
1168
1169fn max_updated<'a>(
1171 it: impl Iterator<Item = Option<&'a DateTime<FixedOffset>>>,
1172) -> Option<DateTime<FixedOffset>> {
1173 let mut best: Option<DateTime<FixedOffset>> = None;
1174 for ts in it.flatten() {
1175 best = Some(match best {
1176 Some(cur) if cur >= *ts => cur,
1177 _ => *ts,
1178 });
1179 }
1180 best
1181}
1182
1183fn read_jsonl_records(jsonl: &Path) -> crate::Result<Vec<IndexRecord>> {
1187 let text = match fs::read_to_string(jsonl) {
1188 Ok(t) => t,
1189 Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(Vec::new()),
1190 Err(e) => return Err(e.into()),
1191 };
1192 let mut by_path: BTreeMap<PathBuf, IndexRecord> = BTreeMap::new();
1194 for (i, line) in text.lines().enumerate() {
1195 if line.trim().is_empty() {
1196 continue;
1197 }
1198 let rec: IndexRecord = serde_json::from_str(line).map_err(|e| {
1199 crate::Error::Store(crate::store::StoreError::BadTypeIndex {
1200 path: jsonl.to_path_buf(),
1201 message: format!("line {}: {e}", i + 1),
1202 })
1203 })?;
1204 by_path.insert(rec.path.clone(), rec);
1205 }
1206 let mut records: Vec<IndexRecord> = by_path.into_values().collect();
1207 sort_records(&mut records);
1208 Ok(records)
1209}
1210
1211#[derive(Debug, Clone, Default, PartialEq)]
1218struct FolderStat {
1219 count: usize,
1220 newest: Option<IndexRecord>,
1221}
1222
1223fn read_folder_stat(jsonl: &Path) -> crate::Result<FolderStat> {
1233 let text = match fs::read_to_string(jsonl) {
1234 Ok(t) => t,
1235 Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(FolderStat::default()),
1236 Err(e) => return Err(e.into()),
1237 };
1238 let mut by_path: BTreeMap<PathBuf, IndexRecord> = BTreeMap::new();
1241 for (i, line) in text.lines().enumerate() {
1242 if line.trim().is_empty() {
1243 continue;
1244 }
1245 let rec: IndexRecord = serde_json::from_str(line).map_err(|e| {
1246 crate::Error::Store(crate::store::StoreError::BadTypeIndex {
1247 path: jsonl.to_path_buf(),
1248 message: format!("line {}: {e}", i + 1),
1249 })
1250 })?;
1251 by_path.insert(rec.path.clone(), rec);
1252 }
1253 let count = by_path.len();
1254 let newest = by_path.into_values().min_by(record_recency_cmp);
1258 Ok(FolderStat { count, newest })
1259}
1260
1261fn record_recency_cmp(a: &IndexRecord, b: &IndexRecord) -> std::cmp::Ordering {
1266 match (b.updated, a.updated) {
1267 (Some(bu), Some(au)) => bu.cmp(&au),
1268 (Some(_), None) => std::cmp::Ordering::Greater, (None, Some(_)) => std::cmp::Ordering::Less, (None, None) => std::cmp::Ordering::Equal,
1271 }
1272 .then_with(|| a.path.cmp(&b.path))
1273}
1274
1275fn collect_child_stats(
1283 store: &Store,
1284 layers: &[Layer],
1285) -> crate::Result<BTreeMap<PathBuf, FolderStat>> {
1286 let mut stats = BTreeMap::new();
1287 for &layer in layers {
1288 for tf in type_folders_in_layer(store, layer) {
1289 let stat = read_folder_stat(&store.root.join(&tf).join("index.jsonl"))?;
1290 if stat.count > 0 {
1291 stats.insert(tf, stat);
1292 }
1293 }
1294 }
1295 Ok(stats)
1296}
1297
1298fn walk_type_folder_files(folder_abs: &Path) -> Vec<PathBuf> {
1301 let mut out = Vec::new();
1302 if !folder_abs.is_dir() {
1303 return out;
1304 }
1305 for entry in walkdir::WalkDir::new(folder_abs)
1306 .into_iter()
1307 .filter_entry(|e| !is_hidden(e.file_name()))
1308 .filter_map(|e| e.ok())
1309 {
1310 if !entry.file_type().is_file() {
1311 continue;
1312 }
1313 let p = entry.path();
1314 if p.extension().and_then(|e| e.to_str()) != Some("md") {
1315 continue;
1316 }
1317 if p.file_name().and_then(|n| n.to_str()) == Some("index.md") {
1318 continue;
1319 }
1320 out.push(p.to_path_buf());
1321 }
1322 out
1323}
1324
1325fn type_folders_in_layer(store: &Store, layer: Layer) -> Vec<PathBuf> {
1328 let layer_dir = store.root.join(layer_dir_name(layer));
1329 let mut out = Vec::new();
1330 let rd = match fs::read_dir(&layer_dir) {
1331 Ok(rd) => rd,
1332 Err(_) => return out,
1333 };
1334 for entry in rd.flatten() {
1335 if !entry.path().is_dir() {
1336 continue;
1337 }
1338 let name = entry.file_name();
1339 let name = match name.to_str() {
1340 Some(n) => n,
1341 None => continue,
1342 };
1343 if is_hidden(entry.file_name().as_os_str()) || name == "log" {
1344 continue;
1345 }
1346 out.push(PathBuf::from(layer_dir_name(layer)).join(name));
1347 }
1348 out.sort();
1349 out
1350}
1351
1352fn type_folder_of(file_rel: &Path) -> Option<PathBuf> {
1356 let mut comps = file_rel.components();
1357 let layer = comps.next()?.as_os_str().to_str()?;
1358 layer_from_dir_name(layer)?;
1359 let type_seg = comps.next()?.as_os_str().to_str()?;
1360 Some(PathBuf::from(layer).join(type_seg))
1361}
1362
1363fn rel_to_store(root: &Path, abs: &Path) -> Option<PathBuf> {
1365 abs.strip_prefix(root).ok().map(|p| p.to_path_buf())
1366}
1367
1368fn normalize_rel(p: &Path) -> PathBuf {
1371 let s = path_to_unix(p);
1372 let s = s.strip_prefix("./").unwrap_or(&s);
1373 PathBuf::from(s)
1374}
1375
1376fn is_index_artifact(p: &Path) -> bool {
1377 matches!(
1378 p.file_name().and_then(|n| n.to_str()),
1379 Some("index.md") | Some("index.jsonl")
1380 )
1381}
1382
1383fn is_deletable_catalog_artifact(p: &Path) -> bool {
1397 match p.file_name().and_then(|n| n.to_str()) {
1398 Some("index.jsonl") => true,
1399 Some("index.md") => match read_frontmatter(p) {
1400 Ok(meta) => meta.type_.as_deref().is_none_or(|t| t == "index"),
1402 Err(_) => true,
1404 },
1405 _ => false,
1406 }
1407}
1408
1409fn is_hidden(name: &std::ffi::OsStr) -> bool {
1410 name.to_str().map(|s| s.starts_with('.')).unwrap_or(false)
1411}
1412
1413fn layer_dir_name(layer: Layer) -> &'static str {
1414 match layer {
1415 Layer::Sources => "sources",
1416 Layer::Records => "records",
1417 }
1418}
1419
1420fn layer_from_dir_name(name: &str) -> Option<Layer> {
1423 match name {
1424 "sources" => Some(Layer::Sources),
1425 "records" => Some(Layer::Records),
1426 _ => None,
1427 }
1428}
1429
1430fn folder_basename(p: &Path) -> &str {
1432 p.file_name().and_then(|n| n.to_str()).unwrap_or("")
1433}
1434
1435fn wiki_target(p: &Path) -> String {
1439 let unix = path_to_unix(p);
1440 unix.strip_suffix(".md").unwrap_or(&unix).to_string()
1441}
1442
1443fn path_to_unix(p: &Path) -> String {
1455 p.components()
1456 .map(|c| c.as_os_str().to_string_lossy().into_owned())
1457 .collect::<Vec<_>>()
1458 .join("/")
1459}
1460
1461mod path_serde {
1467 use super::path_to_unix;
1468 use serde::{Deserialize, Deserializer, Serializer};
1469 use std::path::{Path, PathBuf};
1470
1471 pub fn serialize<S: Serializer>(p: &Path, s: S) -> Result<S::Ok, S::Error> {
1472 s.serialize_str(&path_to_unix(p))
1473 }
1474
1475 pub fn deserialize<'de, D: Deserializer<'de>>(d: D) -> Result<PathBuf, D::Error> {
1476 Ok(PathBuf::from(String::deserialize(d)?))
1477 }
1478}
1479
1480fn capitalize(s: &str) -> String {
1482 let mut chars = s.chars();
1483 match chars.next() {
1484 Some(first) => first.to_uppercase().collect::<String>() + chars.as_str(),
1485 None => String::new(),
1486 }
1487}
1488
1489fn collapse_whitespace(s: &str) -> String {
1494 s.split_whitespace().collect::<Vec<_>>().join(" ")
1495}
1496
1497fn truncate(s: &str, max: usize) -> String {
1499 let one_line = collapse_whitespace(s);
1500 if one_line.chars().count() <= max {
1501 one_line
1502 } else {
1503 one_line.chars().take(max).collect()
1504 }
1505}
1506
1507fn write_atomic(path: &Path, contents: String) -> crate::Result<()> {
1514 if let Some(parent) = path.parent() {
1515 fs::create_dir_all(parent)?;
1516 }
1517 let dir = path.parent().unwrap_or_else(|| Path::new("."));
1518 let mut tmp = tempfile_in(dir)?;
1519 tmp.write_all(contents.as_bytes())?;
1520 tmp.flush()?;
1521 tmp.persist(path)?;
1522 Ok(())
1523}
1524
1525fn remove_if_exists(path: &Path) -> crate::Result<()> {
1526 match fs::remove_file(path) {
1527 Ok(()) => Ok(()),
1528 Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(()),
1529 Err(e) => Err(e.into()),
1530 }
1531}
1532
1533fn bad_index(path: &Path, msg: &str) -> crate::Error {
1534 crate::Error::Store(crate::store::StoreError::BadTypeIndex {
1535 path: path.to_path_buf(),
1536 message: msg.to_string(),
1537 })
1538}
1539
1540struct FolderLock {
1560 path: PathBuf,
1561 held: bool,
1562}
1563
1564impl FolderLock {
1565 fn acquire(folder_abs: &Path) -> Self {
1572 use std::time::{Duration, SystemTime};
1573 const MAX_ATTEMPTS: u32 = 600; const SPIN: Duration = Duration::from_millis(10);
1575 const STALE_AFTER: Duration = Duration::from_secs(30);
1576
1577 let path = folder_abs.join(".index.lock");
1578 let _ = fs::create_dir_all(folder_abs);
1580 for _ in 0..MAX_ATTEMPTS {
1581 match fs::OpenOptions::new()
1582 .write(true)
1583 .create_new(true)
1584 .open(&path)
1585 {
1586 Ok(_) => {
1587 return FolderLock { path, held: true };
1588 }
1589 Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => {
1590 if let Ok(meta) = fs::metadata(&path) {
1592 if let Ok(modified) = meta.modified() {
1593 if SystemTime::now()
1594 .duration_since(modified)
1595 .map(|age| age > STALE_AFTER)
1596 .unwrap_or(false)
1597 {
1598 let _ = fs::remove_file(&path);
1599 continue;
1600 }
1601 }
1602 }
1603 std::thread::sleep(SPIN);
1604 }
1605 Err(_) => return FolderLock { path, held: false },
1608 }
1609 }
1610 FolderLock { path, held: false }
1612 }
1613}
1614
1615impl Drop for FolderLock {
1616 fn drop(&mut self) {
1617 if self.held {
1618 let _ = fs::remove_file(&self.path);
1619 }
1620 }
1621}
1622
1623fn lock_folders(store: &Store, a: &Path, b: &Path) -> Vec<FolderLock> {
1629 if a == b {
1630 return vec![FolderLock::acquire(&store.root.join(a))];
1631 }
1632 let (first, second) = if a < b { (a, b) } else { (b, a) };
1633 vec![
1634 FolderLock::acquire(&store.root.join(first)),
1635 FolderLock::acquire(&store.root.join(second)),
1636 ]
1637}
1638
1639struct AtomicTemp {
1645 file: Option<fs::File>,
1646 path: PathBuf,
1647 persisted: bool,
1648}
1649
1650impl AtomicTemp {
1651 fn write_all(&mut self, bytes: &[u8]) -> std::io::Result<()> {
1652 self.file.as_mut().expect("temp file open").write_all(bytes)
1653 }
1654 fn flush(&mut self) -> std::io::Result<()> {
1655 self.file.as_mut().expect("temp file open").flush()
1656 }
1657 fn persist(mut self, dest: &Path) -> std::io::Result<()> {
1658 if let Some(f) = self.file.take() {
1659 f.sync_all().ok();
1660 }
1662 fs::rename(&self.path, dest)?;
1663 self.persisted = true;
1664 Ok(())
1665 }
1666}
1667
1668impl Drop for AtomicTemp {
1669 fn drop(&mut self) {
1670 if !self.persisted {
1672 let _ = fs::remove_file(&self.path);
1673 }
1674 }
1675}
1676
1677fn tempfile_in(dir: &Path) -> std::io::Result<AtomicTemp> {
1678 use std::time::{SystemTime, UNIX_EPOCH};
1679 let nanos = SystemTime::now()
1680 .duration_since(UNIX_EPOCH)
1681 .map(|d| d.as_nanos())
1682 .unwrap_or(0);
1683 let pid = std::process::id();
1684 let counter = next_temp_counter();
1687 let name = format!(".dbmd-index-{pid}-{nanos}-{counter}.tmp");
1688 let path = dir.join(name);
1689 let file = fs::OpenOptions::new()
1690 .write(true)
1691 .create_new(true)
1692 .open(&path)?;
1693 Ok(AtomicTemp {
1694 file: Some(file),
1695 path,
1696 persisted: false,
1697 })
1698}
1699
1700fn next_temp_counter() -> u64 {
1701 use std::sync::atomic::{AtomicU64, Ordering};
1702 static C: AtomicU64 = AtomicU64::new(0);
1703 C.fetch_add(1, Ordering::Relaxed)
1704}
1705
1706#[cfg(test)]
1707mod tests {
1708 use super::*;
1709 use std::collections::BTreeSet;
1710 use std::fs;
1711 use tempfile::TempDir;
1712
1713 fn mk_store() -> (TempDir, Store) {
1718 let dir = TempDir::new().unwrap();
1719 fs::write(dir.path().join("DB.md"), "# test store\n").unwrap();
1720 let store = Store {
1721 root: dir.path().to_path_buf(),
1722 config: crate::parser::Config::default(),
1723 };
1724 (dir, store)
1725 }
1726
1727 fn write_raw(store: &Store, rel: &str, fm: &str, body: &str) {
1730 let abs = store.root.join(rel);
1731 fs::create_dir_all(abs.parent().unwrap()).unwrap();
1732 fs::write(&abs, format!("---\n{fm}\n---\n{body}")).unwrap();
1733 }
1734
1735 fn write_doc(
1737 store: &Store,
1738 rel: &str,
1739 type_: &str,
1740 summary: Option<&str>,
1741 updated: Option<&str>,
1742 extra_yaml: &str,
1743 ) {
1744 let mut fm = format!("type: {type_}\n");
1745 if let Some(s) = summary {
1746 fm.push_str(&format!("summary: {s}\n"));
1747 }
1748 if let Some(u) = updated {
1749 fm.push_str(&format!("updated: {u}\n"));
1750 }
1751 fm.push_str(extra_yaml);
1752 write_raw(store, rel, fm.trim_end(), "\nbody text\n");
1753 }
1754
1755 fn read(store: &Store, rel: &str) -> String {
1756 fs::read_to_string(store.root.join(rel)).unwrap()
1757 }
1758
1759 fn exists(store: &Store, rel: &str) -> bool {
1760 store.root.join(rel).exists()
1761 }
1762
1763 fn snapshot_artifacts(store: &Store) -> BTreeMap<String, String> {
1766 let mut out = BTreeMap::new();
1767 for entry in walkdir::WalkDir::new(&store.root)
1768 .into_iter()
1769 .filter_map(|e| e.ok())
1770 {
1771 let p = entry.path();
1772 if is_index_artifact(p) {
1773 let rel = path_to_unix(&rel_to_store(&store.root, p).unwrap());
1774 out.insert(rel, fs::read_to_string(p).unwrap());
1775 }
1776 }
1777 out
1778 }
1779
1780 #[test]
1783 fn type_folder_aggregates_across_shards_in_recency_order() {
1784 let (_d, store) = mk_store();
1785 write_doc(
1788 &store,
1789 "sources/emails/2026/05/b-old.md",
1790 "email",
1791 Some("Older mail"),
1792 Some("2026-05-01T09:00:00Z"),
1793 "",
1794 );
1795 write_doc(
1796 &store,
1797 "sources/emails/2026/06/c-new.md",
1798 "email",
1799 Some("Newest mail"),
1800 Some("2026-06-15T12:00:00Z"),
1801 "",
1802 );
1803 write_doc(
1804 &store,
1805 "sources/emails/2026/05/a-mid.md",
1806 "email",
1807 Some("Middle mail"),
1808 Some("2026-05-20T08:00:00Z"),
1809 "",
1810 );
1811
1812 let idx = Index::build_type_folder(&store, Path::new("sources/emails")).unwrap();
1813 let paths: Vec<String> = idx.records.iter().map(|r| path_to_unix(&r.path)).collect();
1814 assert_eq!(
1815 paths,
1816 vec![
1817 "sources/emails/2026/06/c-new.md",
1818 "sources/emails/2026/05/a-mid.md",
1819 "sources/emails/2026/05/b-old.md",
1820 ],
1821 "records must aggregate across shards, newest `updated` first"
1822 );
1823 }
1824
1825 #[test]
1826 fn type_folder_md_format_entries_tags_and_derived_updated() {
1827 let (_d, store) = mk_store();
1828 write_doc(
1829 &store,
1830 "records/contacts/sarah-chen.md",
1831 "contact",
1832 Some("Renewal champion at Acme"),
1833 Some("2026-05-27T10:00:00Z"),
1834 "tags:\n - renewal\n - acme\n",
1835 );
1836 write_doc(
1837 &store,
1838 "records/contacts/no-tags.md",
1839 "contact",
1840 Some("Plain contact"),
1841 Some("2026-05-26T10:00:00Z"),
1842 "",
1843 );
1844
1845 let idx = Index::build_type_folder(&store, Path::new("records/contacts")).unwrap();
1846 let md = idx.to_markdown();
1847
1848 assert!(md.starts_with(
1851 "---\ntype: index\nscope: type-folder\nfolder: records/contacts\nupdated: 2026-05-27T10:00:00Z\n---\n\n# records/contacts\n"
1852 ), "frontmatter/heading wrong:\n{md}");
1853
1854 assert!(
1856 md.contains(
1857 "- [[records/contacts/sarah-chen]] — Renewal champion at Acme · #renewal #acme\n"
1858 ),
1859 "tagged entry wrong:\n{md}"
1860 );
1861 assert!(
1863 md.contains("- [[records/contacts/no-tags]] — Plain contact\n"),
1864 "untagged entry wrong:\n{md}"
1865 );
1866 assert!(
1867 !md.contains("Plain contact ·"),
1868 "untagged entry must not emit a tag separator"
1869 );
1870 assert!(!md.contains("## More"), "no footer expected under the cap");
1872 }
1873
1874 #[test]
1875 fn missing_summary_becomes_placeholder_not_invented() {
1876 let (_d, store) = mk_store();
1877 write_doc(
1878 &store,
1879 "records/notes/x.md",
1880 "note",
1881 None,
1882 Some("2026-05-27T10:00:00Z"),
1883 "",
1884 );
1885 let idx = Index::build_type_folder(&store, Path::new("records/notes")).unwrap();
1886 assert_eq!(idx.records[0].summary, MISSING_SUMMARY);
1887 let md = idx.to_markdown();
1888 assert!(
1889 md.contains("- [[records/notes/x]] — (no summary)\n"),
1890 "missing summary must render the placeholder, not invent text:\n{md}"
1891 );
1892 }
1893
1894 #[test]
1897 fn jsonl_is_complete_structured_and_round_trips() {
1898 let (_d, store) = mk_store();
1899 write_doc(
1900 &store,
1901 "records/expenses/2026/05/e1.md",
1902 "expense",
1903 Some("Lunch with vendor"),
1904 Some("2026-05-10T10:00:00Z"),
1905 "created: 2026-05-10T09:00:00Z\nstatus: paid\namount: 42\ncompany: [[records/companies/acme]]\nrelated:\n - [[wiki/themes/spend]]\ntags:\n - food\nlinks:\n - wiki/themes/spend\n - [[wiki/themes/renewal]]\n",
1906 );
1907 write_doc(
1908 &store,
1909 "records/expenses/2026/06/e2.md",
1910 "expense",
1911 Some("Cloud bill"),
1912 Some("2026-06-01T10:00:00Z"),
1913 "amount: 100\n",
1914 );
1915
1916 let idx = Index::build_type_folder(&store, Path::new("records/expenses")).unwrap();
1917 let jsonl = idx.to_jsonl();
1918 let lines: Vec<&str> = jsonl.lines().collect();
1919 assert_eq!(lines.len(), 2, "one JSON object per file, uncapped");
1920
1921 let r0: IndexRecord = serde_json::from_str(lines[0]).unwrap();
1923 assert_eq!(path_to_unix(&r0.path), "records/expenses/2026/06/e2.md");
1924 assert_eq!(
1925 r0, idx.records[0],
1926 "jsonl line must round-trip to the record"
1927 );
1928
1929 let r1: IndexRecord = serde_json::from_str(lines[1]).unwrap();
1932 assert_eq!(r1.type_, "expense");
1933 assert_eq!(r1.summary, "Lunch with vendor");
1934 assert_eq!(r1.tags, vec!["food".to_string()]);
1935 assert_eq!(
1936 r1.links,
1937 vec![
1938 "wiki/themes/spend".to_string(),
1939 "[[wiki/themes/renewal]]".to_string()
1940 ]
1941 );
1942 assert_eq!(
1943 r1.created,
1944 Some(DateTime::parse_from_rfc3339("2026-05-10T09:00:00Z").unwrap())
1945 );
1946 assert_eq!(r1.fields.get("status"), Some(&Value::from("paid")));
1947 assert_eq!(r1.fields.get("amount"), Some(&Value::from(42)));
1948 assert_eq!(
1949 r1.fields.get("company"),
1950 Some(&Value::from("[[records/companies/acme]]"))
1951 );
1952 assert_eq!(
1953 r1.fields.get("related"),
1954 Some(&serde_json::json!(["[[wiki/themes/spend]]"]))
1955 );
1956 for reserved in [
1958 "path", "type", "summary", "tags", "links", "created", "updated",
1959 ] {
1960 assert!(
1961 !r1.fields.contains_key(reserved),
1962 "reserved key {reserved} must not appear in fields"
1963 );
1964 }
1965
1966 assert!(
1968 lines[1].starts_with(
1969 r#"{"path":"records/expenses/2026/05/e1.md","type":"expense","summary":"Lunch with vendor","tags":["food"],"links":["wiki/themes/spend","[[wiki/themes/renewal]]"],"created":"2026-05-10T09:00:00Z","updated":"2026-05-10T10:00:00Z","#
1970 ),
1971 "jsonl key order not stable:\n{}",
1972 lines[1]
1973 );
1974 assert!(
1979 lines[1].ends_with(r#""amount":42,"company":"[[records/companies/acme]]","meta-type":"fact","related":["[[wiki/themes/spend]]"],"status":"paid"}"#),
1980 "extras must be sorted:\n{}",
1981 lines[1]
1982 );
1983 }
1984
1985 #[test]
1988 fn over_cap_md_shows_500_plus_footer_jsonl_holds_all() {
1989 let (_d, store) = mk_store();
1990 let total = MD_CAP + 7;
1991 for i in 0..total {
1992 let day = 1 + (i % 27);
1994 let rel = format!("sources/emails/2026/05/m-{i:04}.md");
1995 let updated = format!("2026-05-{day:02}T00:00:{:02}Z", i % 60);
1996 write_doc(
1997 &store,
1998 &rel,
1999 "email",
2000 Some(&format!("mail {i}")),
2001 Some(&updated),
2002 "",
2003 );
2004 }
2005 let idx = Index::build_type_folder(&store, Path::new("sources/emails")).unwrap();
2006 assert_eq!(idx.records.len(), total, "jsonl/records keep every file");
2007
2008 let md = idx.to_markdown();
2009 let entry_lines = md.lines().filter(|l| l.starts_with("- [[")).count();
2010 assert_eq!(entry_lines, MD_CAP, "md browse view is capped at 500");
2011
2012 assert!(
2013 md.contains("## More\n\n"),
2014 "over-cap md needs a More footer"
2015 );
2016 assert!(
2017 md.contains(&format!(
2018 "This folder has {total} files. The 500 most recent are listed above.\n"
2019 )),
2020 "footer count wrong:\n{md}"
2021 );
2022 assert!(
2023 md.contains(
2024 "Use `dbmd index query --type email --in sources` for the complete catalog.\n"
2025 ),
2026 "footer must infer type=email layer=sources:\n{md}"
2027 );
2028
2029 let jsonl = idx.to_jsonl();
2030 assert_eq!(jsonl.lines().count(), total, "jsonl is uncapped");
2031 }
2032
2033 #[test]
2036 fn sort_breaks_ties_by_path_and_puts_undated_last() {
2037 let mut recs = vec![
2038 rec("z/a.md", Some("2026-05-01T00:00:00Z")),
2039 rec("a/b.md", Some("2026-05-01T00:00:00Z")), rec("m/c.md", None), rec("b/d.md", Some("2026-06-01T00:00:00Z")), ];
2043 sort_records(&mut recs);
2044 let order: Vec<String> = recs.iter().map(|r| path_to_unix(&r.path)).collect();
2045 assert_eq!(order, vec!["b/d.md", "a/b.md", "z/a.md", "m/c.md"]);
2046 }
2047
2048 fn rec(path: &str, updated: Option<&str>) -> IndexRecord {
2049 IndexRecord {
2050 path: PathBuf::from(path),
2051 type_: "t".into(),
2052 summary: "s".into(),
2053 tags: vec![],
2054 links: vec![],
2055 created: None,
2056 updated: updated.map(|u| DateTime::parse_from_rfc3339(u).unwrap()),
2057 fields: BTreeMap::new(),
2058 }
2059 }
2060
2061 #[test]
2064 fn layer_index_lists_type_folders_with_counts_and_preview() {
2065 let (_d, store) = mk_store();
2066 write_doc(
2067 &store,
2068 "records/contacts/a.md",
2069 "contact",
2070 Some("Contact A older"),
2071 Some("2026-05-01T00:00:00Z"),
2072 "",
2073 );
2074 write_doc(
2075 &store,
2076 "records/contacts/b.md",
2077 "contact",
2078 Some("Contact B newest"),
2079 Some("2026-05-09T00:00:00Z"),
2080 "",
2081 );
2082 write_doc(
2083 &store,
2084 "records/companies/x.md",
2085 "company",
2086 Some("Acme Inc"),
2087 Some("2026-05-05T00:00:00Z"),
2088 "",
2089 );
2090 Index::write_level(&store, &IndexLevel::TypeFolder("records/contacts".into())).unwrap();
2092 Index::write_level(&store, &IndexLevel::TypeFolder("records/companies".into())).unwrap();
2093
2094 Index::write_level(&store, &IndexLevel::Layer(Layer::Records)).unwrap();
2095 let md = read(&store, "records/index.md");
2096
2097 assert!(
2098 md.starts_with("---\ntype: index\nscope: layer\nfolder: records\n"),
2099 "layer fm:\n{md}"
2100 );
2101 let companies_at = md.find("companies/index").unwrap();
2103 let contacts_at = md.find("contacts/index").unwrap();
2104 assert!(
2105 companies_at < contacts_at,
2106 "type folders must be alphabetical"
2107 );
2108 assert!(
2110 md.contains("- [[records/contacts/index|Contacts]] (2) — Contact B newest\n"),
2111 "contacts entry:\n{md}"
2112 );
2113 assert!(
2114 md.contains("- [[records/companies/index|Companies]] (1) — Acme Inc\n"),
2115 "companies entry:\n{md}"
2116 );
2117 assert!(
2119 md.contains("updated: 2026-05-09T00:00:00Z\n"),
2120 "layer updated must be max child:\n{md}"
2121 );
2122 }
2123
2124 #[test]
2125 fn root_index_groups_layers_with_totals_and_per_type_counts() {
2126 let (_d, store) = mk_store();
2127 write_doc(
2128 &store,
2129 "sources/emails/2026/05/a.md",
2130 "email",
2131 Some("Mail"),
2132 Some("2026-05-01T00:00:00Z"),
2133 "",
2134 );
2135 write_doc(
2136 &store,
2137 "sources/docs/d.md",
2138 "doc",
2139 Some("Doc"),
2140 Some("2026-05-02T00:00:00Z"),
2141 "",
2142 );
2143 write_doc(
2144 &store,
2145 "records/contacts/c.md",
2146 "contact",
2147 Some("C"),
2148 Some("2026-05-03T00:00:00Z"),
2149 "",
2150 );
2151 Index::rebuild_all(&store).unwrap();
2154 let md = read(&store, "index.md");
2155
2156 assert!(
2157 md.starts_with("---\ntype: index\nscope: root\n"),
2158 "root fm:\n{md}"
2159 );
2160 assert!(md.contains("# Knowledge base index\n"), "root title:\n{md}");
2161 let sources_h = md
2163 .find("## Sources (2)")
2164 .expect("sources heading w/ total 2");
2165 let records_h = md
2166 .find("## Records (1)")
2167 .expect("records heading w/ total 1");
2168 assert!(sources_h < records_h, "Sources must precede Records");
2169 assert!(!md.contains("## Wiki"), "empty layer gets no section");
2170 assert!(
2172 md.contains("- [[sources/docs/index|Docs]] (1)\n"),
2173 "root docs entry:\n{md}"
2174 );
2175 assert!(
2176 md.contains("- [[sources/emails/index|Emails]] (1)\n"),
2177 "root emails entry:\n{md}"
2178 );
2179 assert!(
2180 md.contains("- [[records/contacts/index|Contacts]] (1)\n"),
2181 "root contacts entry:\n{md}"
2182 );
2183 assert!(!md.contains("— "), "root entries carry no preview text");
2184 }
2185
2186 #[test]
2189 fn on_write_matches_rebuild_byte_for_byte() {
2190 let (_d1, wt) = mk_store();
2193 let (_d2, rb) = mk_store();
2194
2195 let docs: &[(&str, &str, &str, &str, &str)] = &[
2196 (
2197 "sources/emails/2026/05/e1.md",
2198 "email",
2199 "First mail",
2200 "2026-05-01T10:00:00Z",
2201 "tags:\n - inbox\n",
2202 ),
2203 (
2204 "sources/emails/2026/06/e2.md",
2205 "email",
2206 "Second mail",
2207 "2026-06-01T10:00:00Z",
2208 "",
2209 ),
2210 (
2211 "records/contacts/sarah.md",
2212 "contact",
2213 "Sarah",
2214 "2026-05-15T10:00:00Z",
2215 "links:\n - wiki/people/sarah\n",
2216 ),
2217 (
2218 "records/contacts/elena.md",
2219 "contact",
2220 "Elena",
2221 "2026-05-20T10:00:00Z",
2222 "status: active\n",
2223 ),
2224 (
2225 "records/profiles/sarah.md",
2226 "profile",
2227 "Sarah bio",
2228 "2026-05-21T10:00:00Z",
2229 "",
2230 ),
2231 ];
2232
2233 for (rel, t, sum, upd, extra) in docs {
2234 write_doc(&wt, rel, t, Some(sum), Some(upd), extra);
2235 write_doc(&rb, rel, t, Some(sum), Some(upd), extra);
2236 Index::on_write(&wt, Path::new(rel)).unwrap();
2237 }
2238 Index::rebuild_all(&rb).unwrap();
2239
2240 let a = snapshot_artifacts(&wt);
2241 let b = snapshot_artifacts(&rb);
2242 assert_eq!(
2243 a.keys().collect::<Vec<_>>(),
2244 b.keys().collect::<Vec<_>>(),
2245 "same set of index artifacts must exist"
2246 );
2247 for (k, v) in &a {
2248 assert_eq!(v, &b[k], "artifact {k} differs between write-through and rebuild:\n--- write-through ---\n{v}\n--- rebuild ---\n{}", b[k]);
2249 }
2250 assert!(a.contains_key("index.md"));
2252 assert!(a.contains_key("sources/emails/index.jsonl"));
2253 assert!(a.contains_key("records/contacts/index.md"));
2254 }
2255
2256 #[test]
2273 fn loop_op_does_not_walk_sibling_content_tree() {
2274 let (_d, store) = mk_store();
2275
2276 write_doc(
2280 &store,
2281 "records/companies/acme.md",
2282 "company",
2283 Some("Acme Inc"),
2284 Some("2026-05-05T00:00:00Z"),
2285 "",
2286 );
2287 write_doc(
2288 &store,
2289 "records/companies/globex.md",
2290 "company",
2291 Some("Globex"),
2292 Some("2026-05-06T00:00:00Z"),
2293 "",
2294 );
2295 assert!(
2296 !exists(&store, "records/companies/index.jsonl"),
2297 "precondition: companies must be un-indexed"
2298 );
2299
2300 write_doc(
2302 &store,
2303 "records/contacts/sarah.md",
2304 "contact",
2305 Some("Sarah"),
2306 Some("2026-05-15T00:00:00Z"),
2307 "",
2308 );
2309 Index::on_write(&store, Path::new("records/contacts/sarah.md")).unwrap();
2310
2311 let layer_md = read(&store, "records/index.md");
2313 let root_md = read(&store, "index.md");
2314 assert!(
2316 layer_md.contains("- [[records/contacts/index|Contacts]] (1) — Sarah\n"),
2317 "layer must reflect the written folder:\n{layer_md}"
2318 );
2319 assert!(
2320 root_md.contains("- [[records/contacts/index|Contacts]] (1)\n"),
2321 "root must reflect the written folder:\n{root_md}"
2322 );
2323
2324 assert!(
2328 !layer_md.contains("companies"),
2329 "loop op walked the sibling content tree: layer rollup counts un-indexed records/companies\n{layer_md}"
2330 );
2331 assert!(
2332 !root_md.contains("companies"),
2333 "loop op walked the sibling content tree: root rollup counts un-indexed records/companies\n{root_md}"
2334 );
2335 assert!(
2337 root_md.contains("## Records (1)"),
2338 "root layer total must count only the sidecar-indexed folder (1), not walked siblings (would be 3):\n{root_md}"
2339 );
2340
2341 let (_d2, rb) = mk_store();
2346 for (rel, t, s, u) in [
2347 (
2348 "records/companies/acme.md",
2349 "company",
2350 "Acme Inc",
2351 "2026-05-05T00:00:00Z",
2352 ),
2353 (
2354 "records/companies/globex.md",
2355 "company",
2356 "Globex",
2357 "2026-05-06T00:00:00Z",
2358 ),
2359 (
2360 "records/contacts/sarah.md",
2361 "contact",
2362 "Sarah",
2363 "2026-05-15T00:00:00Z",
2364 ),
2365 ] {
2366 write_doc(&rb, rel, t, Some(s), Some(u), "");
2367 }
2368 Index::on_write(&store, Path::new("records/companies/acme.md")).unwrap();
2369 Index::on_write(&store, Path::new("records/companies/globex.md")).unwrap();
2370 Index::rebuild_all(&rb).unwrap();
2371 let a = snapshot_artifacts(&store);
2372 let b = snapshot_artifacts(&rb);
2373 assert_eq!(
2374 a.keys().collect::<BTreeSet<_>>(),
2375 b.keys().collect::<BTreeSet<_>>(),
2376 "same artifact set after indexing both folders"
2377 );
2378 for (k, v) in &a {
2379 assert_eq!(
2380 v, &b[k],
2381 "after indexing the sibling too, loop result must equal rebuild for {k}"
2382 );
2383 }
2384 assert!(
2385 read(&store, "index.md").contains("## Records (3)"),
2386 "now that both folders are indexed, the root total is 3"
2387 );
2388 }
2389
2390 #[test]
2403 fn wiki_page_at_shard_path_for_is_indexable_end_to_end() {
2404 let (_d1, wt) = mk_store();
2405 let (_d2, rb) = mk_store();
2406
2407 let rel = wt
2409 .shard_path_for(
2410 "wiki-page",
2411 &crate::parser::Frontmatter::default(),
2412 "renewal-theme",
2413 )
2414 .unwrap();
2415 let rel_str = path_to_unix(&rel);
2416 assert!(
2419 type_folder_of(&rel).is_some(),
2420 "shard_path_for produced a path the index cannot file: {rel_str}"
2421 );
2422
2423 write_doc(
2424 &wt,
2425 &rel_str,
2426 "wiki-page",
2427 Some("Renewal theme"),
2428 Some("2026-05-21T10:00:00Z"),
2429 "",
2430 );
2431 write_doc(
2432 &rb,
2433 &rel_str,
2434 "wiki-page",
2435 Some("Renewal theme"),
2436 Some("2026-05-21T10:00:00Z"),
2437 "",
2438 );
2439
2440 Index::on_write(&wt, &rel)
2443 .expect("on_write must succeed for a toolkit-computed wiki-page path");
2444 Index::rebuild_all(&rb).unwrap();
2445
2446 let page_link = wiki_target(&rel); let tf_md = read(&rb, "records/wiki-page/index.md");
2454 assert!(
2455 tf_md.contains(&format!("[[{page_link}]]")),
2456 "type-folder index must list the page link, got:\n{tf_md}"
2457 );
2458 assert!(
2459 exists(&rb, "records/wiki-page/index.jsonl"),
2460 "type-folder jsonl must exist"
2461 );
2462 assert!(
2463 read(&rb, "records/wiki-page/index.jsonl").contains(&rel_str),
2464 "type-folder jsonl must contain the page row"
2465 );
2466 let layer_md = read(&rb, "records/index.md");
2469 assert!(
2470 layer_md.contains("records/wiki-page/index"),
2471 "layer index must roll up the records/wiki-page type-folder, got:\n{layer_md}"
2472 );
2473
2474 let a = snapshot_artifacts(&wt);
2476 let b = snapshot_artifacts(&rb);
2477 assert_eq!(
2478 a.keys().collect::<Vec<_>>(),
2479 b.keys().collect::<Vec<_>>(),
2480 "loop and sweep must produce the same artifact set"
2481 );
2482 for (k, v) in &a {
2483 assert_eq!(
2484 v, &b[k],
2485 "wiki-page artifact {k} differs between on_write and rebuild"
2486 );
2487 }
2488 }
2489
2490 #[test]
2491 fn on_remove_then_rebuild_match_and_pull_in_next_over_cap() {
2492 let (_d1, wt) = mk_store();
2493 let (_d2, rb) = mk_store();
2494 let total = MD_CAP + 3; let mut all_rels = Vec::new();
2496 for i in 0..total {
2497 let rel = format!("sources/emails/2026/05/m-{i:04}.md");
2498 let updated = format!("2026-05-10T00:{:02}:{:02}Z", i / 60, i % 60);
2500 write_doc(
2501 &wt,
2502 &rel,
2503 "email",
2504 Some(&format!("mail {i}")),
2505 Some(&updated),
2506 "",
2507 );
2508 write_doc(
2509 &rb,
2510 &rel,
2511 "email",
2512 Some(&format!("mail {i}")),
2513 Some(&updated),
2514 "",
2515 );
2516 all_rels.push(rel);
2517 }
2518 Index::rebuild_all(&wt).unwrap();
2520 let newest = &all_rels[total - 1]; fs::remove_file(wt.root.join(newest)).unwrap();
2522 Index::on_remove(&wt, Path::new(newest)).unwrap();
2523
2524 fs::remove_file(rb.root.join(newest)).unwrap();
2526 Index::rebuild_all(&rb).unwrap();
2527
2528 let a = snapshot_artifacts(&wt);
2529 let b = snapshot_artifacts(&rb);
2530 for (k, v) in &a {
2531 assert_eq!(v, &b[k], "after remove, artifact {k} drifted from rebuild");
2532 }
2533
2534 let md = read(&wt, "sources/emails/index.md");
2537 assert_eq!(md.lines().filter(|l| l.starts_with("- [[")).count(), MD_CAP);
2538 assert!(
2540 !md.contains(&format!("[[{}]]", wiki_target(Path::new(newest)))),
2541 "removed file must not be listed in md"
2542 );
2543 let pulled_in = &all_rels[2];
2547 assert!(
2548 md.contains(&format!("[[{}]]", wiki_target(Path::new(pulled_in)))),
2549 "the 501st-most-recent must be pulled into the browse view after a removal"
2550 );
2551 assert!(
2552 md.contains(&format!("This folder has {} files.", total - 1)),
2553 "footer count must decrement:\n{}",
2554 md.lines().rev().take(4).collect::<Vec<_>>().join("\n")
2555 );
2556 let jsonl = read(&wt, "sources/emails/index.jsonl");
2557 assert_eq!(
2558 jsonl.lines().count(),
2559 total - 1,
2560 "jsonl loses exactly the removed file"
2561 );
2562 assert!(
2563 !jsonl.contains(&path_to_unix(Path::new(newest))),
2564 "removed file must be gone from the jsonl too"
2565 );
2566 }
2567
2568 #[test]
2569 fn on_rename_cross_folder_matches_rebuild() {
2570 let (_d1, wt) = mk_store();
2571 let (_d2, rb) = mk_store();
2572 let seed: &[(&str, &str, &str, &str)] = &[
2574 (
2575 "records/contacts/a.md",
2576 "contact",
2577 "A",
2578 "2026-05-01T00:00:00Z",
2579 ),
2580 (
2581 "records/contacts/b.md",
2582 "contact",
2583 "B",
2584 "2026-05-02T00:00:00Z",
2585 ),
2586 (
2587 "records/companies/x.md",
2588 "company",
2589 "X",
2590 "2026-05-03T00:00:00Z",
2591 ),
2592 ];
2593 for (rel, t, s, u) in seed {
2594 write_doc(&wt, rel, t, Some(s), Some(u), "");
2595 write_doc(&rb, rel, t, Some(s), Some(u), "");
2596 }
2597 Index::rebuild_all(&wt).unwrap();
2598
2599 let old = "records/contacts/b.md";
2602 let new = "records/companies/b.md";
2603 fs::create_dir_all(wt.root.join("records/companies")).unwrap();
2604 fs::rename(wt.root.join(old), wt.root.join(new)).unwrap();
2605 Index::on_rename(&wt, Path::new(old), Path::new(new)).unwrap();
2608
2609 fs::create_dir_all(rb.root.join("records/companies")).unwrap();
2611 fs::rename(rb.root.join(old), rb.root.join(new)).unwrap();
2612 Index::rebuild_all(&rb).unwrap();
2613
2614 let a = snapshot_artifacts(&wt);
2615 let b = snapshot_artifacts(&rb);
2616 assert_eq!(a.keys().collect::<Vec<_>>(), b.keys().collect::<Vec<_>>());
2617 for (k, v) in &a {
2618 assert_eq!(v, &b[k], "rename: artifact {k} drifted from rebuild");
2619 }
2620 let contacts = read(&wt, "records/contacts/index.md");
2622 assert!(!contacts.contains("records/contacts/b]]"));
2623 let companies = read(&wt, "records/companies/index.md");
2624 assert!(companies.contains("[[records/companies/b]]"));
2625 }
2626
2627 #[test]
2628 fn on_write_updates_existing_entry_in_place() {
2629 let (_d, store) = mk_store();
2630 write_doc(
2631 &store,
2632 "records/contacts/a.md",
2633 "contact",
2634 Some("Original"),
2635 Some("2026-05-01T00:00:00Z"),
2636 "",
2637 );
2638 Index::on_write(&store, Path::new("records/contacts/a.md")).unwrap();
2639 write_doc(
2641 &store,
2642 "records/contacts/a.md",
2643 "contact",
2644 Some("Revised"),
2645 Some("2026-05-09T00:00:00Z"),
2646 "",
2647 );
2648 Index::on_write(&store, Path::new("records/contacts/a.md")).unwrap();
2649
2650 let jsonl = read(&store, "records/contacts/index.jsonl");
2651 assert_eq!(
2652 jsonl.lines().count(),
2653 1,
2654 "upsert must not duplicate the line"
2655 );
2656 assert!(jsonl.contains("Revised"), "jsonl must reflect the update");
2657 assert!(
2658 !jsonl.contains("Original"),
2659 "stale line must be gone (compacted)"
2660 );
2661 let md = read(&store, "records/contacts/index.md");
2662 assert!(md.contains("- [[records/contacts/a]] — Revised\n"));
2663 assert!(
2664 md.contains("updated: 2026-05-09T00:00:00Z\n"),
2665 "index updated must track the newer member"
2666 );
2667 }
2668
2669 #[test]
2672 fn dry_run_emits_separators_and_writes_nothing() {
2673 let (_d, store) = mk_store();
2674 write_doc(
2675 &store,
2676 "sources/emails/2026/05/a.md",
2677 "email",
2678 Some("Mail"),
2679 Some("2026-05-01T00:00:00Z"),
2680 "",
2681 );
2682 let out = Index::render_dry_run(&store, &IndexLevel::TypeFolder("sources/emails".into()))
2683 .unwrap();
2684 assert!(
2685 out.contains("--- sources/emails/index.md ---\n"),
2686 "md separator:\n{out}"
2687 );
2688 assert!(
2689 out.contains("--- sources/emails/index.jsonl ---\n"),
2690 "jsonl separator:\n{out}"
2691 );
2692 assert!(
2693 out.contains("- [[sources/emails/2026/05/a]] — Mail"),
2694 "md body present"
2695 );
2696 assert!(
2698 !exists(&store, "sources/emails/index.md"),
2699 "dry-run must not write"
2700 );
2701 assert!(
2702 !exists(&store, "sources/emails/index.jsonl"),
2703 "dry-run must not write"
2704 );
2705 }
2706
2707 #[test]
2708 fn cleanup_removes_noncanonical_and_empty_indexes() {
2709 let (_d, store) = mk_store();
2710 write_doc(
2711 &store,
2712 "sources/emails/2026/05/a.md",
2713 "email",
2714 Some("Mail"),
2715 Some("2026-05-01T00:00:00Z"),
2716 "",
2717 );
2718 fs::write(
2720 store.root.join("sources/emails/2026/05/index.md"),
2721 "stale\n",
2722 )
2723 .unwrap();
2724 fs::write(
2725 store.root.join("sources/emails/2026/05/index.jsonl"),
2726 "stale\n",
2727 )
2728 .unwrap();
2729 fs::create_dir_all(store.root.join("records/empty")).unwrap();
2731 fs::write(store.root.join("records/empty/index.md"), "stale\n").unwrap();
2732
2733 Index::cleanup(&store).unwrap();
2734
2735 assert!(
2736 !exists(&store, "sources/emails/2026/05/index.md"),
2737 "shard index must be deleted"
2738 );
2739 assert!(
2740 !exists(&store, "sources/emails/2026/05/index.jsonl"),
2741 "shard jsonl must be deleted"
2742 );
2743 assert!(
2744 !exists(&store, "records/empty/index.md"),
2745 "empty-folder index must be deleted"
2746 );
2747 assert!(exists(&store, "sources/emails/2026/05/a.md"));
2749 }
2750
2751 #[test]
2752 fn rebuild_deletes_stale_indexes_for_emptied_folders() {
2753 let (_d, store) = mk_store();
2754 write_doc(
2755 &store,
2756 "records/contacts/a.md",
2757 "contact",
2758 Some("A"),
2759 Some("2026-05-01T00:00:00Z"),
2760 "",
2761 );
2762 Index::rebuild_all(&store).unwrap();
2763 assert!(exists(&store, "records/contacts/index.md"));
2764 assert!(exists(&store, "records/index.md"));
2765 assert!(exists(&store, "index.md"));
2766
2767 fs::remove_file(store.root.join("records/contacts/a.md")).unwrap();
2769 Index::rebuild_all(&store).unwrap();
2770 assert!(
2771 !exists(&store, "records/contacts/index.md"),
2772 "emptied type-folder index gone"
2773 );
2774 assert!(
2775 !exists(&store, "records/index.md"),
2776 "now-empty layer index gone"
2777 );
2778 assert!(!exists(&store, "index.md"), "now-empty root index gone");
2779 }
2780
2781 #[test]
2784 fn property_writethrough_equals_rebuild_under_mixed_ops() {
2785 let (_d1, wt) = mk_store();
2787 let (_d2, rb) = mk_store();
2788 let mut seed: u64 = 0x9E3779B97F4A7C15;
2789 let mut next = || {
2790 seed = seed
2791 .wrapping_mul(6364136223846793005)
2792 .wrapping_add(1442695040888963407);
2793 (seed >> 33) as u32
2794 };
2795
2796 let folders = ["sources/emails", "records/contacts", "records/profiles"];
2797 let types = ["email", "contact", "profile"];
2798 let mut live: Vec<String> = Vec::new(); for step in 0..120u32 {
2801 let r = next();
2802 let op = r % 10;
2803 if op < 6 || live.is_empty() {
2804 let fi = (next() as usize) % folders.len();
2806 let folder = folders[fi];
2807 let id = next() % 40;
2808 let rel = if folder == "sources/emails" {
2809 let month = 5 + (id % 2); format!("{folder}/2026/{month:02}/f-{id:02}.md")
2811 } else {
2812 format!("{folder}/f-{id:02}.md")
2813 };
2814 let updated = format!(
2816 "2026-05-{:02}T{:02}:{:02}:00Z",
2817 1 + (step % 27),
2818 step % 24,
2819 id % 60
2820 );
2821 let extra = if id % 3 == 0 {
2822 "tags:\n - x\n - y\n"
2823 } else {
2824 ""
2825 };
2826 write_doc(
2827 &wt,
2828 &rel,
2829 types[fi],
2830 Some(&format!("sum {step}")),
2831 Some(&updated),
2832 extra,
2833 );
2834 write_doc(
2835 &rb,
2836 &rel,
2837 types[fi],
2838 Some(&format!("sum {step}")),
2839 Some(&updated),
2840 extra,
2841 );
2842 Index::on_write(&wt, Path::new(&rel)).unwrap();
2843 if !live.contains(&rel) {
2844 live.push(rel);
2845 }
2846 } else if op < 8 {
2847 let idx = (next() as usize) % live.len();
2849 let rel = live.remove(idx);
2850 fs::remove_file(wt.root.join(&rel)).unwrap();
2851 fs::remove_file(rb.root.join(&rel)).ok();
2852 Index::on_remove(&wt, Path::new(&rel)).unwrap();
2853 } else {
2854 let idx = (next() as usize) % live.len();
2856 let old = live[idx].clone();
2857 let fi = (next() as usize) % folders.len();
2859 let folder = folders[fi];
2860 let id = 50 + (next() % 40);
2861 let new = if folder == "sources/emails" {
2862 format!("{folder}/2026/05/f-{id:02}.md")
2863 } else {
2864 format!("{folder}/f-{id:02}.md")
2865 };
2866 if new == old || live.contains(&new) {
2867 continue;
2868 }
2869 fs::create_dir_all(wt.root.join(&new).parent().unwrap()).unwrap();
2870 fs::create_dir_all(rb.root.join(&new).parent().unwrap()).unwrap();
2871 fs::rename(wt.root.join(&old), wt.root.join(&new)).unwrap();
2872 fs::rename(rb.root.join(&old), rb.root.join(&new)).unwrap();
2873 Index::on_rename(&wt, Path::new(&old), Path::new(&new)).unwrap();
2874 live[idx] = new;
2875 }
2876 }
2877
2878 Index::rebuild_all(&rb).unwrap();
2880 let a = snapshot_artifacts(&wt);
2881 let b = snapshot_artifacts(&rb);
2882 assert_eq!(
2883 a.keys().collect::<BTreeSet<_>>(),
2884 b.keys().collect::<BTreeSet<_>>(),
2885 "write-through and rebuild must produce the same set of artifacts"
2886 );
2887 for (k, v) in &a {
2888 assert_eq!(
2889 v, &b[k],
2890 "INVARIANT VIOLATED: artifact {k} differs after mixed ops\n--- write-through ---\n{v}\n--- rebuild ---\n{}",
2891 b[k]
2892 );
2893 }
2894 assert!(
2895 !a.is_empty(),
2896 "the run must have produced at least one artifact"
2897 );
2898 }
2899
2900 #[test]
2906 fn cleanup_preserves_user_content_named_index_md_in_shard() {
2907 let (_d, store) = mk_store();
2908 write_doc(
2910 &store,
2911 "sources/emails/2026/06/index.md",
2912 "email",
2913 Some("Important imported mail"),
2914 Some("2026-06-11T04:23:25Z"),
2915 "",
2916 );
2917 Index::cleanup(&store).unwrap();
2918 assert!(
2919 exists(&store, "sources/emails/2026/06/index.md"),
2920 "cleanup must not delete a user content file named index.md"
2921 );
2922 Index::rebuild_all(&store).unwrap();
2924 assert!(
2925 exists(&store, "sources/emails/2026/06/index.md"),
2926 "rebuild_all must not delete a user content file named index.md"
2927 );
2928 let kept = read(&store, "sources/emails/2026/06/index.md");
2929 assert!(
2930 kept.contains("Important imported mail"),
2931 "the user's record content must be intact"
2932 );
2933 }
2934
2935 #[test]
2940 fn cleanup_keeps_canonical_type_folder_root_sidecars() {
2941 let (_d, store) = mk_store();
2942 write_doc(
2943 &store,
2944 "records/contacts/alice.md",
2945 "contact",
2946 Some("Alice"),
2947 Some("2026-05-01T00:00:00Z"),
2948 "",
2949 );
2950 Index::write_level(&store, &IndexLevel::TypeFolder("records/contacts".into())).unwrap();
2951 assert!(exists(&store, "records/contacts/index.md"));
2952 assert!(exists(&store, "records/contacts/index.jsonl"));
2953 Index::cleanup(&store).unwrap();
2954 assert!(
2955 exists(&store, "records/contacts/index.md"),
2956 "cleanup must keep the canonical type-folder index.md (non-empty folder)"
2957 );
2958 assert!(
2959 exists(&store, "records/contacts/index.jsonl"),
2960 "cleanup must keep the canonical type-folder index.jsonl (non-empty folder)"
2961 );
2962 }
2963
2964 #[test]
2970 fn on_write_ignores_index_artifact_no_phantom_row() {
2971 let (_d, store) = mk_store();
2972 write_doc(
2973 &store,
2974 "records/contacts/alice.md",
2975 "contact",
2976 Some("Alice"),
2977 Some("2026-05-01T00:00:00Z"),
2978 "",
2979 );
2980 Index::on_write(&store, Path::new("records/contacts/alice.md")).unwrap();
2981 let jsonl_before = read(&store, "records/contacts/index.jsonl");
2982 assert_eq!(jsonl_before.lines().count(), 1);
2983
2984 Index::on_write(&store, Path::new("records/contacts/index.md")).unwrap();
2986
2987 let jsonl_after = read(&store, "records/contacts/index.jsonl");
2988 assert_eq!(
2989 jsonl_after.lines().count(),
2990 1,
2991 "on_write on index.md must not add a phantom self-row"
2992 );
2993 assert!(
2994 !jsonl_after.contains("\"type\":\"index\""),
2995 "the catalog artifact must never appear as a catalogued row"
2996 );
2997 let root = read(&store, "index.md");
2999 assert!(
3000 root.contains("[[records/contacts/index|Contacts]] (1)"),
3001 "count must not inflate:\n{root}"
3002 );
3003 }
3004
3005 #[test]
3011 fn multiline_summary_is_single_lined_in_index_md() {
3012 let (_d, store) = mk_store();
3013 write_raw(
3015 &store,
3016 "records/notes/evil.md",
3017 "type: note\nupdated: 2026-06-10T00:00:00Z\nsummary: |-\n legit first line\n - [[records/secrets/fake|Click me]] — injected entry",
3018 "\nbody\n",
3019 );
3020 let idx = Index::build_type_folder(&store, Path::new("records/notes")).unwrap();
3021 let md = idx.to_markdown();
3022 let entry_lines = md.lines().filter(|l| l.starts_with("- [[")).count();
3024 assert_eq!(
3025 entry_lines, 1,
3026 "a multi-line summary must not produce extra entry lines:\n{md}"
3027 );
3028 assert!(
3029 md.contains(
3030 "- [[records/notes/evil]] — legit first line - [[records/secrets/fake|Click me]] — injected entry\n"
3031 ),
3032 "summary newlines must collapse to spaces inline:\n{md}"
3033 );
3034 }
3035
3036 #[test]
3044 fn non_string_scalar_summary_and_type_are_coerced_like_validator() {
3045 let (_d, store) = mk_store();
3046 write_raw(
3047 &store,
3048 "records/contacts/a.md",
3049 "type: contact\nupdated: 2026-05-01T00:00:00Z\nsummary: 2026",
3050 "\nbody\n",
3051 );
3052 let rec = record_from_file(
3053 &store.root.join("records/contacts/a.md"),
3054 PathBuf::from("records/contacts/a.md"),
3055 )
3056 .unwrap();
3057 assert_eq!(rec.summary, "2026");
3060 assert_eq!(rec.type_, "contact");
3061
3062 let idx = Index::build_type_folder(&store, Path::new("records/contacts")).unwrap();
3064 let md = idx.to_markdown();
3065 assert!(
3066 md.contains("- [[records/contacts/a]] — 2026\n"),
3067 "index entry must hold the coerced scalar, not the placeholder:\n{md}"
3068 );
3069
3070 write_raw(
3072 &store,
3073 "records/contacts/b.md",
3074 "type: true\nupdated: 2026-05-02T00:00:00Z\nsummary: hi",
3075 "\nbody\n",
3076 );
3077 let rec_b = record_from_file(
3078 &store.root.join("records/contacts/b.md"),
3079 PathBuf::from("records/contacts/b.md"),
3080 )
3081 .unwrap();
3082 assert_eq!(rec_b.type_, "true");
3083 }
3084
3085 #[test]
3093 fn non_utf8_body_does_not_abort_record_projection() {
3094 let (_d, store) = mk_store();
3095 let rel = "sources/emails/2026/06/x.md";
3096 let abs = store.root.join(rel);
3097 fs::create_dir_all(abs.parent().unwrap()).unwrap();
3098 let mut bytes: Vec<u8> =
3100 b"---\ntype: email\nupdated: 2026-06-11T00:00:00Z\nsummary: An imported email\n---\n\nCaf"
3101 .to_vec();
3102 bytes.push(0xE9);
3103 bytes.extend_from_slice(b" meeting notes\n");
3104 fs::write(&abs, bytes).unwrap();
3105
3106 let rec = record_from_file(&abs, PathBuf::from(rel))
3107 .expect("non-UTF-8 body must not abort the frontmatter read");
3108 assert_eq!(rec.summary, "An imported email");
3109 assert_eq!(rec.type_, "email");
3110
3111 Index::rebuild_all(&store).unwrap();
3113 assert!(
3114 exists(&store, "sources/emails/index.jsonl"),
3115 "rebuild must produce the catalog despite a non-UTF-8 body byte"
3116 );
3117 assert!(
3118 read(&store, "sources/emails/index.jsonl").contains("An imported email"),
3119 "the record must be catalogued"
3120 );
3121 }
3122
3123 #[test]
3132 fn rebuild_aborts_on_malformed_file_and_keeps_prior_catalogs() {
3133 let (_d, store) = mk_store();
3134 write_doc(
3135 &store,
3136 "records/contacts/alice.md",
3137 "contact",
3138 Some("Alice"),
3139 Some("2026-05-01T00:00:00Z"),
3140 "",
3141 );
3142 write_doc(
3143 &store,
3144 "records/companies/acme.md",
3145 "company",
3146 Some("Acme"),
3147 Some("2026-05-02T00:00:00Z"),
3148 "",
3149 );
3150
3151 Index::rebuild_all(&store).expect("clean rebuild succeeds");
3153 assert!(exists(&store, "records/contacts/index.jsonl"));
3154 assert!(exists(&store, "records/companies/index.jsonl"));
3155
3156 let bad = store.root.join("records/contacts/broken.md");
3158 fs::write(
3159 &bad,
3160 "---\ntype: contact\nsummary: \"unterminated\n---\nbody\n",
3161 )
3162 .unwrap();
3163
3164 Index::rebuild_all(&store)
3167 .expect_err("rebuild must abort, not silently skip, on a malformed file");
3168
3169 assert!(
3173 exists(&store, "records/companies/index.jsonl"),
3174 "an aborted rebuild must not destroy a clean sibling folder's catalog"
3175 );
3176 assert!(
3177 exists(&store, "records/contacts/index.jsonl"),
3178 "an aborted rebuild must not destroy the affected folder's prior catalog"
3179 );
3180 let contacts_jsonl = read(&store, "records/contacts/index.jsonl");
3181 assert!(contacts_jsonl.contains("records/contacts/alice.md"));
3182 }
3183
3184 #[test]
3197 fn rebuild_rollup_counts_equal_jsonl_records_and_write_through() {
3198 let (_d, store) = mk_store();
3199 write_doc(
3203 &store,
3204 "records/contacts/alice.md",
3205 "contact",
3206 Some("Alice"),
3207 Some("2026-05-01T00:00:00Z"),
3208 "",
3209 );
3210 write_doc(
3211 &store,
3212 "records/contacts/bob.md",
3213 "contact",
3214 Some("Bob"),
3215 Some("2026-05-02T00:00:00Z"),
3216 "",
3217 );
3218 Index::rebuild_all(&store).expect("clean rebuild succeeds");
3219
3220 let jsonl_lines = read(&store, "records/contacts/index.jsonl")
3222 .lines()
3223 .filter(|l| !l.trim().is_empty())
3224 .count();
3225 assert_eq!(jsonl_lines, 2, "two well-formed files ⇒ two jsonl records");
3226 let layer_md = read(&store, "records/index.md");
3227 let root_md = read(&store, "index.md");
3228 assert!(
3229 layer_md.contains("- [[records/contacts/index|Contacts]] (2)"),
3230 "layer rollup (N) must equal the jsonl record count (2), not a raw .md walk:\n{layer_md}"
3231 );
3232 assert!(
3233 root_md.contains("- [[records/contacts/index|Contacts]] (2)\n")
3234 && root_md.contains("## Records (2)"),
3235 "root rollup (N)/layer total must equal the jsonl record count (2):\n{root_md}"
3236 );
3237
3238 let (_d2, wt) = mk_store();
3245 write_doc(
3246 &wt,
3247 "records/contacts/alice.md",
3248 "contact",
3249 Some("Alice"),
3250 Some("2026-05-01T00:00:00Z"),
3251 "",
3252 );
3253 write_doc(
3254 &wt,
3255 "records/contacts/bob.md",
3256 "contact",
3257 Some("Bob"),
3258 Some("2026-05-02T00:00:00Z"),
3259 "",
3260 );
3261 Index::on_write(&wt, Path::new("records/contacts/alice.md")).unwrap();
3262 Index::on_write(&wt, Path::new("records/contacts/bob.md")).unwrap();
3263
3264 let a = snapshot_artifacts(&wt);
3265 let b = snapshot_artifacts(&store);
3266 assert_eq!(
3267 a.keys().collect::<BTreeSet<_>>(),
3268 b.keys().collect::<BTreeSet<_>>(),
3269 "write-through and rebuild_all must produce the same artifact set"
3270 );
3271 for (k, v) in &a {
3272 assert_eq!(
3273 v, &b[k],
3274 "rollup bytes diverged between write-through and rebuild_all for {k} \
3275 (a skip-version inflates rebuild_all's (N) above the jsonl record \
3276 count, which write-through then rewrites):\n--- write-through ---\n{v}\n--- rebuild ---\n{}",
3277 b[k]
3278 );
3279 }
3280 }
3281
3282 #[cfg(unix)]
3287 #[test]
3288 fn non_utf8_path_component_is_kept_not_dropped() {
3289 use std::ffi::OsStr;
3290 use std::os::unix::ffi::OsStrExt;
3291 let mut leaf = b"caf".to_vec();
3293 leaf.push(0xE9);
3294 leaf.extend_from_slice(b".md");
3295 let p = Path::new("sources/emails").join(OsStr::from_bytes(&leaf));
3296 let unix = path_to_unix(&p);
3297 assert_ne!(
3300 unix, "sources/emails",
3301 "non-UTF-8 leaf must not be dropped, collapsing the path to its parent dir"
3302 );
3303 assert!(
3304 unix.starts_with("sources/emails/caf"),
3305 "the lossy leaf must remain under its folder: {unix}"
3306 );
3307 }
3308}