1use std::collections::{BTreeMap, BTreeSet};
53use std::fs;
54use std::io::Write as _;
55use std::path::{Path, PathBuf};
56
57use chrono::{DateTime, FixedOffset, SecondsFormat};
58use serde::{Deserialize, Serialize};
59use serde_json::Value;
60
61use crate::store::{Layer, Store};
62
63const MD_CAP: usize = 500;
65
66const MISSING_SUMMARY: &str = "(no summary)";
70
71const ROOT_TITLE: &str = "Knowledge base index";
73
74#[derive(Debug, Clone, PartialEq, Eq)]
76pub enum IndexLevel {
77 Root,
79 Layer(Layer),
81 TypeFolder(PathBuf),
83}
84
85#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
93pub struct IndexRecord {
94 pub path: PathBuf,
96 #[serde(rename = "type")]
98 pub type_: String,
99 pub summary: String,
101 #[serde(default)]
103 pub tags: Vec<String>,
104 #[serde(default)]
106 pub links: Vec<String>,
107 pub created: Option<DateTime<FixedOffset>>,
109 pub updated: Option<DateTime<FixedOffset>>,
111 #[serde(flatten)]
113 pub fields: BTreeMap<String, Value>,
114}
115
116#[derive(Debug, Clone, PartialEq)]
119pub struct Index {
120 pub level: IndexLevel,
122 pub records: Vec<IndexRecord>,
125 pub child_counts: BTreeMap<PathBuf, usize>,
127}
128
129impl Index {
130 pub fn build_type_folder(store: &Store, type_folder: &Path) -> crate::Result<Index> {
136 let rel = normalize_rel(type_folder);
137 let abs = store.root.join(&rel);
138 let mut records = Vec::new();
139 for file_abs in walk_type_folder_files(&abs) {
140 let rel_path =
141 rel_to_store(&store.root, &file_abs).expect("walked file is under the store root");
142 records.push(record_from_file(&file_abs, rel_path)?);
143 }
144 sort_records(&mut records);
145 Ok(Index {
146 level: IndexLevel::TypeFolder(rel),
147 records,
148 child_counts: BTreeMap::new(),
149 })
150 }
151
152 pub fn build_layer(store: &Store, layer: Layer) -> crate::Result<Index> {
155 let mut child_counts = BTreeMap::new();
156 for tf in type_folders_in_layer(store, layer) {
157 let abs = store.root.join(&tf);
158 let n = walk_type_folder_files(&abs).len();
159 if n > 0 {
160 child_counts.insert(tf, n);
161 }
162 }
163 Ok(Index {
164 level: IndexLevel::Layer(layer),
165 records: Vec::new(),
166 child_counts,
167 })
168 }
169
170 pub fn build_root(store: &Store) -> crate::Result<Index> {
173 let mut child_counts = BTreeMap::new();
174 for layer in Layer::all() {
175 for tf in type_folders_in_layer(store, layer) {
176 let abs = store.root.join(&tf);
177 let n = walk_type_folder_files(&abs).len();
178 if n > 0 {
179 child_counts.insert(tf, n);
180 }
181 }
182 }
183 Ok(Index {
184 level: IndexLevel::Root,
185 records: Vec::new(),
186 child_counts,
187 })
188 }
189
190 pub fn to_markdown(&self) -> String {
192 match &self.level {
193 IndexLevel::TypeFolder(folder) => self.render_type_folder_md(folder),
194 IndexLevel::Layer(layer) => self.render_layer_md(*layer),
195 IndexLevel::Root => self.render_root_md(),
196 }
197 }
198
199 pub fn to_jsonl(&self) -> String {
203 let mut out = String::new();
204 for rec in &self.records {
205 let line = serde_json::to_string(rec).expect("IndexRecord serializes");
208 out.push_str(&line);
209 out.push('\n');
210 }
211 out
212 }
213
214 fn render_type_folder_md(&self, folder: &Path) -> String {
217 let folder_disp = path_to_unix(folder);
218 let updated = max_updated(self.records.iter().map(|r| r.updated.as_ref()));
219 let mut s = String::new();
220 s.push_str("---\n");
221 s.push_str("type: index\n");
222 s.push_str("scope: type-folder\n");
223 s.push_str(&format!("folder: {folder_disp}\n"));
224 if let Some(ts) = updated {
225 s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
226 }
227 s.push_str("---\n\n");
228 s.push_str(&format!("# {folder_disp}\n\n"));
229
230 let shown = self.records.len().min(MD_CAP);
231 for rec in self.records.iter().take(shown) {
232 s.push_str(&format_md_entry(rec));
233 s.push('\n');
234 }
235
236 if self.records.len() > MD_CAP {
237 let type_ = self.records.first().map(|r| r.type_.as_str()).unwrap_or("");
238 let layer = folder
239 .components()
240 .next()
241 .and_then(|c| c.as_os_str().to_str())
242 .unwrap_or("");
243 s.push('\n');
244 s.push_str(&more_footer(self.records.len(), type_, layer));
245 }
246 s
247 }
248
249 fn render_layer_md(&self, layer: Layer) -> String {
254 let layer_dir = layer_dir_name(layer);
255 let mut s = String::new();
256 s.push_str("---\n");
257 s.push_str("type: index\n");
258 s.push_str("scope: layer\n");
259 s.push_str(&format!("folder: {layer_dir}\n"));
260 s.push_str("---\n\n");
261 s.push_str(&format!("# {layer_dir}\n\n"));
262 for (tf, n) in &self.child_counts {
263 let tf_unix = path_to_unix(tf);
264 let display = capitalize(folder_basename(tf));
265 s.push_str(&format!("- [[{tf_unix}/index|{display}]] ({n})\n"));
266 }
267 s
268 }
269
270 fn render_root_md(&self) -> String {
273 let mut s = String::new();
274 s.push_str("---\n");
275 s.push_str("type: index\n");
276 s.push_str("scope: root\n");
277 s.push_str("---\n\n");
278 s.push_str(&format!("# {ROOT_TITLE}\n"));
279 for layer in Layer::all() {
280 let layer_dir = layer_dir_name(layer);
281 let prefix = format!("{layer_dir}/");
282 let children: Vec<(&PathBuf, &usize)> = self
283 .child_counts
284 .iter()
285 .filter(|(tf, _)| path_to_unix(tf).starts_with(&prefix))
286 .collect();
287 if children.is_empty() {
288 continue;
289 }
290 let total: usize = children.iter().map(|(_, n)| **n).sum();
291 s.push('\n');
292 s.push_str(&format!("## {} ({total})\n", capitalize(layer_dir)));
293 for (tf, n) in children {
294 let tf_unix = path_to_unix(tf);
295 let display = capitalize(folder_basename(tf));
296 s.push_str(&format!("- [[{tf_unix}/index|{display}]] ({n})\n"));
297 }
298 }
299 s
300 }
301}
302
303impl Index {
308 pub fn on_write(store: &Store, file: &Path) -> crate::Result<()> {
315 let file_rel = normalize_rel(file);
316 let file_abs = store.root.join(&file_rel);
317 let folder = type_folder_of(&file_rel)
318 .ok_or_else(|| bad_index(&file_rel, "file is not inside a layer/type-folder"))?;
319 let record = record_from_file(&file_abs, file_rel.clone())?;
320
321 let mut records = read_jsonl_records(&store.root.join(&folder).join("index.jsonl"))?;
322 records.retain(|r| r.path != record.path);
323 records.push(record);
324 sort_records(&mut records);
325
326 write_type_folder_artifacts(store, &folder, &records)?;
327 update_parents(store, &folder)?;
328 Ok(())
329 }
330
331 pub fn on_rename(store: &Store, old: &Path, new: &Path) -> crate::Result<()> {
335 let old_rel = normalize_rel(old);
336 let new_rel = normalize_rel(new);
337 let old_folder = type_folder_of(&old_rel)
338 .ok_or_else(|| bad_index(&old_rel, "source is not inside a layer/type-folder"))?;
339 let new_folder = type_folder_of(&new_rel)
340 .ok_or_else(|| bad_index(&new_rel, "target is not inside a layer/type-folder"))?;
341
342 let mut old_records =
344 read_jsonl_records(&store.root.join(&old_folder).join("index.jsonl"))?;
345 old_records.retain(|r| r.path != old_rel);
346
347 if old_folder == new_folder {
348 let record = record_from_file(&store.root.join(&new_rel), new_rel.clone())?;
350 old_records.retain(|r| r.path != record.path);
351 old_records.push(record);
352 sort_records(&mut old_records);
353 write_type_folder_artifacts(store, &old_folder, &old_records)?;
354 update_parents(store, &old_folder)?;
355 return Ok(());
356 }
357
358 sort_records(&mut old_records);
361 write_type_folder_artifacts(store, &old_folder, &old_records)?;
362
363 let record = record_from_file(&store.root.join(&new_rel), new_rel.clone())?;
364 let mut new_records =
365 read_jsonl_records(&store.root.join(&new_folder).join("index.jsonl"))?;
366 new_records.retain(|r| r.path != record.path);
367 new_records.push(record);
368 sort_records(&mut new_records);
369 write_type_folder_artifacts(store, &new_folder, &new_records)?;
370
371 update_parents(store, &old_folder)?;
372 update_parents(store, &new_folder)?;
373 Ok(())
374 }
375
376 pub fn on_remove(store: &Store, file: &Path) -> crate::Result<()> {
381 let file_rel = normalize_rel(file);
382 let folder = type_folder_of(&file_rel)
383 .ok_or_else(|| bad_index(&file_rel, "file is not inside a layer/type-folder"))?;
384 let mut records = read_jsonl_records(&store.root.join(&folder).join("index.jsonl"))?;
385 let before = records.len();
386 records.retain(|r| r.path != file_rel);
387 if records.len() == before {
388 }
391 sort_records(&mut records);
392 write_type_folder_artifacts(store, &folder, &records)?;
393 update_parents(store, &folder)?;
394 Ok(())
395 }
396
397 pub fn rebuild_all(store: &Store) -> crate::Result<()> {
401 Index::cleanup(store)?;
402 for layer in Layer::all() {
403 for tf in type_folders_in_layer(store, layer) {
404 let idx = Index::build_type_folder(store, &tf)?;
405 if idx.records.is_empty() {
406 continue;
407 }
408 write_type_folder_artifacts(store, &tf, &idx.records)?;
409 }
410 let layer_idx = Index::build_layer(store, layer)?;
411 let layer_index_md = store.root.join(layer_dir_name(layer)).join("index.md");
412 if layer_idx.child_counts.is_empty() {
413 remove_if_exists(&layer_index_md)?;
414 } else {
415 write_atomic(
416 &layer_index_md,
417 render_layer_md_with_store(store, &layer_idx),
418 )?;
419 }
420 }
421 let root_idx = Index::build_root(store)?;
422 let root_index_md = store.root.join("index.md");
423 if root_idx.child_counts.is_empty() {
424 remove_if_exists(&root_index_md)?;
425 } else {
426 write_atomic(&root_index_md, render_root_md_with_store(store, &root_idx))?;
427 }
428 Ok(())
429 }
430
431 pub fn write_level(store: &Store, level: &IndexLevel) -> crate::Result<()> {
433 match level {
434 IndexLevel::TypeFolder(folder) => {
435 let idx = Index::build_type_folder(store, folder)?;
436 if idx.records.is_empty() {
437 remove_if_exists(&store.root.join(folder).join("index.md"))?;
438 remove_if_exists(&store.root.join(folder).join("index.jsonl"))?;
439 } else {
440 write_type_folder_artifacts(store, folder, &idx.records)?;
441 }
442 }
443 IndexLevel::Layer(layer) => {
444 let idx = Index::build_layer(store, *layer)?;
445 let p = store.root.join(layer_dir_name(*layer)).join("index.md");
446 if idx.child_counts.is_empty() {
447 remove_if_exists(&p)?;
448 } else {
449 write_atomic(&p, render_layer_md_with_store(store, &idx))?;
450 }
451 }
452 IndexLevel::Root => {
453 let idx = Index::build_root(store)?;
454 let p = store.root.join("index.md");
455 if idx.child_counts.is_empty() {
456 remove_if_exists(&p)?;
457 } else {
458 write_atomic(&p, render_root_md_with_store(store, &idx))?;
459 }
460 }
461 }
462 Ok(())
463 }
464
465 pub fn render_dry_run(store: &Store, level: &IndexLevel) -> crate::Result<String> {
468 let mut out = String::new();
469 match level {
470 IndexLevel::TypeFolder(folder) => {
471 let idx = Index::build_type_folder(store, folder)?;
472 let md_path = path_to_unix(&folder.join("index.md"));
473 let jsonl_path = path_to_unix(&folder.join("index.jsonl"));
474 out.push_str(&format!("--- {md_path} ---\n"));
475 out.push_str(&idx.to_markdown());
476 out.push_str(&format!("--- {jsonl_path} ---\n"));
477 out.push_str(&idx.to_jsonl());
478 }
479 IndexLevel::Layer(layer) => {
480 let idx = Index::build_layer(store, *layer)?;
481 let md_path = format!("{}/index.md", layer_dir_name(*layer));
482 out.push_str(&format!("--- {md_path} ---\n"));
483 out.push_str(&render_layer_md_with_store(store, &idx));
484 }
485 IndexLevel::Root => {
486 let idx = Index::build_root(store)?;
487 out.push_str("--- index.md ---\n");
488 out.push_str(&render_root_md_with_store(store, &idx));
489 }
490 }
491 Ok(out)
492 }
493
494 pub fn cleanup(store: &Store) -> crate::Result<()> {
498 for layer in Layer::all() {
499 let layer_dir = store.root.join(layer_dir_name(layer));
500 if !layer_dir.is_dir() {
501 continue;
502 }
503 for tf in type_folders_in_layer(store, layer) {
504 let tf_abs = store.root.join(&tf);
505 for entry in walkdir::WalkDir::new(&tf_abs)
508 .min_depth(1)
509 .into_iter()
510 .filter_map(|e| e.ok())
511 {
512 let p = entry.path();
513 if is_index_artifact(p) {
514 remove_if_exists(p)?;
515 }
516 }
517 if walk_type_folder_files(&tf_abs).is_empty() {
519 remove_if_exists(&tf_abs.join("index.md"))?;
520 remove_if_exists(&tf_abs.join("index.jsonl"))?;
521 }
522 }
523 }
524 Ok(())
525 }
526}
527
528fn write_type_folder_artifacts(
536 store: &Store,
537 folder: &Path,
538 records: &[IndexRecord],
539) -> crate::Result<()> {
540 let folder_abs = store.root.join(folder);
541 let md_path = folder_abs.join("index.md");
542 let jsonl_path = folder_abs.join("index.jsonl");
543 if records.is_empty() {
544 remove_if_exists(&md_path)?;
545 remove_if_exists(&jsonl_path)?;
546 return Ok(());
547 }
548 let idx = Index {
549 level: IndexLevel::TypeFolder(folder.to_path_buf()),
550 records: records.to_vec(),
551 child_counts: BTreeMap::new(),
552 };
553 write_atomic(&md_path, idx.to_markdown())?;
554 write_atomic(&jsonl_path, idx.to_jsonl())?;
555 Ok(())
556}
557
558fn update_parents(store: &Store, folder: &Path) -> crate::Result<()> {
571 let layer = folder
572 .components()
573 .next()
574 .and_then(|c| c.as_os_str().to_str())
575 .and_then(layer_from_dir_name);
576 if let Some(layer) = layer {
577 let idx = Index {
578 level: IndexLevel::Layer(layer),
579 records: Vec::new(),
580 child_counts: child_counts_from_jsonl(store, &[layer])?,
581 };
582 let p = store.root.join(layer_dir_name(layer)).join("index.md");
583 if idx.child_counts.is_empty() {
584 remove_if_exists(&p)?;
585 } else {
586 write_atomic(&p, render_layer_md_with_store(store, &idx))?;
587 }
588 }
589 let root = Index {
590 level: IndexLevel::Root,
591 records: Vec::new(),
592 child_counts: child_counts_from_jsonl(store, &Layer::all())?,
593 };
594 let rp = store.root.join("index.md");
595 if root.child_counts.is_empty() {
596 remove_if_exists(&rp)?;
597 } else {
598 write_atomic(&rp, render_root_md_with_store(store, &root))?;
599 }
600 Ok(())
601}
602
603fn render_layer_md_with_store(store: &Store, idx: &Index) -> String {
607 let layer = match idx.level {
608 IndexLevel::Layer(l) => l,
609 _ => unreachable!("render_layer_md_with_store called on non-layer"),
610 };
611 let layer_dir = layer_dir_name(layer);
612 let mut max_upd: Option<DateTime<FixedOffset>> = None;
613 let mut entries = String::new();
614 for (tf, n) in &idx.child_counts {
615 let recs = read_jsonl_records(&store.root.join(tf).join("index.jsonl")).unwrap_or_default();
616 let newest = recs.first();
617 if let Some(u) = newest.and_then(|r| r.updated) {
618 max_upd = Some(match max_upd {
619 Some(cur) if cur >= u => cur,
620 _ => u,
621 });
622 }
623 let tf_unix = path_to_unix(tf);
624 let display = capitalize(folder_basename(tf));
625 let preview = newest
626 .map(|r| truncate(&r.summary, 80))
627 .filter(|p| !p.is_empty() && p != MISSING_SUMMARY);
628 match preview {
629 Some(p) => entries.push_str(&format!("- [[{tf_unix}/index|{display}]] ({n}) — {p}\n")),
630 None => entries.push_str(&format!("- [[{tf_unix}/index|{display}]] ({n})\n")),
631 }
632 }
633 let mut s = String::new();
634 s.push_str("---\n");
635 s.push_str("type: index\n");
636 s.push_str("scope: layer\n");
637 s.push_str(&format!("folder: {layer_dir}\n"));
638 if let Some(ts) = max_upd {
639 s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
640 }
641 s.push_str("---\n\n");
642 s.push_str(&format!("# {layer_dir}\n\n"));
643 s.push_str(&entries);
644 s
645}
646
647fn render_root_md_with_store(store: &Store, idx: &Index) -> String {
650 let mut max_upd: Option<DateTime<FixedOffset>> = None;
651 for tf in idx.child_counts.keys() {
652 let recs = read_jsonl_records(&store.root.join(tf).join("index.jsonl")).unwrap_or_default();
653 if let Some(u) = recs.first().and_then(|r| r.updated) {
654 max_upd = Some(match max_upd {
655 Some(cur) if cur >= u => cur,
656 _ => u,
657 });
658 }
659 }
660 let mut s = String::new();
661 s.push_str("---\n");
662 s.push_str("type: index\n");
663 s.push_str("scope: root\n");
664 if let Some(ts) = max_upd {
665 s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
666 }
667 s.push_str("---\n\n");
668 s.push_str(&format!("# {ROOT_TITLE}\n"));
669 for layer in Layer::all() {
670 let layer_dir = layer_dir_name(layer);
671 let prefix = format!("{layer_dir}/");
672 let children: Vec<(&PathBuf, &usize)> = idx
673 .child_counts
674 .iter()
675 .filter(|(tf, _)| path_to_unix(tf).starts_with(&prefix))
676 .collect();
677 if children.is_empty() {
678 continue;
679 }
680 let total: usize = children.iter().map(|(_, n)| **n).sum();
681 s.push('\n');
682 s.push_str(&format!("## {} ({total})\n", capitalize(layer_dir)));
683 for (tf, n) in children {
684 let tf_unix = path_to_unix(tf);
685 let display = capitalize(folder_basename(tf));
686 s.push_str(&format!("- [[{tf_unix}/index|{display}]] ({n})\n"));
687 }
688 }
689 s
690}
691
692fn format_md_entry(rec: &IndexRecord) -> String {
698 let path = wiki_target(&rec.path);
699 let mut line = format!("- [[{path}]] — {}", rec.summary);
700 if !rec.tags.is_empty() {
701 let tags = rec
702 .tags
703 .iter()
704 .map(|t| format!("#{t}"))
705 .collect::<Vec<_>>()
706 .join(" ");
707 line.push_str(&format!(" · {tags}"));
708 }
709 line
710}
711
712fn more_footer(total: usize, type_: &str, layer: &str) -> String {
714 format!(
715 "## More\n\nThis folder has {total} files. The {MD_CAP} most recent are listed above.\nUse `dbmd index query --type {type_} --in {layer}` for the complete catalog.\n"
716 )
717}
718
719fn sort_records(records: &mut [IndexRecord]) {
723 records.sort_by(|a, b| {
724 match (b.updated, a.updated) {
725 (Some(bu), Some(au)) => bu.cmp(&au),
726 (Some(_), None) => std::cmp::Ordering::Greater, (None, Some(_)) => std::cmp::Ordering::Less, (None, None) => std::cmp::Ordering::Equal,
729 }
730 .then_with(|| a.path.cmp(&b.path))
731 });
732}
733
734impl IndexRecord {
735 pub(crate) fn expected_from_file(abs: &Path, rel: PathBuf) -> crate::Result<IndexRecord> {
747 record_from_file(abs, rel)
748 }
749}
750
751fn record_from_file(abs: &Path, rel: PathBuf) -> crate::Result<IndexRecord> {
754 let meta = read_frontmatter(abs)?;
755 Ok(IndexRecord {
756 path: rel,
757 type_: meta.type_.unwrap_or_default(),
758 summary: meta.summary.unwrap_or_else(|| MISSING_SUMMARY.to_string()),
759 tags: meta.tags,
760 links: meta.links,
761 created: meta.created,
762 updated: meta.updated,
763 fields: meta.fields,
764 })
765}
766
767struct FileMeta {
769 type_: Option<String>,
770 summary: Option<String>,
771 tags: Vec<String>,
772 links: Vec<String>,
773 created: Option<DateTime<FixedOffset>>,
774 updated: Option<DateTime<FixedOffset>>,
775 fields: BTreeMap<String, Value>,
776}
777
778fn read_frontmatter(abs: &Path) -> crate::Result<FileMeta> {
782 let text = fs::read_to_string(abs)?;
783 let yaml = extract_frontmatter_block(&text).unwrap_or_default();
784 let map: serde_yml::Mapping = if yaml.trim().is_empty() {
785 serde_yml::Mapping::new()
786 } else {
787 serde_yml::from_str(&yaml).map_err(|e| {
788 crate::Error::Store(crate::store::StoreError::BadTypeIndex {
789 path: abs.to_path_buf(),
790 message: format!("frontmatter YAML: {e}"),
791 })
792 })?
793 };
794
795 let mut type_ = None;
796 let mut summary = None;
797 let mut tags = Vec::new();
798 let mut links = Vec::new();
799 let mut created = None;
800 let mut updated = None;
801 let mut fields = BTreeMap::new();
802
803 for (k, v) in map {
804 let key = match k.as_str() {
805 Some(s) => s.to_string(),
806 None => continue,
807 };
808 match key.as_str() {
809 "type" => type_ = v.as_str().map(str::to_string),
810 "summary" => summary = v.as_str().map(str::to_string),
811 "tags" => tags = yaml_string_list(&v),
812 "links" => links = yaml_string_list(&v),
813 "created" => created = v.as_str().and_then(parse_ts),
814 "updated" => updated = v.as_str().and_then(parse_ts),
815 "path" => {}
819 _ => {
820 if let Ok(jv) = serde_json::to_value(&v) {
821 fields.insert(key, jv);
822 }
823 }
824 }
825 }
826
827 Ok(FileMeta {
828 type_,
829 summary,
830 tags,
831 links,
832 created,
833 updated,
834 fields,
835 })
836}
837
838fn extract_frontmatter_block(text: &str) -> Option<String> {
841 let trimmed = text.strip_prefix('\u{feff}').unwrap_or(text);
842 let mut lines = trimmed.lines();
843 let first = lines.next()?;
844 if first.trim_end() != "---" {
845 return None;
846 }
847 let mut block = String::new();
848 for line in lines {
849 if line.trim_end() == "---" {
850 return Some(block);
851 }
852 block.push_str(line);
853 block.push('\n');
854 }
855 None }
857
858fn yaml_string_list(v: &serde_yml::Value) -> Vec<String> {
861 match v {
862 serde_yml::Value::String(s) => vec![s.clone()],
863 serde_yml::Value::Sequence(seq) => seq
864 .iter()
865 .filter_map(|item| item.as_str().map(str::to_string))
866 .collect(),
867 _ => Vec::new(),
868 }
869}
870
871fn parse_ts(s: &str) -> Option<DateTime<FixedOffset>> {
873 DateTime::parse_from_rfc3339(s.trim()).ok()
874}
875
876fn fmt_ts(ts: &DateTime<FixedOffset>) -> String {
880 ts.to_rfc3339_opts(SecondsFormat::AutoSi, true)
881}
882
883fn max_updated<'a>(
885 it: impl Iterator<Item = Option<&'a DateTime<FixedOffset>>>,
886) -> Option<DateTime<FixedOffset>> {
887 let mut best: Option<DateTime<FixedOffset>> = None;
888 for ts in it.flatten() {
889 best = Some(match best {
890 Some(cur) if cur >= *ts => cur,
891 _ => *ts,
892 });
893 }
894 best
895}
896
897fn read_jsonl_records(jsonl: &Path) -> crate::Result<Vec<IndexRecord>> {
901 let text = match fs::read_to_string(jsonl) {
902 Ok(t) => t,
903 Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(Vec::new()),
904 Err(e) => return Err(e.into()),
905 };
906 let mut by_path: BTreeMap<PathBuf, IndexRecord> = BTreeMap::new();
908 for (i, line) in text.lines().enumerate() {
909 if line.trim().is_empty() {
910 continue;
911 }
912 let rec: IndexRecord = serde_json::from_str(line).map_err(|e| {
913 crate::Error::Store(crate::store::StoreError::BadTypeIndex {
914 path: jsonl.to_path_buf(),
915 message: format!("line {}: {e}", i + 1),
916 })
917 })?;
918 by_path.insert(rec.path.clone(), rec);
919 }
920 let mut records: Vec<IndexRecord> = by_path.into_values().collect();
921 sort_records(&mut records);
922 Ok(records)
923}
924
925fn jsonl_record_count(jsonl: &Path) -> crate::Result<usize> {
936 let text = match fs::read_to_string(jsonl) {
937 Ok(t) => t,
938 Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(0),
939 Err(e) => return Err(e.into()),
940 };
941 let mut paths: BTreeSet<PathBuf> = BTreeSet::new();
942 for (i, line) in text.lines().enumerate() {
943 if line.trim().is_empty() {
944 continue;
945 }
946 let rec: IndexRecord = serde_json::from_str(line).map_err(|e| {
947 crate::Error::Store(crate::store::StoreError::BadTypeIndex {
948 path: jsonl.to_path_buf(),
949 message: format!("line {}: {e}", i + 1),
950 })
951 })?;
952 paths.insert(rec.path);
953 }
954 Ok(paths.len())
955}
956
957fn child_counts_from_jsonl(
963 store: &Store,
964 layers: &[Layer],
965) -> crate::Result<BTreeMap<PathBuf, usize>> {
966 let mut child_counts = BTreeMap::new();
967 for &layer in layers {
968 for tf in type_folders_in_layer(store, layer) {
969 let n = jsonl_record_count(&store.root.join(&tf).join("index.jsonl"))?;
970 if n > 0 {
971 child_counts.insert(tf, n);
972 }
973 }
974 }
975 Ok(child_counts)
976}
977
978fn walk_type_folder_files(folder_abs: &Path) -> Vec<PathBuf> {
981 let mut out = Vec::new();
982 if !folder_abs.is_dir() {
983 return out;
984 }
985 for entry in walkdir::WalkDir::new(folder_abs)
986 .into_iter()
987 .filter_entry(|e| !is_hidden(e.file_name()))
988 .filter_map(|e| e.ok())
989 {
990 if !entry.file_type().is_file() {
991 continue;
992 }
993 let p = entry.path();
994 if p.extension().and_then(|e| e.to_str()) != Some("md") {
995 continue;
996 }
997 if p.file_name().and_then(|n| n.to_str()) == Some("index.md") {
998 continue;
999 }
1000 out.push(p.to_path_buf());
1001 }
1002 out
1003}
1004
1005fn type_folders_in_layer(store: &Store, layer: Layer) -> Vec<PathBuf> {
1008 let layer_dir = store.root.join(layer_dir_name(layer));
1009 let mut out = Vec::new();
1010 let rd = match fs::read_dir(&layer_dir) {
1011 Ok(rd) => rd,
1012 Err(_) => return out,
1013 };
1014 for entry in rd.flatten() {
1015 if !entry.path().is_dir() {
1016 continue;
1017 }
1018 let name = entry.file_name();
1019 let name = match name.to_str() {
1020 Some(n) => n,
1021 None => continue,
1022 };
1023 if is_hidden(entry.file_name().as_os_str()) || name == "log" {
1024 continue;
1025 }
1026 out.push(PathBuf::from(layer_dir_name(layer)).join(name));
1027 }
1028 out.sort();
1029 out
1030}
1031
1032fn type_folder_of(file_rel: &Path) -> Option<PathBuf> {
1036 let mut comps = file_rel.components();
1037 let layer = comps.next()?.as_os_str().to_str()?;
1038 layer_from_dir_name(layer)?;
1039 let type_seg = comps.next()?.as_os_str().to_str()?;
1040 Some(PathBuf::from(layer).join(type_seg))
1041}
1042
1043fn rel_to_store(root: &Path, abs: &Path) -> Option<PathBuf> {
1045 abs.strip_prefix(root).ok().map(|p| p.to_path_buf())
1046}
1047
1048fn normalize_rel(p: &Path) -> PathBuf {
1051 let s = path_to_unix(p);
1052 let s = s.strip_prefix("./").unwrap_or(&s);
1053 PathBuf::from(s)
1054}
1055
1056fn is_index_artifact(p: &Path) -> bool {
1057 matches!(
1058 p.file_name().and_then(|n| n.to_str()),
1059 Some("index.md") | Some("index.jsonl")
1060 )
1061}
1062
1063fn is_hidden(name: &std::ffi::OsStr) -> bool {
1064 name.to_str().map(|s| s.starts_with('.')).unwrap_or(false)
1065}
1066
1067fn layer_dir_name(layer: Layer) -> &'static str {
1068 match layer {
1069 Layer::Sources => "sources",
1070 Layer::Records => "records",
1071 Layer::Wiki => "wiki",
1072 }
1073}
1074
1075fn layer_from_dir_name(name: &str) -> Option<Layer> {
1078 match name {
1079 "sources" => Some(Layer::Sources),
1080 "records" => Some(Layer::Records),
1081 "wiki" => Some(Layer::Wiki),
1082 _ => None,
1083 }
1084}
1085
1086fn folder_basename(p: &Path) -> &str {
1088 p.file_name().and_then(|n| n.to_str()).unwrap_or("")
1089}
1090
1091fn wiki_target(p: &Path) -> String {
1095 let unix = path_to_unix(p);
1096 unix.strip_suffix(".md").unwrap_or(&unix).to_string()
1097}
1098
1099fn path_to_unix(p: &Path) -> String {
1102 p.components()
1103 .filter_map(|c| c.as_os_str().to_str())
1104 .collect::<Vec<_>>()
1105 .join("/")
1106}
1107
1108fn capitalize(s: &str) -> String {
1110 let mut chars = s.chars();
1111 match chars.next() {
1112 Some(first) => first.to_uppercase().collect::<String>() + chars.as_str(),
1113 None => String::new(),
1114 }
1115}
1116
1117fn truncate(s: &str, max: usize) -> String {
1119 let one_line: String = s.split_whitespace().collect::<Vec<_>>().join(" ");
1120 if one_line.chars().count() <= max {
1121 one_line
1122 } else {
1123 one_line.chars().take(max).collect()
1124 }
1125}
1126
1127fn write_atomic(path: &Path, contents: String) -> crate::Result<()> {
1128 if let Some(parent) = path.parent() {
1129 fs::create_dir_all(parent)?;
1130 }
1131 let dir = path.parent().unwrap_or_else(|| Path::new("."));
1132 let mut tmp = tempfile_in(dir)?;
1133 tmp.write_all(contents.as_bytes())?;
1134 tmp.flush()?;
1135 tmp.persist(path)?;
1136 Ok(())
1137}
1138
1139fn remove_if_exists(path: &Path) -> crate::Result<()> {
1140 match fs::remove_file(path) {
1141 Ok(()) => Ok(()),
1142 Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(()),
1143 Err(e) => Err(e.into()),
1144 }
1145}
1146
1147fn bad_index(path: &Path, msg: &str) -> crate::Error {
1148 crate::Error::Store(crate::store::StoreError::BadTypeIndex {
1149 path: path.to_path_buf(),
1150 message: msg.to_string(),
1151 })
1152}
1153
1154struct AtomicTemp {
1160 file: Option<fs::File>,
1161 path: PathBuf,
1162 persisted: bool,
1163}
1164
1165impl AtomicTemp {
1166 fn write_all(&mut self, bytes: &[u8]) -> std::io::Result<()> {
1167 self.file.as_mut().expect("temp file open").write_all(bytes)
1168 }
1169 fn flush(&mut self) -> std::io::Result<()> {
1170 self.file.as_mut().expect("temp file open").flush()
1171 }
1172 fn persist(mut self, dest: &Path) -> std::io::Result<()> {
1173 if let Some(f) = self.file.take() {
1174 f.sync_all().ok();
1175 }
1177 fs::rename(&self.path, dest)?;
1178 self.persisted = true;
1179 Ok(())
1180 }
1181}
1182
1183impl Drop for AtomicTemp {
1184 fn drop(&mut self) {
1185 if !self.persisted {
1187 let _ = fs::remove_file(&self.path);
1188 }
1189 }
1190}
1191
1192fn tempfile_in(dir: &Path) -> std::io::Result<AtomicTemp> {
1193 use std::time::{SystemTime, UNIX_EPOCH};
1194 let nanos = SystemTime::now()
1195 .duration_since(UNIX_EPOCH)
1196 .map(|d| d.as_nanos())
1197 .unwrap_or(0);
1198 let pid = std::process::id();
1199 let counter = next_temp_counter();
1202 let name = format!(".dbmd-index-{pid}-{nanos}-{counter}.tmp");
1203 let path = dir.join(name);
1204 let file = fs::OpenOptions::new()
1205 .write(true)
1206 .create_new(true)
1207 .open(&path)?;
1208 Ok(AtomicTemp {
1209 file: Some(file),
1210 path,
1211 persisted: false,
1212 })
1213}
1214
1215fn next_temp_counter() -> u64 {
1216 use std::sync::atomic::{AtomicU64, Ordering};
1217 static C: AtomicU64 = AtomicU64::new(0);
1218 C.fetch_add(1, Ordering::Relaxed)
1219}
1220
1221#[cfg(test)]
1222mod tests {
1223 use super::*;
1224 use std::collections::BTreeSet;
1225 use std::fs;
1226 use tempfile::TempDir;
1227
1228 fn mk_store() -> (TempDir, Store) {
1233 let dir = TempDir::new().unwrap();
1234 fs::write(dir.path().join("DB.md"), "# test store\n").unwrap();
1235 let store = Store {
1236 root: dir.path().to_path_buf(),
1237 config: crate::parser::Config::default(),
1238 };
1239 (dir, store)
1240 }
1241
1242 fn write_raw(store: &Store, rel: &str, fm: &str, body: &str) {
1245 let abs = store.root.join(rel);
1246 fs::create_dir_all(abs.parent().unwrap()).unwrap();
1247 fs::write(&abs, format!("---\n{fm}\n---\n{body}")).unwrap();
1248 }
1249
1250 fn write_doc(
1252 store: &Store,
1253 rel: &str,
1254 type_: &str,
1255 summary: Option<&str>,
1256 updated: Option<&str>,
1257 extra_yaml: &str,
1258 ) {
1259 let mut fm = format!("type: {type_}\n");
1260 if let Some(s) = summary {
1261 fm.push_str(&format!("summary: {s}\n"));
1262 }
1263 if let Some(u) = updated {
1264 fm.push_str(&format!("updated: {u}\n"));
1265 }
1266 fm.push_str(extra_yaml);
1267 write_raw(store, rel, fm.trim_end(), "\nbody text\n");
1268 }
1269
1270 fn read(store: &Store, rel: &str) -> String {
1271 fs::read_to_string(store.root.join(rel)).unwrap()
1272 }
1273
1274 fn exists(store: &Store, rel: &str) -> bool {
1275 store.root.join(rel).exists()
1276 }
1277
1278 fn snapshot_artifacts(store: &Store) -> BTreeMap<String, String> {
1281 let mut out = BTreeMap::new();
1282 for entry in walkdir::WalkDir::new(&store.root)
1283 .into_iter()
1284 .filter_map(|e| e.ok())
1285 {
1286 let p = entry.path();
1287 if is_index_artifact(p) {
1288 let rel = path_to_unix(&rel_to_store(&store.root, p).unwrap());
1289 out.insert(rel, fs::read_to_string(p).unwrap());
1290 }
1291 }
1292 out
1293 }
1294
1295 #[test]
1298 fn type_folder_aggregates_across_shards_in_recency_order() {
1299 let (_d, store) = mk_store();
1300 write_doc(
1303 &store,
1304 "sources/emails/2026/05/b-old.md",
1305 "email",
1306 Some("Older mail"),
1307 Some("2026-05-01T09:00:00Z"),
1308 "",
1309 );
1310 write_doc(
1311 &store,
1312 "sources/emails/2026/06/c-new.md",
1313 "email",
1314 Some("Newest mail"),
1315 Some("2026-06-15T12:00:00Z"),
1316 "",
1317 );
1318 write_doc(
1319 &store,
1320 "sources/emails/2026/05/a-mid.md",
1321 "email",
1322 Some("Middle mail"),
1323 Some("2026-05-20T08:00:00Z"),
1324 "",
1325 );
1326
1327 let idx = Index::build_type_folder(&store, Path::new("sources/emails")).unwrap();
1328 let paths: Vec<String> = idx.records.iter().map(|r| path_to_unix(&r.path)).collect();
1329 assert_eq!(
1330 paths,
1331 vec![
1332 "sources/emails/2026/06/c-new.md",
1333 "sources/emails/2026/05/a-mid.md",
1334 "sources/emails/2026/05/b-old.md",
1335 ],
1336 "records must aggregate across shards, newest `updated` first"
1337 );
1338 }
1339
1340 #[test]
1341 fn type_folder_md_format_entries_tags_and_derived_updated() {
1342 let (_d, store) = mk_store();
1343 write_doc(
1344 &store,
1345 "records/contacts/sarah-chen.md",
1346 "contact",
1347 Some("Renewal champion at Acme"),
1348 Some("2026-05-27T10:00:00Z"),
1349 "tags:\n - renewal\n - acme\n",
1350 );
1351 write_doc(
1352 &store,
1353 "records/contacts/no-tags.md",
1354 "contact",
1355 Some("Plain contact"),
1356 Some("2026-05-26T10:00:00Z"),
1357 "",
1358 );
1359
1360 let idx = Index::build_type_folder(&store, Path::new("records/contacts")).unwrap();
1361 let md = idx.to_markdown();
1362
1363 assert!(md.starts_with(
1366 "---\ntype: index\nscope: type-folder\nfolder: records/contacts\nupdated: 2026-05-27T10:00:00Z\n---\n\n# records/contacts\n"
1367 ), "frontmatter/heading wrong:\n{md}");
1368
1369 assert!(
1371 md.contains(
1372 "- [[records/contacts/sarah-chen]] — Renewal champion at Acme · #renewal #acme\n"
1373 ),
1374 "tagged entry wrong:\n{md}"
1375 );
1376 assert!(
1378 md.contains("- [[records/contacts/no-tags]] — Plain contact\n"),
1379 "untagged entry wrong:\n{md}"
1380 );
1381 assert!(
1382 !md.contains("Plain contact ·"),
1383 "untagged entry must not emit a tag separator"
1384 );
1385 assert!(!md.contains("## More"), "no footer expected under the cap");
1387 }
1388
1389 #[test]
1390 fn missing_summary_becomes_placeholder_not_invented() {
1391 let (_d, store) = mk_store();
1392 write_doc(
1393 &store,
1394 "records/notes/x.md",
1395 "note",
1396 None,
1397 Some("2026-05-27T10:00:00Z"),
1398 "",
1399 );
1400 let idx = Index::build_type_folder(&store, Path::new("records/notes")).unwrap();
1401 assert_eq!(idx.records[0].summary, MISSING_SUMMARY);
1402 let md = idx.to_markdown();
1403 assert!(
1404 md.contains("- [[records/notes/x]] — (no summary)\n"),
1405 "missing summary must render the placeholder, not invent text:\n{md}"
1406 );
1407 }
1408
1409 #[test]
1412 fn jsonl_is_complete_structured_and_round_trips() {
1413 let (_d, store) = mk_store();
1414 write_doc(
1415 &store,
1416 "records/expenses/2026/05/e1.md",
1417 "expense",
1418 Some("Lunch with vendor"),
1419 Some("2026-05-10T10:00:00Z"),
1420 "created: 2026-05-10T09:00:00Z\nstatus: paid\namount: 42\ntags:\n - food\nlinks:\n - wiki/themes/spend\n",
1421 );
1422 write_doc(
1423 &store,
1424 "records/expenses/2026/06/e2.md",
1425 "expense",
1426 Some("Cloud bill"),
1427 Some("2026-06-01T10:00:00Z"),
1428 "amount: 100\n",
1429 );
1430
1431 let idx = Index::build_type_folder(&store, Path::new("records/expenses")).unwrap();
1432 let jsonl = idx.to_jsonl();
1433 let lines: Vec<&str> = jsonl.lines().collect();
1434 assert_eq!(lines.len(), 2, "one JSON object per file, uncapped");
1435
1436 let r0: IndexRecord = serde_json::from_str(lines[0]).unwrap();
1438 assert_eq!(path_to_unix(&r0.path), "records/expenses/2026/06/e2.md");
1439 assert_eq!(
1440 r0, idx.records[0],
1441 "jsonl line must round-trip to the record"
1442 );
1443
1444 let r1: IndexRecord = serde_json::from_str(lines[1]).unwrap();
1447 assert_eq!(r1.type_, "expense");
1448 assert_eq!(r1.summary, "Lunch with vendor");
1449 assert_eq!(r1.tags, vec!["food".to_string()]);
1450 assert_eq!(r1.links, vec!["wiki/themes/spend".to_string()]);
1451 assert_eq!(
1452 r1.created,
1453 Some(DateTime::parse_from_rfc3339("2026-05-10T09:00:00Z").unwrap())
1454 );
1455 assert_eq!(r1.fields.get("status"), Some(&Value::from("paid")));
1456 assert_eq!(r1.fields.get("amount"), Some(&Value::from(42)));
1457 for reserved in [
1459 "path", "type", "summary", "tags", "links", "created", "updated",
1460 ] {
1461 assert!(
1462 !r1.fields.contains_key(reserved),
1463 "reserved key {reserved} must not appear in fields"
1464 );
1465 }
1466
1467 assert!(
1469 lines[1].starts_with(
1470 r#"{"path":"records/expenses/2026/05/e1.md","type":"expense","summary":"Lunch with vendor","tags":["food"],"links":["wiki/themes/spend"],"created":"2026-05-10T09:00:00Z","updated":"2026-05-10T10:00:00Z","#
1471 ),
1472 "jsonl key order not stable:\n{}",
1473 lines[1]
1474 );
1475 assert!(
1477 lines[1].ends_with(r#""amount":42,"status":"paid"}"#),
1478 "extras must be sorted:\n{}",
1479 lines[1]
1480 );
1481 }
1482
1483 #[test]
1486 fn over_cap_md_shows_500_plus_footer_jsonl_holds_all() {
1487 let (_d, store) = mk_store();
1488 let total = MD_CAP + 7;
1489 for i in 0..total {
1490 let day = 1 + (i % 27);
1492 let rel = format!("sources/emails/2026/05/m-{i:04}.md");
1493 let updated = format!("2026-05-{day:02}T00:00:{:02}Z", i % 60);
1494 write_doc(
1495 &store,
1496 &rel,
1497 "email",
1498 Some(&format!("mail {i}")),
1499 Some(&updated),
1500 "",
1501 );
1502 }
1503 let idx = Index::build_type_folder(&store, Path::new("sources/emails")).unwrap();
1504 assert_eq!(idx.records.len(), total, "jsonl/records keep every file");
1505
1506 let md = idx.to_markdown();
1507 let entry_lines = md.lines().filter(|l| l.starts_with("- [[")).count();
1508 assert_eq!(entry_lines, MD_CAP, "md browse view is capped at 500");
1509
1510 assert!(
1511 md.contains("## More\n\n"),
1512 "over-cap md needs a More footer"
1513 );
1514 assert!(
1515 md.contains(&format!(
1516 "This folder has {total} files. The 500 most recent are listed above.\n"
1517 )),
1518 "footer count wrong:\n{md}"
1519 );
1520 assert!(
1521 md.contains(
1522 "Use `dbmd index query --type email --in sources` for the complete catalog.\n"
1523 ),
1524 "footer must infer type=email layer=sources:\n{md}"
1525 );
1526
1527 let jsonl = idx.to_jsonl();
1528 assert_eq!(jsonl.lines().count(), total, "jsonl is uncapped");
1529 }
1530
1531 #[test]
1534 fn sort_breaks_ties_by_path_and_puts_undated_last() {
1535 let mut recs = vec![
1536 rec("z/a.md", Some("2026-05-01T00:00:00Z")),
1537 rec("a/b.md", Some("2026-05-01T00:00:00Z")), rec("m/c.md", None), rec("b/d.md", Some("2026-06-01T00:00:00Z")), ];
1541 sort_records(&mut recs);
1542 let order: Vec<String> = recs.iter().map(|r| path_to_unix(&r.path)).collect();
1543 assert_eq!(order, vec!["b/d.md", "a/b.md", "z/a.md", "m/c.md"]);
1544 }
1545
1546 fn rec(path: &str, updated: Option<&str>) -> IndexRecord {
1547 IndexRecord {
1548 path: PathBuf::from(path),
1549 type_: "t".into(),
1550 summary: "s".into(),
1551 tags: vec![],
1552 links: vec![],
1553 created: None,
1554 updated: updated.map(|u| DateTime::parse_from_rfc3339(u).unwrap()),
1555 fields: BTreeMap::new(),
1556 }
1557 }
1558
1559 #[test]
1562 fn layer_index_lists_type_folders_with_counts_and_preview() {
1563 let (_d, store) = mk_store();
1564 write_doc(
1565 &store,
1566 "records/contacts/a.md",
1567 "contact",
1568 Some("Contact A older"),
1569 Some("2026-05-01T00:00:00Z"),
1570 "",
1571 );
1572 write_doc(
1573 &store,
1574 "records/contacts/b.md",
1575 "contact",
1576 Some("Contact B newest"),
1577 Some("2026-05-09T00:00:00Z"),
1578 "",
1579 );
1580 write_doc(
1581 &store,
1582 "records/companies/x.md",
1583 "company",
1584 Some("Acme Inc"),
1585 Some("2026-05-05T00:00:00Z"),
1586 "",
1587 );
1588 Index::write_level(&store, &IndexLevel::TypeFolder("records/contacts".into())).unwrap();
1590 Index::write_level(&store, &IndexLevel::TypeFolder("records/companies".into())).unwrap();
1591
1592 Index::write_level(&store, &IndexLevel::Layer(Layer::Records)).unwrap();
1593 let md = read(&store, "records/index.md");
1594
1595 assert!(
1596 md.starts_with("---\ntype: index\nscope: layer\nfolder: records\n"),
1597 "layer fm:\n{md}"
1598 );
1599 let companies_at = md.find("companies/index").unwrap();
1601 let contacts_at = md.find("contacts/index").unwrap();
1602 assert!(
1603 companies_at < contacts_at,
1604 "type folders must be alphabetical"
1605 );
1606 assert!(
1608 md.contains("- [[records/contacts/index|Contacts]] (2) — Contact B newest\n"),
1609 "contacts entry:\n{md}"
1610 );
1611 assert!(
1612 md.contains("- [[records/companies/index|Companies]] (1) — Acme Inc\n"),
1613 "companies entry:\n{md}"
1614 );
1615 assert!(
1617 md.contains("updated: 2026-05-09T00:00:00Z\n"),
1618 "layer updated must be max child:\n{md}"
1619 );
1620 }
1621
1622 #[test]
1623 fn root_index_groups_layers_with_totals_and_per_type_counts() {
1624 let (_d, store) = mk_store();
1625 write_doc(
1626 &store,
1627 "sources/emails/2026/05/a.md",
1628 "email",
1629 Some("Mail"),
1630 Some("2026-05-01T00:00:00Z"),
1631 "",
1632 );
1633 write_doc(
1634 &store,
1635 "sources/docs/d.md",
1636 "doc",
1637 Some("Doc"),
1638 Some("2026-05-02T00:00:00Z"),
1639 "",
1640 );
1641 write_doc(
1642 &store,
1643 "records/contacts/c.md",
1644 "contact",
1645 Some("C"),
1646 Some("2026-05-03T00:00:00Z"),
1647 "",
1648 );
1649 Index::rebuild_all(&store).unwrap();
1652 let md = read(&store, "index.md");
1653
1654 assert!(
1655 md.starts_with("---\ntype: index\nscope: root\n"),
1656 "root fm:\n{md}"
1657 );
1658 assert!(md.contains("# Knowledge base index\n"), "root title:\n{md}");
1659 let sources_h = md
1661 .find("## Sources (2)")
1662 .expect("sources heading w/ total 2");
1663 let records_h = md
1664 .find("## Records (1)")
1665 .expect("records heading w/ total 1");
1666 assert!(sources_h < records_h, "Sources must precede Records");
1667 assert!(!md.contains("## Wiki"), "empty layer gets no section");
1668 assert!(
1670 md.contains("- [[sources/docs/index|Docs]] (1)\n"),
1671 "root docs entry:\n{md}"
1672 );
1673 assert!(
1674 md.contains("- [[sources/emails/index|Emails]] (1)\n"),
1675 "root emails entry:\n{md}"
1676 );
1677 assert!(
1678 md.contains("- [[records/contacts/index|Contacts]] (1)\n"),
1679 "root contacts entry:\n{md}"
1680 );
1681 assert!(!md.contains("— "), "root entries carry no preview text");
1682 }
1683
1684 #[test]
1687 fn on_write_matches_rebuild_byte_for_byte() {
1688 let (_d1, wt) = mk_store();
1691 let (_d2, rb) = mk_store();
1692
1693 let docs: &[(&str, &str, &str, &str, &str)] = &[
1694 (
1695 "sources/emails/2026/05/e1.md",
1696 "email",
1697 "First mail",
1698 "2026-05-01T10:00:00Z",
1699 "tags:\n - inbox\n",
1700 ),
1701 (
1702 "sources/emails/2026/06/e2.md",
1703 "email",
1704 "Second mail",
1705 "2026-06-01T10:00:00Z",
1706 "",
1707 ),
1708 (
1709 "records/contacts/sarah.md",
1710 "contact",
1711 "Sarah",
1712 "2026-05-15T10:00:00Z",
1713 "links:\n - wiki/people/sarah\n",
1714 ),
1715 (
1716 "records/contacts/elena.md",
1717 "contact",
1718 "Elena",
1719 "2026-05-20T10:00:00Z",
1720 "status: active\n",
1721 ),
1722 (
1723 "wiki/people/sarah.md",
1724 "wiki-page",
1725 "Sarah bio",
1726 "2026-05-21T10:00:00Z",
1727 "",
1728 ),
1729 ];
1730
1731 for (rel, t, sum, upd, extra) in docs {
1732 write_doc(&wt, rel, t, Some(sum), Some(upd), extra);
1733 write_doc(&rb, rel, t, Some(sum), Some(upd), extra);
1734 Index::on_write(&wt, Path::new(rel)).unwrap();
1735 }
1736 Index::rebuild_all(&rb).unwrap();
1737
1738 let a = snapshot_artifacts(&wt);
1739 let b = snapshot_artifacts(&rb);
1740 assert_eq!(
1741 a.keys().collect::<Vec<_>>(),
1742 b.keys().collect::<Vec<_>>(),
1743 "same set of index artifacts must exist"
1744 );
1745 for (k, v) in &a {
1746 assert_eq!(v, &b[k], "artifact {k} differs between write-through and rebuild:\n--- write-through ---\n{v}\n--- rebuild ---\n{}", b[k]);
1747 }
1748 assert!(a.contains_key("index.md"));
1750 assert!(a.contains_key("sources/emails/index.jsonl"));
1751 assert!(a.contains_key("records/contacts/index.md"));
1752 }
1753
1754 #[test]
1771 fn loop_op_does_not_walk_sibling_content_tree() {
1772 let (_d, store) = mk_store();
1773
1774 write_doc(
1778 &store,
1779 "records/companies/acme.md",
1780 "company",
1781 Some("Acme Inc"),
1782 Some("2026-05-05T00:00:00Z"),
1783 "",
1784 );
1785 write_doc(
1786 &store,
1787 "records/companies/globex.md",
1788 "company",
1789 Some("Globex"),
1790 Some("2026-05-06T00:00:00Z"),
1791 "",
1792 );
1793 assert!(
1794 !exists(&store, "records/companies/index.jsonl"),
1795 "precondition: companies must be un-indexed"
1796 );
1797
1798 write_doc(
1800 &store,
1801 "records/contacts/sarah.md",
1802 "contact",
1803 Some("Sarah"),
1804 Some("2026-05-15T00:00:00Z"),
1805 "",
1806 );
1807 Index::on_write(&store, Path::new("records/contacts/sarah.md")).unwrap();
1808
1809 let layer_md = read(&store, "records/index.md");
1811 let root_md = read(&store, "index.md");
1812 assert!(
1814 layer_md.contains("- [[records/contacts/index|Contacts]] (1) — Sarah\n"),
1815 "layer must reflect the written folder:\n{layer_md}"
1816 );
1817 assert!(
1818 root_md.contains("- [[records/contacts/index|Contacts]] (1)\n"),
1819 "root must reflect the written folder:\n{root_md}"
1820 );
1821
1822 assert!(
1826 !layer_md.contains("companies"),
1827 "loop op walked the sibling content tree: layer rollup counts un-indexed records/companies\n{layer_md}"
1828 );
1829 assert!(
1830 !root_md.contains("companies"),
1831 "loop op walked the sibling content tree: root rollup counts un-indexed records/companies\n{root_md}"
1832 );
1833 assert!(
1835 root_md.contains("## Records (1)"),
1836 "root layer total must count only the sidecar-indexed folder (1), not walked siblings (would be 3):\n{root_md}"
1837 );
1838
1839 let (_d2, rb) = mk_store();
1844 for (rel, t, s, u) in [
1845 (
1846 "records/companies/acme.md",
1847 "company",
1848 "Acme Inc",
1849 "2026-05-05T00:00:00Z",
1850 ),
1851 (
1852 "records/companies/globex.md",
1853 "company",
1854 "Globex",
1855 "2026-05-06T00:00:00Z",
1856 ),
1857 (
1858 "records/contacts/sarah.md",
1859 "contact",
1860 "Sarah",
1861 "2026-05-15T00:00:00Z",
1862 ),
1863 ] {
1864 write_doc(&rb, rel, t, Some(s), Some(u), "");
1865 }
1866 Index::on_write(&store, Path::new("records/companies/acme.md")).unwrap();
1867 Index::on_write(&store, Path::new("records/companies/globex.md")).unwrap();
1868 Index::rebuild_all(&rb).unwrap();
1869 let a = snapshot_artifacts(&store);
1870 let b = snapshot_artifacts(&rb);
1871 assert_eq!(
1872 a.keys().collect::<BTreeSet<_>>(),
1873 b.keys().collect::<BTreeSet<_>>(),
1874 "same artifact set after indexing both folders"
1875 );
1876 for (k, v) in &a {
1877 assert_eq!(
1878 v, &b[k],
1879 "after indexing the sibling too, loop result must equal rebuild for {k}"
1880 );
1881 }
1882 assert!(
1883 read(&store, "index.md").contains("## Records (3)"),
1884 "now that both folders are indexed, the root total is 3"
1885 );
1886 }
1887
1888 #[test]
1899 fn wiki_page_at_shard_path_for_is_indexable_end_to_end() {
1900 let (_d1, wt) = mk_store();
1901 let (_d2, rb) = mk_store();
1902
1903 let rel = wt
1905 .shard_path_for(
1906 "wiki-page",
1907 &crate::parser::Frontmatter::default(),
1908 "renewal-theme",
1909 )
1910 .unwrap();
1911 let rel_str = path_to_unix(&rel);
1912 assert!(
1915 type_folder_of(&rel).is_some(),
1916 "shard_path_for produced a path the index cannot file: {rel_str}"
1917 );
1918
1919 write_doc(
1920 &wt,
1921 &rel_str,
1922 "wiki-page",
1923 Some("Renewal theme"),
1924 Some("2026-05-21T10:00:00Z"),
1925 "",
1926 );
1927 write_doc(
1928 &rb,
1929 &rel_str,
1930 "wiki-page",
1931 Some("Renewal theme"),
1932 Some("2026-05-21T10:00:00Z"),
1933 "",
1934 );
1935
1936 Index::on_write(&wt, &rel)
1939 .expect("on_write must succeed for a toolkit-computed wiki-page path");
1940 Index::rebuild_all(&rb).unwrap();
1941
1942 let page_link = wiki_target(&rel); let tf_md = read(&rb, "wiki/topics/index.md");
1948 assert!(
1949 tf_md.contains(&format!("[[{page_link}]]")),
1950 "type-folder index must list the page link, got:\n{tf_md}"
1951 );
1952 assert!(
1953 exists(&rb, "wiki/topics/index.jsonl"),
1954 "type-folder jsonl must exist"
1955 );
1956 assert!(
1957 read(&rb, "wiki/topics/index.jsonl").contains(&rel_str),
1958 "type-folder jsonl must contain the page row"
1959 );
1960 let layer_md = read(&rb, "wiki/index.md");
1963 assert!(
1964 layer_md.contains("wiki/topics/index"),
1965 "layer index must roll up the wiki/topics type-folder, got:\n{layer_md}"
1966 );
1967
1968 let a = snapshot_artifacts(&wt);
1970 let b = snapshot_artifacts(&rb);
1971 assert_eq!(
1972 a.keys().collect::<Vec<_>>(),
1973 b.keys().collect::<Vec<_>>(),
1974 "loop and sweep must produce the same artifact set"
1975 );
1976 for (k, v) in &a {
1977 assert_eq!(
1978 v, &b[k],
1979 "wiki-page artifact {k} differs between on_write and rebuild"
1980 );
1981 }
1982 }
1983
1984 #[test]
1985 fn on_remove_then_rebuild_match_and_pull_in_next_over_cap() {
1986 let (_d1, wt) = mk_store();
1987 let (_d2, rb) = mk_store();
1988 let total = MD_CAP + 3; let mut all_rels = Vec::new();
1990 for i in 0..total {
1991 let rel = format!("sources/emails/2026/05/m-{i:04}.md");
1992 let updated = format!("2026-05-10T00:{:02}:{:02}Z", i / 60, i % 60);
1994 write_doc(
1995 &wt,
1996 &rel,
1997 "email",
1998 Some(&format!("mail {i}")),
1999 Some(&updated),
2000 "",
2001 );
2002 write_doc(
2003 &rb,
2004 &rel,
2005 "email",
2006 Some(&format!("mail {i}")),
2007 Some(&updated),
2008 "",
2009 );
2010 all_rels.push(rel);
2011 }
2012 Index::rebuild_all(&wt).unwrap();
2014 let newest = &all_rels[total - 1]; fs::remove_file(wt.root.join(newest)).unwrap();
2016 Index::on_remove(&wt, Path::new(newest)).unwrap();
2017
2018 fs::remove_file(rb.root.join(newest)).unwrap();
2020 Index::rebuild_all(&rb).unwrap();
2021
2022 let a = snapshot_artifacts(&wt);
2023 let b = snapshot_artifacts(&rb);
2024 for (k, v) in &a {
2025 assert_eq!(v, &b[k], "after remove, artifact {k} drifted from rebuild");
2026 }
2027
2028 let md = read(&wt, "sources/emails/index.md");
2031 assert_eq!(md.lines().filter(|l| l.starts_with("- [[")).count(), MD_CAP);
2032 assert!(
2034 !md.contains(&format!("[[{}]]", wiki_target(Path::new(newest)))),
2035 "removed file must not be listed in md"
2036 );
2037 let pulled_in = &all_rels[2];
2041 assert!(
2042 md.contains(&format!("[[{}]]", wiki_target(Path::new(pulled_in)))),
2043 "the 501st-most-recent must be pulled into the browse view after a removal"
2044 );
2045 assert!(
2046 md.contains(&format!("This folder has {} files.", total - 1)),
2047 "footer count must decrement:\n{}",
2048 md.lines().rev().take(4).collect::<Vec<_>>().join("\n")
2049 );
2050 let jsonl = read(&wt, "sources/emails/index.jsonl");
2051 assert_eq!(
2052 jsonl.lines().count(),
2053 total - 1,
2054 "jsonl loses exactly the removed file"
2055 );
2056 assert!(
2057 !jsonl.contains(&path_to_unix(Path::new(newest))),
2058 "removed file must be gone from the jsonl too"
2059 );
2060 }
2061
2062 #[test]
2063 fn on_rename_cross_folder_matches_rebuild() {
2064 let (_d1, wt) = mk_store();
2065 let (_d2, rb) = mk_store();
2066 let seed: &[(&str, &str, &str, &str)] = &[
2068 (
2069 "records/contacts/a.md",
2070 "contact",
2071 "A",
2072 "2026-05-01T00:00:00Z",
2073 ),
2074 (
2075 "records/contacts/b.md",
2076 "contact",
2077 "B",
2078 "2026-05-02T00:00:00Z",
2079 ),
2080 (
2081 "records/companies/x.md",
2082 "company",
2083 "X",
2084 "2026-05-03T00:00:00Z",
2085 ),
2086 ];
2087 for (rel, t, s, u) in seed {
2088 write_doc(&wt, rel, t, Some(s), Some(u), "");
2089 write_doc(&rb, rel, t, Some(s), Some(u), "");
2090 }
2091 Index::rebuild_all(&wt).unwrap();
2092
2093 let old = "records/contacts/b.md";
2096 let new = "records/companies/b.md";
2097 fs::create_dir_all(wt.root.join("records/companies")).unwrap();
2098 fs::rename(wt.root.join(old), wt.root.join(new)).unwrap();
2099 Index::on_rename(&wt, Path::new(old), Path::new(new)).unwrap();
2102
2103 fs::create_dir_all(rb.root.join("records/companies")).unwrap();
2105 fs::rename(rb.root.join(old), rb.root.join(new)).unwrap();
2106 Index::rebuild_all(&rb).unwrap();
2107
2108 let a = snapshot_artifacts(&wt);
2109 let b = snapshot_artifacts(&rb);
2110 assert_eq!(a.keys().collect::<Vec<_>>(), b.keys().collect::<Vec<_>>());
2111 for (k, v) in &a {
2112 assert_eq!(v, &b[k], "rename: artifact {k} drifted from rebuild");
2113 }
2114 let contacts = read(&wt, "records/contacts/index.md");
2116 assert!(!contacts.contains("records/contacts/b]]"));
2117 let companies = read(&wt, "records/companies/index.md");
2118 assert!(companies.contains("[[records/companies/b]]"));
2119 }
2120
2121 #[test]
2122 fn on_write_updates_existing_entry_in_place() {
2123 let (_d, store) = mk_store();
2124 write_doc(
2125 &store,
2126 "records/contacts/a.md",
2127 "contact",
2128 Some("Original"),
2129 Some("2026-05-01T00:00:00Z"),
2130 "",
2131 );
2132 Index::on_write(&store, Path::new("records/contacts/a.md")).unwrap();
2133 write_doc(
2135 &store,
2136 "records/contacts/a.md",
2137 "contact",
2138 Some("Revised"),
2139 Some("2026-05-09T00:00:00Z"),
2140 "",
2141 );
2142 Index::on_write(&store, Path::new("records/contacts/a.md")).unwrap();
2143
2144 let jsonl = read(&store, "records/contacts/index.jsonl");
2145 assert_eq!(
2146 jsonl.lines().count(),
2147 1,
2148 "upsert must not duplicate the line"
2149 );
2150 assert!(jsonl.contains("Revised"), "jsonl must reflect the update");
2151 assert!(
2152 !jsonl.contains("Original"),
2153 "stale line must be gone (compacted)"
2154 );
2155 let md = read(&store, "records/contacts/index.md");
2156 assert!(md.contains("- [[records/contacts/a]] — Revised\n"));
2157 assert!(
2158 md.contains("updated: 2026-05-09T00:00:00Z\n"),
2159 "index updated must track the newer member"
2160 );
2161 }
2162
2163 #[test]
2166 fn dry_run_emits_separators_and_writes_nothing() {
2167 let (_d, store) = mk_store();
2168 write_doc(
2169 &store,
2170 "sources/emails/2026/05/a.md",
2171 "email",
2172 Some("Mail"),
2173 Some("2026-05-01T00:00:00Z"),
2174 "",
2175 );
2176 let out = Index::render_dry_run(&store, &IndexLevel::TypeFolder("sources/emails".into()))
2177 .unwrap();
2178 assert!(
2179 out.contains("--- sources/emails/index.md ---\n"),
2180 "md separator:\n{out}"
2181 );
2182 assert!(
2183 out.contains("--- sources/emails/index.jsonl ---\n"),
2184 "jsonl separator:\n{out}"
2185 );
2186 assert!(
2187 out.contains("- [[sources/emails/2026/05/a]] — Mail"),
2188 "md body present"
2189 );
2190 assert!(
2192 !exists(&store, "sources/emails/index.md"),
2193 "dry-run must not write"
2194 );
2195 assert!(
2196 !exists(&store, "sources/emails/index.jsonl"),
2197 "dry-run must not write"
2198 );
2199 }
2200
2201 #[test]
2202 fn cleanup_removes_noncanonical_and_empty_indexes() {
2203 let (_d, store) = mk_store();
2204 write_doc(
2205 &store,
2206 "sources/emails/2026/05/a.md",
2207 "email",
2208 Some("Mail"),
2209 Some("2026-05-01T00:00:00Z"),
2210 "",
2211 );
2212 fs::write(
2214 store.root.join("sources/emails/2026/05/index.md"),
2215 "stale\n",
2216 )
2217 .unwrap();
2218 fs::write(
2219 store.root.join("sources/emails/2026/05/index.jsonl"),
2220 "stale\n",
2221 )
2222 .unwrap();
2223 fs::create_dir_all(store.root.join("records/empty")).unwrap();
2225 fs::write(store.root.join("records/empty/index.md"), "stale\n").unwrap();
2226
2227 Index::cleanup(&store).unwrap();
2228
2229 assert!(
2230 !exists(&store, "sources/emails/2026/05/index.md"),
2231 "shard index must be deleted"
2232 );
2233 assert!(
2234 !exists(&store, "sources/emails/2026/05/index.jsonl"),
2235 "shard jsonl must be deleted"
2236 );
2237 assert!(
2238 !exists(&store, "records/empty/index.md"),
2239 "empty-folder index must be deleted"
2240 );
2241 assert!(exists(&store, "sources/emails/2026/05/a.md"));
2243 }
2244
2245 #[test]
2246 fn rebuild_deletes_stale_indexes_for_emptied_folders() {
2247 let (_d, store) = mk_store();
2248 write_doc(
2249 &store,
2250 "records/contacts/a.md",
2251 "contact",
2252 Some("A"),
2253 Some("2026-05-01T00:00:00Z"),
2254 "",
2255 );
2256 Index::rebuild_all(&store).unwrap();
2257 assert!(exists(&store, "records/contacts/index.md"));
2258 assert!(exists(&store, "records/index.md"));
2259 assert!(exists(&store, "index.md"));
2260
2261 fs::remove_file(store.root.join("records/contacts/a.md")).unwrap();
2263 Index::rebuild_all(&store).unwrap();
2264 assert!(
2265 !exists(&store, "records/contacts/index.md"),
2266 "emptied type-folder index gone"
2267 );
2268 assert!(
2269 !exists(&store, "records/index.md"),
2270 "now-empty layer index gone"
2271 );
2272 assert!(!exists(&store, "index.md"), "now-empty root index gone");
2273 }
2274
2275 #[test]
2278 fn property_writethrough_equals_rebuild_under_mixed_ops() {
2279 let (_d1, wt) = mk_store();
2281 let (_d2, rb) = mk_store();
2282 let mut seed: u64 = 0x9E3779B97F4A7C15;
2283 let mut next = || {
2284 seed = seed
2285 .wrapping_mul(6364136223846793005)
2286 .wrapping_add(1442695040888963407);
2287 (seed >> 33) as u32
2288 };
2289
2290 let folders = ["sources/emails", "records/contacts", "wiki/people"];
2291 let types = ["email", "contact", "wiki-page"];
2292 let mut live: Vec<String> = Vec::new(); for step in 0..120u32 {
2295 let r = next();
2296 let op = r % 10;
2297 if op < 6 || live.is_empty() {
2298 let fi = (next() as usize) % folders.len();
2300 let folder = folders[fi];
2301 let id = next() % 40;
2302 let rel = if folder == "sources/emails" {
2303 let month = 5 + (id % 2); format!("{folder}/2026/{month:02}/f-{id:02}.md")
2305 } else {
2306 format!("{folder}/f-{id:02}.md")
2307 };
2308 let updated = format!(
2310 "2026-05-{:02}T{:02}:{:02}:00Z",
2311 1 + (step % 27),
2312 step % 24,
2313 id % 60
2314 );
2315 let extra = if id % 3 == 0 {
2316 "tags:\n - x\n - y\n"
2317 } else {
2318 ""
2319 };
2320 write_doc(
2321 &wt,
2322 &rel,
2323 types[fi],
2324 Some(&format!("sum {step}")),
2325 Some(&updated),
2326 extra,
2327 );
2328 write_doc(
2329 &rb,
2330 &rel,
2331 types[fi],
2332 Some(&format!("sum {step}")),
2333 Some(&updated),
2334 extra,
2335 );
2336 Index::on_write(&wt, Path::new(&rel)).unwrap();
2337 if !live.contains(&rel) {
2338 live.push(rel);
2339 }
2340 } else if op < 8 {
2341 let idx = (next() as usize) % live.len();
2343 let rel = live.remove(idx);
2344 fs::remove_file(wt.root.join(&rel)).unwrap();
2345 fs::remove_file(rb.root.join(&rel)).ok();
2346 Index::on_remove(&wt, Path::new(&rel)).unwrap();
2347 } else {
2348 let idx = (next() as usize) % live.len();
2350 let old = live[idx].clone();
2351 let fi = (next() as usize) % folders.len();
2353 let folder = folders[fi];
2354 let id = 50 + (next() % 40);
2355 let new = if folder == "sources/emails" {
2356 format!("{folder}/2026/05/f-{id:02}.md")
2357 } else {
2358 format!("{folder}/f-{id:02}.md")
2359 };
2360 if new == old || live.contains(&new) {
2361 continue;
2362 }
2363 fs::create_dir_all(wt.root.join(&new).parent().unwrap()).unwrap();
2364 fs::create_dir_all(rb.root.join(&new).parent().unwrap()).unwrap();
2365 fs::rename(wt.root.join(&old), wt.root.join(&new)).unwrap();
2366 fs::rename(rb.root.join(&old), rb.root.join(&new)).unwrap();
2367 Index::on_rename(&wt, Path::new(&old), Path::new(&new)).unwrap();
2368 live[idx] = new;
2369 }
2370 }
2371
2372 Index::rebuild_all(&rb).unwrap();
2374 let a = snapshot_artifacts(&wt);
2375 let b = snapshot_artifacts(&rb);
2376 assert_eq!(
2377 a.keys().collect::<BTreeSet<_>>(),
2378 b.keys().collect::<BTreeSet<_>>(),
2379 "write-through and rebuild must produce the same set of artifacts"
2380 );
2381 for (k, v) in &a {
2382 assert_eq!(
2383 v, &b[k],
2384 "INVARIANT VIOLATED: artifact {k} differs after mixed ops\n--- write-through ---\n{v}\n--- rebuild ---\n{}",
2385 b[k]
2386 );
2387 }
2388 assert!(
2389 !a.is_empty(),
2390 "the run must have produced at least one artifact"
2391 );
2392 }
2393}