1use std::collections::BTreeMap;
53use std::fs;
54use std::io::Write as _;
55use std::path::{Path, PathBuf};
56
57use chrono::{DateTime, FixedOffset, SecondsFormat};
58use serde::{Deserialize, Serialize};
59use serde_json::Value;
60
61use crate::parser::FolderMeta;
62use crate::store::{Layer, Store};
63
64const MD_CAP: usize = 500;
66
67const MISSING_SUMMARY: &str = "(no summary)";
71
72const ROOT_TITLE: &str = "Knowledge base index";
74
75#[derive(Debug, Clone, PartialEq, Eq)]
77pub enum IndexLevel {
78 Root,
80 Layer(Layer),
82 TypeFolder(PathBuf),
84}
85
86#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
94pub struct IndexRecord {
95 #[serde(with = "path_serde")]
99 pub path: PathBuf,
100 #[serde(rename = "type")]
102 pub type_: String,
103 pub summary: String,
105 #[serde(default)]
107 pub tags: Vec<String>,
108 #[serde(default)]
110 pub links: Vec<String>,
111 pub created: Option<DateTime<FixedOffset>>,
113 pub updated: Option<DateTime<FixedOffset>>,
115 #[serde(flatten)]
117 pub fields: BTreeMap<String, Value>,
118}
119
120#[derive(Debug, Clone, PartialEq)]
123pub struct Index {
124 pub level: IndexLevel,
126 pub records: Vec<IndexRecord>,
129 pub child_counts: BTreeMap<PathBuf, usize>,
131}
132
133impl Index {
134 pub fn build_type_folder(store: &Store, type_folder: &Path) -> crate::Result<Index> {
140 let rel = normalize_rel(type_folder);
141 let abs = store.root.join(&rel);
142 let mut records = Vec::new();
143 for file_abs in walk_type_folder_files(&abs) {
144 let rel_path =
145 rel_to_store(&store.root, &file_abs).expect("walked file is under the store root");
146 records.push(record_from_file(&file_abs, rel_path)?);
158 }
159 sort_records(&mut records);
160 Ok(Index {
161 level: IndexLevel::TypeFolder(rel),
162 records,
163 child_counts: BTreeMap::new(),
164 })
165 }
166
167 pub fn build_layer(store: &Store, layer: Layer) -> crate::Result<Index> {
170 let mut child_counts = BTreeMap::new();
171 for tf in type_folders_in_layer(store, layer) {
172 let abs = store.root.join(&tf);
173 let n = walk_type_folder_files(&abs).len();
174 if n > 0 {
175 child_counts.insert(tf, n);
176 }
177 }
178 Ok(Index {
179 level: IndexLevel::Layer(layer),
180 records: Vec::new(),
181 child_counts,
182 })
183 }
184
185 pub fn build_root(store: &Store) -> crate::Result<Index> {
188 let mut child_counts = BTreeMap::new();
189 for layer in Layer::all() {
190 for tf in type_folders_in_layer(store, layer) {
191 let abs = store.root.join(&tf);
192 let n = walk_type_folder_files(&abs).len();
193 if n > 0 {
194 child_counts.insert(tf, n);
195 }
196 }
197 }
198 Ok(Index {
199 level: IndexLevel::Root,
200 records: Vec::new(),
201 child_counts,
202 })
203 }
204
205 pub fn to_markdown(&self) -> String {
207 match &self.level {
208 IndexLevel::TypeFolder(folder) => self.render_type_folder_md(folder),
209 IndexLevel::Layer(layer) => self.render_layer_md(*layer),
210 IndexLevel::Root => self.render_root_md(),
211 }
212 }
213
214 pub fn to_jsonl(&self) -> String {
218 let mut out = String::new();
219 for rec in &self.records {
220 let line = serde_json::to_string(rec).expect("IndexRecord serializes");
223 out.push_str(&line);
224 out.push('\n');
225 }
226 out
227 }
228
229 fn render_type_folder_md(&self, folder: &Path) -> String {
232 let folder_disp = path_to_unix(folder);
233 let updated = max_updated(self.records.iter().map(|r| r.updated.as_ref()));
234 let mut s = String::new();
235 s.push_str("---\n");
236 s.push_str("type: index\n");
237 s.push_str("scope: type-folder\n");
238 s.push_str(&format!("folder: {folder_disp}\n"));
239 if let Some(ts) = updated {
240 s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
241 }
242 s.push_str("---\n\n");
243 s.push_str(&format!("# {folder_disp}\n\n"));
244
245 let shown = self.records.len().min(MD_CAP);
246 for rec in self.records.iter().take(shown) {
247 s.push_str(&format_md_entry(rec));
248 s.push('\n');
249 }
250
251 if self.records.len() > MD_CAP {
252 let type_ = self.records.first().map(|r| r.type_.as_str()).unwrap_or("");
253 let layer = folder
254 .components()
255 .next()
256 .and_then(|c| c.as_os_str().to_str())
257 .unwrap_or("");
258 s.push('\n');
259 s.push_str(&more_footer(self.records.len(), type_, layer));
260 }
261 s
262 }
263
264 fn render_layer_md(&self, layer: Layer) -> String {
269 let layer_dir = layer_dir_name(layer);
270 let mut s = String::new();
271 s.push_str("---\n");
272 s.push_str("type: index\n");
273 s.push_str("scope: layer\n");
274 s.push_str(&format!("folder: {layer_dir}\n"));
275 s.push_str("---\n\n");
276 s.push_str(&format!("# {layer_dir}\n\n"));
277 for (tf, n) in &self.child_counts {
278 let tf_unix = path_to_unix(tf);
279 let display = capitalize(folder_basename(tf));
280 s.push_str(&format!("- [[{tf_unix}/index|{display}]] ({n})\n"));
281 }
282 s
283 }
284
285 fn render_root_md(&self) -> String {
288 let mut s = String::new();
289 s.push_str("---\n");
290 s.push_str("type: index\n");
291 s.push_str("scope: root\n");
292 s.push_str("---\n\n");
293 s.push_str(&format!("# {ROOT_TITLE}\n"));
294 for layer in Layer::all() {
295 let layer_dir = layer_dir_name(layer);
296 let prefix = format!("{layer_dir}/");
297 let children: Vec<(&PathBuf, &usize)> = self
298 .child_counts
299 .iter()
300 .filter(|(tf, _)| path_to_unix(tf).starts_with(&prefix))
301 .collect();
302 if children.is_empty() {
303 continue;
304 }
305 let total: usize = children.iter().map(|(_, n)| **n).sum();
306 s.push('\n');
307 s.push_str(&format!("## {} ({total})\n", capitalize(layer_dir)));
308 for (tf, n) in children {
309 let tf_unix = path_to_unix(tf);
310 let display = capitalize(folder_basename(tf));
311 s.push_str(&format!("- [[{tf_unix}/index|{display}]] ({n})\n"));
312 }
313 }
314 s
315 }
316}
317
318impl Index {
323 pub fn on_write(store: &Store, file: &Path) -> crate::Result<()> {
330 let file_rel = normalize_rel(file);
331 if is_index_artifact(&file_rel) {
338 return Ok(());
339 }
340 let file_abs = store.root.join(&file_rel);
341 let folder = type_folder_of(&file_rel)
342 .ok_or_else(|| bad_index(&file_rel, "file is not inside a layer/type-folder"))?;
343 let record = record_from_file(&file_abs, file_rel.clone())?;
344
345 let _lock = FolderLock::acquire(&store.root.join(&folder));
348 let mut records = read_jsonl_records(&store.root.join(&folder).join("index.jsonl"))?;
349 records.retain(|r| r.path != record.path);
350 records.push(record);
351 sort_records(&mut records);
352
353 write_type_folder_artifacts(store, &folder, &records)?;
354 update_parents(store, &folder)?;
355 Ok(())
356 }
357
358 pub fn on_rename(store: &Store, old: &Path, new: &Path) -> crate::Result<()> {
362 let old_rel = normalize_rel(old);
363 let new_rel = normalize_rel(new);
364 if is_index_artifact(&old_rel) || is_index_artifact(&new_rel) {
368 return Ok(());
369 }
370 let old_folder = type_folder_of(&old_rel)
371 .ok_or_else(|| bad_index(&old_rel, "source is not inside a layer/type-folder"))?;
372 let new_folder = type_folder_of(&new_rel)
373 .ok_or_else(|| bad_index(&new_rel, "target is not inside a layer/type-folder"))?;
374
375 let _locks = lock_folders(store, &old_folder, &new_folder);
379
380 let mut old_records =
382 read_jsonl_records(&store.root.join(&old_folder).join("index.jsonl"))?;
383 old_records.retain(|r| r.path != old_rel);
384
385 if old_folder == new_folder {
386 let record = record_from_file(&store.root.join(&new_rel), new_rel.clone())?;
388 old_records.retain(|r| r.path != record.path);
389 old_records.push(record);
390 sort_records(&mut old_records);
391 write_type_folder_artifacts(store, &old_folder, &old_records)?;
392 update_parents(store, &old_folder)?;
393 return Ok(());
394 }
395
396 sort_records(&mut old_records);
399 write_type_folder_artifacts(store, &old_folder, &old_records)?;
400
401 let record = record_from_file(&store.root.join(&new_rel), new_rel.clone())?;
402 let mut new_records =
403 read_jsonl_records(&store.root.join(&new_folder).join("index.jsonl"))?;
404 new_records.retain(|r| r.path != record.path);
405 new_records.push(record);
406 sort_records(&mut new_records);
407 write_type_folder_artifacts(store, &new_folder, &new_records)?;
408
409 update_parents(store, &old_folder)?;
410 update_parents(store, &new_folder)?;
411 Ok(())
412 }
413
414 pub fn on_remove(store: &Store, file: &Path) -> crate::Result<()> {
419 let file_rel = normalize_rel(file);
420 if is_index_artifact(&file_rel) {
423 return Ok(());
424 }
425 let folder = type_folder_of(&file_rel)
426 .ok_or_else(|| bad_index(&file_rel, "file is not inside a layer/type-folder"))?;
427 let _lock = FolderLock::acquire(&store.root.join(&folder));
429 let mut records = read_jsonl_records(&store.root.join(&folder).join("index.jsonl"))?;
430 let before = records.len();
431 records.retain(|r| r.path != file_rel);
432 if records.len() == before {
433 }
436 sort_records(&mut records);
437 write_type_folder_artifacts(store, &folder, &records)?;
438 update_parents(store, &folder)?;
439 Ok(())
440 }
441
442 pub fn rebuild_all(store: &Store) -> crate::Result<()> {
446 Index::cleanup(store)?;
447 for layer in Layer::all() {
448 for tf in type_folders_in_layer(store, layer) {
449 let idx = Index::build_type_folder(store, &tf)?;
450 if idx.records.is_empty() {
451 continue;
452 }
453 write_type_folder_artifacts(store, &tf, &idx.records)?;
454 }
455 let layer_idx = Index::build_layer(store, layer)?;
456 let layer_index_md = store.root.join(layer_dir_name(layer)).join("index.md");
457 if layer_idx.child_counts.is_empty() {
458 remove_if_exists(&layer_index_md)?;
459 } else {
460 write_atomic(
461 &layer_index_md,
462 render_layer_md_with_store(store, &layer_idx),
463 )?;
464 }
465 }
466 let root_idx = Index::build_root(store)?;
467 let root_index_md = store.root.join("index.md");
468 if root_idx.child_counts.is_empty() {
469 remove_if_exists(&root_index_md)?;
470 } else {
471 write_atomic(&root_index_md, render_root_md_with_store(store, &root_idx))?;
472 }
473 Ok(())
474 }
475
476 pub fn rebuild_folder(store: &Store, folder: &Path) -> crate::Result<()> {
483 Self::write_level(store, &IndexLevel::TypeFolder(folder.to_path_buf()))?;
484 update_parents(store, folder)
485 }
486
487 pub fn write_level(store: &Store, level: &IndexLevel) -> crate::Result<()> {
489 match level {
490 IndexLevel::TypeFolder(folder) => {
491 let idx = Index::build_type_folder(store, folder)?;
492 if idx.records.is_empty() {
493 remove_if_exists(&store.root.join(folder).join("index.md"))?;
494 remove_if_exists(&store.root.join(folder).join("index.jsonl"))?;
495 } else {
496 write_type_folder_artifacts(store, folder, &idx.records)?;
497 }
498 }
499 IndexLevel::Layer(layer) => {
500 let idx = Index::build_layer(store, *layer)?;
501 let p = store.root.join(layer_dir_name(*layer)).join("index.md");
502 if idx.child_counts.is_empty() {
503 remove_if_exists(&p)?;
504 } else {
505 write_atomic(&p, render_layer_md_with_store(store, &idx))?;
506 }
507 }
508 IndexLevel::Root => {
509 let idx = Index::build_root(store)?;
510 let p = store.root.join("index.md");
511 if idx.child_counts.is_empty() {
512 remove_if_exists(&p)?;
513 } else {
514 write_atomic(&p, render_root_md_with_store(store, &idx))?;
515 }
516 }
517 }
518 Ok(())
519 }
520
521 pub fn render_dry_run(store: &Store, level: &IndexLevel) -> crate::Result<String> {
524 let mut out = String::new();
525 match level {
526 IndexLevel::TypeFolder(folder) => {
527 let idx = Index::build_type_folder(store, folder)?;
528 let md_path = path_to_unix(&folder.join("index.md"));
529 let jsonl_path = path_to_unix(&folder.join("index.jsonl"));
530 out.push_str(&format!("--- {md_path} ---\n"));
531 out.push_str(&idx.to_markdown());
532 out.push_str(&format!("--- {jsonl_path} ---\n"));
533 out.push_str(&idx.to_jsonl());
534 }
535 IndexLevel::Layer(layer) => {
536 let idx = Index::build_layer(store, *layer)?;
537 let md_path = format!("{}/index.md", layer_dir_name(*layer));
538 out.push_str(&format!("--- {md_path} ---\n"));
539 out.push_str(&render_layer_md_with_store(store, &idx));
540 }
541 IndexLevel::Root => {
542 let idx = Index::build_root(store)?;
543 out.push_str("--- index.md ---\n");
544 out.push_str(&render_root_md_with_store(store, &idx));
545 }
546 }
547 Ok(out)
548 }
549
550 pub fn cleanup(store: &Store) -> crate::Result<()> {
568 for layer in Layer::all() {
569 let layer_dir = store.root.join(layer_dir_name(layer));
570 if !layer_dir.is_dir() {
571 continue;
572 }
573 for tf in type_folders_in_layer(store, layer) {
574 let tf_abs = store.root.join(&tf);
575 for entry in walkdir::WalkDir::new(&tf_abs)
579 .min_depth(2)
580 .into_iter()
581 .filter_map(|e| e.ok())
582 {
583 let p = entry.path();
584 if is_index_artifact(p) && is_deletable_catalog_artifact(p) {
585 remove_if_exists(p)?;
586 }
587 }
588 if walk_type_folder_files(&tf_abs).is_empty() {
592 let md = tf_abs.join("index.md");
593 if is_deletable_catalog_artifact(&md) {
594 remove_if_exists(&md)?;
595 }
596 remove_if_exists(&tf_abs.join("index.jsonl"))?;
597 }
598 }
599 }
600 Ok(())
601 }
602}
603
604fn write_type_folder_artifacts(
612 store: &Store,
613 folder: &Path,
614 records: &[IndexRecord],
615) -> crate::Result<()> {
616 let folder_abs = store.root.join(folder);
617 let md_path = folder_abs.join("index.md");
618 let jsonl_path = folder_abs.join("index.jsonl");
619 if records.is_empty() {
620 remove_if_exists(&md_path)?;
621 remove_if_exists(&jsonl_path)?;
622 return Ok(());
623 }
624 let idx = Index {
625 level: IndexLevel::TypeFolder(folder.to_path_buf()),
626 records: records.to_vec(),
627 child_counts: BTreeMap::new(),
628 };
629 write_atomic(&md_path, idx.to_markdown())?;
630 write_atomic(&jsonl_path, idx.to_jsonl())?;
631 Ok(())
632}
633
634fn update_parents(store: &Store, folder: &Path) -> crate::Result<()> {
647 let stats = collect_child_stats(store, &Layer::all())?;
667
668 let layer = folder
669 .components()
670 .next()
671 .and_then(|c| c.as_os_str().to_str())
672 .and_then(layer_from_dir_name);
673 if let Some(layer) = layer {
674 let p = store.root.join(layer_dir_name(layer)).join("index.md");
675 if layer_has_children(&stats, layer) {
676 write_atomic(
677 &p,
678 render_layer_md_from_stats(layer, &stats, &store.config.folders),
679 )?;
680 } else {
681 remove_if_exists(&p)?;
682 }
683 }
684 let rp = store.root.join("index.md");
685 if stats.values().any(|s| s.count > 0) {
686 write_atomic(
687 &rp,
688 render_root_md_from_stats(&stats, &store.config.folders),
689 )?;
690 } else {
691 remove_if_exists(&rp)?;
692 }
693 Ok(())
694}
695
696fn layer_has_children(stats: &BTreeMap<PathBuf, FolderStat>, layer: Layer) -> bool {
698 let prefix = format!("{}/", layer_dir_name(layer));
699 stats
700 .iter()
701 .any(|(tf, s)| s.count > 0 && path_to_unix(tf).starts_with(&prefix))
702}
703
704fn render_layer_md_from_stats(
709 layer: Layer,
710 stats: &BTreeMap<PathBuf, FolderStat>,
711 folders: &BTreeMap<String, FolderMeta>,
712) -> String {
713 let layer_dir = layer_dir_name(layer);
714 let prefix = format!("{layer_dir}/");
715 let mut max_upd: Option<DateTime<FixedOffset>> = None;
716 let mut entries = String::new();
717 for (tf, stat) in stats {
718 if stat.count == 0 || !path_to_unix(tf).starts_with(&prefix) {
719 continue;
720 }
721 if let Some(u) = stat.newest.as_ref().and_then(|r| r.updated) {
722 max_upd = Some(match max_upd {
723 Some(cur) if cur >= u => cur,
724 _ => u,
725 });
726 }
727 let tf_unix = path_to_unix(tf);
728 let (display, description) = folder_label(&tf_unix, folder_basename(tf), folders);
729 entries.push_str(&folder_entry(&tf_unix, &display, stat.count, description));
730 }
731 let mut s = String::new();
732 s.push_str("---\n");
733 s.push_str("type: index\n");
734 s.push_str("scope: layer\n");
735 s.push_str(&format!("folder: {layer_dir}\n"));
736 if let Some(ts) = max_upd {
737 s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
738 }
739 s.push_str("---\n\n");
740 s.push_str(&format!("# {layer_dir}\n\n"));
741 s.push_str(&entries);
742 s
743}
744
745fn render_root_md_from_stats(
747 stats: &BTreeMap<PathBuf, FolderStat>,
748 folders: &BTreeMap<String, FolderMeta>,
749) -> String {
750 let mut max_upd: Option<DateTime<FixedOffset>> = None;
751 for stat in stats.values() {
752 if stat.count == 0 {
753 continue;
754 }
755 if let Some(u) = stat.newest.as_ref().and_then(|r| r.updated) {
756 max_upd = Some(match max_upd {
757 Some(cur) if cur >= u => cur,
758 _ => u,
759 });
760 }
761 }
762 let mut s = String::new();
763 s.push_str("---\n");
764 s.push_str("type: index\n");
765 s.push_str("scope: root\n");
766 if let Some(ts) = max_upd {
767 s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
768 }
769 s.push_str("---\n\n");
770 s.push_str(&format!("# {ROOT_TITLE}\n"));
771 for layer in Layer::all() {
772 let layer_dir = layer_dir_name(layer);
773 let prefix = format!("{layer_dir}/");
774 let children: Vec<(&PathBuf, usize)> = stats
775 .iter()
776 .filter(|(tf, s)| s.count > 0 && path_to_unix(tf).starts_with(&prefix))
777 .map(|(tf, s)| (tf, s.count))
778 .collect();
779 if children.is_empty() {
780 continue;
781 }
782 let total: usize = children.iter().map(|(_, n)| *n).sum();
783 s.push('\n');
784 s.push_str(&format!("## {} ({total})\n", capitalize(layer_dir)));
785 for (tf, n) in children {
786 let tf_unix = path_to_unix(tf);
787 let (display, description) = folder_label(&tf_unix, folder_basename(tf), folders);
788 s.push_str(&folder_entry(&tf_unix, &display, n, description));
789 }
790 }
791 s
792}
793
794fn render_layer_md_with_store(store: &Store, idx: &Index) -> String {
801 let layer = match idx.level {
802 IndexLevel::Layer(l) => l,
803 _ => unreachable!("render_layer_md_with_store called on non-layer"),
804 };
805 let layer_dir = layer_dir_name(layer);
806 let mut max_upd: Option<DateTime<FixedOffset>> = None;
807 let mut entries = String::new();
808 for (tf, n) in &idx.child_counts {
809 let recs = read_jsonl_records(&store.root.join(tf).join("index.jsonl")).unwrap_or_default();
810 let newest = recs.first();
811 if let Some(u) = newest.and_then(|r| r.updated) {
812 max_upd = Some(match max_upd {
813 Some(cur) if cur >= u => cur,
814 _ => u,
815 });
816 }
817 let tf_unix = path_to_unix(tf);
818 let (display, description) =
819 folder_label(&tf_unix, folder_basename(tf), &store.config.folders);
820 entries.push_str(&folder_entry(&tf_unix, &display, *n, description));
821 }
822 let mut s = String::new();
823 s.push_str("---\n");
824 s.push_str("type: index\n");
825 s.push_str("scope: layer\n");
826 s.push_str(&format!("folder: {layer_dir}\n"));
827 if let Some(ts) = max_upd {
828 s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
829 }
830 s.push_str("---\n\n");
831 s.push_str(&format!("# {layer_dir}\n\n"));
832 s.push_str(&entries);
833 s
834}
835
836fn render_root_md_with_store(store: &Store, idx: &Index) -> String {
840 let mut max_upd: Option<DateTime<FixedOffset>> = None;
841 for tf in idx.child_counts.keys() {
842 let recs = read_jsonl_records(&store.root.join(tf).join("index.jsonl")).unwrap_or_default();
843 if let Some(u) = recs.first().and_then(|r| r.updated) {
844 max_upd = Some(match max_upd {
845 Some(cur) if cur >= u => cur,
846 _ => u,
847 });
848 }
849 }
850 let mut s = String::new();
851 s.push_str("---\n");
852 s.push_str("type: index\n");
853 s.push_str("scope: root\n");
854 if let Some(ts) = max_upd {
855 s.push_str(&format!("updated: {}\n", fmt_ts(&ts)));
856 }
857 s.push_str("---\n\n");
858 s.push_str(&format!("# {ROOT_TITLE}\n"));
859 for layer in Layer::all() {
860 let layer_dir = layer_dir_name(layer);
861 let prefix = format!("{layer_dir}/");
862 let children: Vec<(&PathBuf, &usize)> = idx
863 .child_counts
864 .iter()
865 .filter(|(tf, _)| path_to_unix(tf).starts_with(&prefix))
866 .collect();
867 if children.is_empty() {
868 continue;
869 }
870 let total: usize = children.iter().map(|(_, n)| **n).sum();
871 s.push('\n');
872 s.push_str(&format!("## {} ({total})\n", capitalize(layer_dir)));
873 for (tf, n) in children {
874 let tf_unix = path_to_unix(tf);
875 let (display, description) =
876 folder_label(&tf_unix, folder_basename(tf), &store.config.folders);
877 s.push_str(&folder_entry(&tf_unix, &display, *n, description));
878 }
879 }
880 s
881}
882
883fn format_md_entry(rec: &IndexRecord) -> String {
889 let path = wiki_target(&rec.path);
890 let summary = collapse_whitespace(&rec.summary);
899 let mut line = format!("- [[{path}]] — {summary}");
900 if !rec.tags.is_empty() {
901 let tags = rec
902 .tags
903 .iter()
904 .map(|t| format!("#{t}"))
905 .collect::<Vec<_>>()
906 .join(" ");
907 line.push_str(&format!(" · {tags}"));
908 }
909 line
910}
911
912fn more_footer(total: usize, type_: &str, layer: &str) -> String {
914 format!(
915 "## More\n\nThis folder has {total} files. The {MD_CAP} most recent are listed above.\nUse `dbmd index query --type {type_} --in {layer}` for the complete catalog.\n"
916 )
917}
918
919fn sort_records(records: &mut [IndexRecord]) {
923 records.sort_by(record_recency_cmp);
924}
925
926impl IndexRecord {
927 pub(crate) fn expected_from_file(abs: &Path, rel: PathBuf) -> crate::Result<IndexRecord> {
939 record_from_file(abs, rel)
940 }
941}
942
943fn record_from_file(abs: &Path, rel: PathBuf) -> crate::Result<IndexRecord> {
946 let mut meta = read_frontmatter(abs)?;
947 if rel.starts_with("records") {
952 meta.fields
953 .entry("meta-type".to_string())
954 .or_insert_with(|| Value::String("fact".to_string()));
955 }
956 Ok(IndexRecord {
957 path: rel,
958 type_: meta.type_.unwrap_or_default(),
959 summary: meta.summary.unwrap_or_else(|| MISSING_SUMMARY.to_string()),
960 tags: meta.tags,
961 links: meta.links,
962 created: meta.created,
963 updated: meta.updated,
964 fields: meta.fields,
965 })
966}
967
968struct FileMeta {
970 type_: Option<String>,
971 summary: Option<String>,
972 tags: Vec<String>,
973 links: Vec<String>,
974 created: Option<DateTime<FixedOffset>>,
975 updated: Option<DateTime<FixedOffset>>,
976 fields: BTreeMap<String, Value>,
977}
978
979fn read_frontmatter(abs: &Path) -> crate::Result<FileMeta> {
993 let bytes = fs::read(abs)?;
994 let yaml = extract_frontmatter_block_lossy(&bytes).unwrap_or_default();
995 let map: serde_norway::Mapping = if yaml.trim().is_empty() {
996 serde_norway::Mapping::new()
997 } else {
998 serde_norway::from_str(&yaml).map_err(|e| {
999 crate::Error::Store(crate::store::StoreError::BadTypeIndex {
1000 path: abs.to_path_buf(),
1001 message: format!("frontmatter YAML: {e}"),
1002 })
1003 })?
1004 };
1005
1006 let mut type_ = None;
1007 let mut summary = None;
1008 let mut tags = Vec::new();
1009 let mut links = Vec::new();
1010 let mut created = None;
1011 let mut updated = None;
1012 let mut fields = BTreeMap::new();
1013
1014 for (k, v) in map {
1015 let key = match k.as_str() {
1016 Some(s) => s.to_string(),
1017 None => continue,
1018 };
1019 match key.as_str() {
1020 "type" => type_ = scalar_string(&v),
1030 "summary" => summary = scalar_string(&v),
1031 "tags" => tags = yaml_string_list(&v),
1032 "links" => links = yaml_string_list(&v),
1033 "created" => created = v.as_str().and_then(parse_ts),
1034 "updated" => updated = v.as_str().and_then(parse_ts),
1035 "path" => {}
1039 _ => {
1040 fields.insert(key, yaml_to_json_value(&v));
1041 }
1042 }
1043 }
1044
1045 Ok(FileMeta {
1046 type_,
1047 summary,
1048 tags,
1049 links,
1050 created,
1051 updated,
1052 fields,
1053 })
1054}
1055
1056fn scalar_string(v: &serde_norway::Value) -> Option<String> {
1062 match v {
1063 serde_norway::Value::String(s) => Some(s.clone()),
1064 serde_norway::Value::Number(n) => Some(n.to_string()),
1065 serde_norway::Value::Bool(b) => Some(b.to_string()),
1066 _ => None,
1067 }
1068}
1069
1070fn extract_frontmatter_block_lossy(bytes: &[u8]) -> Option<String> {
1076 let text = String::from_utf8_lossy(bytes);
1081 extract_frontmatter_block(&text)
1082}
1083
1084fn extract_frontmatter_block(text: &str) -> Option<String> {
1087 let trimmed = text.strip_prefix('\u{feff}').unwrap_or(text);
1088 let mut lines = trimmed.lines();
1089 let first = lines.next()?;
1090 if first.trim_end() != "---" {
1091 return None;
1092 }
1093 let mut block = String::new();
1094 for line in lines {
1095 if line.trim_end() == "---" {
1096 return Some(block);
1097 }
1098 block.push_str(line);
1099 block.push('\n');
1100 }
1101 None }
1103
1104fn yaml_string_list(v: &serde_norway::Value) -> Vec<String> {
1107 match v {
1108 serde_norway::Value::String(s) => vec![s.clone()],
1109 serde_norway::Value::Sequence(seq) => seq
1110 .iter()
1111 .filter_map(yaml_string_or_wiki_link_literal)
1112 .collect(),
1113 _ => Vec::new(),
1114 }
1115}
1116
1117fn yaml_string_or_wiki_link_literal(v: &serde_norway::Value) -> Option<String> {
1118 v.as_str()
1119 .map(str::to_string)
1120 .or_else(|| unquoted_wiki_link_literal(v))
1121}
1122
1123fn yaml_to_json_value(v: &serde_norway::Value) -> Value {
1124 if let Some(link) = unquoted_wiki_link_literal(v) {
1125 return Value::String(link);
1126 }
1127 match v {
1128 serde_norway::Value::String(s) => Value::String(s.clone()),
1129 serde_norway::Value::Bool(b) => Value::Bool(*b),
1130 serde_norway::Value::Number(n) => {
1131 serde_json::to_value(n).unwrap_or_else(|_| Value::String(n.to_string()))
1132 }
1133 serde_norway::Value::Sequence(seq) => {
1134 Value::Array(seq.iter().map(yaml_to_json_value).collect())
1135 }
1136 serde_norway::Value::Mapping(_) | serde_norway::Value::Tagged(_) => {
1137 serde_json::to_value(v).unwrap_or(Value::Null)
1138 }
1139 serde_norway::Value::Null => Value::Null,
1140 }
1141}
1142
1143fn unquoted_wiki_link_literal(v: &serde_norway::Value) -> Option<String> {
1144 let serde_norway::Value::Sequence(outer) = v else {
1145 return None;
1146 };
1147 if outer.len() != 1 {
1148 return None;
1149 }
1150 let serde_norway::Value::Sequence(inner) = &outer[0] else {
1151 return None;
1152 };
1153 let [serde_norway::Value::String(target)] = inner.as_slice() else {
1154 return None;
1155 };
1156 Some(format!("[[{target}]]"))
1157}
1158
1159fn parse_ts(s: &str) -> Option<DateTime<FixedOffset>> {
1161 DateTime::parse_from_rfc3339(s.trim()).ok()
1162}
1163
1164fn fmt_ts(ts: &DateTime<FixedOffset>) -> String {
1168 ts.to_rfc3339_opts(SecondsFormat::AutoSi, true)
1169}
1170
1171fn max_updated<'a>(
1173 it: impl Iterator<Item = Option<&'a DateTime<FixedOffset>>>,
1174) -> Option<DateTime<FixedOffset>> {
1175 let mut best: Option<DateTime<FixedOffset>> = None;
1176 for ts in it.flatten() {
1177 best = Some(match best {
1178 Some(cur) if cur >= *ts => cur,
1179 _ => *ts,
1180 });
1181 }
1182 best
1183}
1184
1185fn read_jsonl_records(jsonl: &Path) -> crate::Result<Vec<IndexRecord>> {
1189 let text = match fs::read_to_string(jsonl) {
1190 Ok(t) => t,
1191 Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(Vec::new()),
1192 Err(e) => return Err(e.into()),
1193 };
1194 let mut by_path: BTreeMap<PathBuf, IndexRecord> = BTreeMap::new();
1196 for (i, line) in text.lines().enumerate() {
1197 if line.trim().is_empty() {
1198 continue;
1199 }
1200 let rec: IndexRecord = serde_json::from_str(line).map_err(|e| {
1201 crate::Error::Store(crate::store::StoreError::BadTypeIndex {
1202 path: jsonl.to_path_buf(),
1203 message: format!("line {}: {e}", i + 1),
1204 })
1205 })?;
1206 by_path.insert(rec.path.clone(), rec);
1207 }
1208 let mut records: Vec<IndexRecord> = by_path.into_values().collect();
1209 sort_records(&mut records);
1210 Ok(records)
1211}
1212
1213#[derive(Debug, Clone, Default, PartialEq)]
1220struct FolderStat {
1221 count: usize,
1222 newest: Option<IndexRecord>,
1223}
1224
1225fn read_folder_stat(jsonl: &Path) -> crate::Result<FolderStat> {
1235 let text = match fs::read_to_string(jsonl) {
1236 Ok(t) => t,
1237 Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(FolderStat::default()),
1238 Err(e) => return Err(e.into()),
1239 };
1240 let mut by_path: BTreeMap<PathBuf, IndexRecord> = BTreeMap::new();
1243 for (i, line) in text.lines().enumerate() {
1244 if line.trim().is_empty() {
1245 continue;
1246 }
1247 let rec: IndexRecord = serde_json::from_str(line).map_err(|e| {
1248 crate::Error::Store(crate::store::StoreError::BadTypeIndex {
1249 path: jsonl.to_path_buf(),
1250 message: format!("line {}: {e}", i + 1),
1251 })
1252 })?;
1253 by_path.insert(rec.path.clone(), rec);
1254 }
1255 let count = by_path.len();
1256 let newest = by_path.into_values().min_by(record_recency_cmp);
1260 Ok(FolderStat { count, newest })
1261}
1262
1263fn record_recency_cmp(a: &IndexRecord, b: &IndexRecord) -> std::cmp::Ordering {
1268 match (b.updated, a.updated) {
1269 (Some(bu), Some(au)) => bu.cmp(&au),
1270 (Some(_), None) => std::cmp::Ordering::Greater, (None, Some(_)) => std::cmp::Ordering::Less, (None, None) => std::cmp::Ordering::Equal,
1273 }
1274 .then_with(|| a.path.cmp(&b.path))
1275}
1276
1277fn collect_child_stats(
1290 store: &Store,
1291 layers: &[Layer],
1292) -> crate::Result<BTreeMap<PathBuf, FolderStat>> {
1293 let mut stats = BTreeMap::new();
1294 for &layer in layers {
1295 for tf in type_folders_in_layer(store, layer) {
1296 let stat = read_folder_stat(&store.root.join(&tf).join("index.jsonl"))?;
1297 if stat.count > 0 {
1298 stats.insert(tf, stat);
1299 }
1300 }
1301 }
1302 Ok(stats)
1303}
1304
1305fn walk_type_folder_files(folder_abs: &Path) -> Vec<PathBuf> {
1308 let mut out = Vec::new();
1309 if !folder_abs.is_dir() {
1310 return out;
1311 }
1312 for entry in walkdir::WalkDir::new(folder_abs)
1313 .into_iter()
1314 .filter_entry(|e| !is_hidden(e.file_name()))
1315 .filter_map(|e| e.ok())
1316 {
1317 if !entry.file_type().is_file() {
1318 continue;
1319 }
1320 let p = entry.path();
1321 if p.extension().and_then(|e| e.to_str()) != Some("md") {
1322 continue;
1323 }
1324 if p.file_name().and_then(|n| n.to_str()) == Some("index.md") {
1325 continue;
1326 }
1327 out.push(p.to_path_buf());
1328 }
1329 out
1330}
1331
1332fn type_folders_in_layer(store: &Store, layer: Layer) -> Vec<PathBuf> {
1335 let layer_dir = store.root.join(layer_dir_name(layer));
1336 let mut out = Vec::new();
1337 let rd = match fs::read_dir(&layer_dir) {
1338 Ok(rd) => rd,
1339 Err(_) => return out,
1340 };
1341 for entry in rd.flatten() {
1342 if !entry.path().is_dir() {
1343 continue;
1344 }
1345 let name = entry.file_name();
1346 let name = match name.to_str() {
1347 Some(n) => n,
1348 None => continue,
1349 };
1350 if is_hidden(entry.file_name().as_os_str()) || name == "log" {
1351 continue;
1352 }
1353 out.push(PathBuf::from(layer_dir_name(layer)).join(name));
1354 }
1355 out.sort();
1356 out
1357}
1358
1359fn type_folder_of(file_rel: &Path) -> Option<PathBuf> {
1363 let mut comps = file_rel.components();
1364 let layer = comps.next()?.as_os_str().to_str()?;
1365 layer_from_dir_name(layer)?;
1366 let type_seg = comps.next()?.as_os_str().to_str()?;
1367 Some(PathBuf::from(layer).join(type_seg))
1368}
1369
1370fn rel_to_store(root: &Path, abs: &Path) -> Option<PathBuf> {
1372 abs.strip_prefix(root).ok().map(|p| p.to_path_buf())
1373}
1374
1375fn normalize_rel(p: &Path) -> PathBuf {
1378 let s = path_to_unix(p);
1379 let s = s.strip_prefix("./").unwrap_or(&s);
1380 PathBuf::from(s)
1381}
1382
1383fn is_index_artifact(p: &Path) -> bool {
1384 matches!(
1385 p.file_name().and_then(|n| n.to_str()),
1386 Some("index.md") | Some("index.jsonl")
1387 )
1388}
1389
1390fn is_deletable_catalog_artifact(p: &Path) -> bool {
1404 match p.file_name().and_then(|n| n.to_str()) {
1405 Some("index.jsonl") => true,
1406 Some("index.md") => match read_frontmatter(p) {
1407 Ok(meta) => meta.type_.as_deref().is_none_or(|t| t == "index"),
1409 Err(_) => true,
1411 },
1412 _ => false,
1413 }
1414}
1415
1416fn is_hidden(name: &std::ffi::OsStr) -> bool {
1417 name.to_str().map(|s| s.starts_with('.')).unwrap_or(false)
1418}
1419
1420fn layer_dir_name(layer: Layer) -> &'static str {
1421 match layer {
1422 Layer::Sources => "sources",
1423 Layer::Records => "records",
1424 }
1425}
1426
1427fn layer_from_dir_name(name: &str) -> Option<Layer> {
1430 match name {
1431 "sources" => Some(Layer::Sources),
1432 "records" => Some(Layer::Records),
1433 _ => None,
1434 }
1435}
1436
1437fn folder_basename(p: &Path) -> &str {
1439 p.file_name().and_then(|n| n.to_str()).unwrap_or("")
1440}
1441
1442fn wiki_target(p: &Path) -> String {
1446 let unix = path_to_unix(p);
1447 unix.strip_suffix(".md").unwrap_or(&unix).to_string()
1448}
1449
1450fn path_to_unix(p: &Path) -> String {
1462 p.components()
1463 .map(|c| c.as_os_str().to_string_lossy().into_owned())
1464 .collect::<Vec<_>>()
1465 .join("/")
1466}
1467
1468mod path_serde {
1474 use super::path_to_unix;
1475 use serde::{Deserialize, Deserializer, Serializer};
1476 use std::path::{Path, PathBuf};
1477
1478 pub fn serialize<S: Serializer>(p: &Path, s: S) -> Result<S::Ok, S::Error> {
1479 s.serialize_str(&path_to_unix(p))
1480 }
1481
1482 pub fn deserialize<'de, D: Deserializer<'de>>(d: D) -> Result<PathBuf, D::Error> {
1483 Ok(PathBuf::from(String::deserialize(d)?))
1484 }
1485}
1486
1487fn capitalize(s: &str) -> String {
1489 let mut chars = s.chars();
1490 match chars.next() {
1491 Some(first) => first.to_uppercase().collect::<String>() + chars.as_str(),
1492 None => String::new(),
1493 }
1494}
1495
1496fn collapse_whitespace(s: &str) -> String {
1501 s.split_whitespace().collect::<Vec<_>>().join(" ")
1502}
1503
1504fn default_display(basename: &str) -> String {
1510 let spaced: String = basename
1511 .chars()
1512 .map(|c| if c == '-' || c == '_' { ' ' } else { c })
1513 .collect();
1514 capitalize(&spaced)
1515}
1516
1517fn folder_label<'a>(
1524 tf_unix: &str,
1525 basename: &str,
1526 folders: &'a BTreeMap<String, FolderMeta>,
1527) -> (String, Option<&'a str>) {
1528 let meta = folders.get(tf_unix);
1529 let display = meta
1530 .and_then(|m| m.display.as_deref())
1531 .map(str::to_string)
1532 .unwrap_or_else(|| default_display(basename));
1533 (display, meta.and_then(|m| m.description.as_deref()))
1534}
1535
1536fn folder_entry(tf_unix: &str, display: &str, count: usize, description: Option<&str>) -> String {
1539 match description {
1540 Some(d) => format!("- [[{tf_unix}/index|{display}]] ({count}) — {d}\n"),
1541 None => format!("- [[{tf_unix}/index|{display}]] ({count})\n"),
1542 }
1543}
1544
1545fn write_atomic(path: &Path, contents: String) -> crate::Result<()> {
1552 if let Some(parent) = path.parent() {
1553 fs::create_dir_all(parent)?;
1554 }
1555 let dir = path.parent().unwrap_or_else(|| Path::new("."));
1556 let mut tmp = tempfile_in(dir)?;
1557 tmp.write_all(contents.as_bytes())?;
1558 tmp.flush()?;
1559 tmp.persist(path)?;
1560 Ok(())
1561}
1562
1563fn remove_if_exists(path: &Path) -> crate::Result<()> {
1564 match fs::remove_file(path) {
1565 Ok(()) => Ok(()),
1566 Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(()),
1567 Err(e) => Err(e.into()),
1568 }
1569}
1570
1571fn bad_index(path: &Path, msg: &str) -> crate::Error {
1572 crate::Error::Store(crate::store::StoreError::BadTypeIndex {
1573 path: path.to_path_buf(),
1574 message: msg.to_string(),
1575 })
1576}
1577
1578struct FolderLock {
1598 path: PathBuf,
1599 held: bool,
1600}
1601
1602impl FolderLock {
1603 fn acquire(folder_abs: &Path) -> Self {
1610 use std::time::{Duration, SystemTime};
1611 const MAX_ATTEMPTS: u32 = 600; const SPIN: Duration = Duration::from_millis(10);
1613 const STALE_AFTER: Duration = Duration::from_secs(30);
1614
1615 let path = folder_abs.join(".index.lock");
1616 let _ = fs::create_dir_all(folder_abs);
1618 for _ in 0..MAX_ATTEMPTS {
1619 match fs::OpenOptions::new()
1620 .write(true)
1621 .create_new(true)
1622 .open(&path)
1623 {
1624 Ok(_) => {
1625 return FolderLock { path, held: true };
1626 }
1627 Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => {
1628 if let Ok(meta) = fs::metadata(&path) {
1630 if let Ok(modified) = meta.modified() {
1631 if SystemTime::now()
1632 .duration_since(modified)
1633 .map(|age| age > STALE_AFTER)
1634 .unwrap_or(false)
1635 {
1636 let _ = fs::remove_file(&path);
1637 continue;
1638 }
1639 }
1640 }
1641 std::thread::sleep(SPIN);
1642 }
1643 Err(_) => return FolderLock { path, held: false },
1646 }
1647 }
1648 FolderLock { path, held: false }
1650 }
1651}
1652
1653impl Drop for FolderLock {
1654 fn drop(&mut self) {
1655 if self.held {
1656 let _ = fs::remove_file(&self.path);
1657 }
1658 }
1659}
1660
1661fn lock_folders(store: &Store, a: &Path, b: &Path) -> Vec<FolderLock> {
1667 if a == b {
1668 return vec![FolderLock::acquire(&store.root.join(a))];
1669 }
1670 let (first, second) = if a < b { (a, b) } else { (b, a) };
1671 vec![
1672 FolderLock::acquire(&store.root.join(first)),
1673 FolderLock::acquire(&store.root.join(second)),
1674 ]
1675}
1676
1677struct AtomicTemp {
1683 file: Option<fs::File>,
1684 path: PathBuf,
1685 persisted: bool,
1686}
1687
1688impl AtomicTemp {
1689 fn write_all(&mut self, bytes: &[u8]) -> std::io::Result<()> {
1690 self.file.as_mut().expect("temp file open").write_all(bytes)
1691 }
1692 fn flush(&mut self) -> std::io::Result<()> {
1693 self.file.as_mut().expect("temp file open").flush()
1694 }
1695 fn persist(mut self, dest: &Path) -> std::io::Result<()> {
1696 if let Some(f) = self.file.take() {
1697 f.sync_all().ok();
1698 }
1700 fs::rename(&self.path, dest)?;
1701 self.persisted = true;
1702 Ok(())
1703 }
1704}
1705
1706impl Drop for AtomicTemp {
1707 fn drop(&mut self) {
1708 if !self.persisted {
1710 let _ = fs::remove_file(&self.path);
1711 }
1712 }
1713}
1714
1715fn tempfile_in(dir: &Path) -> std::io::Result<AtomicTemp> {
1716 use std::time::{SystemTime, UNIX_EPOCH};
1717 let nanos = SystemTime::now()
1718 .duration_since(UNIX_EPOCH)
1719 .map(|d| d.as_nanos())
1720 .unwrap_or(0);
1721 let pid = std::process::id();
1722 let counter = next_temp_counter();
1725 let name = format!(".dbmd-index-{pid}-{nanos}-{counter}.tmp");
1726 let path = dir.join(name);
1727 let file = fs::OpenOptions::new()
1728 .write(true)
1729 .create_new(true)
1730 .open(&path)?;
1731 Ok(AtomicTemp {
1732 file: Some(file),
1733 path,
1734 persisted: false,
1735 })
1736}
1737
1738fn next_temp_counter() -> u64 {
1739 use std::sync::atomic::{AtomicU64, Ordering};
1740 static C: AtomicU64 = AtomicU64::new(0);
1741 C.fetch_add(1, Ordering::Relaxed)
1742}
1743
1744#[cfg(test)]
1745mod tests {
1746 use super::*;
1747 use std::collections::BTreeSet;
1748 use std::fs;
1749 use tempfile::TempDir;
1750
1751 fn mk_store() -> (TempDir, Store) {
1756 let dir = TempDir::new().unwrap();
1757 fs::write(dir.path().join("DB.md"), "# test store\n").unwrap();
1758 let store = Store {
1759 root: dir.path().to_path_buf(),
1760 config: crate::parser::Config::default(),
1761 };
1762 (dir, store)
1763 }
1764
1765 fn write_raw(store: &Store, rel: &str, fm: &str, body: &str) {
1768 let abs = store.root.join(rel);
1769 fs::create_dir_all(abs.parent().unwrap()).unwrap();
1770 fs::write(&abs, format!("---\n{fm}\n---\n{body}")).unwrap();
1771 }
1772
1773 fn write_doc(
1775 store: &Store,
1776 rel: &str,
1777 type_: &str,
1778 summary: Option<&str>,
1779 updated: Option<&str>,
1780 extra_yaml: &str,
1781 ) {
1782 let mut fm = format!("type: {type_}\n");
1783 if let Some(s) = summary {
1784 fm.push_str(&format!("summary: {s}\n"));
1785 }
1786 if let Some(u) = updated {
1787 fm.push_str(&format!("updated: {u}\n"));
1788 }
1789 fm.push_str(extra_yaml);
1790 write_raw(store, rel, fm.trim_end(), "\nbody text\n");
1791 }
1792
1793 fn read(store: &Store, rel: &str) -> String {
1794 fs::read_to_string(store.root.join(rel)).unwrap()
1795 }
1796
1797 fn exists(store: &Store, rel: &str) -> bool {
1798 store.root.join(rel).exists()
1799 }
1800
1801 fn snapshot_artifacts(store: &Store) -> BTreeMap<String, String> {
1804 let mut out = BTreeMap::new();
1805 for entry in walkdir::WalkDir::new(&store.root)
1806 .into_iter()
1807 .filter_map(|e| e.ok())
1808 {
1809 let p = entry.path();
1810 if is_index_artifact(p) {
1811 let rel = path_to_unix(&rel_to_store(&store.root, p).unwrap());
1812 out.insert(rel, fs::read_to_string(p).unwrap());
1813 }
1814 }
1815 out
1816 }
1817
1818 #[test]
1821 fn type_folder_aggregates_across_shards_in_recency_order() {
1822 let (_d, store) = mk_store();
1823 write_doc(
1826 &store,
1827 "sources/emails/2026/05/b-old.md",
1828 "email",
1829 Some("Older mail"),
1830 Some("2026-05-01T09:00:00Z"),
1831 "",
1832 );
1833 write_doc(
1834 &store,
1835 "sources/emails/2026/06/c-new.md",
1836 "email",
1837 Some("Newest mail"),
1838 Some("2026-06-15T12:00:00Z"),
1839 "",
1840 );
1841 write_doc(
1842 &store,
1843 "sources/emails/2026/05/a-mid.md",
1844 "email",
1845 Some("Middle mail"),
1846 Some("2026-05-20T08:00:00Z"),
1847 "",
1848 );
1849
1850 let idx = Index::build_type_folder(&store, Path::new("sources/emails")).unwrap();
1851 let paths: Vec<String> = idx.records.iter().map(|r| path_to_unix(&r.path)).collect();
1852 assert_eq!(
1853 paths,
1854 vec![
1855 "sources/emails/2026/06/c-new.md",
1856 "sources/emails/2026/05/a-mid.md",
1857 "sources/emails/2026/05/b-old.md",
1858 ],
1859 "records must aggregate across shards, newest `updated` first"
1860 );
1861 }
1862
1863 #[test]
1864 fn type_folder_md_format_entries_tags_and_derived_updated() {
1865 let (_d, store) = mk_store();
1866 write_doc(
1867 &store,
1868 "records/contacts/sarah-chen.md",
1869 "contact",
1870 Some("Renewal champion at Acme"),
1871 Some("2026-05-27T10:00:00Z"),
1872 "tags:\n - renewal\n - acme\n",
1873 );
1874 write_doc(
1875 &store,
1876 "records/contacts/no-tags.md",
1877 "contact",
1878 Some("Plain contact"),
1879 Some("2026-05-26T10:00:00Z"),
1880 "",
1881 );
1882
1883 let idx = Index::build_type_folder(&store, Path::new("records/contacts")).unwrap();
1884 let md = idx.to_markdown();
1885
1886 assert!(md.starts_with(
1889 "---\ntype: index\nscope: type-folder\nfolder: records/contacts\nupdated: 2026-05-27T10:00:00Z\n---\n\n# records/contacts\n"
1890 ), "frontmatter/heading wrong:\n{md}");
1891
1892 assert!(
1894 md.contains(
1895 "- [[records/contacts/sarah-chen]] — Renewal champion at Acme · #renewal #acme\n"
1896 ),
1897 "tagged entry wrong:\n{md}"
1898 );
1899 assert!(
1901 md.contains("- [[records/contacts/no-tags]] — Plain contact\n"),
1902 "untagged entry wrong:\n{md}"
1903 );
1904 assert!(
1905 !md.contains("Plain contact ·"),
1906 "untagged entry must not emit a tag separator"
1907 );
1908 assert!(!md.contains("## More"), "no footer expected under the cap");
1910 }
1911
1912 #[test]
1913 fn missing_summary_becomes_placeholder_not_invented() {
1914 let (_d, store) = mk_store();
1915 write_doc(
1916 &store,
1917 "records/notes/x.md",
1918 "note",
1919 None,
1920 Some("2026-05-27T10:00:00Z"),
1921 "",
1922 );
1923 let idx = Index::build_type_folder(&store, Path::new("records/notes")).unwrap();
1924 assert_eq!(idx.records[0].summary, MISSING_SUMMARY);
1925 let md = idx.to_markdown();
1926 assert!(
1927 md.contains("- [[records/notes/x]] — (no summary)\n"),
1928 "missing summary must render the placeholder, not invent text:\n{md}"
1929 );
1930 }
1931
1932 #[test]
1935 fn jsonl_is_complete_structured_and_round_trips() {
1936 let (_d, store) = mk_store();
1937 write_doc(
1938 &store,
1939 "records/expenses/2026/05/e1.md",
1940 "expense",
1941 Some("Lunch with vendor"),
1942 Some("2026-05-10T10:00:00Z"),
1943 "created: 2026-05-10T09:00:00Z\nstatus: paid\namount: 42\ncompany: [[records/companies/acme]]\nrelated:\n - [[records/concepts/spend]]\ntags:\n - food\nlinks:\n - records/concepts/spend\n - [[records/concepts/renewal]]\n",
1944 );
1945 write_doc(
1946 &store,
1947 "records/expenses/2026/06/e2.md",
1948 "expense",
1949 Some("Cloud bill"),
1950 Some("2026-06-01T10:00:00Z"),
1951 "amount: 100\n",
1952 );
1953
1954 let idx = Index::build_type_folder(&store, Path::new("records/expenses")).unwrap();
1955 let jsonl = idx.to_jsonl();
1956 let lines: Vec<&str> = jsonl.lines().collect();
1957 assert_eq!(lines.len(), 2, "one JSON object per file, uncapped");
1958
1959 let r0: IndexRecord = serde_json::from_str(lines[0]).unwrap();
1961 assert_eq!(path_to_unix(&r0.path), "records/expenses/2026/06/e2.md");
1962 assert_eq!(
1963 r0, idx.records[0],
1964 "jsonl line must round-trip to the record"
1965 );
1966
1967 let r1: IndexRecord = serde_json::from_str(lines[1]).unwrap();
1970 assert_eq!(r1.type_, "expense");
1971 assert_eq!(r1.summary, "Lunch with vendor");
1972 assert_eq!(r1.tags, vec!["food".to_string()]);
1973 assert_eq!(
1974 r1.links,
1975 vec![
1976 "records/concepts/spend".to_string(),
1977 "[[records/concepts/renewal]]".to_string()
1978 ]
1979 );
1980 assert_eq!(
1981 r1.created,
1982 Some(DateTime::parse_from_rfc3339("2026-05-10T09:00:00Z").unwrap())
1983 );
1984 assert_eq!(r1.fields.get("status"), Some(&Value::from("paid")));
1985 assert_eq!(r1.fields.get("amount"), Some(&Value::from(42)));
1986 assert_eq!(
1987 r1.fields.get("company"),
1988 Some(&Value::from("[[records/companies/acme]]"))
1989 );
1990 assert_eq!(
1991 r1.fields.get("related"),
1992 Some(&serde_json::json!(["[[records/concepts/spend]]"]))
1993 );
1994 for reserved in [
1996 "path", "type", "summary", "tags", "links", "created", "updated",
1997 ] {
1998 assert!(
1999 !r1.fields.contains_key(reserved),
2000 "reserved key {reserved} must not appear in fields"
2001 );
2002 }
2003
2004 assert!(
2006 lines[1].starts_with(
2007 r#"{"path":"records/expenses/2026/05/e1.md","type":"expense","summary":"Lunch with vendor","tags":["food"],"links":["records/concepts/spend","[[records/concepts/renewal]]"],"created":"2026-05-10T09:00:00Z","updated":"2026-05-10T10:00:00Z","#
2008 ),
2009 "jsonl key order not stable:\n{}",
2010 lines[1]
2011 );
2012 assert!(
2017 lines[1].ends_with(r#""amount":42,"company":"[[records/companies/acme]]","meta-type":"fact","related":["[[records/concepts/spend]]"],"status":"paid"}"#),
2018 "extras must be sorted:\n{}",
2019 lines[1]
2020 );
2021 }
2022
2023 #[test]
2026 fn over_cap_md_shows_500_plus_footer_jsonl_holds_all() {
2027 let (_d, store) = mk_store();
2028 let total = MD_CAP + 7;
2029 for i in 0..total {
2030 let day = 1 + (i % 27);
2032 let rel = format!("sources/emails/2026/05/m-{i:04}.md");
2033 let updated = format!("2026-05-{day:02}T00:00:{:02}Z", i % 60);
2034 write_doc(
2035 &store,
2036 &rel,
2037 "email",
2038 Some(&format!("mail {i}")),
2039 Some(&updated),
2040 "",
2041 );
2042 }
2043 let idx = Index::build_type_folder(&store, Path::new("sources/emails")).unwrap();
2044 assert_eq!(idx.records.len(), total, "jsonl/records keep every file");
2045
2046 let md = idx.to_markdown();
2047 let entry_lines = md.lines().filter(|l| l.starts_with("- [[")).count();
2048 assert_eq!(entry_lines, MD_CAP, "md browse view is capped at 500");
2049
2050 assert!(
2051 md.contains("## More\n\n"),
2052 "over-cap md needs a More footer"
2053 );
2054 assert!(
2055 md.contains(&format!(
2056 "This folder has {total} files. The 500 most recent are listed above.\n"
2057 )),
2058 "footer count wrong:\n{md}"
2059 );
2060 assert!(
2061 md.contains(
2062 "Use `dbmd index query --type email --in sources` for the complete catalog.\n"
2063 ),
2064 "footer must infer type=email layer=sources:\n{md}"
2065 );
2066
2067 let jsonl = idx.to_jsonl();
2068 assert_eq!(jsonl.lines().count(), total, "jsonl is uncapped");
2069 }
2070
2071 #[test]
2074 fn sort_breaks_ties_by_path_and_puts_undated_last() {
2075 let mut recs = vec![
2076 rec("z/a.md", Some("2026-05-01T00:00:00Z")),
2077 rec("a/b.md", Some("2026-05-01T00:00:00Z")), rec("m/c.md", None), rec("b/d.md", Some("2026-06-01T00:00:00Z")), ];
2081 sort_records(&mut recs);
2082 let order: Vec<String> = recs.iter().map(|r| path_to_unix(&r.path)).collect();
2083 assert_eq!(order, vec!["b/d.md", "a/b.md", "z/a.md", "m/c.md"]);
2084 }
2085
2086 fn rec(path: &str, updated: Option<&str>) -> IndexRecord {
2087 IndexRecord {
2088 path: PathBuf::from(path),
2089 type_: "t".into(),
2090 summary: "s".into(),
2091 tags: vec![],
2092 links: vec![],
2093 created: None,
2094 updated: updated.map(|u| DateTime::parse_from_rfc3339(u).unwrap()),
2095 fields: BTreeMap::new(),
2096 }
2097 }
2098
2099 #[test]
2102 fn layer_index_lists_type_folders_with_counts() {
2103 let (_d, store) = mk_store();
2104 write_doc(
2105 &store,
2106 "records/contacts/a.md",
2107 "contact",
2108 Some("Contact A older"),
2109 Some("2026-05-01T00:00:00Z"),
2110 "",
2111 );
2112 write_doc(
2113 &store,
2114 "records/contacts/b.md",
2115 "contact",
2116 Some("Contact B newest"),
2117 Some("2026-05-09T00:00:00Z"),
2118 "",
2119 );
2120 write_doc(
2121 &store,
2122 "records/companies/x.md",
2123 "company",
2124 Some("Acme Inc"),
2125 Some("2026-05-05T00:00:00Z"),
2126 "",
2127 );
2128 Index::write_level(&store, &IndexLevel::TypeFolder("records/contacts".into())).unwrap();
2130 Index::write_level(&store, &IndexLevel::TypeFolder("records/companies".into())).unwrap();
2131
2132 Index::write_level(&store, &IndexLevel::Layer(Layer::Records)).unwrap();
2133 let md = read(&store, "records/index.md");
2134
2135 assert!(
2136 md.starts_with("---\ntype: index\nscope: layer\nfolder: records\n"),
2137 "layer fm:\n{md}"
2138 );
2139 let companies_at = md.find("companies/index").unwrap();
2141 let contacts_at = md.find("contacts/index").unwrap();
2142 assert!(
2143 companies_at < contacts_at,
2144 "type folders must be alphabetical"
2145 );
2146 assert!(
2149 md.contains("- [[records/contacts/index|Contacts]] (2)\n"),
2150 "contacts entry:\n{md}"
2151 );
2152 assert!(
2153 md.contains("- [[records/companies/index|Companies]] (1)\n"),
2154 "companies entry:\n{md}"
2155 );
2156 assert!(
2158 !md.contains("Contact B newest") && !md.contains("Acme Inc"),
2159 "layer rollup must not quote a member summary:\n{md}"
2160 );
2161 assert!(
2163 md.contains("updated: 2026-05-09T00:00:00Z\n"),
2164 "layer updated must be max child:\n{md}"
2165 );
2166 }
2167
2168 #[test]
2169 fn folders_section_supplies_authored_display_and_description() {
2170 let (_d, mut store) = mk_store();
2174 store.config.folders.insert(
2175 "records/contacts".into(),
2176 crate::parser::FolderMeta {
2177 display: None,
2178 description: Some("people across customer + prospect accounts".into()),
2179 },
2180 );
2181 store.config.folders.insert(
2182 "sources/hubspot-exports".into(),
2183 crate::parser::FolderMeta {
2184 display: Some("HubSpot exports".into()),
2185 description: Some("deal + pipeline exports".into()),
2186 },
2187 );
2188 write_doc(
2189 &store,
2190 "records/contacts/a.md",
2191 "contact",
2192 Some("Contact A"),
2193 Some("2026-05-01T00:00:00Z"),
2194 "",
2195 );
2196 write_doc(
2198 &store,
2199 "records/companies/x.md",
2200 "company",
2201 Some("Acme Inc"),
2202 Some("2026-05-05T00:00:00Z"),
2203 "",
2204 );
2205 write_doc(
2206 &store,
2207 "sources/hubspot-exports/d.md",
2208 "hubspot-export",
2209 Some("a single deal export"),
2210 Some("2026-05-03T00:00:00Z"),
2211 "",
2212 );
2213
2214 Index::rebuild_all(&store).unwrap();
2215
2216 let records_layer = read(&store, "records/index.md");
2218 assert!(
2219 records_layer.contains("- [[records/contacts/index|Contacts]] (1) — people across customer + prospect accounts\n"),
2220 "authored description must surface:\n{records_layer}"
2221 );
2222 assert!(
2224 records_layer.contains("- [[records/companies/index|Companies]] (1)\n")
2225 && !records_layer.contains("Acme Inc"),
2226 "un-described folder is counts-only:\n{records_layer}"
2227 );
2228
2229 let sources_layer = read(&store, "sources/index.md");
2231 assert!(
2232 sources_layer.contains("- [[sources/hubspot-exports/index|HubSpot exports]] (1) — deal + pipeline exports\n"),
2233 "display override + description must surface:\n{sources_layer}"
2234 );
2235
2236 let root = read(&store, "index.md");
2238 assert!(
2239 root.contains("- [[records/contacts/index|Contacts]] (1) — people across customer + prospect accounts\n"),
2240 "root surfaces authored description:\n{root}"
2241 );
2242 assert!(
2243 root.contains("- [[sources/hubspot-exports/index|HubSpot exports]] (1) — deal + pipeline exports\n"),
2244 "root surfaces display override:\n{root}"
2245 );
2246 }
2247
2248 #[test]
2249 fn default_display_turns_separators_to_spaces_and_caps() {
2250 assert_eq!(default_display("contacts"), "Contacts");
2251 assert_eq!(default_display("hubspot-exports"), "Hubspot exports");
2252 assert_eq!(default_display("usage_exports"), "Usage exports");
2253 }
2254
2255 #[test]
2256 fn root_index_groups_layers_with_totals_and_per_type_counts() {
2257 let (_d, store) = mk_store();
2258 write_doc(
2259 &store,
2260 "sources/emails/2026/05/a.md",
2261 "email",
2262 Some("Mail"),
2263 Some("2026-05-01T00:00:00Z"),
2264 "",
2265 );
2266 write_doc(
2267 &store,
2268 "sources/docs/d.md",
2269 "doc",
2270 Some("Doc"),
2271 Some("2026-05-02T00:00:00Z"),
2272 "",
2273 );
2274 write_doc(
2275 &store,
2276 "records/contacts/c.md",
2277 "contact",
2278 Some("C"),
2279 Some("2026-05-03T00:00:00Z"),
2280 "",
2281 );
2282 Index::rebuild_all(&store).unwrap();
2285 let md = read(&store, "index.md");
2286
2287 assert!(
2288 md.starts_with("---\ntype: index\nscope: root\n"),
2289 "root fm:\n{md}"
2290 );
2291 assert!(md.contains("# Knowledge base index\n"), "root title:\n{md}");
2292 let sources_h = md
2294 .find("## Sources (2)")
2295 .expect("sources heading w/ total 2");
2296 let records_h = md
2297 .find("## Records (1)")
2298 .expect("records heading w/ total 1");
2299 assert!(sources_h < records_h, "Sources must precede Records");
2300 assert!(!md.contains("## Wiki"), "empty layer gets no section");
2301 assert!(
2303 md.contains("- [[sources/docs/index|Docs]] (1)\n"),
2304 "root docs entry:\n{md}"
2305 );
2306 assert!(
2307 md.contains("- [[sources/emails/index|Emails]] (1)\n"),
2308 "root emails entry:\n{md}"
2309 );
2310 assert!(
2311 md.contains("- [[records/contacts/index|Contacts]] (1)\n"),
2312 "root contacts entry:\n{md}"
2313 );
2314 assert!(!md.contains("— "), "root entries carry no preview text");
2315 }
2316
2317 #[test]
2320 fn on_write_matches_rebuild_byte_for_byte() {
2321 let (_d1, wt) = mk_store();
2324 let (_d2, rb) = mk_store();
2325
2326 let docs: &[(&str, &str, &str, &str, &str)] = &[
2327 (
2328 "sources/emails/2026/05/e1.md",
2329 "email",
2330 "First mail",
2331 "2026-05-01T10:00:00Z",
2332 "tags:\n - inbox\n",
2333 ),
2334 (
2335 "sources/emails/2026/06/e2.md",
2336 "email",
2337 "Second mail",
2338 "2026-06-01T10:00:00Z",
2339 "",
2340 ),
2341 (
2342 "records/contacts/sarah.md",
2343 "contact",
2344 "Sarah",
2345 "2026-05-15T10:00:00Z",
2346 "links:\n - records/profiles/sarah\n",
2347 ),
2348 (
2349 "records/contacts/elena.md",
2350 "contact",
2351 "Elena",
2352 "2026-05-20T10:00:00Z",
2353 "status: active\n",
2354 ),
2355 (
2356 "records/profiles/sarah.md",
2357 "profile",
2358 "Sarah bio",
2359 "2026-05-21T10:00:00Z",
2360 "",
2361 ),
2362 ];
2363
2364 for (rel, t, sum, upd, extra) in docs {
2365 write_doc(&wt, rel, t, Some(sum), Some(upd), extra);
2366 write_doc(&rb, rel, t, Some(sum), Some(upd), extra);
2367 Index::on_write(&wt, Path::new(rel)).unwrap();
2368 }
2369 Index::rebuild_all(&rb).unwrap();
2370
2371 let a = snapshot_artifacts(&wt);
2372 let b = snapshot_artifacts(&rb);
2373 assert_eq!(
2374 a.keys().collect::<Vec<_>>(),
2375 b.keys().collect::<Vec<_>>(),
2376 "same set of index artifacts must exist"
2377 );
2378 for (k, v) in &a {
2379 assert_eq!(v, &b[k], "artifact {k} differs between write-through and rebuild:\n--- write-through ---\n{v}\n--- rebuild ---\n{}", b[k]);
2380 }
2381 assert!(a.contains_key("index.md"));
2383 assert!(a.contains_key("sources/emails/index.jsonl"));
2384 assert!(a.contains_key("records/contacts/index.md"));
2385 }
2386
2387 #[test]
2404 fn loop_op_does_not_walk_sibling_content_tree() {
2405 let (_d, store) = mk_store();
2406
2407 write_doc(
2411 &store,
2412 "records/companies/acme.md",
2413 "company",
2414 Some("Acme Inc"),
2415 Some("2026-05-05T00:00:00Z"),
2416 "",
2417 );
2418 write_doc(
2419 &store,
2420 "records/companies/globex.md",
2421 "company",
2422 Some("Globex"),
2423 Some("2026-05-06T00:00:00Z"),
2424 "",
2425 );
2426 assert!(
2427 !exists(&store, "records/companies/index.jsonl"),
2428 "precondition: companies must be un-indexed"
2429 );
2430
2431 write_doc(
2433 &store,
2434 "records/contacts/sarah.md",
2435 "contact",
2436 Some("Sarah"),
2437 Some("2026-05-15T00:00:00Z"),
2438 "",
2439 );
2440 Index::on_write(&store, Path::new("records/contacts/sarah.md")).unwrap();
2441
2442 let layer_md = read(&store, "records/index.md");
2444 let root_md = read(&store, "index.md");
2445 assert!(
2447 layer_md.contains("- [[records/contacts/index|Contacts]] (1)\n")
2448 && !layer_md.contains("Sarah"),
2449 "layer must reflect the written folder, counts only:\n{layer_md}"
2450 );
2451 assert!(
2452 root_md.contains("- [[records/contacts/index|Contacts]] (1)\n"),
2453 "root must reflect the written folder:\n{root_md}"
2454 );
2455
2456 assert!(
2460 !layer_md.contains("companies"),
2461 "loop op walked the sibling content tree: layer rollup counts un-indexed records/companies\n{layer_md}"
2462 );
2463 assert!(
2464 !root_md.contains("companies"),
2465 "loop op walked the sibling content tree: root rollup counts un-indexed records/companies\n{root_md}"
2466 );
2467 assert!(
2469 root_md.contains("## Records (1)"),
2470 "root layer total must count only the sidecar-indexed folder (1), not walked siblings (would be 3):\n{root_md}"
2471 );
2472
2473 let (_d2, rb) = mk_store();
2478 for (rel, t, s, u) in [
2479 (
2480 "records/companies/acme.md",
2481 "company",
2482 "Acme Inc",
2483 "2026-05-05T00:00:00Z",
2484 ),
2485 (
2486 "records/companies/globex.md",
2487 "company",
2488 "Globex",
2489 "2026-05-06T00:00:00Z",
2490 ),
2491 (
2492 "records/contacts/sarah.md",
2493 "contact",
2494 "Sarah",
2495 "2026-05-15T00:00:00Z",
2496 ),
2497 ] {
2498 write_doc(&rb, rel, t, Some(s), Some(u), "");
2499 }
2500 Index::on_write(&store, Path::new("records/companies/acme.md")).unwrap();
2501 Index::on_write(&store, Path::new("records/companies/globex.md")).unwrap();
2502 Index::rebuild_all(&rb).unwrap();
2503 let a = snapshot_artifacts(&store);
2504 let b = snapshot_artifacts(&rb);
2505 assert_eq!(
2506 a.keys().collect::<BTreeSet<_>>(),
2507 b.keys().collect::<BTreeSet<_>>(),
2508 "same artifact set after indexing both folders"
2509 );
2510 for (k, v) in &a {
2511 assert_eq!(
2512 v, &b[k],
2513 "after indexing the sibling too, loop result must equal rebuild for {k}"
2514 );
2515 }
2516 assert!(
2517 read(&store, "index.md").contains("## Records (3)"),
2518 "now that both folders are indexed, the root total is 3"
2519 );
2520 }
2521
2522 #[test]
2535 fn custom_type_at_shard_path_for_is_indexable_end_to_end() {
2536 let (_d1, wt) = mk_store();
2537 let (_d2, rb) = mk_store();
2538
2539 let rel = wt
2541 .shard_path_for(
2542 "profile",
2543 &crate::parser::Frontmatter::default(),
2544 "renewal-theme",
2545 )
2546 .unwrap();
2547 let rel_str = path_to_unix(&rel);
2548 assert!(
2551 type_folder_of(&rel).is_some(),
2552 "shard_path_for produced a path the index cannot file: {rel_str}"
2553 );
2554
2555 write_doc(
2556 &wt,
2557 &rel_str,
2558 "profile",
2559 Some("Renewal theme"),
2560 Some("2026-05-21T10:00:00Z"),
2561 "",
2562 );
2563 write_doc(
2564 &rb,
2565 &rel_str,
2566 "profile",
2567 Some("Renewal theme"),
2568 Some("2026-05-21T10:00:00Z"),
2569 "",
2570 );
2571
2572 Index::on_write(&wt, &rel)
2575 .expect("on_write must succeed for a toolkit-computed custom-type path");
2576 Index::rebuild_all(&rb).unwrap();
2577
2578 let page_link = wiki_target(&rel); let tf_md = read(&rb, "records/profile/index.md");
2585 assert!(
2586 tf_md.contains(&format!("[[{page_link}]]")),
2587 "type-folder index must list the page link, got:\n{tf_md}"
2588 );
2589 assert!(
2590 exists(&rb, "records/profile/index.jsonl"),
2591 "type-folder jsonl must exist"
2592 );
2593 assert!(
2594 read(&rb, "records/profile/index.jsonl").contains(&rel_str),
2595 "type-folder jsonl must contain the page row"
2596 );
2597 let layer_md = read(&rb, "records/index.md");
2600 assert!(
2601 layer_md.contains("records/profile/index"),
2602 "layer index must roll up the records/profile type-folder, got:\n{layer_md}"
2603 );
2604
2605 let a = snapshot_artifacts(&wt);
2607 let b = snapshot_artifacts(&rb);
2608 assert_eq!(
2609 a.keys().collect::<Vec<_>>(),
2610 b.keys().collect::<Vec<_>>(),
2611 "loop and sweep must produce the same artifact set"
2612 );
2613 for (k, v) in &a {
2614 assert_eq!(
2615 v, &b[k],
2616 "custom-type artifact {k} differs between on_write and rebuild"
2617 );
2618 }
2619 }
2620
2621 #[test]
2622 fn on_remove_then_rebuild_match_and_pull_in_next_over_cap() {
2623 let (_d1, wt) = mk_store();
2624 let (_d2, rb) = mk_store();
2625 let total = MD_CAP + 3; let mut all_rels = Vec::new();
2627 for i in 0..total {
2628 let rel = format!("sources/emails/2026/05/m-{i:04}.md");
2629 let updated = format!("2026-05-10T00:{:02}:{:02}Z", i / 60, i % 60);
2631 write_doc(
2632 &wt,
2633 &rel,
2634 "email",
2635 Some(&format!("mail {i}")),
2636 Some(&updated),
2637 "",
2638 );
2639 write_doc(
2640 &rb,
2641 &rel,
2642 "email",
2643 Some(&format!("mail {i}")),
2644 Some(&updated),
2645 "",
2646 );
2647 all_rels.push(rel);
2648 }
2649 Index::rebuild_all(&wt).unwrap();
2651 let newest = &all_rels[total - 1]; fs::remove_file(wt.root.join(newest)).unwrap();
2653 Index::on_remove(&wt, Path::new(newest)).unwrap();
2654
2655 fs::remove_file(rb.root.join(newest)).unwrap();
2657 Index::rebuild_all(&rb).unwrap();
2658
2659 let a = snapshot_artifacts(&wt);
2660 let b = snapshot_artifacts(&rb);
2661 for (k, v) in &a {
2662 assert_eq!(v, &b[k], "after remove, artifact {k} drifted from rebuild");
2663 }
2664
2665 let md = read(&wt, "sources/emails/index.md");
2668 assert_eq!(md.lines().filter(|l| l.starts_with("- [[")).count(), MD_CAP);
2669 assert!(
2671 !md.contains(&format!("[[{}]]", wiki_target(Path::new(newest)))),
2672 "removed file must not be listed in md"
2673 );
2674 let pulled_in = &all_rels[2];
2678 assert!(
2679 md.contains(&format!("[[{}]]", wiki_target(Path::new(pulled_in)))),
2680 "the 501st-most-recent must be pulled into the browse view after a removal"
2681 );
2682 assert!(
2683 md.contains(&format!("This folder has {} files.", total - 1)),
2684 "footer count must decrement:\n{}",
2685 md.lines().rev().take(4).collect::<Vec<_>>().join("\n")
2686 );
2687 let jsonl = read(&wt, "sources/emails/index.jsonl");
2688 assert_eq!(
2689 jsonl.lines().count(),
2690 total - 1,
2691 "jsonl loses exactly the removed file"
2692 );
2693 assert!(
2694 !jsonl.contains(&path_to_unix(Path::new(newest))),
2695 "removed file must be gone from the jsonl too"
2696 );
2697 }
2698
2699 #[test]
2700 fn on_rename_cross_folder_matches_rebuild() {
2701 let (_d1, wt) = mk_store();
2702 let (_d2, rb) = mk_store();
2703 let seed: &[(&str, &str, &str, &str)] = &[
2705 (
2706 "records/contacts/a.md",
2707 "contact",
2708 "A",
2709 "2026-05-01T00:00:00Z",
2710 ),
2711 (
2712 "records/contacts/b.md",
2713 "contact",
2714 "B",
2715 "2026-05-02T00:00:00Z",
2716 ),
2717 (
2718 "records/companies/x.md",
2719 "company",
2720 "X",
2721 "2026-05-03T00:00:00Z",
2722 ),
2723 ];
2724 for (rel, t, s, u) in seed {
2725 write_doc(&wt, rel, t, Some(s), Some(u), "");
2726 write_doc(&rb, rel, t, Some(s), Some(u), "");
2727 }
2728 Index::rebuild_all(&wt).unwrap();
2729
2730 let old = "records/contacts/b.md";
2733 let new = "records/companies/b.md";
2734 fs::create_dir_all(wt.root.join("records/companies")).unwrap();
2735 fs::rename(wt.root.join(old), wt.root.join(new)).unwrap();
2736 Index::on_rename(&wt, Path::new(old), Path::new(new)).unwrap();
2739
2740 fs::create_dir_all(rb.root.join("records/companies")).unwrap();
2742 fs::rename(rb.root.join(old), rb.root.join(new)).unwrap();
2743 Index::rebuild_all(&rb).unwrap();
2744
2745 let a = snapshot_artifacts(&wt);
2746 let b = snapshot_artifacts(&rb);
2747 assert_eq!(a.keys().collect::<Vec<_>>(), b.keys().collect::<Vec<_>>());
2748 for (k, v) in &a {
2749 assert_eq!(v, &b[k], "rename: artifact {k} drifted from rebuild");
2750 }
2751 let contacts = read(&wt, "records/contacts/index.md");
2753 assert!(!contacts.contains("records/contacts/b]]"));
2754 let companies = read(&wt, "records/companies/index.md");
2755 assert!(companies.contains("[[records/companies/b]]"));
2756 }
2757
2758 #[test]
2759 fn on_write_updates_existing_entry_in_place() {
2760 let (_d, store) = mk_store();
2761 write_doc(
2762 &store,
2763 "records/contacts/a.md",
2764 "contact",
2765 Some("Original"),
2766 Some("2026-05-01T00:00:00Z"),
2767 "",
2768 );
2769 Index::on_write(&store, Path::new("records/contacts/a.md")).unwrap();
2770 write_doc(
2772 &store,
2773 "records/contacts/a.md",
2774 "contact",
2775 Some("Revised"),
2776 Some("2026-05-09T00:00:00Z"),
2777 "",
2778 );
2779 Index::on_write(&store, Path::new("records/contacts/a.md")).unwrap();
2780
2781 let jsonl = read(&store, "records/contacts/index.jsonl");
2782 assert_eq!(
2783 jsonl.lines().count(),
2784 1,
2785 "upsert must not duplicate the line"
2786 );
2787 assert!(jsonl.contains("Revised"), "jsonl must reflect the update");
2788 assert!(
2789 !jsonl.contains("Original"),
2790 "stale line must be gone (compacted)"
2791 );
2792 let md = read(&store, "records/contacts/index.md");
2793 assert!(md.contains("- [[records/contacts/a]] — Revised\n"));
2794 assert!(
2795 md.contains("updated: 2026-05-09T00:00:00Z\n"),
2796 "index updated must track the newer member"
2797 );
2798 }
2799
2800 #[test]
2803 fn dry_run_emits_separators_and_writes_nothing() {
2804 let (_d, store) = mk_store();
2805 write_doc(
2806 &store,
2807 "sources/emails/2026/05/a.md",
2808 "email",
2809 Some("Mail"),
2810 Some("2026-05-01T00:00:00Z"),
2811 "",
2812 );
2813 let out = Index::render_dry_run(&store, &IndexLevel::TypeFolder("sources/emails".into()))
2814 .unwrap();
2815 assert!(
2816 out.contains("--- sources/emails/index.md ---\n"),
2817 "md separator:\n{out}"
2818 );
2819 assert!(
2820 out.contains("--- sources/emails/index.jsonl ---\n"),
2821 "jsonl separator:\n{out}"
2822 );
2823 assert!(
2824 out.contains("- [[sources/emails/2026/05/a]] — Mail"),
2825 "md body present"
2826 );
2827 assert!(
2829 !exists(&store, "sources/emails/index.md"),
2830 "dry-run must not write"
2831 );
2832 assert!(
2833 !exists(&store, "sources/emails/index.jsonl"),
2834 "dry-run must not write"
2835 );
2836 }
2837
2838 #[test]
2839 fn cleanup_removes_noncanonical_and_empty_indexes() {
2840 let (_d, store) = mk_store();
2841 write_doc(
2842 &store,
2843 "sources/emails/2026/05/a.md",
2844 "email",
2845 Some("Mail"),
2846 Some("2026-05-01T00:00:00Z"),
2847 "",
2848 );
2849 fs::write(
2851 store.root.join("sources/emails/2026/05/index.md"),
2852 "stale\n",
2853 )
2854 .unwrap();
2855 fs::write(
2856 store.root.join("sources/emails/2026/05/index.jsonl"),
2857 "stale\n",
2858 )
2859 .unwrap();
2860 fs::create_dir_all(store.root.join("records/empty")).unwrap();
2862 fs::write(store.root.join("records/empty/index.md"), "stale\n").unwrap();
2863
2864 Index::cleanup(&store).unwrap();
2865
2866 assert!(
2867 !exists(&store, "sources/emails/2026/05/index.md"),
2868 "shard index must be deleted"
2869 );
2870 assert!(
2871 !exists(&store, "sources/emails/2026/05/index.jsonl"),
2872 "shard jsonl must be deleted"
2873 );
2874 assert!(
2875 !exists(&store, "records/empty/index.md"),
2876 "empty-folder index must be deleted"
2877 );
2878 assert!(exists(&store, "sources/emails/2026/05/a.md"));
2880 }
2881
2882 #[test]
2883 fn rebuild_deletes_stale_indexes_for_emptied_folders() {
2884 let (_d, store) = mk_store();
2885 write_doc(
2886 &store,
2887 "records/contacts/a.md",
2888 "contact",
2889 Some("A"),
2890 Some("2026-05-01T00:00:00Z"),
2891 "",
2892 );
2893 Index::rebuild_all(&store).unwrap();
2894 assert!(exists(&store, "records/contacts/index.md"));
2895 assert!(exists(&store, "records/index.md"));
2896 assert!(exists(&store, "index.md"));
2897
2898 fs::remove_file(store.root.join("records/contacts/a.md")).unwrap();
2900 Index::rebuild_all(&store).unwrap();
2901 assert!(
2902 !exists(&store, "records/contacts/index.md"),
2903 "emptied type-folder index gone"
2904 );
2905 assert!(
2906 !exists(&store, "records/index.md"),
2907 "now-empty layer index gone"
2908 );
2909 assert!(!exists(&store, "index.md"), "now-empty root index gone");
2910 }
2911
2912 #[test]
2915 fn property_writethrough_equals_rebuild_under_mixed_ops() {
2916 let (_d1, wt) = mk_store();
2918 let (_d2, rb) = mk_store();
2919 let mut seed: u64 = 0x9E3779B97F4A7C15;
2920 let mut next = || {
2921 seed = seed
2922 .wrapping_mul(6364136223846793005)
2923 .wrapping_add(1442695040888963407);
2924 (seed >> 33) as u32
2925 };
2926
2927 let folders = ["sources/emails", "records/contacts", "records/profiles"];
2928 let types = ["email", "contact", "profile"];
2929 let mut live: Vec<String> = Vec::new(); for step in 0..120u32 {
2932 let r = next();
2933 let op = r % 10;
2934 if op < 6 || live.is_empty() {
2935 let fi = (next() as usize) % folders.len();
2937 let folder = folders[fi];
2938 let id = next() % 40;
2939 let rel = if folder == "sources/emails" {
2940 let month = 5 + (id % 2); format!("{folder}/2026/{month:02}/f-{id:02}.md")
2942 } else {
2943 format!("{folder}/f-{id:02}.md")
2944 };
2945 let updated = format!(
2947 "2026-05-{:02}T{:02}:{:02}:00Z",
2948 1 + (step % 27),
2949 step % 24,
2950 id % 60
2951 );
2952 let extra = if id % 3 == 0 {
2953 "tags:\n - x\n - y\n"
2954 } else {
2955 ""
2956 };
2957 write_doc(
2958 &wt,
2959 &rel,
2960 types[fi],
2961 Some(&format!("sum {step}")),
2962 Some(&updated),
2963 extra,
2964 );
2965 write_doc(
2966 &rb,
2967 &rel,
2968 types[fi],
2969 Some(&format!("sum {step}")),
2970 Some(&updated),
2971 extra,
2972 );
2973 Index::on_write(&wt, Path::new(&rel)).unwrap();
2974 if !live.contains(&rel) {
2975 live.push(rel);
2976 }
2977 } else if op < 8 {
2978 let idx = (next() as usize) % live.len();
2980 let rel = live.remove(idx);
2981 fs::remove_file(wt.root.join(&rel)).unwrap();
2982 fs::remove_file(rb.root.join(&rel)).ok();
2983 Index::on_remove(&wt, Path::new(&rel)).unwrap();
2984 } else {
2985 let idx = (next() as usize) % live.len();
2987 let old = live[idx].clone();
2988 let fi = (next() as usize) % folders.len();
2990 let folder = folders[fi];
2991 let id = 50 + (next() % 40);
2992 let new = if folder == "sources/emails" {
2993 format!("{folder}/2026/05/f-{id:02}.md")
2994 } else {
2995 format!("{folder}/f-{id:02}.md")
2996 };
2997 if new == old || live.contains(&new) {
2998 continue;
2999 }
3000 fs::create_dir_all(wt.root.join(&new).parent().unwrap()).unwrap();
3001 fs::create_dir_all(rb.root.join(&new).parent().unwrap()).unwrap();
3002 fs::rename(wt.root.join(&old), wt.root.join(&new)).unwrap();
3003 fs::rename(rb.root.join(&old), rb.root.join(&new)).unwrap();
3004 Index::on_rename(&wt, Path::new(&old), Path::new(&new)).unwrap();
3005 live[idx] = new;
3006 }
3007 }
3008
3009 Index::rebuild_all(&rb).unwrap();
3011 let a = snapshot_artifacts(&wt);
3012 let b = snapshot_artifacts(&rb);
3013 assert_eq!(
3014 a.keys().collect::<BTreeSet<_>>(),
3015 b.keys().collect::<BTreeSet<_>>(),
3016 "write-through and rebuild must produce the same set of artifacts"
3017 );
3018 for (k, v) in &a {
3019 assert_eq!(
3020 v, &b[k],
3021 "INVARIANT VIOLATED: artifact {k} differs after mixed ops\n--- write-through ---\n{v}\n--- rebuild ---\n{}",
3022 b[k]
3023 );
3024 }
3025 assert!(
3026 !a.is_empty(),
3027 "the run must have produced at least one artifact"
3028 );
3029 }
3030
3031 #[test]
3037 fn cleanup_preserves_user_content_named_index_md_in_shard() {
3038 let (_d, store) = mk_store();
3039 write_doc(
3041 &store,
3042 "sources/emails/2026/06/index.md",
3043 "email",
3044 Some("Important imported mail"),
3045 Some("2026-06-11T04:23:25Z"),
3046 "",
3047 );
3048 Index::cleanup(&store).unwrap();
3049 assert!(
3050 exists(&store, "sources/emails/2026/06/index.md"),
3051 "cleanup must not delete a user content file named index.md"
3052 );
3053 Index::rebuild_all(&store).unwrap();
3055 assert!(
3056 exists(&store, "sources/emails/2026/06/index.md"),
3057 "rebuild_all must not delete a user content file named index.md"
3058 );
3059 let kept = read(&store, "sources/emails/2026/06/index.md");
3060 assert!(
3061 kept.contains("Important imported mail"),
3062 "the user's record content must be intact"
3063 );
3064 }
3065
3066 #[test]
3071 fn cleanup_keeps_canonical_type_folder_root_sidecars() {
3072 let (_d, store) = mk_store();
3073 write_doc(
3074 &store,
3075 "records/contacts/alice.md",
3076 "contact",
3077 Some("Alice"),
3078 Some("2026-05-01T00:00:00Z"),
3079 "",
3080 );
3081 Index::write_level(&store, &IndexLevel::TypeFolder("records/contacts".into())).unwrap();
3082 assert!(exists(&store, "records/contacts/index.md"));
3083 assert!(exists(&store, "records/contacts/index.jsonl"));
3084 Index::cleanup(&store).unwrap();
3085 assert!(
3086 exists(&store, "records/contacts/index.md"),
3087 "cleanup must keep the canonical type-folder index.md (non-empty folder)"
3088 );
3089 assert!(
3090 exists(&store, "records/contacts/index.jsonl"),
3091 "cleanup must keep the canonical type-folder index.jsonl (non-empty folder)"
3092 );
3093 }
3094
3095 #[test]
3101 fn on_write_ignores_index_artifact_no_phantom_row() {
3102 let (_d, store) = mk_store();
3103 write_doc(
3104 &store,
3105 "records/contacts/alice.md",
3106 "contact",
3107 Some("Alice"),
3108 Some("2026-05-01T00:00:00Z"),
3109 "",
3110 );
3111 Index::on_write(&store, Path::new("records/contacts/alice.md")).unwrap();
3112 let jsonl_before = read(&store, "records/contacts/index.jsonl");
3113 assert_eq!(jsonl_before.lines().count(), 1);
3114
3115 Index::on_write(&store, Path::new("records/contacts/index.md")).unwrap();
3117
3118 let jsonl_after = read(&store, "records/contacts/index.jsonl");
3119 assert_eq!(
3120 jsonl_after.lines().count(),
3121 1,
3122 "on_write on index.md must not add a phantom self-row"
3123 );
3124 assert!(
3125 !jsonl_after.contains("\"type\":\"index\""),
3126 "the catalog artifact must never appear as a catalogued row"
3127 );
3128 let root = read(&store, "index.md");
3130 assert!(
3131 root.contains("[[records/contacts/index|Contacts]] (1)"),
3132 "count must not inflate:\n{root}"
3133 );
3134 }
3135
3136 #[test]
3142 fn multiline_summary_is_single_lined_in_index_md() {
3143 let (_d, store) = mk_store();
3144 write_raw(
3146 &store,
3147 "records/notes/evil.md",
3148 "type: note\nupdated: 2026-06-10T00:00:00Z\nsummary: |-\n legit first line\n - [[records/secrets/fake|Click me]] — injected entry",
3149 "\nbody\n",
3150 );
3151 let idx = Index::build_type_folder(&store, Path::new("records/notes")).unwrap();
3152 let md = idx.to_markdown();
3153 let entry_lines = md.lines().filter(|l| l.starts_with("- [[")).count();
3155 assert_eq!(
3156 entry_lines, 1,
3157 "a multi-line summary must not produce extra entry lines:\n{md}"
3158 );
3159 assert!(
3160 md.contains(
3161 "- [[records/notes/evil]] — legit first line - [[records/secrets/fake|Click me]] — injected entry\n"
3162 ),
3163 "summary newlines must collapse to spaces inline:\n{md}"
3164 );
3165 }
3166
3167 #[test]
3175 fn non_string_scalar_summary_and_type_are_coerced_like_validator() {
3176 let (_d, store) = mk_store();
3177 write_raw(
3178 &store,
3179 "records/contacts/a.md",
3180 "type: contact\nupdated: 2026-05-01T00:00:00Z\nsummary: 2026",
3181 "\nbody\n",
3182 );
3183 let rec = record_from_file(
3184 &store.root.join("records/contacts/a.md"),
3185 PathBuf::from("records/contacts/a.md"),
3186 )
3187 .unwrap();
3188 assert_eq!(rec.summary, "2026");
3191 assert_eq!(rec.type_, "contact");
3192
3193 let idx = Index::build_type_folder(&store, Path::new("records/contacts")).unwrap();
3195 let md = idx.to_markdown();
3196 assert!(
3197 md.contains("- [[records/contacts/a]] — 2026\n"),
3198 "index entry must hold the coerced scalar, not the placeholder:\n{md}"
3199 );
3200
3201 write_raw(
3203 &store,
3204 "records/contacts/b.md",
3205 "type: true\nupdated: 2026-05-02T00:00:00Z\nsummary: hi",
3206 "\nbody\n",
3207 );
3208 let rec_b = record_from_file(
3209 &store.root.join("records/contacts/b.md"),
3210 PathBuf::from("records/contacts/b.md"),
3211 )
3212 .unwrap();
3213 assert_eq!(rec_b.type_, "true");
3214 }
3215
3216 #[test]
3224 fn non_utf8_body_does_not_abort_record_projection() {
3225 let (_d, store) = mk_store();
3226 let rel = "sources/emails/2026/06/x.md";
3227 let abs = store.root.join(rel);
3228 fs::create_dir_all(abs.parent().unwrap()).unwrap();
3229 let mut bytes: Vec<u8> =
3231 b"---\ntype: email\nupdated: 2026-06-11T00:00:00Z\nsummary: An imported email\n---\n\nCaf"
3232 .to_vec();
3233 bytes.push(0xE9);
3234 bytes.extend_from_slice(b" meeting notes\n");
3235 fs::write(&abs, bytes).unwrap();
3236
3237 let rec = record_from_file(&abs, PathBuf::from(rel))
3238 .expect("non-UTF-8 body must not abort the frontmatter read");
3239 assert_eq!(rec.summary, "An imported email");
3240 assert_eq!(rec.type_, "email");
3241
3242 Index::rebuild_all(&store).unwrap();
3244 assert!(
3245 exists(&store, "sources/emails/index.jsonl"),
3246 "rebuild must produce the catalog despite a non-UTF-8 body byte"
3247 );
3248 assert!(
3249 read(&store, "sources/emails/index.jsonl").contains("An imported email"),
3250 "the record must be catalogued"
3251 );
3252 }
3253
3254 #[test]
3263 fn rebuild_aborts_on_malformed_file_and_keeps_prior_catalogs() {
3264 let (_d, store) = mk_store();
3265 write_doc(
3266 &store,
3267 "records/contacts/alice.md",
3268 "contact",
3269 Some("Alice"),
3270 Some("2026-05-01T00:00:00Z"),
3271 "",
3272 );
3273 write_doc(
3274 &store,
3275 "records/companies/acme.md",
3276 "company",
3277 Some("Acme"),
3278 Some("2026-05-02T00:00:00Z"),
3279 "",
3280 );
3281
3282 Index::rebuild_all(&store).expect("clean rebuild succeeds");
3284 assert!(exists(&store, "records/contacts/index.jsonl"));
3285 assert!(exists(&store, "records/companies/index.jsonl"));
3286
3287 let bad = store.root.join("records/contacts/broken.md");
3289 fs::write(
3290 &bad,
3291 "---\ntype: contact\nsummary: \"unterminated\n---\nbody\n",
3292 )
3293 .unwrap();
3294
3295 Index::rebuild_all(&store)
3298 .expect_err("rebuild must abort, not silently skip, on a malformed file");
3299
3300 assert!(
3304 exists(&store, "records/companies/index.jsonl"),
3305 "an aborted rebuild must not destroy a clean sibling folder's catalog"
3306 );
3307 assert!(
3308 exists(&store, "records/contacts/index.jsonl"),
3309 "an aborted rebuild must not destroy the affected folder's prior catalog"
3310 );
3311 let contacts_jsonl = read(&store, "records/contacts/index.jsonl");
3312 assert!(contacts_jsonl.contains("records/contacts/alice.md"));
3313 }
3314
3315 #[test]
3328 fn rebuild_rollup_counts_equal_jsonl_records_and_write_through() {
3329 let (_d, store) = mk_store();
3330 write_doc(
3334 &store,
3335 "records/contacts/alice.md",
3336 "contact",
3337 Some("Alice"),
3338 Some("2026-05-01T00:00:00Z"),
3339 "",
3340 );
3341 write_doc(
3342 &store,
3343 "records/contacts/bob.md",
3344 "contact",
3345 Some("Bob"),
3346 Some("2026-05-02T00:00:00Z"),
3347 "",
3348 );
3349 Index::rebuild_all(&store).expect("clean rebuild succeeds");
3350
3351 let jsonl_lines = read(&store, "records/contacts/index.jsonl")
3353 .lines()
3354 .filter(|l| !l.trim().is_empty())
3355 .count();
3356 assert_eq!(jsonl_lines, 2, "two well-formed files ⇒ two jsonl records");
3357 let layer_md = read(&store, "records/index.md");
3358 let root_md = read(&store, "index.md");
3359 assert!(
3360 layer_md.contains("- [[records/contacts/index|Contacts]] (2)"),
3361 "layer rollup (N) must equal the jsonl record count (2), not a raw .md walk:\n{layer_md}"
3362 );
3363 assert!(
3364 root_md.contains("- [[records/contacts/index|Contacts]] (2)\n")
3365 && root_md.contains("## Records (2)"),
3366 "root rollup (N)/layer total must equal the jsonl record count (2):\n{root_md}"
3367 );
3368
3369 let (_d2, wt) = mk_store();
3376 write_doc(
3377 &wt,
3378 "records/contacts/alice.md",
3379 "contact",
3380 Some("Alice"),
3381 Some("2026-05-01T00:00:00Z"),
3382 "",
3383 );
3384 write_doc(
3385 &wt,
3386 "records/contacts/bob.md",
3387 "contact",
3388 Some("Bob"),
3389 Some("2026-05-02T00:00:00Z"),
3390 "",
3391 );
3392 Index::on_write(&wt, Path::new("records/contacts/alice.md")).unwrap();
3393 Index::on_write(&wt, Path::new("records/contacts/bob.md")).unwrap();
3394
3395 let a = snapshot_artifacts(&wt);
3396 let b = snapshot_artifacts(&store);
3397 assert_eq!(
3398 a.keys().collect::<BTreeSet<_>>(),
3399 b.keys().collect::<BTreeSet<_>>(),
3400 "write-through and rebuild_all must produce the same artifact set"
3401 );
3402 for (k, v) in &a {
3403 assert_eq!(
3404 v, &b[k],
3405 "rollup bytes diverged between write-through and rebuild_all for {k} \
3406 (a skip-version inflates rebuild_all's (N) above the jsonl record \
3407 count, which write-through then rewrites):\n--- write-through ---\n{v}\n--- rebuild ---\n{}",
3408 b[k]
3409 );
3410 }
3411 }
3412
3413 #[cfg(unix)]
3418 #[test]
3419 fn non_utf8_path_component_is_kept_not_dropped() {
3420 use std::ffi::OsStr;
3421 use std::os::unix::ffi::OsStrExt;
3422 let mut leaf = b"caf".to_vec();
3424 leaf.push(0xE9);
3425 leaf.extend_from_slice(b".md");
3426 let p = Path::new("sources/emails").join(OsStr::from_bytes(&leaf));
3427 let unix = path_to_unix(&p);
3428 assert_ne!(
3431 unix, "sources/emails",
3432 "non-UTF-8 leaf must not be dropped, collapsing the path to its parent dir"
3433 );
3434 assert!(
3435 unix.starts_with("sources/emails/caf"),
3436 "the lossy leaf must remain under its folder: {unix}"
3437 );
3438 }
3439}