1use std::collections::{BTreeMap, BTreeSet, HashMap};
42use std::path::{Component, Path, PathBuf};
43
44use chrono::{DateTime, FixedOffset, NaiveDateTime};
45use serde_norway::Value;
46
47use crate::parser::{Schema, Shape};
48use crate::store::Store;
49
50#[derive(Debug, Clone, Copy, PartialEq, Eq)]
53pub enum Severity {
54 Error,
56 Warning,
58 Info,
60}
61
62#[derive(Debug, Clone, PartialEq, Eq)]
66pub struct Issue {
67 pub severity: Severity,
69 pub code: &'static str,
71 pub file: PathBuf,
73 pub line: Option<u32>,
75 pub key: Option<String>,
77 pub message: String,
79 pub suggestion: Option<String>,
81 pub related: Vec<PathBuf>,
83}
84
85impl Issue {
86 pub fn is_error(&self) -> bool {
89 matches!(self.severity, Severity::Error)
90 }
91}
92
93pub mod codes {
97 pub const NOT_A_STORE: &str = "NOT_A_STORE";
99 pub const DB_MD_BAD_TYPE: &str = "DB_MD_BAD_TYPE";
101 pub const DB_MD_MISSING_FIELD: &str = "DB_MD_MISSING_FIELD";
103 pub const DB_MD_UNKNOWN_SECTION: &str = "DB_MD_UNKNOWN_SECTION";
105 pub const DB_MD_SCHEMA_FIELD: &str = "DB_MD_SCHEMA_FIELD";
108 pub const FM_MISSING_TYPE: &str = "FM_MISSING_TYPE";
110 pub const FM_MISSING_CREATED: &str = "FM_MISSING_CREATED";
112 pub const FM_MISSING_UPDATED: &str = "FM_MISSING_UPDATED";
114 pub const FM_UNREADABLE: &str = "FM_UNREADABLE";
116 pub const FM_MALFORMED_YAML: &str = "FM_MALFORMED_YAML";
118 pub const FM_BAD_TIMESTAMP: &str = "FM_BAD_TIMESTAMP";
120 pub const FM_BAD_META_TYPE: &str = "FM_BAD_META_TYPE";
122 pub const SUMMARY_MISSING: &str = "SUMMARY_MISSING";
124 pub const SUMMARY_EMPTY: &str = "SUMMARY_EMPTY";
126 pub const SUMMARY_MULTILINE: &str = "SUMMARY_MULTILINE";
128 pub const SUMMARY_TOO_LONG: &str = "SUMMARY_TOO_LONG";
130 pub const WIKI_LINK_SHORT_FORM: &str = "WIKI_LINK_SHORT_FORM";
132 pub const WIKI_LINK_BROKEN: &str = "WIKI_LINK_BROKEN";
134 pub const WIKI_LINK_AMBIGUOUS: &str = "WIKI_LINK_AMBIGUOUS";
136 pub const WIKI_LINK_HAS_EXTENSION: &str = "WIKI_LINK_HAS_EXTENSION";
138 pub const WIKI_LINK_FLOW_FORM_LIST: &str = "WIKI_LINK_FLOW_FORM_LIST";
140 pub const DUP_ID: &str = "DUP_ID";
142 pub const DUP_UNIQUE_KEY: &str = "DUP_UNIQUE_KEY";
144 pub const SCHEMA_MISSING_REQUIRED: &str = "SCHEMA_MISSING_REQUIRED";
146 pub const SCHEMA_SHAPE_MISMATCH: &str = "SCHEMA_SHAPE_MISMATCH";
148 pub const SCHEMA_LINK_PREFIX_MISMATCH: &str = "SCHEMA_LINK_PREFIX_MISMATCH";
150 pub const SCHEMA_ENUM_VIOLATION: &str = "SCHEMA_ENUM_VIOLATION";
152 pub const POLICY_FROZEN_PAGE: &str = "POLICY_FROZEN_PAGE";
154 pub const POLICY_IGNORED_TYPE_PRESENT: &str = "POLICY_IGNORED_TYPE_PRESENT";
156 pub const POLICY_IGNORED_TYPE_DERIVED: &str = "POLICY_IGNORED_TYPE_DERIVED";
158 pub const LOG_BAD_TIMESTAMP: &str = "LOG_BAD_TIMESTAMP";
160 pub const LOG_UNKNOWN_KIND: &str = "LOG_UNKNOWN_KIND";
162 pub const LOG_OUT_OF_ORDER: &str = "LOG_OUT_OF_ORDER";
164 pub const INDEX_MISSING: &str = "INDEX_MISSING";
166 pub const INDEX_STALE_ENTRY: &str = "INDEX_STALE_ENTRY";
168 pub const INDEX_MISSING_ENTRY: &str = "INDEX_MISSING_ENTRY";
170 pub const INDEX_ORPHAN: &str = "INDEX_ORPHAN";
172 pub const INDEX_WRONG_SCOPE: &str = "INDEX_WRONG_SCOPE";
174 pub const INDEX_SUMMARY_MISMATCH: &str = "INDEX_SUMMARY_MISMATCH";
176 pub const INDEX_JSONL_MISSING: &str = "INDEX_JSONL_MISSING";
178 pub const INDEX_JSONL_DESYNC: &str = "INDEX_JSONL_DESYNC";
181 pub const INDEX_JSONL_STALE: &str = "INDEX_JSONL_STALE";
183 pub const TAGS_MALFORMED: &str = "TAGS_MALFORMED";
185 pub const ASSET_MANIFEST_MALFORMED: &str = "ASSET_MANIFEST_MALFORMED";
187 pub const ASSET_UNDECLARED: &str = "ASSET_UNDECLARED";
190 pub const ASSET_WRAPPER_BROKEN: &str = "ASSET_WRAPPER_BROKEN";
192 pub const ASSET_MANIFEST_ORPHAN: &str = "ASSET_MANIFEST_ORPHAN";
194 pub const ASSET_PATH_IS_CONTENT: &str = "ASSET_PATH_IS_CONTENT";
196}
197
198const MAX_SUMMARY_LEN: usize = 200;
200
201const RECOGNIZED_LOG_KINDS: &[&str] = &[
204 "ingest",
205 "create",
206 "update",
207 "delete",
208 "rename",
209 "link",
210 "validate",
211 "index-rebuild",
212 "contradiction",
213];
214
215pub fn validate_working_set(
241 store: &Store,
242 since: Option<DateTime<FixedOffset>>,
243) -> crate::Result<Vec<Issue>> {
244 if !store_marker_present(store) {
245 return Ok(vec![not_a_store_issue(store)]);
246 }
247
248 let cutoff = match since {
249 Some(ts) => Some(ts),
250 None => last_validate_at(store),
251 };
252
253 let changed = changed_objects_since(store, cutoff);
255 if changed.is_empty() && since.is_none() {
256 return validate_content_sweep(store);
257 }
258
259 let changed_targets: Vec<PathBuf> = changed.iter().cloned().collect();
270 let mut working: BTreeSet<PathBuf> = changed;
271 for linker in store.find_links_to_any(&changed_targets)? {
272 working.insert(linker);
273 }
274
275 let mut issues = Vec::new();
276 for rel in &working {
277 let abs = store.root.join(rel);
278 if !abs.is_file() {
281 continue;
282 }
283 check_content_file(store, rel, &abs, None, &mut issues);
288 }
289 issues.sort_by(issue_order);
290 Ok(issues)
291}
292
293fn validate_content_sweep(store: &Store) -> crate::Result<Vec<Issue>> {
294 let mut issues = Vec::new();
295 for rel in store.walk()? {
296 let abs = store.root.join(&rel);
297 check_content_file(store, &rel, &abs, None, &mut issues);
298 }
299 issues.sort_by(issue_order);
300 Ok(issues)
301}
302
303pub fn validate_all(store: &Store) -> crate::Result<Vec<Issue>> {
308 if !store_marker_present(store) {
309 return Ok(vec![not_a_store_issue(store)]);
310 }
311
312 let mut issues = Vec::new();
313
314 check_db_md(store, &mut issues);
318
319 let files = walk_content_files(&store.root);
320
321 let basenames = build_basename_index(&files);
326
327 let mut parsed: Vec<(PathBuf, Parsed)> = Vec::new();
329 for rel in &files {
330 let abs = store.root.join(rel);
331 if let Some(p) = check_content_file(store, rel, &abs, Some(&basenames), &mut issues) {
332 parsed.push((rel.clone(), p));
333 }
334 }
335
336 check_duplicates(store, &parsed, &mut issues);
338
339 check_indexes(store, &files, &mut issues);
341
342 check_log(store, &mut issues);
344
345 check_assets(store, &parsed, &mut issues);
350
351 issues.sort_by(issue_order);
352 Ok(issues)
353}
354
355struct Parsed {
364 fm: Option<BTreeMap<String, Value>>,
367 fm_yaml: String,
370}
371
372fn check_content_file(
377 store: &Store,
378 rel: &Path,
379 abs: &Path,
380 basenames: Option<&BasenameIndex>,
381 issues: &mut Vec<Issue>,
382) -> Option<Parsed> {
383 let text = match std::fs::read_to_string(abs) {
384 Ok(t) => t,
385 Err(e) => {
386 let detail = if e.kind() == std::io::ErrorKind::InvalidData {
394 "file is not valid UTF-8 text".to_string()
395 } else {
396 format!("file could not be read: {e}")
397 };
398 push(
399 issues,
400 Severity::Error,
401 codes::FM_UNREADABLE,
402 rel,
403 None,
404 None,
405 format!("content file is unreadable: {detail}"),
406 Some(
407 "save the file as UTF-8 text, or remove it if it isn't a db.md content file"
408 .into(),
409 ),
410 vec![],
411 );
412 return None;
413 }
414 };
415
416 let is_content = is_content_file(rel);
417
418 let (fm_yaml, body, fm_end_line) = match split_frontmatter(&text) {
419 Some(split) => split,
420 None => {
421 if is_content {
425 push(
426 issues,
427 Severity::Error,
428 codes::FM_MISSING_TYPE,
429 rel,
430 None,
431 Some("type".into()),
432 "content file has no frontmatter `type:`".into(),
433 Some("add a YAML frontmatter block with `type:`".into()),
434 vec![],
435 );
436 push(
437 issues,
438 Severity::Error,
439 codes::SUMMARY_MISSING,
440 rel,
441 None,
442 Some("summary".into()),
443 "content file has no `summary`".into(),
444 Some("run `dbmd fm init`".into()),
445 vec![],
446 );
447 }
448 return None;
449 }
450 };
451
452 let fm: Option<BTreeMap<String, Value>> = match serde_norway::from_str::<Value>(&fm_yaml) {
454 Ok(Value::Mapping(map)) => Some(yaml_map_to_btree(&map)),
455 Ok(Value::Null) => Some(BTreeMap::new()),
457 Ok(_) => {
458 push(
462 issues,
463 Severity::Error,
464 codes::FM_MALFORMED_YAML,
465 rel,
466 Some(1),
467 None,
468 "frontmatter is not a YAML mapping".into(),
469 Some("repair the frontmatter YAML mapping, then rerun `dbmd validate`".into()),
470 vec![],
471 );
472 None
473 }
474 Err(e) => {
475 push(
478 issues,
479 Severity::Error,
480 codes::FM_MALFORMED_YAML,
481 rel,
482 Some(1),
483 None,
484 format!("frontmatter block isn't valid YAML: {e}"),
485 Some("repair the frontmatter YAML block, then rerun `dbmd validate`".into()),
486 vec![],
487 );
488 None
489 }
490 };
491
492 if let Some(map) = &fm {
493 check_frontmatter(store, rel, map, &fm_yaml, basenames, issues, is_content);
495 }
496
497 if !is_root_meta_file(rel) && !is_index_catalog_file(rel) {
519 check_body_wiki_links(store, rel, &body, fm_end_line, basenames, issues);
520 }
521
522 Some(Parsed { fm, fm_yaml })
523}
524
525fn check_frontmatter(
527 store: &Store,
528 rel: &Path,
529 fm: &BTreeMap<String, Value>,
530 fm_yaml: &str,
531 basenames: Option<&BasenameIndex>,
532 issues: &mut Vec<Issue>,
533 is_content: bool,
534) {
535 let type_ = fm.get("type").and_then(scalar_string);
536
537 if is_content && type_.is_none() {
539 push(
540 issues,
541 Severity::Error,
542 codes::FM_MISSING_TYPE,
543 rel,
544 fm_key_line_or_top(fm_yaml, "type"),
545 Some("type".into()),
546 "content file has no `type:`".into(),
547 Some("add a `type:` field (e.g. `type: contact`)".into()),
548 vec![],
549 );
550 }
551
552 if is_content {
557 if let Some(v) = fm.get("meta-type").filter(|v| !v.is_null()) {
566 match scalar_string(v) {
567 Some(mt) if matches!(mt.as_str(), "fact" | "operational" | "conclusion") => {}
568 Some(mt) => push(
569 issues,
570 Severity::Error,
571 codes::FM_BAD_META_TYPE,
572 rel,
573 fm_key_line_or_top(fm_yaml, "meta-type"),
574 Some("meta-type".into()),
575 format!("`meta-type: {mt}` is not one of fact / operational / conclusion"),
576 Some(
577 "use one of: fact, operational, conclusion (or omit for the default `fact`)"
578 .into(),
579 ),
580 vec![],
581 ),
582 None => push(
583 issues,
584 Severity::Error,
585 codes::FM_BAD_META_TYPE,
586 rel,
587 fm_key_line_or_top(fm_yaml, "meta-type"),
588 Some("meta-type".into()),
589 "`meta-type` is not one of fact / operational / conclusion: expected a scalar \
590 string, found a list or mapping"
591 .to_string(),
592 Some(
593 "use one of: fact, operational, conclusion (or omit for the default `fact`)"
594 .into(),
595 ),
596 vec![],
597 ),
598 }
599 }
600 }
601
602 if is_content {
604 check_summary(rel, fm, fm_yaml, issues);
605 }
606
607 if is_content {
611 for (key, missing_code) in [
612 ("created", codes::FM_MISSING_CREATED),
613 ("updated", codes::FM_MISSING_UPDATED),
614 ] {
615 let value = fm.get(key);
620 let missing = value.is_none() || value.is_some_and(Value::is_null);
621 if missing {
622 push(
623 issues,
624 Severity::Error,
625 missing_code,
626 rel,
627 fm_key_line_or_top(fm_yaml, key),
628 Some(key.into()),
629 format!("content file has no `{key}:` timestamp"),
630 Some(format!(
631 "set `{key}` to an RFC3339 timestamp, e.g. 2026-05-27T08:00:00-07:00"
632 )),
633 vec![],
634 );
635 } else if let Some(v) = value {
636 match scalar_string(v) {
642 Some(s) if is_iso8601(&s) => {}
643 Some(s) => push(
644 issues,
645 Severity::Error,
646 codes::FM_BAD_TIMESTAMP,
647 rel,
648 fm_key_line(fm_yaml, key),
649 Some(key.into()),
650 format!("`{key}` is not ISO-8601: {s:?}"),
651 Some("use RFC3339, e.g. 2026-05-27T08:00:00-07:00".into()),
652 vec![],
653 ),
654 None => push(
655 issues,
656 Severity::Error,
657 codes::FM_BAD_TIMESTAMP,
658 rel,
659 fm_key_line(fm_yaml, key),
660 Some(key.into()),
661 format!(
662 "`{key}` is not ISO-8601: expected a timestamp string, found a list or mapping"
663 ),
664 Some("use RFC3339, e.g. 2026-05-27T08:00:00-07:00".into()),
665 vec![],
666 ),
667 }
668 }
669 }
670 }
671 if let Some(tags) = fm.get("tags") {
673 if !is_flat_scalar_list(tags) {
674 push(
675 issues,
676 Severity::Warning,
677 codes::TAGS_MALFORMED,
678 rel,
679 fm_key_line(fm_yaml, "tags"),
680 Some("tags".into()),
681 "`tags` must be a flat YAML list of short scalar labels".into(),
682 Some("use block form: one `- <tag>` per line".into()),
683 vec![],
684 );
685 }
686 }
687
688 for key in detect_flow_form_link_lists(fm_yaml) {
690 push(
691 issues,
692 Severity::Error,
693 codes::WIKI_LINK_FLOW_FORM_LIST,
694 rel,
695 fm_key_line(fm_yaml, &key),
696 Some(key.clone()),
697 format!("`{key}` uses inline flow form `[[[a]], [[b]]]`"),
698 Some("use YAML block-sequence form: one `- [[...]]` per line".into()),
699 vec![],
700 );
701 }
702
703 let schema_link_keys: BTreeSet<String> =
708 effective_schema(store, type_.as_deref().unwrap_or(""))
709 .map(|s| {
710 s.fields
711 .iter()
712 .filter(|f| f.link_prefix.is_some())
713 .map(|f| f.name.clone())
714 .collect()
715 })
716 .unwrap_or_default();
717 for (key, link) in frontmatter_link_fields_text(fm_yaml, 2) {
718 if schema_link_keys.contains(&key) {
719 continue;
720 }
721 check_wiki_link(
722 store,
723 rel,
724 &link,
725 Some(link.line),
726 Some(&key),
727 basenames,
728 issues,
729 );
730 }
731
732 if let Some(t) = &type_ {
734 if store.config.ignored_types.iter().any(|it| it == t) {
735 push(
736 issues,
737 Severity::Info,
738 codes::POLICY_IGNORED_TYPE_PRESENT,
739 rel,
740 fm_key_line(fm_yaml, "type"),
741 Some("type".into()),
742 format!("file has ignored type `{t}` (per DB.md ## Policies)"),
743 Some(
744 "change the `type`, or remove it from DB.md `### Ignored types` if it should be managed"
745 .into(),
746 ),
747 vec![PathBuf::from("DB.md")],
749 );
750 }
751 let meta_type = fm
757 .get("meta-type")
758 .and_then(scalar_string)
759 .unwrap_or_else(|| "fact".to_string());
760 for link in frontmatter_links_for_key(fm_yaml, "derived_from", 2) {
761 if let Some(hit) =
762 derived_from_ignored_type(store, &meta_type, std::iter::once(link.target.as_str()))
763 {
764 push(
765 issues,
766 Severity::Warning,
767 codes::POLICY_IGNORED_TYPE_DERIVED,
768 rel,
769 Some(link.line),
770 Some("derived_from".into()),
771 format!(
772 "conclusion record derives from ignored-type record `{}` (type `{}`)",
773 hit.target, hit.target_type
774 ),
775 Some(
776 "drop this `derived_from` link, or remove the target type from DB.md `### Ignored types`"
777 .into(),
778 ),
779 vec![
782 PathBuf::from(format!("{}.md", hit.target)),
783 PathBuf::from("DB.md"),
784 ],
785 );
786 }
787 }
788 }
789
790 if let Some(t) = &type_ {
792 if let Some(schema) = effective_schema(store, t) {
793 check_schema(store, rel, fm, fm_yaml, &schema, issues);
794 }
795 }
796}
797
798fn check_summary(rel: &Path, fm: &BTreeMap<String, Value>, fm_yaml: &str, issues: &mut Vec<Issue>) {
800 let line = fm_key_line(fm_yaml, "summary");
801 match fm.get("summary") {
802 None => push(
803 issues,
804 Severity::Error,
805 codes::SUMMARY_MISSING,
806 rel,
807 fm_key_line_or_top(fm_yaml, "summary"),
810 Some("summary".into()),
811 "content file has no `summary`".into(),
812 Some("run `dbmd fm init`".into()),
813 vec![],
814 ),
815 Some(v) => {
816 let s = scalar_string(v).unwrap_or_default();
817 if s.trim().is_empty() {
818 push(
819 issues,
820 Severity::Error,
821 codes::SUMMARY_EMPTY,
822 rel,
823 line,
824 Some("summary".into()),
825 "`summary` is present but empty".into(),
826 Some("write a one-line summary, or run `dbmd fm init`".into()),
827 vec![],
828 );
829 } else if s.contains('\n') {
830 push(
831 issues,
832 Severity::Error,
833 codes::SUMMARY_MULTILINE,
834 rel,
835 line,
836 Some("summary".into()),
837 "`summary` must be one line (contains a newline)".into(),
838 Some("collapse the summary to a single line".into()),
839 vec![],
840 );
841 } else if s.chars().count() > MAX_SUMMARY_LEN {
842 push(
843 issues,
844 Severity::Warning,
845 codes::SUMMARY_TOO_LONG,
846 rel,
847 line,
848 Some("summary".into()),
849 format!(
850 "`summary` is {} chars (> {MAX_SUMMARY_LEN})",
851 s.chars().count()
852 ),
853 Some(format!("trim the summary to ≤ {MAX_SUMMARY_LEN} chars")),
854 vec![],
855 );
856 }
857 }
858 }
859}
860
861fn check_body_wiki_links(
863 store: &Store,
864 rel: &Path,
865 body: &str,
866 fm_end_line: u32,
867 basenames: Option<&BasenameIndex>,
868 issues: &mut Vec<Issue>,
869) {
870 for link in extract_wiki_links(body) {
871 let abs_line = fm_end_line + link.line;
874 check_wiki_link(store, rel, &link, Some(abs_line), None, basenames, issues);
875 }
876}
877
878type BasenameIndex = HashMap<String, Vec<PathBuf>>;
886
887fn build_basename_index(files: &[PathBuf]) -> BasenameIndex {
890 let mut idx: BasenameIndex = HashMap::new();
891 for rel in files {
892 if let Some(stem) = rel.file_stem().and_then(|s| s.to_str()) {
893 idx.entry(stem.to_string()).or_default().push(rel.clone());
894 }
895 }
896 idx
897}
898
899fn check_wiki_link(
904 store: &Store,
905 rel: &Path,
906 link: &Link,
907 line: Option<u32>,
908 key: Option<&str>,
909 basenames: Option<&BasenameIndex>,
910 issues: &mut Vec<Issue>,
911) {
912 let bare = link.target.trim_end_matches(".md");
913
914 if !is_full_store_path(bare) {
917 if !bare.contains('/') {
922 if let Some(idx) = basenames {
923 if let Some(matches) = idx.get(bare) {
924 if matches.len() >= 2 {
925 let mut related = matches.clone();
926 related.sort();
927 push(
928 issues,
929 Severity::Error,
930 codes::WIKI_LINK_AMBIGUOUS,
931 rel,
932 line,
933 key.map(str::to_string),
934 format!(
935 "short-form wiki-link `[[{}]]` matches multiple files",
936 link.target
937 ),
938 Some("use the full store-relative path to disambiguate".into()),
939 related,
940 );
941 return;
942 }
943 }
944 }
945 }
946 push(
947 issues,
948 Severity::Error,
949 codes::WIKI_LINK_SHORT_FORM,
950 rel,
951 line,
952 key.map(str::to_string),
953 format!(
954 "wiki-link `[[{}]]` is not a full store-relative path",
955 link.target
956 ),
957 short_form_suggestion(bare),
958 vec![],
959 );
960 return;
962 }
963
964 if link.target.ends_with(".md") {
966 push(
967 issues,
968 Severity::Warning,
969 codes::WIKI_LINK_HAS_EXTENSION,
970 rel,
971 line,
972 key.map(str::to_string),
973 format!("wiki-link `[[{}]]` carries a `.md` extension", link.target),
974 Some(format!("drop the extension: [[{bare}]]")),
975 vec![],
976 );
977 }
978
979 match resolve_wiki_target(store, bare) {
984 TargetResolution::Exists => {}
985 TargetResolution::Missing => push(
986 issues,
987 Severity::Error,
988 codes::WIKI_LINK_BROKEN,
989 rel,
990 line,
991 key.map(str::to_string),
992 format!("wiki-link target `{bare}` doesn't exist"),
993 Some(format!(
994 "create `{bare}.md`, or point the link at an existing file"
995 )),
996 vec![],
997 ),
998 TargetResolution::Unsafe => push(
999 issues,
1000 Severity::Error,
1001 codes::WIKI_LINK_BROKEN,
1002 rel,
1003 line,
1004 key.map(str::to_string),
1005 format!("wiki-link target `{bare}` is not a safe store-relative path"),
1006 Some("use a full store-relative path under sources/ or records/".into()),
1007 vec![],
1008 ),
1009 }
1010}
1011
1012fn effective_schema(store: &Store, type_: &str) -> Option<Schema> {
1023 store.config.schemas.get(type_).cloned()
1024}
1025
1026fn check_schema(
1028 store: &Store,
1029 rel: &Path,
1030 fm: &BTreeMap<String, Value>,
1031 fm_yaml: &str,
1032 schema: &Schema,
1033 issues: &mut Vec<Issue>,
1034) {
1035 for spec in &schema.fields {
1036 let present = fm.get(&spec.name);
1037 let line = fm_key_line(fm_yaml, &spec.name);
1038
1039 let is_empty = match present {
1047 None => true,
1048 Some(v) => is_empty_value(v),
1049 };
1050 if spec.required && is_empty {
1051 push(
1052 issues,
1053 Severity::Error,
1054 codes::SCHEMA_MISSING_REQUIRED,
1055 rel,
1056 fm_key_line_or_top(fm_yaml, &spec.name),
1059 Some(spec.name.clone()),
1060 format!("required field `{}` is absent or empty", spec.name),
1061 Some(format!("set `{}` to a non-empty value", spec.name)),
1062 vec![],
1063 );
1064 continue;
1065 }
1066 let Some(value) = present else { continue };
1067
1068 let value_empty = value.is_null()
1074 || scalar_string(value)
1075 .map(|s| s.trim().is_empty())
1076 .unwrap_or(false);
1077 if !spec.required && value_empty {
1078 continue;
1079 }
1080
1081 if let Some(prefix) = &spec.link_prefix {
1084 check_schema_link(store, rel, &spec.name, fm_yaml, prefix, line, issues);
1085 continue; }
1087
1088 if (spec.shape.is_some() || spec.enum_values.is_some()) && scalar_string(value).is_none() {
1095 push(
1096 issues,
1097 Severity::Error,
1098 codes::SCHEMA_SHAPE_MISMATCH,
1099 rel,
1100 line,
1101 Some(spec.name.clone()),
1102 format!(
1103 "`{}` must be a scalar value, found a list or mapping",
1104 spec.name
1105 ),
1106 Some(format!("set `{}` to a single scalar value", spec.name)),
1107 vec![],
1108 );
1109 continue;
1110 }
1111
1112 if let Some(allowed) = &spec.enum_values {
1114 if let Some(s) = scalar_string(value) {
1115 if !allowed.iter().any(|a| a == &s) {
1116 push(
1117 issues,
1118 Severity::Error,
1119 codes::SCHEMA_ENUM_VIOLATION,
1120 rel,
1121 line,
1122 Some(spec.name.clone()),
1123 format!("`{}` value {s:?} not in enum {allowed:?}", spec.name),
1124 Some(format!("use one of: {}", allowed.join(", "))),
1125 vec![],
1126 );
1127 }
1128 }
1129 continue;
1130 }
1131
1132 if let Some(shape) = spec.shape {
1134 check_schema_shape(rel, &spec.name, value, shape, line, issues);
1135 }
1136 }
1137}
1138
1139fn check_schema_link(
1144 store: &Store,
1145 rel: &Path,
1146 field: &str,
1147 fm_yaml: &str,
1148 prefix: &Path,
1149 line: Option<u32>,
1150 issues: &mut Vec<Issue>,
1151) {
1152 let prefix_str = prefix.to_string_lossy();
1153 let prefix_str = prefix_str.trim_end_matches('/');
1154 let suggestion = |target_leaf: &str| {
1155 Some(format!(
1156 "expected `link to {prefix_str}/`; replace with [[{prefix_str}/{target_leaf}]]"
1157 ))
1158 };
1159
1160 let links = frontmatter_links_for_key(fm_yaml, field, 2);
1161 if links.is_empty() {
1162 let raw = frontmatter_raw_value_for_key(fm_yaml, field, 2).unwrap_or_default();
1164 let raw = raw.trim().trim_matches('"').trim_matches('\'').trim();
1165 let leaf = slugish(raw);
1166 push(
1167 issues,
1168 Severity::Error,
1169 codes::SCHEMA_LINK_PREFIX_MISMATCH,
1170 rel,
1171 line,
1172 Some(field.to_string()),
1173 format!(
1174 "`{field}` is a plain string {raw:?}, expected a wiki-link under `{prefix_str}/`"
1175 ),
1176 suggestion(&leaf),
1177 vec![],
1178 );
1179 return;
1180 }
1181
1182 for link in links {
1183 if link.target.ends_with(".md") {
1184 let bare = link.target.trim_end_matches(".md");
1185 push(
1186 issues,
1187 Severity::Warning,
1188 codes::WIKI_LINK_HAS_EXTENSION,
1189 rel,
1190 Some(link.line),
1191 Some(field.to_string()),
1192 format!("wiki-link `[[{}]]` carries a `.md` extension", link.target),
1193 Some(format!("drop the extension: [[{bare}]]")),
1194 vec![],
1195 );
1196 }
1197 let bare = link.target.trim_end_matches(".md");
1198 if !path_under_prefix(bare, prefix_str) {
1199 let leaf = bare.rsplit('/').next().unwrap_or(bare);
1200 push(
1201 issues,
1202 Severity::Error,
1203 codes::SCHEMA_LINK_PREFIX_MISMATCH,
1204 rel,
1205 line,
1206 Some(field.to_string()),
1207 format!("`{field}` target `{bare}` is not under `{prefix_str}/`"),
1208 suggestion(leaf),
1209 vec![],
1210 );
1211 } else {
1212 match resolve_wiki_target(store, bare) {
1217 TargetResolution::Exists => {}
1218 TargetResolution::Missing => push(
1219 issues,
1220 Severity::Error,
1221 codes::WIKI_LINK_BROKEN,
1222 rel,
1223 line,
1224 Some(field.to_string()),
1225 format!("wiki-link target `{bare}` doesn't exist"),
1226 Some(format!(
1227 "create `{bare}.md`, or point the link at an existing file"
1228 )),
1229 vec![],
1230 ),
1231 TargetResolution::Unsafe => push(
1232 issues,
1233 Severity::Error,
1234 codes::WIKI_LINK_BROKEN,
1235 rel,
1236 line,
1237 Some(field.to_string()),
1238 format!("wiki-link target `{bare}` is not a safe store-relative path"),
1239 Some("use a full store-relative path under sources/ or records/".into()),
1240 vec![],
1241 ),
1242 }
1243 }
1244 }
1245}
1246
1247fn check_schema_shape(
1249 rel: &Path,
1250 field: &str,
1251 value: &Value,
1252 shape: Shape,
1253 line: Option<u32>,
1254 issues: &mut Vec<Issue>,
1255) {
1256 let s = scalar_string(value).unwrap_or_default();
1257 let ok = match shape {
1258 Shape::String => true, Shape::Int => value.is_i64() || value.is_u64() || s.trim().parse::<i64>().is_ok(),
1260 Shape::Bool => value.is_bool() || matches!(s.trim(), "true" | "false"),
1261 Shape::Date => is_iso8601_date_or_datetime(&s),
1262 Shape::Email => is_email(&s),
1263 Shape::Currency => is_currency(&s),
1264 Shape::Url => is_url(&s),
1265 };
1266 if !ok {
1267 push(
1268 issues,
1269 Severity::Error,
1270 codes::SCHEMA_SHAPE_MISMATCH,
1271 rel,
1272 line,
1273 Some(field.to_string()),
1274 format!("`{field}` value {s:?} doesn't match shape {shape:?}"),
1275 Some(shape_suggestion(shape)),
1276 vec![],
1277 );
1278 }
1279}
1280
1281fn check_duplicates(store: &Store, parsed: &[(PathBuf, Parsed)], issues: &mut Vec<Issue>) {
1300 let fm_yaml_of: HashMap<&PathBuf, &str> = parsed
1303 .iter()
1304 .map(|(rel, p)| (rel, p.fm_yaml.as_str()))
1305 .collect();
1306
1307 let mut by_id: HashMap<String, Vec<PathBuf>> = HashMap::new();
1309 for (rel, p) in parsed {
1310 if let Some(map) = &p.fm {
1311 if let Some(id) = map.get("id").and_then(scalar_string) {
1312 if !id.trim().is_empty() {
1313 by_id.entry(id).or_default().push(rel.clone());
1314 }
1315 }
1316 }
1317 }
1318 for (id, files) in &by_id {
1319 if files.len() > 1 {
1320 let (reported, related) = canonical_and_related(files);
1321 let line = fm_yaml_of.get(&reported).and_then(|y| fm_key_line(y, "id"));
1322 push(
1323 issues,
1324 Severity::Error,
1325 codes::DUP_ID,
1326 &reported,
1327 line,
1328 Some("id".into()),
1329 format!("id {id:?} is declared by more than one file"),
1330 Some("give each file a unique `id` (or drop it to derive from the path)".into()),
1331 related,
1332 );
1333 }
1334 }
1335
1336 for (type_name, schema) in &store.config.schemas {
1341 for key_fields in &schema.unique_keys {
1342 soft_dup(parsed, issues, type_name, key_fields, &fm_yaml_of);
1343 }
1344 }
1345}
1346
1347fn soft_dup(
1356 parsed: &[(PathBuf, Parsed)],
1357 issues: &mut Vec<Issue>,
1358 type_: &str,
1359 key_fields: &[String],
1360 fm_yaml_of: &HashMap<&PathBuf, &str>,
1361) {
1362 if key_fields.is_empty() {
1363 return;
1364 }
1365 let mut groups: HashMap<Vec<String>, Vec<PathBuf>> = HashMap::new();
1366 for (rel, p) in parsed {
1367 let is_type =
1368 p.fm.as_ref()
1369 .and_then(|m| m.get("type"))
1370 .and_then(scalar_string)
1371 .map(|t| t == type_)
1372 .unwrap_or(false);
1373 if !is_type {
1374 continue;
1375 }
1376 if let Some(key) = dedup_key(p, key_fields) {
1377 groups.entry(key).or_default().push(rel.clone());
1378 }
1379 }
1380 let mut collisions: Vec<(PathBuf, Vec<PathBuf>)> = groups
1383 .values()
1384 .filter(|files| files.len() > 1)
1385 .map(|files| canonical_and_related(files))
1386 .collect();
1387 collisions.sort_by(|a, b| a.0.cmp(&b.0));
1388
1389 let fields_disp = key_fields.join(", ");
1390 for (reported, related) in collisions {
1391 let (line, key) = if key_fields.len() == 1 {
1394 (
1395 fm_yaml_of
1396 .get(&reported)
1397 .and_then(|y| fm_key_line(y, &key_fields[0])),
1398 Some(key_fields[0].clone()),
1399 )
1400 } else {
1401 (Some(1), None)
1402 };
1403 let n = related.len();
1404 push(
1405 issues,
1406 Severity::Warning,
1407 codes::DUP_UNIQUE_KEY,
1408 &reported,
1409 line,
1410 key,
1411 format!("`{type_}` unique key ({fields_disp}) collides with {n} other record(s)"),
1412 Some("merge with `dbmd rename`, or cross-link with `dbmd link`".into()),
1413 related,
1414 );
1415 }
1416}
1417
1418fn dedup_key(p: &Parsed, key_fields: &[String]) -> Option<Vec<String>> {
1422 let mut out = Vec::with_capacity(key_fields.len());
1423 for f in key_fields {
1424 out.push(dedup_token(p, f)?);
1425 }
1426 Some(out)
1427}
1428
1429fn dedup_token(p: &Parsed, field: &str) -> Option<String> {
1434 let links = frontmatter_links_for_key(&p.fm_yaml, field, 2);
1437 if !links.is_empty() {
1438 let set: BTreeSet<String> = links
1439 .into_iter()
1440 .map(|l| l.target.trim_end_matches(".md").to_lowercase())
1441 .filter(|t| !t.is_empty())
1442 .collect();
1443 return if set.is_empty() {
1444 None
1445 } else {
1446 Some(set.into_iter().collect::<Vec<_>>().join(","))
1447 };
1448 }
1449 match p.fm.as_ref()?.get(field) {
1450 Some(Value::Sequence(items)) => {
1451 let set: BTreeSet<String> = items
1452 .iter()
1453 .filter_map(scalar_string)
1454 .map(|s| s.trim().to_lowercase())
1455 .filter(|t| !t.is_empty())
1456 .collect();
1457 if set.is_empty() {
1458 None
1459 } else {
1460 Some(set.into_iter().collect::<Vec<_>>().join(","))
1461 }
1462 }
1463 Some(v) => {
1464 let s = scalar_string(v)?.trim().to_lowercase();
1465 if s.is_empty() {
1466 None
1467 } else {
1468 Some(s)
1469 }
1470 }
1471 None => None,
1472 }
1473}
1474
1475fn canonical_and_related(files: &[PathBuf]) -> (PathBuf, Vec<PathBuf>) {
1480 let mut sorted = files.to_vec();
1481 sorted.sort();
1482 let reported = sorted[0].clone();
1483 let related = sorted[1..].to_vec();
1484 (reported, related)
1485}
1486
1487fn check_indexes(store: &Store, files: &[PathBuf], issues: &mut Vec<Issue>) {
1493 let mut type_folders: BTreeMap<PathBuf, Vec<PathBuf>> = BTreeMap::new();
1497 let mut layers_present: BTreeSet<&'static str> = BTreeSet::new();
1498 for rel in files {
1499 if let Some(layer) = rel.iter().next().and_then(|s| s.to_str()) {
1503 match layer {
1504 "sources" => layers_present.insert("sources"),
1505 "records" => layers_present.insert("records"),
1506 _ => false,
1507 };
1508 }
1509 if let Some(tf) = type_folder_of(rel) {
1510 type_folders.entry(tf).or_default().push(rel.clone());
1511 }
1512 }
1513
1514 if !files.is_empty() {
1516 let root_index = store.root.join("index.md");
1517 if !root_index.is_file() {
1518 push(
1519 issues,
1520 Severity::Error,
1521 codes::INDEX_MISSING,
1522 Path::new("index.md"),
1523 None,
1524 None,
1525 "store has files but no root `index.md`".into(),
1526 Some("run `dbmd index rebuild`".into()),
1527 vec![],
1528 );
1529 } else {
1530 check_index_scope(store, Path::new("index.md"), "root", None, issues);
1531 }
1532 }
1533
1534 for layer in &layers_present {
1536 let layer_index_rel = PathBuf::from(layer).join("index.md");
1537 let abs = store.root.join(&layer_index_rel);
1538 if !abs.is_file() {
1539 push(
1540 issues,
1541 Severity::Error,
1542 codes::INDEX_MISSING,
1543 &layer_index_rel,
1544 None,
1545 None,
1546 format!("layer `{layer}/` has files but no `index.md`"),
1547 Some("run `dbmd index rebuild`".into()),
1548 vec![],
1549 );
1550 } else {
1551 check_index_scope(store, &layer_index_rel, "layer", Some(layer), issues);
1552 }
1553 }
1554
1555 for (tf, members) in &type_folders {
1557 let index_md_rel = tf.join("index.md");
1558 let index_md_abs = store.root.join(&index_md_rel);
1559 let index_md_present = index_md_abs.is_file();
1560 if !index_md_present {
1561 push(
1567 issues,
1568 Severity::Error,
1569 codes::INDEX_MISSING,
1570 tf,
1571 None,
1572 None,
1573 format!("non-empty folder `{}` has no index.md", tf.display()),
1574 Some(format!(
1575 "run `dbmd index rebuild --folder {}`",
1576 tf.display()
1577 )),
1578 vec![],
1579 );
1580 continue;
1581 }
1582
1583 check_index_scope(store, &index_md_rel, "type-folder", tf.to_str(), issues);
1584 check_type_folder_index_md(store, tf, &index_md_rel, members, issues);
1585
1586 let jsonl_rel = tf.join("index.jsonl");
1590 let jsonl_abs = store.root.join(&jsonl_rel);
1591 if !jsonl_abs.is_file() {
1592 push(
1593 issues,
1594 Severity::Error,
1595 codes::INDEX_JSONL_MISSING,
1596 &jsonl_rel,
1597 None,
1598 None,
1599 format!("type-folder `{}/` has no `index.jsonl` twin", tf.display()),
1600 Some("run `dbmd index rebuild`".into()),
1601 vec![],
1602 );
1603 } else {
1604 check_type_folder_index_jsonl(store, tf, &jsonl_rel, members, issues);
1605 }
1606 }
1607
1608 let mut loose_by_layer: BTreeMap<PathBuf, Vec<PathBuf>> = BTreeMap::new();
1616 for rel in files {
1617 if !is_content_file(rel) || type_folder_of(rel).is_some() {
1618 continue;
1619 }
1620 if let Some(layer_dir) = loose_layer_dir(rel) {
1621 loose_by_layer
1622 .entry(layer_dir)
1623 .or_default()
1624 .push(rel.clone());
1625 }
1626 }
1627 for (layer_dir, members) in &loose_by_layer {
1628 let jsonl_rel = layer_dir.join("index.jsonl");
1629 if !store.root.join(&jsonl_rel).is_file() {
1630 push(
1631 issues,
1632 Severity::Error,
1633 codes::INDEX_JSONL_MISSING,
1634 &jsonl_rel,
1635 None,
1636 None,
1637 format!(
1638 "loose files at `{}/` are not catalogued — the layer has no `index.jsonl`",
1639 layer_dir.display()
1640 ),
1641 Some("run `dbmd index rebuild`".into()),
1642 members.clone(),
1643 );
1644 } else {
1645 check_type_folder_index_jsonl(store, layer_dir, &jsonl_rel, members, issues);
1649 }
1650 }
1651
1652 for rel in walk_index_files(&store.root) {
1654 let parent = rel.parent().unwrap_or(Path::new("")).to_path_buf();
1655 let parent_str = parent.to_string_lossy().to_string();
1656 let is_canonical = parent_str.is_empty() || matches!(parent_str.as_str(), "sources" | "records")
1658 || type_folders.contains_key(&parent);
1659 if !is_canonical {
1660 push(
1661 issues,
1662 Severity::Warning,
1663 codes::INDEX_ORPHAN,
1664 &rel,
1665 None,
1666 None,
1667 format!(
1668 "`{}` sits in an empty or non-canonical folder",
1669 rel.display()
1670 ),
1671 Some("remove it, or run `dbmd index rebuild`".into()),
1672 vec![],
1673 );
1674 }
1675 }
1676}
1677
1678fn check_type_folder_index_md(
1682 store: &Store,
1683 tf: &Path,
1684 index_rel: &Path,
1685 members: &[PathBuf],
1686 issues: &mut Vec<Issue>,
1687) {
1688 let abs = store.root.join(index_rel);
1689 let Ok(text) = std::fs::read_to_string(&abs) else {
1690 return;
1691 };
1692 let entries = parse_index_entries(&text);
1693
1694 let listed: BTreeSet<PathBuf> = entries
1695 .iter()
1696 .map(|e| PathBuf::from(e.target.trim_end_matches(".md")))
1697 .collect();
1698
1699 for entry in &entries {
1701 let bare = entry.target.trim_end_matches(".md");
1702 let target_abs = match resolved_target_abs(store, bare) {
1705 Some(abs) => abs,
1706 None => {
1707 if matches!(resolve_wiki_target(store, bare), TargetResolution::Unsafe) {
1708 push(
1709 issues,
1710 Severity::Error,
1711 codes::INDEX_STALE_ENTRY,
1712 index_rel,
1713 Some(entry.line),
1714 None,
1715 format!("index entry `[[{bare}]]` is not a safe store-relative path"),
1716 Some("run `dbmd index rebuild`".into()),
1717 vec![],
1718 );
1719 } else {
1720 push(
1721 issues,
1722 Severity::Error,
1723 codes::INDEX_STALE_ENTRY,
1724 index_rel,
1725 Some(entry.line),
1726 None,
1727 format!("index entry `[[{bare}]]` points at a missing file"),
1728 Some("run `dbmd index rebuild`".into()),
1729 vec![PathBuf::from(format!("{bare}.md"))],
1733 );
1734 }
1735 continue;
1736 }
1737 };
1738 if let Some(expected) = read_summary(&target_abs) {
1745 match &entry.summary_text {
1746 Some(text_part)
1757 if crate::summary::collapse_whitespace(text_part)
1758 != crate::summary::collapse_whitespace(&expected) =>
1759 {
1760 push(
1761 issues,
1762 Severity::Error,
1763 codes::INDEX_SUMMARY_MISMATCH,
1764 index_rel,
1765 Some(entry.line),
1766 None,
1767 format!("index entry for `{bare}` text doesn't match the file's `summary`"),
1768 Some("run `dbmd index rebuild`".into()),
1769 vec![PathBuf::from(format!("{bare}.md"))],
1770 );
1771 }
1772 None if !expected.trim().is_empty() => {
1773 push(
1774 issues,
1775 Severity::Error,
1776 codes::INDEX_SUMMARY_MISMATCH,
1777 index_rel,
1778 Some(entry.line),
1779 None,
1780 format!("index entry for `{bare}` is missing its summary text (the file has a `summary`)"),
1781 Some("run `dbmd index rebuild`".into()),
1782 vec![PathBuf::from(format!("{bare}.md"))],
1783 );
1784 }
1785 _ => {}
1786 }
1787 }
1788 }
1789
1790 let content_members: Vec<&PathBuf> = members.iter().filter(|m| is_content_file(m)).collect();
1794 if content_members.len() <= 500 {
1795 for m in content_members {
1796 let bare = PathBuf::from(m.to_string_lossy().trim_end_matches(".md").to_string());
1797 if !listed.contains(&bare) {
1798 push(
1799 issues,
1800 Severity::Error,
1801 codes::INDEX_MISSING_ENTRY,
1802 index_rel,
1803 None,
1804 None,
1805 format!(
1806 "file `{}` is not listed in its folder's `index.md`",
1807 m.display()
1808 ),
1809 Some("run `dbmd index rebuild`".into()),
1810 vec![(*m).clone()],
1811 );
1812 }
1813 }
1814 }
1815 let _ = tf;
1816}
1817
1818fn check_type_folder_index_jsonl(
1822 store: &Store,
1823 tf: &Path,
1824 jsonl_rel: &Path,
1825 members: &[PathBuf],
1826 issues: &mut Vec<Issue>,
1827) {
1828 let abs = store.root.join(jsonl_rel);
1829 let Ok(text) = std::fs::read_to_string(&abs) else {
1830 return;
1831 };
1832
1833 let mut records: BTreeMap<PathBuf, serde_json::Value> = BTreeMap::new();
1835 for (i, line) in text.lines().enumerate() {
1836 let line = line.trim();
1837 if line.is_empty() {
1838 continue;
1839 }
1840 let rec: serde_json::Value = match serde_json::from_str(line) {
1841 Ok(v) => v,
1842 Err(e) => {
1843 push(
1844 issues,
1845 Severity::Error,
1846 codes::INDEX_JSONL_DESYNC,
1847 jsonl_rel,
1848 Some((i + 1) as u32),
1849 None,
1850 format!("`index.jsonl` line {} is not valid JSON: {e}", i + 1),
1851 Some("run `dbmd index rebuild`".into()),
1852 vec![],
1853 );
1854 continue;
1855 }
1856 };
1857 if let Some(path) = rec.get("path").and_then(|v| v.as_str()) {
1858 if !is_safe_store_relative_path(Path::new(path)) {
1859 push(
1860 issues,
1861 Severity::Error,
1862 codes::INDEX_JSONL_DESYNC,
1863 jsonl_rel,
1864 Some((i + 1) as u32),
1865 None,
1866 format!("`index.jsonl` record path `{path}` is not a safe store-relative path"),
1867 Some("run `dbmd index rebuild`".into()),
1868 vec![],
1869 );
1870 continue;
1871 }
1872 records.insert(PathBuf::from(path), rec);
1873 }
1874 }
1875
1876 let member_set: BTreeSet<PathBuf> = members
1877 .iter()
1878 .filter(|m| is_content_file(m))
1879 .cloned()
1880 .collect();
1881
1882 for path in records.keys() {
1884 let target_abs = store.root.join(path);
1885 if !target_abs.is_file() {
1886 push(
1887 issues,
1888 Severity::Error,
1889 codes::INDEX_JSONL_DESYNC,
1890 jsonl_rel,
1891 None,
1892 None,
1893 format!(
1894 "`index.jsonl` record points at missing file `{}`",
1895 path.display()
1896 ),
1897 Some("run `dbmd index rebuild`".into()),
1898 vec![],
1899 );
1900 }
1901 }
1902
1903 for m in &member_set {
1905 if !records.contains_key(m) {
1906 push(
1907 issues,
1908 Severity::Error,
1909 codes::INDEX_JSONL_DESYNC,
1910 jsonl_rel,
1911 None,
1912 None,
1913 format!(
1914 "file `{}` is missing from the complete `index.jsonl`",
1915 m.display()
1916 ),
1917 Some("run `dbmd index rebuild`".into()),
1918 vec![m.clone()],
1919 );
1920 }
1921 }
1922
1923 for (path, rec) in &records {
1937 let target_abs = store.root.join(path);
1938 if !target_abs.is_file() {
1939 continue;
1940 }
1941 let Ok(expected) = crate::index::IndexRecord::expected_from_file(&target_abs, path.clone())
1942 else {
1943 continue; };
1945 let Ok(expected_json) = serde_json::to_value(&expected) else {
1946 continue;
1947 };
1948 let (Some(have), Some(want)) = (rec.as_object(), expected_json.as_object()) else {
1949 continue;
1950 };
1951
1952 let mut mismatched_keys: BTreeSet<&str> = BTreeSet::new();
1955 for key in have.keys().chain(want.keys()) {
1956 if key == "path" {
1957 continue;
1958 }
1959 if have.get(key) != want.get(key) {
1960 mismatched_keys.insert(key);
1961 }
1962 }
1963
1964 if !mismatched_keys.is_empty() {
1965 let keys: Vec<&str> = mismatched_keys.into_iter().collect();
1966 push(
1967 issues,
1968 Severity::Error,
1969 codes::INDEX_JSONL_STALE,
1970 jsonl_rel,
1971 None,
1972 Some(keys.join(",")),
1973 format!(
1974 "`index.jsonl` record for `{}` is stale ({})",
1975 path.display(),
1976 keys.join(", ")
1977 ),
1978 Some("run `dbmd index rebuild`".into()),
1979 vec![path.clone()],
1980 );
1981 }
1982 }
1983 let _ = tf;
1984}
1985
1986fn check_index_scope(
1988 store: &Store,
1989 index_rel: &Path,
1990 expected_scope: &str,
1991 expected_folder: Option<&str>,
1992 issues: &mut Vec<Issue>,
1993) {
1994 let abs = store.root.join(index_rel);
1995 let Ok(text) = std::fs::read_to_string(&abs) else {
1996 return;
1997 };
1998 let Some((yaml, _, _)) = split_frontmatter(&text) else {
1999 return;
2000 };
2001 let Ok(Value::Mapping(map)) = serde_norway::from_str::<Value>(&yaml) else {
2002 return;
2003 };
2004 let fm = yaml_map_to_btree(&map);
2005
2006 if let Some(scope) = fm.get("scope").and_then(scalar_string) {
2007 let scope_ok =
2009 scope == expected_scope || (expected_scope == "type-folder" && scope == "folder");
2010 if !scope_ok {
2011 push(
2012 issues,
2013 Severity::Warning,
2014 codes::INDEX_WRONG_SCOPE,
2015 index_rel,
2016 fm_key_line(&yaml, "scope"),
2017 Some("scope".into()),
2018 format!(
2019 "index `scope: {scope}` doesn't match location (expected `{expected_scope}`)"
2020 ),
2021 Some(format!("set `scope: {expected_scope}`")),
2022 vec![],
2023 );
2024 }
2025 }
2026 if let Some(expected) = expected_folder {
2028 if let Some(folder) = fm.get("folder").and_then(scalar_string) {
2029 if folder.trim_end_matches('/') != expected.trim_end_matches('/') {
2030 push(
2031 issues,
2032 Severity::Warning,
2033 codes::INDEX_WRONG_SCOPE,
2034 index_rel,
2035 fm_key_line(&yaml, "folder"),
2036 Some("folder".into()),
2037 format!("index `folder: {folder}` doesn't match location `{expected}`"),
2038 Some(format!("set `folder: {expected}`")),
2039 vec![],
2040 );
2041 }
2042 }
2043 }
2044}
2045
2046fn check_log(store: &Store, issues: &mut Vec<Issue>) {
2065 let mut prev: Option<DateTime<FixedOffset>> = None;
2066 for rel in log_files_chronological(store) {
2067 check_log_file(store, &rel, &mut prev, issues);
2068 }
2069}
2070
2071fn log_files_chronological(store: &Store) -> Vec<PathBuf> {
2075 let mut files: Vec<PathBuf> = Vec::new();
2076 let archive_dir = store.root.join("log");
2077 if let Ok(entries) = std::fs::read_dir(&archive_dir) {
2078 let mut archives: Vec<PathBuf> = entries
2079 .flatten()
2080 .map(|e| e.path())
2081 .filter(|p| {
2082 p.is_file()
2083 && p.file_name()
2084 .and_then(|s| s.to_str())
2085 .and_then(|n| n.strip_suffix(".md"))
2086 .is_some_and(is_year_month_archive)
2087 })
2088 .filter_map(|p| p.strip_prefix(&store.root).ok().map(Path::to_path_buf))
2089 .collect();
2090 archives.sort();
2092 files.extend(archives);
2093 }
2094 if store.root.join("log.md").is_file() {
2096 files.push(PathBuf::from("log.md"));
2097 }
2098 files
2099}
2100
2101fn check_log_file(
2105 store: &Store,
2106 log_rel: &Path,
2107 prev: &mut Option<DateTime<FixedOffset>>,
2108 issues: &mut Vec<Issue>,
2109) {
2110 let abs = store.root.join(log_rel);
2111 let Ok(text) = std::fs::read_to_string(&abs) else {
2112 return;
2113 };
2114
2115 for (i, line) in text.lines().enumerate() {
2116 if !line.starts_with("## [") {
2117 continue;
2118 }
2119 let line_no = (i + 1) as u32;
2120 match parse_log_header(line) {
2121 None => push(
2122 issues,
2123 Severity::Error,
2124 codes::LOG_BAD_TIMESTAMP,
2125 log_rel,
2126 Some(line_no),
2127 None,
2128 format!("log entry header has an unparseable timestamp: {line:?}"),
2129 Some("use `## [YYYY-MM-DD HH:MM] <kind> | <object>`".into()),
2130 vec![],
2131 ),
2132 Some((ts, kind, _object)) => {
2133 if !RECOGNIZED_LOG_KINDS.contains(&kind.as_str()) {
2134 push(
2135 issues,
2136 Severity::Warning,
2137 codes::LOG_UNKNOWN_KIND,
2138 log_rel,
2139 Some(line_no),
2140 None,
2141 format!("log entry kind `{kind}` is not recognized"),
2142 Some(format!("use one of: {}", RECOGNIZED_LOG_KINDS.join(", "))),
2143 vec![],
2144 );
2145 }
2146 if let Some(p) = *prev {
2147 if ts < p {
2148 push(
2149 issues,
2150 Severity::Warning,
2151 codes::LOG_OUT_OF_ORDER,
2152 log_rel,
2153 Some(line_no),
2154 None,
2155 "log entry is older than the entry above it (possible rewrite)".into(),
2156 Some("append corrective entries; never reorder past ones".into()),
2157 vec![],
2158 );
2159 }
2160 }
2161 *prev = Some(ts);
2162 }
2163 }
2164 }
2165}
2166
2167#[derive(Debug)]
2173struct Link {
2174 target: String,
2175 line: u32,
2176}
2177
2178fn store_marker_present(store: &Store) -> bool {
2182 let want = store.root.join("DB.md");
2183 if !want.is_file() {
2184 return false;
2185 }
2186 match std::fs::read_dir(&store.root) {
2188 Ok(entries) => entries
2189 .flatten()
2190 .any(|e| e.file_name().to_str() == Some("DB.md")),
2191 Err(_) => true, }
2193}
2194
2195fn check_db_md(store: &Store, issues: &mut Vec<Issue>) {
2206 let rel = Path::new("DB.md");
2207 let abs = store.root.join("DB.md");
2208 let Ok(text) = std::fs::read_to_string(&abs) else {
2209 return; };
2211
2212 let Some((fm_yaml, body, fm_end_line)) = split_frontmatter(&text) else {
2213 push(
2217 issues,
2218 Severity::Error,
2219 codes::DB_MD_BAD_TYPE,
2220 rel,
2221 Some(1),
2222 Some("type".into()),
2223 "DB.md has no frontmatter; it must declare `type: db-md`".into(),
2224 Some("add a `---` frontmatter block with `type: db-md`".into()),
2225 vec![],
2226 );
2227 for field in ["scope", "owner"] {
2228 push(
2229 issues,
2230 Severity::Error,
2231 codes::DB_MD_MISSING_FIELD,
2232 rel,
2233 Some(1),
2234 Some(field.into()),
2235 format!("DB.md frontmatter is missing required field `{field}`"),
2236 Some(format!("add `{field}:` to the DB.md frontmatter")),
2237 vec![],
2238 );
2239 }
2240 return;
2241 };
2242
2243 let fm: Option<BTreeMap<String, Value>> = match serde_norway::from_str::<Value>(&fm_yaml) {
2246 Ok(Value::Mapping(map)) => Some(yaml_map_to_btree(&map)),
2247 Ok(Value::Null) => Some(BTreeMap::new()),
2248 _ => None,
2249 };
2250
2251 match &fm {
2252 Some(map) => {
2253 let type_ = map.get("type").and_then(scalar_string);
2255 if type_.as_deref() != Some("db-md") {
2256 let (line, msg) = match &type_ {
2257 Some(t) => (
2258 fm_key_line(&fm_yaml, "type"),
2259 format!("DB.md has `type: {t}`; a store's DB.md must be `type: db-md`"),
2260 ),
2261 None => (
2262 Some(1),
2263 "DB.md frontmatter has no `type:`; it must be `type: db-md`".to_string(),
2264 ),
2265 };
2266 push(
2267 issues,
2268 Severity::Error,
2269 codes::DB_MD_BAD_TYPE,
2270 rel,
2271 line,
2272 Some("type".into()),
2273 msg,
2274 Some("set `type: db-md` in the DB.md frontmatter".into()),
2275 vec![],
2276 );
2277 }
2278
2279 for field in ["scope", "owner"] {
2281 let present = map
2282 .get(field)
2283 .and_then(scalar_string)
2284 .map(|s| !s.trim().is_empty())
2285 .unwrap_or(false);
2286 if !present {
2287 push(
2288 issues,
2289 Severity::Error,
2290 codes::DB_MD_MISSING_FIELD,
2291 rel,
2292 fm_key_line_or_top(&fm_yaml, field),
2295 Some(field.into()),
2296 format!("DB.md frontmatter is missing required field `{field}`"),
2297 Some(format!("add `{field}:` to the DB.md frontmatter")),
2298 vec![],
2299 );
2300 }
2301 }
2302 }
2303 None => {
2304 push(
2307 issues,
2308 Severity::Error,
2309 codes::DB_MD_BAD_TYPE,
2310 rel,
2311 Some(1),
2312 Some("type".into()),
2313 "DB.md frontmatter isn't valid YAML; it must declare `type: db-md`".into(),
2314 Some("fix the DB.md frontmatter and set `type: db-md`".into()),
2315 vec![],
2316 );
2317 for field in ["scope", "owner"] {
2318 push(
2319 issues,
2320 Severity::Error,
2321 codes::DB_MD_MISSING_FIELD,
2322 rel,
2323 Some(1),
2324 Some(field.into()),
2325 format!("DB.md frontmatter is missing required field `{field}`"),
2326 Some(format!("add `{field}:` to the DB.md frontmatter")),
2327 vec![],
2328 );
2329 }
2330 }
2331 }
2332
2333 for section in crate::parser::extract_sections(&body) {
2347 if section.level != 2 {
2348 continue;
2349 }
2350 let name = section.heading.trim().to_ascii_lowercase();
2351 if matches!(
2352 name.as_str(),
2353 "agent instructions" | "policies" | "schemas" | "folders"
2354 ) {
2355 continue;
2356 }
2357 let file_line = fm_end_line + section.line;
2360 push(
2361 issues,
2362 Severity::Warning,
2363 codes::DB_MD_UNKNOWN_SECTION,
2364 rel,
2365 Some(file_line),
2366 None,
2367 format!(
2368 "DB.md has an unrecognized `## {}` section",
2369 section.heading.trim()
2370 ),
2371 Some(
2372 "DB.md sections are `## Agent instructions`, `## Policies`, `## Schemas`, \
2373 `## Folders` — remove or rename this heading"
2374 .into(),
2375 ),
2376 vec![],
2377 );
2378 }
2379
2380 check_db_md_schemas(store, rel, &body, fm_end_line, issues);
2385}
2386
2387fn check_db_md_schemas(
2394 store: &Store,
2395 rel: &Path,
2396 body: &str,
2397 fm_end_line: u32,
2398 issues: &mut Vec<Issue>,
2399) {
2400 if store.config.schemas.is_empty() {
2401 return;
2402 }
2403
2404 let mut type_line: BTreeMap<String, u32> = BTreeMap::new();
2409 let mut current_h2: Option<String> = None;
2410 for section in crate::parser::extract_sections(body) {
2411 match section.level {
2412 2 => current_h2 = Some(section.heading.trim().to_ascii_lowercase()),
2413 3 if current_h2.as_deref() == Some("schemas") => {
2414 type_line
2417 .entry(section.heading.trim().to_string())
2418 .or_insert(fm_end_line + section.line);
2419 }
2420 _ => {}
2421 }
2422 }
2423
2424 for (type_name, schema) in &store.config.schemas {
2425 let line = type_line.get(type_name).copied();
2426 let mut seen: BTreeSet<String> = BTreeSet::new();
2427 for field in &schema.fields {
2428 let name = field.name.trim();
2429
2430 if name.is_empty() {
2434 push(
2435 issues,
2436 Severity::Warning,
2437 codes::DB_MD_SCHEMA_FIELD,
2438 rel,
2439 line,
2440 None,
2441 format!("`### {type_name}` has a schema field bullet with no field name"),
2442 Some(
2443 "write each field as `- <name> (<modifiers>)`, e.g. `- email (required, email)`"
2444 .into(),
2445 ),
2446 vec![],
2447 );
2448 continue;
2449 }
2450
2451 if !seen.insert(name.to_string()) {
2455 push(
2456 issues,
2457 Severity::Warning,
2458 codes::DB_MD_SCHEMA_FIELD,
2459 rel,
2460 line,
2461 Some(name.to_string()),
2462 format!("`### {type_name}` declares field `{name}` more than once"),
2463 Some(
2464 "remove the duplicate field bullet, or merge the modifiers onto one".into(),
2465 ),
2466 vec![],
2467 );
2468 }
2469
2470 for modifier in &field.unknown_modifiers {
2475 let modifier = modifier.trim();
2476 if modifier.is_empty() {
2477 continue;
2478 }
2479 push(
2480 issues,
2481 Severity::Info,
2482 codes::DB_MD_SCHEMA_FIELD,
2483 rel,
2484 line,
2485 Some(name.to_string()),
2486 format!(
2487 "`### {type_name}` field `{name}` has an unrecognized modifier `{modifier}`"
2488 ),
2489 Some(
2490 "recognized modifiers are `required`, a shape (`string`/`int`/`bool`/`date`/`email`/`currency`/`url`), `link to <prefix>/`, `default <value>`, `enum: <v1>, <v2>, …`"
2491 .into(),
2492 ),
2493 vec![],
2494 );
2495 }
2496 }
2497 }
2498}
2499
2500fn not_a_store_issue(store: &Store) -> Issue {
2502 Issue {
2503 severity: Severity::Error,
2504 code: codes::NOT_A_STORE,
2505 file: store.root.clone(),
2506 line: None,
2507 key: None,
2508 message: format!("{} has no DB.md; not a db.md store", store.root.display()),
2509 suggestion: Some("create a `DB.md` at the store root".into()),
2510 related: vec![],
2511 }
2512}
2513
2514fn is_content_file(rel: &Path) -> bool {
2517 if !is_safe_store_relative_path(rel) {
2523 return false;
2524 }
2525 let Some(first) = rel.iter().next().and_then(|s| s.to_str()) else {
2526 return false;
2527 };
2528 if !matches!(first, "sources" | "records") {
2529 return false;
2530 }
2531 let name = rel.file_name().and_then(|s| s.to_str()).unwrap_or("");
2532 if matches!(name, "index.md" | "index.jsonl") {
2538 return false;
2539 }
2540 name.ends_with(".md")
2541}
2542
2543fn is_root_meta_file(rel: &Path) -> bool {
2550 let mut comps = rel.components();
2551 let Some(Component::Normal(only)) = comps.next() else {
2552 return false;
2553 };
2554 if comps.next().is_some() {
2555 return false; }
2557 matches!(only.to_str(), Some("DB.md") | Some("log.md"))
2558}
2559
2560fn is_index_catalog_file(rel: &Path) -> bool {
2568 matches!(
2569 rel.file_name().and_then(|n| n.to_str()),
2570 Some("index.md") | Some("index.jsonl")
2571 )
2572}
2573
2574fn split_frontmatter(text: &str) -> Option<(String, String, u32)> {
2578 let text = text.strip_prefix('\u{feff}').unwrap_or(text);
2583 let mut lines = text.lines();
2584 let first = lines.next()?;
2585 if first.trim_end() != "---" {
2586 return None;
2587 }
2588 let mut yaml = String::new();
2589 let mut close_line: Option<u32> = None;
2590 let mut current = 1u32;
2592 for line in lines {
2593 current += 1;
2594 if line.trim_end() == "---" {
2595 close_line = Some(current);
2596 break;
2597 }
2598 yaml.push_str(line);
2599 yaml.push('\n');
2600 }
2601 let close_line = close_line?;
2602 let body: String = text
2604 .lines()
2605 .skip(close_line as usize)
2606 .collect::<Vec<_>>()
2607 .join("\n");
2608 Some((yaml, body, close_line))
2609}
2610
2611fn read_summary(abs: &Path) -> Option<String> {
2613 let text = std::fs::read_to_string(abs).ok()?;
2614 let (yaml, _, _) = split_frontmatter(&text)?;
2615 let value: Value = serde_norway::from_str(&yaml).ok()?;
2616 if let Value::Mapping(m) = value {
2617 m.get(Value::String("summary".into()))
2618 .and_then(scalar_string)
2619 } else {
2620 None
2621 }
2622}
2623
2624fn yaml_map_to_btree(map: &serde_norway::Mapping) -> BTreeMap<String, Value> {
2627 let mut out = BTreeMap::new();
2628 for (k, v) in map {
2629 if let Value::String(s) = k {
2630 out.insert(s.clone(), v.clone());
2631 }
2632 }
2633 out
2634}
2635
2636fn scalar_string(v: &Value) -> Option<String> {
2639 match v {
2640 Value::String(s) => Some(s.clone()),
2641 Value::Number(n) => Some(n.to_string()),
2642 Value::Bool(b) => Some(b.to_string()),
2643 _ => None,
2644 }
2645}
2646
2647fn is_empty_value(v: &Value) -> bool {
2654 match v {
2655 Value::Null => true,
2656 Value::Sequence(items) => items.is_empty(),
2657 Value::Mapping(map) => map.is_empty(),
2658 other => scalar_string(other)
2659 .map(|s| s.trim().is_empty())
2660 .unwrap_or(true),
2661 }
2662}
2663
2664fn is_flat_scalar_list(v: &Value) -> bool {
2667 match v {
2668 Value::Sequence(items) => items.iter().all(|it| scalar_string(it).is_some()),
2669 _ => false,
2670 }
2671}
2672
2673fn frontmatter_link_fields_text(fm_yaml: &str, fm_start_line: u32) -> Vec<(String, Link)> {
2683 let mut out = Vec::new();
2684 for (key, _value_text, links) in frontmatter_key_blocks(fm_yaml, fm_start_line) {
2685 for link in links {
2686 out.push((key.clone(), link));
2687 }
2688 }
2689 out
2690}
2691
2692fn frontmatter_links_for_key(fm_yaml: &str, key: &str, fm_start_line: u32) -> Vec<Link> {
2696 for (k, _value_text, links) in frontmatter_key_blocks(fm_yaml, fm_start_line) {
2697 if k == key {
2698 return links;
2699 }
2700 }
2701 Vec::new()
2702}
2703
2704fn frontmatter_raw_value_for_key(fm_yaml: &str, key: &str, fm_start_line: u32) -> Option<String> {
2708 for (k, value_text, _links) in frontmatter_key_blocks(fm_yaml, fm_start_line) {
2709 if k == key {
2710 return Some(value_text);
2711 }
2712 }
2713 None
2714}
2715
2716fn frontmatter_key_blocks(fm_yaml: &str, fm_start_line: u32) -> Vec<(String, String, Vec<Link>)> {
2723 let mut blocks: Vec<(String, String, Vec<Link>)> = Vec::new();
2724 let mut current: Option<(String, String, Vec<Link>)> = None;
2725
2726 for (idx, raw_line) in fm_yaml.lines().enumerate() {
2727 let file_line = fm_start_line + idx as u32;
2728 let indented = raw_line.starts_with(' ') || raw_line.starts_with('\t');
2729 let trimmed = raw_line.trim();
2730
2731 let new_key = if !indented && !trimmed.starts_with('#') && !trimmed.starts_with('-') {
2734 top_level_key(raw_line)
2735 } else {
2736 None
2737 };
2738
2739 if let Some((key, after)) = new_key {
2740 if let Some(done) = current.take() {
2741 blocks.push(done);
2742 }
2743 let mut links = Vec::new();
2744 collect_line_links(after, file_line, &mut links);
2745 current = Some((key, after.trim().to_string(), links));
2746 } else if let Some((_k, value_text, links)) = current.as_mut() {
2747 if !value_text.is_empty() {
2749 value_text.push('\n');
2750 }
2751 value_text.push_str(trimmed);
2752 collect_line_links(raw_line, file_line, links);
2753 }
2754 }
2755 if let Some(done) = current.take() {
2756 blocks.push(done);
2757 }
2758 blocks
2759}
2760
2761fn top_level_key(line: &str) -> Option<(String, &str)> {
2764 let (key, rest) = line.split_once(':')?;
2765 let key = key.trim();
2766 if key.is_empty()
2767 || !key
2768 .chars()
2769 .all(|c| c.is_alphanumeric() || c == '_' || c == '-')
2770 {
2771 return None;
2772 }
2773 Some((key.to_string(), rest))
2774}
2775
2776fn collect_line_links(s: &str, file_line: u32, links: &mut Vec<Link>) {
2779 let bytes = s.as_bytes();
2780 let mut i = 0;
2781 while i + 1 < bytes.len() {
2782 if bytes[i] == b'[' && bytes[i + 1] == b'[' {
2783 if let Some(close) = s[i + 2..].find("]]") {
2784 let inner = &s[i + 2..i + 2 + close];
2785 let target = inner
2788 .trim_start_matches('[')
2789 .split('|')
2790 .next()
2791 .unwrap_or(inner)
2792 .trim()
2793 .to_string();
2794 if !target.is_empty() {
2795 links.push(Link {
2796 target,
2797 line: file_line,
2798 });
2799 }
2800 i = i + 2 + close + 2;
2801 continue;
2802 }
2803 }
2804 i += 1;
2805 }
2806}
2807
2808fn extract_wiki_links(body: &str) -> Vec<Link> {
2820 let mut out = Vec::new();
2821 let mut fence: Option<(u8, usize)> = None;
2822 for (idx, line) in body.lines().enumerate() {
2823 let content = line.trim_end_matches('\r');
2824 if let Some(f) = fence {
2825 if fence_closes(content, f) {
2829 fence = None;
2830 }
2831 continue;
2832 }
2833 if let Some(opened) = fence_opens(content) {
2834 fence = Some(opened);
2835 continue;
2836 }
2837 let line_no = (idx + 1) as u32;
2838 let bytes = line.as_bytes();
2839 let mut i = 0;
2840 while i + 1 < bytes.len() {
2841 if bytes[i] == b'[' && bytes[i + 1] == b'[' {
2842 if let Some(close) = line[i + 2..].find("]]") {
2843 let inner = &line[i + 2..i + 2 + close];
2844 let target = inner.split('|').next().unwrap_or(inner).trim().to_string();
2845 if !target.is_empty() && !target.starts_with('[') {
2853 out.push(Link {
2854 target,
2855 line: line_no,
2856 });
2857 }
2858 i = i + 2 + close + 2;
2859 continue;
2860 }
2861 }
2862 i += 1;
2863 }
2864 }
2865 out
2866}
2867
2868fn fence_opens(line: &str) -> Option<(u8, usize)> {
2874 let indent = line.len() - line.trim_start_matches(' ').len();
2875 if indent > 3 {
2876 return None;
2877 }
2878 let rest = &line[indent..];
2879 let byte = rest.bytes().next()?;
2880 if byte != b'`' && byte != b'~' {
2881 return None;
2882 }
2883 let run = rest.len() - rest.trim_start_matches(byte as char).len();
2884 if run < 3 {
2885 return None;
2886 }
2887 if byte == b'`' && rest[run..].contains('`') {
2889 return None;
2890 }
2891 Some((byte, run))
2892}
2893
2894fn fence_closes(line: &str, fence: (u8, usize)) -> bool {
2899 let (byte, open_len) = fence;
2900 let indent = line.len() - line.trim_start_matches(' ').len();
2901 if indent > 3 {
2902 return false;
2903 }
2904 let rest = &line[indent..];
2905 let run = rest.len() - rest.trim_start_matches(byte as char).len();
2906 if run < open_len {
2907 return false;
2908 }
2909 rest[run..].trim().is_empty()
2910}
2911
2912fn detect_flow_form_link_lists(fm_yaml: &str) -> Vec<String> {
2929 let mut out = Vec::new();
2930 for line in fm_yaml.lines() {
2931 if line.starts_with(' ') || line.starts_with('\t') {
2933 continue;
2934 }
2935 let Some((key, rest)) = line.split_once(':') else {
2936 continue;
2937 };
2938 let key = key.trim();
2939 if key.is_empty()
2940 || key.starts_with('#')
2941 || key.starts_with('-')
2942 || !key
2943 .chars()
2944 .all(|c| c.is_alphanumeric() || c == '_' || c == '-')
2945 {
2946 continue;
2947 }
2948 let rest = rest.trim();
2949 if !rest.starts_with('[') {
2952 continue;
2953 }
2954 if let Ok(Value::Sequence(items)) = serde_norway::from_str::<Value>(rest) {
2959 let nested = items.iter().any(|item| match item {
2960 Value::Sequence(inner) => inner.iter().any(|x| matches!(x, Value::Sequence(_))),
2961 _ => false,
2962 });
2963 if nested {
2964 out.push(key.to_string());
2965 }
2966 }
2967 }
2968 out
2969}
2970
2971fn is_full_store_path(bare: &str) -> bool {
2974 let mut parts = bare.splitn(2, '/');
2975 let first = parts.next().unwrap_or("");
2976 let has_rest = parts.next().map(|r| !r.is_empty()).unwrap_or(false);
2977 matches!(first, "sources" | "records") && has_rest
2978}
2979
2980fn is_safe_store_relative_path(path: &Path) -> bool {
2984 let mut saw_component = false;
2985 for component in path.components() {
2986 match component {
2987 Component::Normal(_) => saw_component = true,
2988 Component::CurDir => {}
2989 Component::ParentDir | Component::RootDir | Component::Prefix(_) => return false,
2990 }
2991 }
2992 saw_component
2993}
2994
2995fn safe_md_target_rel(bare: &str) -> Option<PathBuf> {
2996 let path = Path::new(bare);
2997 if !is_safe_store_relative_path(path) {
2998 return None;
2999 }
3000 Some(PathBuf::from(format!("{bare}.md")))
3001}
3002
3003enum TargetResolution {
3005 Exists,
3007 Missing,
3009 Unsafe,
3011}
3012
3013fn resolve_wiki_target(store: &Store, bare: &str) -> TargetResolution {
3022 if !is_safe_store_relative_path(Path::new(bare)) {
3026 return TargetResolution::Unsafe;
3027 }
3028 match resolved_target_abs(store, bare) {
3029 Some(_) => TargetResolution::Exists,
3030 None => TargetResolution::Missing,
3031 }
3032}
3033
3034fn resolved_target_abs(store: &Store, bare: &str) -> Option<PathBuf> {
3040 if !is_safe_store_relative_path(Path::new(bare)) {
3041 return None;
3042 }
3043 let literal = store.root.join(bare);
3046 if literal.is_file() {
3047 return Some(literal);
3048 }
3049 let with_md = store.root.join(format!("{bare}.md"));
3051 if with_md.is_file() {
3052 return Some(with_md);
3053 }
3054 None
3055}
3056
3057fn path_under_prefix(bare: &str, prefix: &str) -> bool {
3059 let prefix = prefix.trim_end_matches('/');
3060 bare == prefix || bare.starts_with(&format!("{prefix}/"))
3061}
3062
3063fn type_folder_of(rel: &Path) -> Option<PathBuf> {
3067 let comps: Vec<&str> = rel.iter().filter_map(|s| s.to_str()).collect();
3068 if comps.len() < 3 {
3069 return None; }
3071 if !matches!(comps[0], "sources" | "records") {
3072 return None;
3073 }
3074 Some(PathBuf::from(comps[0]).join(comps[1]))
3075}
3076
3077fn loose_layer_dir(rel: &Path) -> Option<PathBuf> {
3082 let comps: Vec<&str> = rel.iter().filter_map(|s| s.to_str()).collect();
3083 if comps.len() != 2 || !matches!(comps[0], "sources" | "records") {
3084 return None;
3085 }
3086 Some(PathBuf::from(comps[0]))
3087}
3088
3089fn walk_content_files(root: &Path) -> Vec<PathBuf> {
3104 let mut out = Vec::new();
3105 for layer in ["sources", "records"] {
3106 let base = root.join(layer);
3107 if !base.is_dir() {
3108 continue;
3109 }
3110 for entry in walkdir::WalkDir::new(&base)
3111 .follow_links(true)
3122 .into_iter()
3123 .filter_entry(|e| {
3124 let name = e.file_name().to_str().unwrap_or("");
3125 !name.starts_with('.')
3126 })
3127 .flatten()
3128 {
3129 if !entry.file_type().is_file() {
3130 continue;
3131 }
3132 let name = entry.file_name().to_str().unwrap_or("");
3133 if name.ends_with(".md") && name != "index.md" {
3134 if let Ok(rel) = entry.path().strip_prefix(root) {
3135 out.push(rel.to_path_buf());
3136 }
3137 }
3138 }
3139 }
3140 out.sort();
3141 out
3142}
3143
3144fn walk_index_files(root: &Path) -> Vec<PathBuf> {
3151 let mut out = Vec::new();
3152 if root.join("index.md").is_file() {
3153 out.push(PathBuf::from("index.md"));
3154 }
3155 for layer in ["sources", "records"] {
3156 let base = root.join(layer);
3157 if !base.is_dir() {
3158 continue;
3159 }
3160 for entry in walkdir::WalkDir::new(&base)
3161 .follow_links(true)
3172 .into_iter()
3173 .filter_entry(|e| {
3174 let name = e.file_name().to_str().unwrap_or("");
3175 !name.starts_with('.')
3176 })
3177 .flatten()
3178 {
3179 if entry.file_type().is_file() && entry.file_name().to_str() == Some("index.md") {
3180 if let Ok(rel) = entry.path().strip_prefix(root) {
3181 out.push(rel.to_path_buf());
3182 }
3183 }
3184 }
3185 }
3186 out.sort();
3187 out
3188}
3189
3190struct IndexEntry {
3193 target: String,
3194 summary_text: Option<String>,
3195 line: u32,
3196}
3197
3198fn parse_index_entries(text: &str) -> Vec<IndexEntry> {
3203 let mut out = Vec::new();
3204 let mut in_more = false;
3205 for (idx, line) in text.lines().enumerate() {
3206 let trimmed = line.trim_start();
3207 if trimmed.starts_with("## More") {
3208 in_more = true;
3209 continue;
3210 }
3211 if in_more {
3212 continue;
3213 }
3214 if !trimmed.starts_with("- ") {
3215 continue;
3216 }
3217 let Some(open) = trimmed.find("[[") else {
3219 continue;
3220 };
3221 let Some(close_rel) = trimmed[open + 2..].find("]]") else {
3222 continue;
3223 };
3224 let inner = &trimmed[open + 2..open + 2 + close_rel];
3225 let target = inner.split('|').next().unwrap_or(inner).trim().to_string();
3226
3227 let after = &trimmed[open + 2 + close_rel + 2..];
3229 let summary_text = extract_index_entry_summary(after);
3230
3231 out.push(IndexEntry {
3232 target,
3233 summary_text,
3234 line: (idx + 1) as u32,
3235 });
3236 }
3237 out
3238}
3239
3240fn extract_index_entry_summary(after: &str) -> Option<String> {
3246 let mut s = after.trim();
3247 if s.starts_with('(') {
3249 if let Some(close) = s.find(')') {
3250 s = s[close + 1..].trim_start();
3251 }
3252 }
3253 let s = if let Some(rest) = s.strip_prefix('—') {
3255 rest.trim()
3256 } else if let Some(rest) = s.strip_prefix('-') {
3257 rest.trim()
3258 } else {
3259 return None;
3260 };
3261 if s.is_empty() {
3262 return None;
3263 }
3264 let s = match s.rsplit_once(" · ") {
3279 Some((summary, tags)) if is_tag_suffix(tags) => summary.trim(),
3280 _ => s,
3281 };
3282 Some(s.to_string())
3283}
3284
3285fn is_tag_suffix(s: &str) -> bool {
3290 let mut any = false;
3291 for tok in s.split_whitespace() {
3292 if !tok.starts_with('#') || tok.len() < 2 {
3293 return false;
3294 }
3295 any = true;
3296 }
3297 any
3298}
3299
3300fn parse_log_header(line: &str) -> Option<(DateTime<FixedOffset>, String, Option<String>)> {
3304 let rest = line.strip_prefix("## [")?;
3305 let close = rest.find(']')?;
3306 let ts_str = &rest[..close];
3307 let tail = rest[close + 1..].trim();
3308
3309 let naive = NaiveDateTime::parse_from_str(ts_str.trim(), "%Y-%m-%d %H:%M").ok()?;
3312 let offset = FixedOffset::east_opt(0)?;
3313 let ts = naive.and_local_timezone(offset).single()?;
3314
3315 let (kind, object) = match tail.split_once('|') {
3317 Some((k, o)) => {
3318 let o = o.trim();
3319 (
3320 k.trim().to_string(),
3321 if o.is_empty() {
3322 None
3323 } else {
3324 Some(o.to_string())
3325 },
3326 )
3327 }
3328 None => (tail.to_string(), None),
3329 };
3330 if kind.is_empty() {
3331 return None;
3332 }
3333 Some((ts, kind, object))
3334}
3335
3336fn log_files_for_working_set(store: &Store) -> Vec<PathBuf> {
3346 let mut files = vec![store.root.join("log.md")];
3347 let archive_dir = store.root.join("log");
3348 if let Ok(entries) = std::fs::read_dir(&archive_dir) {
3349 let mut archives: Vec<PathBuf> = entries
3350 .flatten()
3351 .map(|e| e.path())
3352 .filter(|p| {
3353 p.is_file()
3354 && p.file_name()
3355 .and_then(|s| s.to_str())
3356 .and_then(|n| n.strip_suffix(".md"))
3357 .is_some_and(is_year_month_archive)
3358 })
3359 .collect();
3360 archives.sort();
3364 files.extend(archives);
3365 }
3366 files
3367}
3368
3369fn is_year_month_archive(s: &str) -> bool {
3372 let b = s.as_bytes();
3373 b.len() == 7
3374 && b[..4].iter().all(u8::is_ascii_digit)
3375 && b[4] == b'-'
3376 && b[5..7].iter().all(u8::is_ascii_digit)
3377}
3378
3379fn last_validate_at(store: &Store) -> Option<DateTime<FixedOffset>> {
3385 let mut latest: Option<DateTime<FixedOffset>> = None;
3386 for file in log_files_for_working_set(store) {
3387 let Ok(text) = std::fs::read_to_string(&file) else {
3388 continue;
3389 };
3390 for line in text.lines() {
3391 if !line.starts_with("## [") {
3392 continue;
3393 }
3394 if let Some((ts, kind, _)) = parse_log_header(line) {
3395 if kind == "validate" {
3396 latest = Some(match latest {
3397 Some(p) if p >= ts => p,
3398 _ => ts,
3399 });
3400 }
3401 }
3402 }
3403 }
3404 latest
3405}
3406
3407fn changed_objects_since(
3418 store: &Store,
3419 cutoff: Option<DateTime<FixedOffset>>,
3420) -> BTreeSet<PathBuf> {
3421 let mut out = BTreeSet::new();
3422 for file in log_files_for_working_set(store) {
3423 let Ok(text) = std::fs::read_to_string(&file) else {
3424 continue;
3425 };
3426 for line in text.lines() {
3427 if !line.starts_with("## [") {
3428 continue;
3429 }
3430 let Some((ts, kind, object)) = parse_log_header(line) else {
3431 continue;
3432 };
3433 if let Some(c) = cutoff {
3434 if ts < c {
3435 continue;
3436 }
3437 }
3438 if !matches!(
3439 kind.as_str(),
3440 "create" | "update" | "ingest" | "rename" | "delete" | "link"
3441 ) {
3442 continue;
3443 }
3444 if let Some(obj) = object {
3445 let bare = obj
3447 .trim()
3448 .trim_start_matches("[[")
3449 .trim_end_matches("]]")
3450 .split('|')
3451 .next()
3452 .unwrap_or("")
3453 .trim()
3454 .trim_end_matches(".md")
3455 .to_string();
3456 if bare.is_empty() {
3457 continue;
3458 }
3459 if let Some(rel) = safe_md_target_rel(&bare) {
3469 out.insert(rel);
3470 }
3471 }
3472 }
3473 }
3474 out
3475}
3476
3477#[derive(Debug, Clone, PartialEq, Eq)]
3482pub struct DerivedFromIgnored {
3483 pub target: String,
3486 pub target_type: String,
3489}
3490
3491pub fn derived_from_ignored_type<I, S>(
3505 store: &Store,
3506 meta_type: &str,
3507 derived_from_targets: I,
3508) -> Option<DerivedFromIgnored>
3509where
3510 I: IntoIterator<Item = S>,
3511 S: AsRef<str>,
3512{
3513 if meta_type != "conclusion" || store.config.ignored_types.is_empty() {
3514 return None;
3515 }
3516 for target in derived_from_targets {
3517 let target = target.as_ref();
3518 if let Some(target_type) = link_target_type(store, target) {
3519 if store.config.ignored_types.contains(&target_type) {
3520 return Some(DerivedFromIgnored {
3521 target: target.to_string(),
3522 target_type,
3523 });
3524 }
3525 }
3526 }
3527 None
3528}
3529
3530fn link_target_type(store: &Store, target: &str) -> Option<String> {
3532 let bare = target.trim_end_matches(".md");
3533 let abs = store.root.join(safe_md_target_rel(bare)?);
3534 let text = std::fs::read_to_string(&abs).ok()?;
3535 let (yaml, _, _) = split_frontmatter(&text)?;
3536 let value: Value = serde_norway::from_str(&yaml).ok()?;
3537 if let Value::Mapping(m) = value {
3538 m.get(Value::String("type".into())).and_then(scalar_string)
3539 } else {
3540 None
3541 }
3542}
3543
3544fn is_iso8601(s: &str) -> bool {
3549 DateTime::parse_from_rfc3339(s.trim()).is_ok()
3550}
3551
3552fn is_iso8601_date_or_datetime(s: &str) -> bool {
3556 let s = s.trim();
3557 if DateTime::parse_from_rfc3339(s).is_ok() {
3558 return true;
3559 }
3560 chrono::NaiveDate::parse_from_str(s, "%Y-%m-%d").is_ok()
3561}
3562
3563fn is_email(s: &str) -> bool {
3568 let s = s.trim();
3569 let Some((local, domain)) = s.split_once('@') else {
3570 return false;
3571 };
3572 !local.is_empty()
3573 && !domain.contains('@')
3574 && domain.contains('.')
3575 && !domain.starts_with('.')
3576 && !domain.ends_with('.')
3577 && !domain.contains(' ')
3578 && !local.contains(' ')
3579}
3580
3581fn is_currency(s: &str) -> bool {
3588 let mut t = s.trim();
3589 for sym in ["$", "€", "£", "¥"] {
3591 if let Some(rest) = t.strip_prefix(sym) {
3592 t = rest.trim_start();
3593 break;
3594 }
3595 }
3596 if let Some((head, rest)) = t.split_once(char::is_whitespace) {
3600 if head.len() == 3 && head.chars().all(|c| c.is_ascii_alphabetic()) {
3601 t = rest.trim_start();
3602 }
3603 }
3604
3605 let cleaned: String = t.chars().filter(|c| *c != ',').collect();
3606 is_plain_amount(cleaned.trim())
3607}
3608
3609fn is_plain_amount(s: &str) -> bool {
3612 let digits = s.strip_prefix(['+', '-']).unwrap_or(s);
3613 let (int_part, frac_part) = match digits.split_once('.') {
3614 Some((i, f)) => (i, Some(f)),
3615 None => (digits, None),
3616 };
3617 if int_part.is_empty() || !int_part.bytes().all(|b| b.is_ascii_digit()) {
3618 return false;
3619 }
3620 match frac_part {
3621 None => true,
3622 Some(f) => (1..=2).contains(&f.len()) && f.bytes().all(|b| b.is_ascii_digit()),
3623 }
3624}
3625
3626fn is_url(s: &str) -> bool {
3632 let s = s.trim();
3633 for scheme in ["http://", "https://"] {
3634 if let Some(rest) = s.strip_prefix(scheme) {
3635 return !rest.is_empty();
3636 }
3637 }
3638 false
3639}
3640
3641fn shape_suggestion(shape: Shape) -> String {
3643 match shape {
3644 Shape::String => "use a scalar string".into(),
3645 Shape::Int => "use an integer".into(),
3646 Shape::Bool => "use `true` or `false`".into(),
3647 Shape::Date => "use an ISO-8601 date, e.g. 2026-05-27".into(),
3648 Shape::Email => "use a `<local>@<domain>` address".into(),
3649 Shape::Currency => "use a numeric amount, e.g. 1234.56".into(),
3650 Shape::Url => "use an http(s) URL".into(),
3651 }
3652}
3653
3654fn short_form_suggestion(bare: &str) -> Option<String> {
3657 Some(format!(
3658 "use a full store-relative path, e.g. [[records/contacts/{}]]",
3659 slugish(bare)
3660 ))
3661}
3662
3663fn slugish(s: &str) -> String {
3665 s.trim()
3666 .to_lowercase()
3667 .chars()
3668 .map(|c| if c.is_whitespace() { '-' } else { c })
3669 .filter(|c| c.is_alphanumeric() || *c == '-' || *c == '/' || *c == '_')
3670 .collect()
3671}
3672
3673fn check_assets(store: &Store, parsed: &[(PathBuf, Parsed)], issues: &mut Vec<Issue>) {
3679 use crate::assets;
3680
3681 let manifest_rel = Path::new(assets::MANIFEST_FILE);
3682 let manifest_abs = store.root.join(assets::MANIFEST_FILE);
3683
3684 let mut manifest: BTreeMap<String, assets::AssetRecord> = BTreeMap::new();
3686 if let Ok(text) = std::fs::read_to_string(&manifest_abs) {
3687 for (i, line) in text.lines().enumerate() {
3688 if line.trim().is_empty() {
3689 continue;
3690 }
3691 match serde_json::from_str::<assets::AssetRecord>(line) {
3692 Ok(rec) => {
3693 manifest.insert(rec.path.clone(), rec);
3694 }
3695 Err(e) => push(
3696 issues,
3697 Severity::Error,
3698 codes::ASSET_MANIFEST_MALFORMED,
3699 manifest_rel,
3700 Some((i as u32) + 1),
3701 None,
3702 format!("invalid {} record: {e}", assets::MANIFEST_FILE),
3703 Some("run `dbmd assets scan` to rebuild the manifest".to_string()),
3704 vec![],
3705 ),
3706 }
3707 }
3708 }
3709
3710 let mut declared: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
3713 for (rel, p) in parsed {
3714 let Some(map) = &p.fm else {
3715 continue;
3716 };
3717 for decl in assets::declarations_from_yaml_map(map) {
3718 let norm = match assets::normalize_asset_path(&decl.path) {
3719 Ok(n) => n,
3720 Err(_) => continue, };
3722 declared.insert(norm.clone());
3723 let is_md = Path::new(&norm)
3724 .extension()
3725 .and_then(|e| e.to_str())
3726 .map(|e| e.eq_ignore_ascii_case("md"))
3727 .unwrap_or(false);
3728 if is_md {
3729 push(
3730 issues,
3731 Severity::Warning,
3732 codes::ASSET_PATH_IS_CONTENT,
3733 rel,
3734 None,
3735 Some("asset".to_string()),
3736 format!("asset path `{norm}` points at a markdown content file"),
3737 Some("assets are raw binaries; reference a non-markdown path".to_string()),
3738 vec![PathBuf::from(&norm)],
3739 );
3740 }
3741 if !manifest.contains_key(&norm) {
3742 push(
3743 issues,
3744 Severity::Error,
3745 codes::ASSET_UNDECLARED,
3746 rel,
3747 None,
3748 Some("asset".to_string()),
3749 format!(
3750 "references asset `{norm}` with no record in {}",
3751 assets::MANIFEST_FILE
3752 ),
3753 Some("run `dbmd assets scan` to catalog it".to_string()),
3754 vec![PathBuf::from(&norm)],
3755 );
3756 }
3757 }
3758 }
3759
3760 for (path, rec) in &manifest {
3762 for w in &rec.wrappers {
3763 if !store.root.join(w).is_file() {
3764 push(
3765 issues,
3766 Severity::Error,
3767 codes::ASSET_WRAPPER_BROKEN,
3768 Path::new(path),
3769 None,
3770 None,
3771 format!("manifest record for `{path}` names a missing wrapper `{w}`"),
3772 Some("run `dbmd assets scan` to reconcile the manifest".to_string()),
3773 vec![PathBuf::from(w)],
3774 );
3775 }
3776 }
3777 if !declared.contains(path) {
3778 push(
3779 issues,
3780 Severity::Warning,
3781 codes::ASSET_MANIFEST_ORPHAN,
3782 Path::new(path),
3783 None,
3784 None,
3785 format!(
3786 "`{path}` is in {} but no wrapper references it",
3787 assets::MANIFEST_FILE
3788 ),
3789 Some("run `dbmd assets scan` to drop the orphan, or add a wrapper".to_string()),
3790 vec![],
3791 );
3792 }
3793 }
3794}
3795
3796#[allow(clippy::too_many_arguments)]
3798fn push(
3799 issues: &mut Vec<Issue>,
3800 severity: Severity,
3801 code: &'static str,
3802 file: &Path,
3803 line: Option<u32>,
3804 key: Option<String>,
3805 message: String,
3806 suggestion: Option<String>,
3807 related: Vec<PathBuf>,
3808) {
3809 issues.push(Issue {
3810 severity,
3811 code,
3812 file: file.to_path_buf(),
3813 line,
3814 key,
3815 message,
3816 suggestion,
3817 related,
3818 });
3819}
3820
3821fn fm_key_line(fm_yaml: &str, key: &str) -> Option<u32> {
3824 for (i, line) in fm_yaml.lines().enumerate() {
3825 let trimmed = line.trim_start();
3826 if let Some(rest) = trimmed.strip_prefix(key) {
3828 if rest.starts_with(':') && line.starts_with(key) {
3829 return Some((i as u32) + 2);
3831 }
3832 }
3833 }
3834 None
3835}
3836
3837fn fm_key_line_or_top(fm_yaml: &str, key: &str) -> Option<u32> {
3843 fm_key_line(fm_yaml, key).or(Some(1))
3844}
3845
3846fn issue_order(a: &Issue, b: &Issue) -> std::cmp::Ordering {
3849 a.file
3850 .cmp(&b.file)
3851 .then(a.line.cmp(&b.line))
3852 .then(a.code.cmp(b.code))
3853 .then(a.key.cmp(&b.key))
3854}
3855
3856#[cfg(test)]
3861mod tests {
3862 use super::*;
3863 use crate::parser::{Config, FieldSpec};
3864 use std::fs;
3865 use tempfile::TempDir;
3866
3867 #[test]
3868 fn split_frontmatter_tolerates_leading_bom() {
3869 let text = "\u{feff}---\ntype: contact\nsummary: hi\n---\nbody\n";
3874 let parsed = split_frontmatter(text);
3875 assert!(
3876 parsed.is_some(),
3877 "a leading BOM must not hide frontmatter from validate"
3878 );
3879 let (yaml, body, close_line) = parsed.unwrap();
3880 assert_eq!(yaml, "type: contact\nsummary: hi\n");
3881 assert_eq!(body, "body");
3882 assert_eq!(close_line, 4, "BOM is inline on line 1, not a new line");
3883 }
3884
3885 struct Fixture {
3888 dir: TempDir,
3889 config: Config,
3890 }
3891
3892 impl Fixture {
3893 fn new() -> Self {
3898 let dir = TempDir::new().unwrap();
3899 fs::write(
3900 dir.path().join("DB.md"),
3901 "---\ntype: db-md\nscope: company\nowner: Test\n---\n",
3902 )
3903 .unwrap();
3904 for layer in ["sources", "records"] {
3905 fs::create_dir_all(dir.path().join(layer)).unwrap();
3906 }
3907 Fixture {
3908 dir,
3909 config: Config::default(),
3910 }
3911 }
3912
3913 fn bare() -> Self {
3915 let dir = TempDir::new().unwrap();
3916 Fixture {
3917 dir,
3918 config: Config::default(),
3919 }
3920 }
3921
3922 fn write(&self, rel: &str, contents: &str) {
3924 let abs = self.dir.path().join(rel);
3925 fs::create_dir_all(abs.parent().unwrap()).unwrap();
3926 fs::write(abs, contents).unwrap();
3927 }
3928
3929 fn store(&self) -> Store {
3930 Store {
3931 root: self.dir.path().to_path_buf(),
3932 config: self.config.clone(),
3933 }
3934 }
3935
3936 fn store_all(&self) -> Vec<Issue> {
3937 validate_all(&self.store()).unwrap()
3938 }
3939
3940 fn rebuild_indexes(&self) {
3947 crate::index::Index::rebuild_all(&self.store()).unwrap();
3948 }
3949 }
3950
3951 fn has(issues: &[Issue], code: &str) -> bool {
3953 issues.iter().any(|i| i.code == code)
3954 }
3955
3956 fn count(issues: &[Issue], code: &str) -> usize {
3958 issues.iter().filter(|i| i.code == code).count()
3959 }
3960
3961 fn find<'a>(issues: &'a [Issue], code: &str) -> &'a Issue {
3963 issues
3964 .iter()
3965 .find(|i| i.code == code)
3966 .unwrap_or_else(|| panic!("expected an issue with code {code}; got {issues:#?}"))
3967 }
3968
3969 fn valid_contact(summary: &str) -> String {
3971 format!(
3972 "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"{summary}\"\nname: A\n---\n\n# A\n"
3973 )
3974 }
3975
3976 #[test]
3979 fn not_a_store_when_db_md_absent() {
3980 let fx = Fixture::bare();
3981 let issues = fx.store_all();
3982 assert_eq!(issues.len(), 1, "only NOT_A_STORE expected: {issues:#?}");
3983 assert_eq!(issues[0].code, codes::NOT_A_STORE);
3984 assert!(issues[0].is_error());
3985 }
3986
3987 #[test]
3988 fn working_set_also_reports_not_a_store() {
3989 let fx = Fixture::bare();
3990 let issues = validate_working_set(&fx.store(), None).unwrap();
3991 assert!(has(&issues, codes::NOT_A_STORE));
3992 }
3993
3994 #[test]
3995 fn clean_store_has_no_issues() {
3996 let fx = Fixture::new();
3997 fx.write("records/contacts/a.md", &valid_contact("A contact"));
3998 fx.rebuild_indexes();
4002 let issues = fx.store_all();
4003 assert!(
4004 issues.is_empty(),
4005 "expected a clean store, got: {issues:#?}"
4006 );
4007 }
4008
4009 #[test]
4017 fn meta_type_enum_is_closed_for_scalars_and_non_scalars() {
4018 let fx = Fixture::new();
4019 let body = |mt: &str| {
4020 format!(
4021 "---\ntype: profile\nmeta-type: {mt}\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n"
4022 )
4023 };
4024
4025 for ok in ["fact", "operational", "conclusion"] {
4027 fx.write("records/profiles/ok.md", &body(ok));
4028 let issues = validate_working_set(&fx.store(), None).unwrap();
4029 assert!(
4030 !has(&issues, codes::FM_BAD_META_TYPE),
4031 "`meta-type: {ok}` must be accepted; got {issues:#?}"
4032 );
4033 }
4034 fx.write(
4035 "records/profiles/absent.md",
4036 "---\ntype: profile\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n",
4037 );
4038 assert!(
4039 !has(
4040 &validate_working_set(&fx.store(), None).unwrap(),
4041 codes::FM_BAD_META_TYPE
4042 ),
4043 "an absent meta-type is the default `fact` and must be accepted"
4044 );
4045
4046 for bad in ["xyz", "Fact", "[fact, conclusion]", "{kind: conclusion}"] {
4048 let fx2 = Fixture::new();
4049 fx2.write("records/profiles/bad.md", &body(bad));
4050 let issues = validate_working_set(&fx2.store(), None).unwrap();
4051 assert!(
4052 has(&issues, codes::FM_BAD_META_TYPE),
4053 "`meta-type: {bad}` must be rejected with FM_BAD_META_TYPE; got {issues:#?}"
4054 );
4055 }
4056 }
4057
4058 #[test]
4064 fn valid_db_md_emits_no_structure_issue() {
4065 let fx = Fixture::new();
4066 let issues = fx.store_all();
4067 assert!(
4068 !has(&issues, codes::DB_MD_BAD_TYPE)
4069 && !has(&issues, codes::DB_MD_MISSING_FIELD)
4070 && !has(&issues, codes::DB_MD_UNKNOWN_SECTION),
4071 "a valid DB.md (type: db-md + scope + owner, recognized sections) is silent: {issues:#?}"
4072 );
4073 }
4074
4075 #[test]
4079 fn db_md_wrong_type_is_error() {
4080 let fx = Fixture::new();
4081 fx.write("DB.md", "---\ntype: notes\nscope: company\nowner: T\n---\n");
4082 let issues = fx.store_all();
4083 let i = find(&issues, codes::DB_MD_BAD_TYPE);
4084 assert!(i.is_error());
4085 assert_eq!(i.file, PathBuf::from("DB.md"));
4086 assert_eq!(i.key.as_deref(), Some("type"));
4087 assert_eq!(i.line, Some(2), "anchors to the `type:` line");
4088 }
4089
4090 #[test]
4093 fn db_md_missing_scope_and_owner_each_report() {
4094 let fx = Fixture::new();
4095 fx.write("DB.md", "---\ntype: db-md\n---\n");
4096 let issues = fx.store_all();
4097 assert_eq!(
4098 count(&issues, codes::DB_MD_MISSING_FIELD),
4099 2,
4100 "both scope and owner absent → two issues: {issues:#?}"
4101 );
4102 let keys: BTreeSet<Option<String>> = issues
4103 .iter()
4104 .filter(|i| i.code == codes::DB_MD_MISSING_FIELD)
4105 .map(|i| i.key.clone())
4106 .collect();
4107 assert_eq!(
4108 keys,
4109 BTreeSet::from([Some("scope".to_string()), Some("owner".to_string())]),
4110 "one issue keyed on each missing field"
4111 );
4112 for i in issues
4113 .iter()
4114 .filter(|i| i.code == codes::DB_MD_MISSING_FIELD)
4115 {
4116 assert!(i.is_error());
4117 assert_eq!(i.line, Some(1), "absent field anchors to the block top");
4118 }
4119 }
4120
4121 #[test]
4125 fn db_md_blank_required_field_is_missing() {
4126 let fx = Fixture::new();
4127 fx.write(
4128 "DB.md",
4129 "---\ntype: db-md\nscope: company\nowner: \"\"\n---\n",
4130 );
4131 let issues = fx.store_all();
4132 let i = find(&issues, codes::DB_MD_MISSING_FIELD);
4133 assert_eq!(i.key.as_deref(), Some("owner"));
4134 assert_eq!(
4135 i.line,
4136 Some(4),
4137 "a present-but-empty field anchors to its line"
4138 );
4139 assert!(
4140 count(&issues, codes::DB_MD_MISSING_FIELD) == 1,
4141 "scope is present and non-empty → only owner reported"
4142 );
4143 }
4144
4145 #[test]
4148 fn db_md_unknown_section_is_warning() {
4149 let fx = Fixture::new();
4150 fx.write(
4151 "DB.md",
4152 "---\ntype: db-md\nscope: company\nowner: T\n---\n\n## Agent instructions\n\nbe good\n\n## Glossary\n\nterms\n",
4156 );
4157 let issues = fx.store_all();
4158 let i = find(&issues, codes::DB_MD_UNKNOWN_SECTION);
4159 assert!(!i.is_error(), "unknown section is a warning, not an error");
4160 assert_eq!(i.severity, Severity::Warning);
4161 assert_eq!(
4162 i.line,
4163 Some(11),
4164 "anchors to the `## Glossary` heading line"
4165 );
4166 assert!(
4167 i.message.contains("Glossary"),
4168 "the message names the offending section: {}",
4169 i.message
4170 );
4171 assert_eq!(
4173 count(&issues, codes::DB_MD_UNKNOWN_SECTION),
4174 1,
4175 "only the unrecognized section is flagged: {issues:#?}"
4176 );
4177 }
4178
4179 #[test]
4182 fn db_md_no_frontmatter_reports_type_and_both_fields() {
4183 let fx = Fixture::new();
4184 fx.write("DB.md", "# just a heading, no frontmatter\n");
4185 let issues = fx.store_all();
4186 assert!(has(&issues, codes::DB_MD_BAD_TYPE));
4187 assert_eq!(count(&issues, codes::DB_MD_MISSING_FIELD), 2);
4188 }
4189
4190 #[test]
4193 fn missing_type_is_error() {
4194 let fx = Fixture::new();
4195 fx.write(
4196 "records/contacts/a.md",
4197 "---\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\n# A\n",
4198 );
4199 let issues = fx.store_all();
4200 assert!(has(&issues, codes::FM_MISSING_TYPE));
4201 assert!(find(&issues, codes::FM_MISSING_TYPE).is_error());
4202 }
4203
4204 #[test]
4205 fn missing_universal_timestamps_are_errors_on_content_files() {
4206 let fx = Fixture::new();
4207 fx.write(
4208 "records/contacts/a.md",
4209 "---\ntype: contact\nsummary: x\nname: A\n---\n\n# A\n",
4210 );
4211 let issues = fx.store_all();
4212
4213 let missing_created = find(&issues, codes::FM_MISSING_CREATED);
4214 assert_eq!(missing_created.key.as_deref(), Some("created"));
4215 assert!(missing_created.is_error());
4216
4217 let missing_updated = find(&issues, codes::FM_MISSING_UPDATED);
4218 assert_eq!(missing_updated.key.as_deref(), Some("updated"));
4219 assert!(missing_updated.is_error());
4220 }
4221
4222 #[test]
4223 fn meta_files_do_not_require_universal_timestamps() {
4224 let fx = Fixture::new();
4225 let issues = fx.store_all();
4226
4227 assert!(
4228 !has(&issues, codes::FM_MISSING_CREATED),
4229 "DB.md/log/index meta files must not require content timestamps: {issues:#?}"
4230 );
4231 assert!(
4232 !has(&issues, codes::FM_MISSING_UPDATED),
4233 "DB.md/log/index meta files must not require content timestamps: {issues:#?}"
4234 );
4235 }
4236
4237 #[test]
4238 fn content_file_with_no_frontmatter_block_reports_type_and_summary() {
4239 let fx = Fixture::new();
4240 fx.write(
4241 "records/profiles/a.md",
4242 "# Just a heading\n\nNo frontmatter here.\n",
4243 );
4244 let issues = fx.store_all();
4245 assert!(has(&issues, codes::FM_MISSING_TYPE), "{issues:#?}");
4246 assert!(has(&issues, codes::SUMMARY_MISSING), "{issues:#?}");
4247 }
4248
4249 #[test]
4250 fn content_file_with_empty_frontmatter_reports_type_and_summary() {
4251 let fx = Fixture::new();
4252 fx.write("records/profiles/a.md", "---\n---\n\nbody\n");
4253 let issues = fx.store_all();
4254 assert!(has(&issues, codes::FM_MISSING_TYPE), "{issues:#?}");
4255 assert!(has(&issues, codes::SUMMARY_MISSING), "{issues:#?}");
4256 }
4257
4258 #[test]
4259 fn malformed_yaml_is_error_and_suppresses_field_checks() {
4260 let fx = Fixture::new();
4261 fx.write(
4263 "records/contacts/a.md",
4264 "---\ntype: contact\n bad: : : :\n: : nope\n---\n\nbody\n",
4265 );
4266 let issues = fx.store_all();
4267 let issue = find(&issues, codes::FM_MALFORMED_YAML);
4268 assert!(issue.is_error());
4269 assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4270 assert!(
4273 !has(&issues, codes::SUMMARY_MISSING),
4274 "malformed YAML should suppress SUMMARY_MISSING: {issues:#?}"
4275 );
4276 }
4277
4278 #[test]
4279 fn bad_created_timestamp_is_error() {
4280 let fx = Fixture::new();
4281 fx.write(
4282 "records/contacts/a.md",
4283 "---\ntype: contact\ncreated: not-a-date\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
4284 );
4285 let issues = fx.store_all();
4286 let issue = find(&issues, codes::FM_BAD_TIMESTAMP);
4287 assert_eq!(issue.key.as_deref(), Some("created"));
4288 assert!(issue.is_error());
4289 }
4290
4291 #[test]
4292 fn date_only_created_is_rejected_but_type_date_field_accepted() {
4293 let fx = Fixture::new();
4294 fx.write(
4297 "records/contacts/a.md",
4298 "---\ntype: contact\ncreated: 2026-05-22\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\nlast_touch: 2026-05-22\n---\n\n# A\n",
4299 );
4300 let issues = fx.store_all();
4301 let created_issues: Vec<_> = issues
4302 .iter()
4303 .filter(|i| i.code == codes::FM_BAD_TIMESTAMP && i.key.as_deref() == Some("created"))
4304 .collect();
4305 assert_eq!(
4306 created_issues.len(),
4307 1,
4308 "date-only `created` must fail: {issues:#?}"
4309 );
4310 assert!(
4311 !issues.iter().any(
4312 |i| i.code == codes::FM_BAD_TIMESTAMP && i.key.as_deref() == Some("last_touch")
4313 ),
4314 "date-only `last_touch` is valid: {issues:#?}"
4315 );
4316 }
4317
4318 #[test]
4321 fn summary_missing_empty_multiline_toolong() {
4322 let fx = Fixture::new();
4323 fx.write(
4324 "records/profiles/missing.md",
4325 "---\ntype: profile\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\n---\n\nbody\n",
4326 );
4327 fx.write(
4328 "records/profiles/empty.md",
4329 "---\ntype: profile\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \" \"\n---\n\nbody\n",
4330 );
4331 let long = "x".repeat(201);
4332 fx.write(
4333 "records/profiles/long.md",
4334 &format!("---\ntype: profile\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"{long}\"\n---\n\nbody\n"),
4335 );
4336 let issues = fx.store_all();
4337 assert!(has(&issues, codes::SUMMARY_MISSING));
4338 assert_eq!(
4339 find(&issues, codes::SUMMARY_MISSING).file,
4340 PathBuf::from("records/profiles/missing.md")
4341 );
4342 assert!(has(&issues, codes::SUMMARY_EMPTY));
4343 assert!(has(&issues, codes::SUMMARY_TOO_LONG));
4344 assert_eq!(
4345 find(&issues, codes::SUMMARY_TOO_LONG).severity,
4346 Severity::Warning
4347 );
4348 }
4349
4350 #[test]
4351 fn summary_multiline_via_yaml_block_scalar() {
4352 let fx = Fixture::new();
4353 fx.write(
4355 "records/profiles/a.md",
4356 "---\ntype: profile\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: |\n line one\n line two\n---\n\nbody\n",
4357 );
4358 let issues = fx.store_all();
4359 assert!(has(&issues, codes::SUMMARY_MULTILINE), "{issues:#?}");
4360 }
4361
4362 #[test]
4363 fn summary_exactly_200_chars_is_ok() {
4364 let fx = Fixture::new();
4365 let s = "y".repeat(200);
4366 fx.write(
4367 "records/profiles/a.md",
4368 &format!("---\ntype: profile\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"{s}\"\n---\n\nbody\n"),
4369 );
4370 let issues = fx.store_all();
4371 assert!(
4372 !has(&issues, codes::SUMMARY_TOO_LONG),
4373 "200 is the bound, inclusive: {issues:#?}"
4374 );
4375 }
4376
4377 #[test]
4378 fn meta_files_need_no_summary() {
4379 let fx = Fixture::new();
4380 fx.write("records/contacts/a.md", &valid_contact("A contact"));
4383 fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n# I\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4384 fx.write(
4385 "records/index.md",
4386 "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4387 );
4388 fx.write("records/contacts/index.md", "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — A contact\n");
4389 fx.write(
4390 "records/contacts/index.jsonl",
4391 "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"A contact\"}\n",
4392 );
4393 fx.write("log.md", "---\ntype: log\n---\n\n# Log\n");
4394 let issues = fx.store_all();
4395 assert!(!has(&issues, codes::SUMMARY_MISSING), "{issues:#?}");
4396 }
4397
4398 #[test]
4401 fn nested_tags_warns_flat_tags_ok() {
4402 let fx = Fixture::new();
4403 fx.write(
4404 "records/contacts/nested.md",
4405 "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ntags:\n - good\n - [nested, list]\n---\n\n# A\n",
4406 );
4407 fx.write(
4408 "records/contacts/flat.md",
4409 "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ntags: [customer, vip]\n---\n\n# A\n",
4410 );
4411 let issues = fx.store_all();
4412 let tag_issues: Vec<_> = issues
4413 .iter()
4414 .filter(|i| i.code == codes::TAGS_MALFORMED)
4415 .collect();
4416 assert_eq!(
4417 tag_issues.len(),
4418 1,
4419 "only the nested-tags file should warn: {issues:#?}"
4420 );
4421 assert_eq!(
4422 tag_issues[0].file,
4423 PathBuf::from("records/contacts/nested.md")
4424 );
4425 assert_eq!(tag_issues[0].severity, Severity::Warning);
4426 }
4427
4428 #[test]
4431 fn short_form_wiki_link_is_error() {
4432 let fx = Fixture::new();
4433 let mut body = valid_contact("links to a short form");
4434 body.push_str("\nSee [[sarah-chen]] for details.\n");
4435 fx.write("records/contacts/a.md", &body);
4436 let issues = fx.store_all();
4437 let issue = find(&issues, codes::WIKI_LINK_SHORT_FORM);
4438 assert!(issue.is_error());
4439 assert!(issue.message.contains("sarah-chen"));
4440 assert!(
4442 !issues
4443 .iter()
4444 .any(|i| i.code == codes::WIKI_LINK_BROKEN && i.message.contains("sarah-chen")),
4445 "short-form should suppress broken: {issues:#?}"
4446 );
4447 }
4448
4449 #[test]
4450 fn broken_full_path_wiki_link_is_error() {
4451 let fx = Fixture::new();
4452 let mut body = valid_contact("links to a missing file");
4453 body.push_str("\nSee [[records/contacts/ghost]].\n");
4454 fx.write("records/contacts/a.md", &body);
4455 let issues = fx.store_all();
4456 let issue = find(&issues, codes::WIKI_LINK_BROKEN);
4457 assert!(issue.is_error());
4458 assert!(issue.message.contains("records/contacts/ghost"));
4459 assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4460 }
4461
4462 #[test]
4463 fn traversal_full_path_wiki_link_is_rejected_before_probe() {
4464 let fx = Fixture::new();
4465 let mut body = valid_contact("links with traversal");
4466 body.push_str("\nSee [[records/contacts/../../ghost]].\n");
4467 fx.write("records/contacts/a.md", &body);
4468 let issues = fx.store_all();
4469 let issue = find(&issues, codes::WIKI_LINK_BROKEN);
4470 assert!(issue.message.contains("not a safe store-relative path"));
4471 assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4472 }
4473
4474 #[test]
4475 fn valid_full_path_wiki_link_passes() {
4476 let fx = Fixture::new();
4477 fx.write("records/contacts/target.md", &valid_contact("target"));
4478 let mut body = valid_contact("links to target");
4479 body.push_str("\nSee [[records/contacts/target]].\n");
4480 fx.write("records/contacts/a.md", &body);
4481 let issues = fx.store_all();
4482 assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
4483 assert!(!has(&issues, codes::WIKI_LINK_SHORT_FORM), "{issues:#?}");
4484 }
4485
4486 #[test]
4487 fn md_extension_wiki_link_warns_and_resolves() {
4488 let fx = Fixture::new();
4489 fx.write("records/contacts/target.md", &valid_contact("target"));
4490 let mut body = valid_contact("links with extension");
4491 body.push_str("\nSee [[records/contacts/target.md]].\n");
4492 fx.write("records/contacts/a.md", &body);
4493 let issues = fx.store_all();
4494 let issue = find(&issues, codes::WIKI_LINK_HAS_EXTENSION);
4495 assert_eq!(issue.severity, Severity::Warning);
4496 assert_eq!(
4497 issue.suggestion.as_deref(),
4498 Some("drop the extension: [[records/contacts/target]]")
4499 );
4500 assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
4502 }
4503
4504 #[test]
4505 fn wiki_links_in_code_fences_are_ignored() {
4506 let fx = Fixture::new();
4507 let mut body = valid_contact("has a fenced example");
4508 body.push_str("\n```\n[[sarah-chen]]\n```\n");
4509 fx.write("records/contacts/a.md", &body);
4510 let issues = fx.store_all();
4511 assert!(
4512 !has(&issues, codes::WIKI_LINK_SHORT_FORM),
4513 "fenced wiki-links must be ignored: {issues:#?}"
4514 );
4515 }
4516
4517 #[test]
4518 fn flow_form_link_list_in_frontmatter_is_error() {
4519 let fx = Fixture::new();
4520 fx.write(
4521 "records/meetings/m.md",
4522 "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a meeting\ndate: 2026-05-22\nattendees: [[[records/contacts/a]], [[records/contacts/b]]]\n---\n\n# M\n",
4523 );
4524 let issues = fx.store_all();
4525 let issue = find(&issues, codes::WIKI_LINK_FLOW_FORM_LIST);
4526 assert!(issue.is_error());
4527 assert_eq!(issue.key.as_deref(), Some("attendees"));
4528 }
4529
4530 #[test]
4531 fn block_form_link_list_in_frontmatter_is_not_flow_form() {
4532 let fx = Fixture::new();
4533 fx.write("records/contacts/a.md", &valid_contact("a"));
4534 fx.write("records/contacts/b.md", &valid_contact("b"));
4535 fx.write(
4536 "records/meetings/m.md",
4537 "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a meeting\ndate: 2026-05-22\nattendees:\n - [[records/contacts/a]]\n - [[records/contacts/b]]\n---\n\n# M\n",
4538 );
4539 let issues = fx.store_all();
4540 assert!(
4541 !has(&issues, codes::WIKI_LINK_FLOW_FORM_LIST),
4542 "{issues:#?}"
4543 );
4544 assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
4546 }
4547
4548 #[test]
4549 fn frontmatter_short_form_link_field_is_error() {
4550 let fx = Fixture::new();
4551 fx.write(
4554 "records/synthesis/a.md",
4555 "---\ntype: synthesis\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nrelated: \"[[sarah-chen]]\"\n---\n\n# A\n",
4556 );
4557 let issues = fx.store_all();
4558 let issue = find(&issues, codes::WIKI_LINK_SHORT_FORM);
4559 assert!(issue.is_error());
4560 assert_eq!(issue.key.as_deref(), Some("related"));
4561 }
4562
4563 #[test]
4564 fn unquoted_frontmatter_link_is_recognized() {
4565 let fx = Fixture::new();
4570 fx.write(
4571 "records/synthesis/short.md",
4572 "---\ntype: synthesis\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nrelated: [[sarah-chen]]\n---\n\n# A\n",
4573 );
4574 fx.write(
4575 "records/synthesis/broken.md",
4576 "---\ntype: synthesis\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nrelated: [[records/contacts/ghost]]\n---\n\n# A\n",
4577 );
4578 let issues = fx.store_all();
4579 assert!(
4580 issues.iter().any(|i| i.code == codes::WIKI_LINK_SHORT_FORM
4581 && i.file == Path::new("records/synthesis/short.md")
4582 && i.key.as_deref() == Some("related")),
4583 "unquoted short-form frontmatter link must be caught: {issues:#?}"
4584 );
4585 assert!(
4586 issues.iter().any(|i| i.code == codes::WIKI_LINK_BROKEN
4587 && i.file == Path::new("records/synthesis/broken.md")),
4588 "unquoted full-path frontmatter link to a missing file must be caught: {issues:#?}"
4589 );
4590 }
4591
4592 #[test]
4593 fn short_form_in_declared_link_field_is_prefix_mismatch_not_double_reported() {
4594 let mut fx = Fixture::new();
4599 fx.config.schemas.insert(
4600 "contact".into(),
4601 Schema {
4602 fields: vec![FieldSpec {
4603 name: "company".into(),
4604 link_prefix: Some(PathBuf::from("records/companies")),
4605 ..Default::default()
4606 }],
4607 ..Default::default()
4608 },
4609 );
4610 fx.write(
4611 "records/contacts/a.md",
4612 "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ncompany: \"[[northstar]]\"\n---\n\n# A\n",
4613 );
4614 let issues = fx.store_all();
4615 let issue = find(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH);
4616 assert_eq!(issue.key.as_deref(), Some("company"));
4617 assert!(
4619 !issues
4620 .iter()
4621 .any(|i| i.code == codes::WIKI_LINK_SHORT_FORM
4622 && i.key.as_deref() == Some("company")),
4623 "schema link fields are checked once, by the schema path: {issues:#?}"
4624 );
4625 }
4626
4627 #[test]
4628 fn schema_link_field_with_md_extension_still_warns() {
4629 let mut fx = Fixture::new();
4630 fx.config.schemas.insert(
4631 "contact".into(),
4632 Schema {
4633 fields: vec![FieldSpec {
4634 name: "company".into(),
4635 link_prefix: Some(PathBuf::from("records/companies")),
4636 ..Default::default()
4637 }],
4638 ..Default::default()
4639 },
4640 );
4641 fx.write(
4642 "records/companies/acme.md",
4643 "---\ntype: company\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: Acme\nname: Acme\n---\n\n# Acme\n",
4644 );
4645 fx.write(
4646 "records/contacts/a.md",
4647 "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ncompany: \"[[records/companies/acme.md]]\"\n---\n\n# A\n",
4648 );
4649 let issues = fx.store_all();
4650 let issue = issues
4651 .iter()
4652 .find(|i| {
4653 i.code == codes::WIKI_LINK_HAS_EXTENSION && i.key.as_deref() == Some("company")
4654 })
4655 .unwrap_or_else(|| panic!("schema link extension warning missing: {issues:#?}"));
4656 assert_eq!(issue.severity, Severity::Warning);
4657 assert!(
4658 !issues
4659 .iter()
4660 .any(|i| i.code == codes::WIKI_LINK_BROKEN && i.key.as_deref() == Some("company")),
4661 "extensionless existence check should still find acme.md: {issues:#?}"
4662 );
4663 }
4664
4665 #[test]
4668 fn explicit_schema_required_shape_enum() {
4669 let fx = {
4670 let mut fx = Fixture::new();
4671 let schema = Schema {
4674 fields: vec![
4675 FieldSpec {
4676 name: "name".into(),
4677 required: true,
4678 ..Default::default()
4679 },
4680 FieldSpec {
4681 name: "email".into(),
4682 required: true,
4683 shape: Some(Shape::Email),
4684 ..Default::default()
4685 },
4686 FieldSpec {
4687 name: "status".into(),
4688 enum_values: Some(vec!["active".into(), "inactive".into()]),
4689 ..Default::default()
4690 },
4691 ],
4692 ..Default::default()
4693 };
4694 fx.config.schemas.insert("contact".into(), schema);
4695 fx
4696 };
4697 fx.write(
4698 "records/contacts/a.md",
4699 "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nemail: not-an-email\nstatus: archived\n---\n\n# A\n",
4700 );
4701 let issues = fx.store_all();
4702 assert!(
4704 issues
4705 .iter()
4706 .any(|i| i.code == codes::SCHEMA_MISSING_REQUIRED
4707 && i.key.as_deref() == Some("name")),
4708 "{issues:#?}"
4709 );
4710 assert!(
4712 issues.iter().any(
4713 |i| i.code == codes::SCHEMA_SHAPE_MISMATCH && i.key.as_deref() == Some("email")
4714 ),
4715 "{issues:#?}"
4716 );
4717 assert!(
4719 issues
4720 .iter()
4721 .any(|i| i.code == codes::SCHEMA_ENUM_VIOLATION
4722 && i.key.as_deref() == Some("status")),
4723 "{issues:#?}"
4724 );
4725 }
4726
4727 #[test]
4728 fn schema_without_link_field_allows_plain_value() {
4729 let mut fx = Fixture::new();
4733 fx.config.schemas.insert(
4734 "contact".into(),
4735 Schema {
4736 fields: vec![FieldSpec {
4737 name: "name".into(),
4738 required: true,
4739 ..Default::default()
4740 }],
4741 ..Default::default()
4742 },
4743 );
4744 fx.write(
4745 "records/contacts/a.md",
4746 "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: Sarah\ncompany: \"Acme Co\"\n---\n\n# Sarah\n",
4747 );
4748 let issues = fx.store_all();
4749 assert!(
4750 !has(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH),
4751 "no declared link field for `company` → a plain value is fine: {issues:#?}"
4752 );
4753 }
4754
4755 #[test]
4756 fn schema_link_field_plain_value_is_prefix_mismatch() {
4757 let mut fx = Fixture::new();
4760 fx.config.schemas.insert(
4761 "contact".into(),
4762 Schema {
4763 fields: vec![FieldSpec {
4764 name: "company".into(),
4765 link_prefix: Some(PathBuf::from("records/companies")),
4766 ..Default::default()
4767 }],
4768 ..Default::default()
4769 },
4770 );
4771 fx.write(
4772 "records/contacts/a.md",
4773 "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: Sarah\ncompany: \"Acme Co\"\n---\n\n# Sarah\n",
4774 );
4775 let issues = fx.store_all();
4776 let issue = find(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH);
4777 assert_eq!(issue.key.as_deref(), Some("company"));
4778 assert!(issue
4779 .suggestion
4780 .as_deref()
4781 .unwrap()
4782 .contains("records/companies/"));
4783 }
4784
4785 #[test]
4786 fn schema_shape_int_and_url_and_currency() {
4787 let mut fx = Fixture::new();
4788 fx.config.schemas.insert(
4789 "widget".into(),
4790 Schema {
4791 fields: vec![
4792 FieldSpec {
4793 name: "qty".into(),
4794 shape: Some(Shape::Int),
4795 ..Default::default()
4796 },
4797 FieldSpec {
4798 name: "site".into(),
4799 shape: Some(Shape::Url),
4800 ..Default::default()
4801 },
4802 FieldSpec {
4803 name: "price".into(),
4804 shape: Some(Shape::Currency),
4805 ..Default::default()
4806 },
4807 ],
4808 ..Default::default()
4809 },
4810 );
4811 fx.write(
4814 "records/widgets/ok.md",
4815 "---\ntype: widget\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: ok\nqty: 5\nsite: https://example.com\nprice: \"USD 1,234.50\"\n---\n\n# ok\n",
4816 );
4817 fx.write(
4821 "records/widgets/bad.md",
4822 "---\ntype: widget\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: bad\nqty: five\nsite: ftp://nope\nprice: inf\n---\n\n# bad\n",
4823 );
4824 let issues = fx.store_all();
4825 let bad_shape: Vec<_> = issues
4826 .iter()
4827 .filter(|i| {
4828 i.code == codes::SCHEMA_SHAPE_MISMATCH
4829 && i.file == Path::new("records/widgets/bad.md")
4830 })
4831 .map(|i| i.key.clone().unwrap_or_default())
4832 .collect();
4833 assert!(bad_shape.contains(&"qty".to_string()), "{issues:#?}");
4834 assert!(bad_shape.contains(&"site".to_string()), "{issues:#?}");
4835 assert!(
4836 bad_shape.contains(&"price".to_string()),
4837 "inf must be rejected as currency: {issues:#?}"
4838 );
4839 assert!(
4840 !issues.iter().any(|i| i.code == codes::SCHEMA_SHAPE_MISMATCH
4841 && i.file == Path::new("records/widgets/ok.md")),
4842 "valid shapes (incl. `USD 1,234.50`) must not fire: {issues:#?}"
4843 );
4844 }
4845
4846 #[test]
4847 fn schema_shape_or_enum_field_with_non_scalar_value_is_shape_mismatch() {
4848 let mut fx = Fixture::new();
4849 fx.config.schemas.insert(
4850 "contact".into(),
4851 Schema {
4852 fields: vec![
4853 FieldSpec {
4854 name: "email".into(),
4855 required: true,
4856 shape: Some(Shape::Email),
4857 ..Default::default()
4858 },
4859 FieldSpec {
4860 name: "status".into(),
4861 enum_values: Some(vec!["active".into(), "inactive".into()]),
4862 ..Default::default()
4863 },
4864 ],
4865 ..Default::default()
4866 },
4867 );
4868 fx.write(
4872 "records/contacts/bad.md",
4873 "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: bad\nemail:\n - a@b.com\n - c@d.com\nstatus:\n - active\n---\n\n# bad\n",
4874 );
4875 let issues = fx.store_all();
4876 let mismatched: Vec<_> = issues
4877 .iter()
4878 .filter(|i| i.code == codes::SCHEMA_SHAPE_MISMATCH)
4879 .map(|i| i.key.clone().unwrap_or_default())
4880 .collect();
4881 assert!(
4882 mismatched.contains(&"email".to_string()),
4883 "list-valued required email must flag: {issues:#?}"
4884 );
4885 assert!(
4886 mismatched.contains(&"status".to_string()),
4887 "list-valued enum must flag: {issues:#?}"
4888 );
4889 }
4890
4891 #[test]
4892 fn is_currency_accepts_codes_and_rejects_non_numeric() {
4893 for ok in [
4895 "100",
4896 "1234.56",
4897 "$1,234.50",
4898 "USD 100", "usd 100", "EUR 9.50",
4901 "£12",
4902 "¥1000",
4903 "-5.00", "+5",
4905 "1,000,000",
4906 ] {
4907 assert!(is_currency(ok), "expected currency: {ok:?}");
4908 }
4909 for bad in [
4912 "inf", "-inf", "infinity", "NaN", "nan", "12.999", "1.2345", "USD", "$", "free", "", " ", "1e3", "1.", ".5", "1 000", "USDD 100", ] {
4923 assert!(!is_currency(bad), "expected NOT currency: {bad:?}");
4924 }
4925 }
4926
4927 #[test]
4930 fn ignored_type_present_is_info() {
4931 let mut fx = Fixture::new();
4932 fx.config.ignored_types.push("temp".into());
4933 fx.write(
4934 "records/temps/x.md",
4935 "---\ntype: temp\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a temp\n---\n\n# x\n",
4936 );
4937 let issues = fx.store_all();
4938 let issue = find(&issues, codes::POLICY_IGNORED_TYPE_PRESENT);
4939 assert_eq!(issue.severity, Severity::Info);
4940 assert!(!issue.is_error());
4941 assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4942 }
4943
4944 #[test]
4945 fn conclusion_record_derived_from_ignored_type_warns() {
4946 let mut fx = Fixture::new();
4947 fx.config.ignored_types.push("temp".into());
4948 fx.write(
4949 "records/temps/x.md",
4950 "---\ntype: temp\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a temp\n---\n\n# x\n",
4951 );
4952 fx.write(
4956 "records/synthesis/t.md",
4957 "---\ntype: synthesis\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: derived\nderived_from: \"[[records/temps/x]]\"\n---\n\n# t\n",
4958 );
4959 let issues = fx.store_all();
4960 let issue = find(&issues, codes::POLICY_IGNORED_TYPE_DERIVED);
4961 assert_eq!(issue.severity, Severity::Warning);
4962 assert_eq!(issue.key.as_deref(), Some("derived_from"));
4963 assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4964 }
4965
4966 #[test]
4974 fn derived_from_ignored_type_is_the_shared_policy_decision() {
4975 let mut fx = Fixture::new();
4976 fx.config.ignored_types.push("secret".into());
4977 fx.write(
4979 "records/secrets/s.md",
4980 "---\ntype: secret\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: hush\n---\n\n# s\n",
4981 );
4982 fx.write(
4984 "records/contacts/c.md",
4985 "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: ok\nname: C\n---\n\n# c\n",
4986 );
4987 let store = fx.store();
4988
4989 let hit =
4993 derived_from_ignored_type(&store, "conclusion", std::iter::once("records/secrets/s"))
4994 .expect("conclusion → ignored-type record must match");
4995 assert_eq!(hit.target, "records/secrets/s");
4996 assert_eq!(hit.target_type, "secret");
4997
4998 assert_eq!(
5001 derived_from_ignored_type(&store, "fact", std::iter::once("records/secrets/s")),
5002 None,
5003 "only conclusion derivation is policed"
5004 );
5005
5006 assert_eq!(
5008 derived_from_ignored_type(&store, "conclusion", std::iter::once("records/contacts/c")),
5009 None,
5010 "deriving from a non-ignored type is allowed"
5011 );
5012
5013 let hit = derived_from_ignored_type(
5015 &store,
5016 "conclusion",
5017 ["records/contacts/c", "records/secrets/s"],
5018 )
5019 .expect("a later ignored-type target must still be found");
5020 assert_eq!(hit.target, "records/secrets/s");
5021
5022 fx.config.ignored_types.clear();
5024 let store = fx.store();
5025 assert_eq!(
5026 derived_from_ignored_type(&store, "conclusion", std::iter::once("records/secrets/s")),
5027 None,
5028 "an empty ignored-types policy short-circuits"
5029 );
5030 }
5031
5032 #[test]
5035 fn dup_id_is_hard_error_with_related() {
5036 let fx = Fixture::new();
5037 fx.write(
5038 "records/contacts/a.md",
5039 "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a\nname: A\n---\n\n# A\n",
5040 );
5041 fx.write(
5042 "records/contacts/b.md",
5043 "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: b\nname: B\n---\n\n# B\n",
5044 );
5045 let issues = fx.store_all();
5046 assert_eq!(
5049 count(&issues, codes::DUP_ID),
5050 1,
5051 "one issue per group: {issues:#?}"
5052 );
5053 let a = issues.iter().find(|i| i.code == codes::DUP_ID).unwrap();
5054 assert_eq!(a.file, PathBuf::from("records/contacts/a.md"));
5055 assert!(a.is_error());
5056 assert_eq!(a.key.as_deref(), Some("id"));
5057 assert_eq!(
5058 a.line,
5059 Some(3),
5060 "anchors to the `id` line on the reported file"
5061 );
5062 assert_eq!(a.related, vec![PathBuf::from("records/contacts/b.md")]);
5063 }
5064
5065 #[test]
5066 fn dup_id_not_fired_in_working_set() {
5067 let fx = Fixture::new();
5069 fx.write(
5070 "records/contacts/a.md",
5071 "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a\nname: A\n---\n\n# A\n",
5072 );
5073 fx.write(
5074 "records/contacts/b.md",
5075 "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: b\nname: B\n---\n\n# B\n",
5076 );
5077 fx.write(
5079 "log.md",
5080 "---\ntype: log\n---\n\n## [2026-05-22 10:00] create | records/contacts/a\nx\n\n## [2026-05-22 10:01] create | records/contacts/b\nx\n",
5081 );
5082 let issues = validate_working_set(&fx.store(), None).unwrap();
5083 assert!(
5084 !has(&issues, codes::DUP_ID),
5085 "DUP_ID is --all only: {issues:#?}"
5086 );
5087 }
5088
5089 #[test]
5090 fn dup_unique_key_single_field_is_warning() {
5091 let mut fx = Fixture::new();
5092 fx.config.schemas.insert(
5094 "contact".into(),
5095 Schema {
5096 unique_keys: vec![vec!["email".into()]],
5097 ..Default::default()
5098 },
5099 );
5100 for (f, name) in [("a", "A"), ("b", "B")] {
5101 fx.write(
5102 &format!("records/contacts/{f}.md"),
5103 &format!("---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: s\nname: {name}\nemail: dup@x.com\n---\n\n# {name}\n"),
5104 );
5105 }
5106 let issues = fx.store_all();
5107 assert_eq!(count(&issues, codes::DUP_UNIQUE_KEY), 1);
5110 let dup = find(&issues, codes::DUP_UNIQUE_KEY);
5111 assert_eq!(dup.severity, Severity::Warning);
5112 assert_eq!(dup.file, PathBuf::from("records/contacts/a.md"));
5113 assert_eq!(dup.key.as_deref(), Some("email"));
5114 assert_eq!(dup.related, vec![PathBuf::from("records/contacts/b.md")]);
5115 }
5116
5117 #[test]
5118 fn dup_unique_key_compound_and_clean_when_one_field_differs() {
5119 let mut fx = Fixture::new();
5120 fx.config.schemas.insert(
5122 "expense".into(),
5123 Schema {
5124 unique_keys: vec![vec!["date".into(), "amount".into(), "vendor".into()]],
5125 ..Default::default()
5126 },
5127 );
5128 fx.write("records/companies/acme.md", "---\ntype: company\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: c\nname: Acme\n---\n# A\n");
5129 let exp = |f: &str, amount: &str| {
5130 format!(
5131 "---\ntype: expense\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: e\ndate: 2026-05-01\namount: {amount}\nvendor: \"[[records/companies/acme]]\"\n---\n\n# {f}\n"
5132 )
5133 };
5134 fx.write("records/expenses/e1.md", &exp("e1", "100"));
5135 fx.write("records/expenses/e2.md", &exp("e2", "100"));
5136 fx.write("records/expenses/e3.md", &exp("e3", "200")); let issues = fx.store_all();
5138 assert_eq!(
5141 count(&issues, codes::DUP_UNIQUE_KEY),
5142 1,
5143 "only e1+e2 collide, one issue: {issues:#?}"
5144 );
5145 let dup = find(&issues, codes::DUP_UNIQUE_KEY);
5146 assert_eq!(dup.file, PathBuf::from("records/expenses/e1.md"));
5147 assert_eq!(
5148 dup.line,
5149 Some(1),
5150 "compound-key collision anchors to line 1"
5151 );
5152 assert_eq!(dup.related, vec![PathBuf::from("records/expenses/e2.md")]);
5153 assert!(
5154 !issues.iter().any(|i| i.code == codes::DUP_UNIQUE_KEY
5155 && i.related.contains(&PathBuf::from("records/expenses/e3.md"))),
5156 "e3 differs on amount and must not collide: {issues:#?}"
5157 );
5158 }
5159
5160 #[test]
5161 fn dup_unique_key_list_field_is_order_independent() {
5162 let mut fx = Fixture::new();
5163 fx.config.schemas.insert(
5165 "meeting".into(),
5166 Schema {
5167 unique_keys: vec![vec!["date".into(), "attendees".into()]],
5168 ..Default::default()
5169 },
5170 );
5171 fx.write("records/contacts/a.md", &valid_contact("a"));
5172 fx.write("records/contacts/b.md", &valid_contact("b"));
5173 let m = |f: &str, order: &str| {
5174 let attendees = if order == "ab" {
5175 " - [[records/contacts/a]]\n - [[records/contacts/b]]"
5176 } else {
5177 " - [[records/contacts/b]]\n - [[records/contacts/a]]"
5178 };
5179 format!(
5180 "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: m\ndate: 2026-05-01\nattendees:\n{attendees}\n---\n\n# {f}\n"
5181 )
5182 };
5183 fx.write("records/meetings/m1.md", &m("m1", "ab"));
5184 fx.write("records/meetings/m2.md", &m("m2", "ba"));
5185 let issues = fx.store_all();
5186 assert_eq!(
5189 count(&issues, codes::DUP_UNIQUE_KEY),
5190 1,
5191 "same date + same attendee set (any order) collide as one issue: {issues:#?}"
5192 );
5193 let dup = find(&issues, codes::DUP_UNIQUE_KEY);
5194 assert_eq!(dup.file, PathBuf::from("records/meetings/m1.md"));
5195 assert_eq!(dup.related, vec![PathBuf::from("records/meetings/m2.md")]);
5196 }
5197
5198 #[test]
5201 fn missing_indexes_at_all_three_levels() {
5202 let fx = Fixture::new();
5203 fx.write("records/contacts/a.md", &valid_contact("a"));
5204 let issues = fx.store_all();
5205 let missing_files: BTreeSet<PathBuf> = issues
5209 .iter()
5210 .filter(|i| i.code == codes::INDEX_MISSING)
5211 .map(|i| i.file.clone())
5212 .collect();
5213 assert!(
5214 missing_files.contains(&PathBuf::from("index.md")),
5215 "{issues:#?}"
5216 );
5217 assert!(
5218 missing_files.contains(&PathBuf::from("records/index.md")),
5219 "{issues:#?}"
5220 );
5221 assert!(
5222 missing_files.contains(&PathBuf::from("records/contacts")),
5223 "{issues:#?}"
5224 );
5225 assert!(!has(&issues, codes::INDEX_JSONL_MISSING), "{issues:#?}");
5228 }
5229
5230 #[test]
5231 fn index_stale_entry_and_missing_entry() {
5232 let fx = Fixture::new();
5233 fx.write(
5234 "records/contacts/present.md",
5235 &valid_contact("present contact"),
5236 );
5237 fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5239 fx.write(
5240 "records/index.md",
5241 "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5242 );
5243 fx.write(
5245 "records/contacts/index.md",
5246 "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/ghost]] — gone\n",
5247 );
5248 fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/present.md\",\"type\":\"contact\",\"summary\":\"present contact\"}\n");
5249 let issues = fx.store_all();
5250 let stale = find(&issues, codes::INDEX_STALE_ENTRY);
5251 assert!(stale.message.contains("ghost"));
5252 assert!(stale.is_error());
5253 let missing = find(&issues, codes::INDEX_MISSING_ENTRY);
5254 assert!(
5255 missing.message.contains("present.md"),
5256 "{}",
5257 missing.message
5258 );
5259 }
5260
5261 #[test]
5262 fn index_md_entry_with_traversal_path_is_stale_not_probe() {
5263 let fx = Fixture::new();
5264 fx.write("records/contacts/a.md", &valid_contact("a"));
5265 fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5266 fx.write(
5267 "records/index.md",
5268 "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5269 );
5270 fx.write(
5271 "records/contacts/index.md",
5272 "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/../../ghost]] — unsafe\n",
5273 );
5274 fx.write(
5275 "records/contacts/index.jsonl",
5276 "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
5277 );
5278 let issues = fx.store_all();
5279 let stale = find(&issues, codes::INDEX_STALE_ENTRY);
5280 assert!(stale.message.contains("not a safe store-relative path"));
5281 }
5282
5283 #[test]
5284 fn index_summary_mismatch() {
5285 let fx = Fixture::new();
5286 fx.write("records/contacts/a.md", &valid_contact("the real summary"));
5287 fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5288 fx.write(
5289 "records/index.md",
5290 "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5291 );
5292 fx.write(
5293 "records/contacts/index.md",
5294 "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a STALE summary\n",
5295 );
5296 fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"the real summary\"}\n");
5297 let issues = fx.store_all();
5298 let issue = find(&issues, codes::INDEX_SUMMARY_MISMATCH);
5299 assert!(issue.is_error());
5300 assert_eq!(issue.related, vec![PathBuf::from("records/contacts/a.md")]);
5301 }
5302
5303 #[test]
5304 fn index_summary_match_passes() {
5305 let fx = Fixture::new();
5306 fx.write("records/contacts/a.md", &valid_contact("matching summary"));
5307 fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5308 fx.write(
5309 "records/index.md",
5310 "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5311 );
5312 fx.write(
5313 "records/contacts/index.md",
5314 "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — matching summary\n",
5315 );
5316 fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"matching summary\"}\n");
5317 let issues = fx.store_all();
5318 assert!(!has(&issues, codes::INDEX_SUMMARY_MISMATCH), "{issues:#?}");
5319 }
5320
5321 #[test]
5322 fn index_entry_with_tag_suffix_matches_summary() {
5323 let fx = Fixture::new();
5324 fx.write("records/contacts/a.md", &valid_contact("clean summary"));
5325 fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5326 fx.write(
5327 "records/index.md",
5328 "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5329 );
5330 fx.write(
5334 "records/contacts/index.md",
5335 "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — clean summary · #customer\n",
5336 );
5337 fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"clean summary\"}\n");
5338 let issues = fx.store_all();
5339 assert!(
5340 !has(&issues, codes::INDEX_SUMMARY_MISMATCH),
5341 "tag suffix should be stripped: {issues:#?}"
5342 );
5343 }
5344
5345 #[test]
5346 fn index_entry_single_spaced_middot_tail_is_part_of_summary() {
5347 let fx = Fixture::new();
5354 fx.write(
5355 "records/contacts/a.md",
5356 &valid_contact("Standup notes · #standup"),
5357 );
5358 fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5359 fx.write(
5360 "records/index.md",
5361 "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5362 );
5363 fx.write(
5364 "records/contacts/index.md",
5365 "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — Standup notes · #standup\n",
5366 );
5367 fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"Standup notes · #standup\"}\n");
5368 let issues = fx.store_all();
5369 assert!(
5370 !has(&issues, codes::INDEX_SUMMARY_MISMATCH),
5371 "a single-spaced middot tail is part of the summary, not a tag block: {issues:#?}"
5372 );
5373 }
5374
5375 #[test]
5376 fn index_jsonl_desync_missing_file_in_jsonl() {
5377 let fx = Fixture::new();
5378 fx.write("records/contacts/a.md", &valid_contact("a"));
5379 fx.write("records/contacts/b.md", &valid_contact("b"));
5380 fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (2 files)\n");
5381 fx.write(
5382 "records/index.md",
5383 "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5384 );
5385 fx.write(
5386 "records/contacts/index.md",
5387 "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n- [[records/contacts/b]] — b\n",
5388 );
5389 fx.write(
5391 "records/contacts/index.jsonl",
5392 "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
5393 );
5394 let issues = fx.store_all();
5395 let desync = find(&issues, codes::INDEX_JSONL_DESYNC);
5396 assert!(desync.message.contains("b.md"), "{}", desync.message);
5397 }
5398
5399 #[test]
5400 fn index_jsonl_desync_record_points_at_missing_file() {
5401 let fx = Fixture::new();
5402 fx.write("records/contacts/a.md", &valid_contact("a"));
5403 fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5404 fx.write(
5405 "records/index.md",
5406 "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5407 );
5408 fx.write(
5409 "records/contacts/index.md",
5410 "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n",
5411 );
5412 fx.write(
5413 "records/contacts/index.jsonl",
5414 "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n{\"path\":\"records/contacts/ghost.md\",\"type\":\"contact\",\"summary\":\"x\"}\n",
5415 );
5416 let issues = fx.store_all();
5417 assert!(
5418 issues
5419 .iter()
5420 .any(|i| i.code == codes::INDEX_JSONL_DESYNC && i.message.contains("ghost.md")),
5421 "{issues:#?}"
5422 );
5423 }
5424
5425 #[test]
5426 fn index_jsonl_record_with_traversal_path_is_desync_not_probe() {
5427 let fx = Fixture::new();
5428 fx.write("records/contacts/a.md", &valid_contact("a"));
5429 fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5430 fx.write(
5431 "records/index.md",
5432 "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5433 );
5434 fx.write(
5435 "records/contacts/index.md",
5436 "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n",
5437 );
5438 fx.write(
5439 "records/contacts/index.jsonl",
5440 "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n{\"path\":\"records/contacts/../../ghost.md\",\"type\":\"contact\",\"summary\":\"x\"}\n",
5441 );
5442 let issues = fx.store_all();
5443 assert!(
5444 issues.iter().any(|i| i.code == codes::INDEX_JSONL_DESYNC
5445 && i.message.contains("not a safe store-relative path")),
5446 "{issues:#?}"
5447 );
5448 }
5449
5450 #[test]
5451 fn index_jsonl_stale_summary() {
5452 let fx = Fixture::new();
5453 fx.write("records/contacts/a.md", &valid_contact("real summary"));
5454 fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5455 fx.write(
5456 "records/index.md",
5457 "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5458 );
5459 fx.write(
5460 "records/contacts/index.md",
5461 "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — real summary\n",
5462 );
5463 fx.write(
5465 "records/contacts/index.jsonl",
5466 "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"OUTDATED\"}\n",
5467 );
5468 let issues = fx.store_all();
5469 let stale = find(&issues, codes::INDEX_JSONL_STALE);
5470 assert_eq!(stale.related, vec![PathBuf::from("records/contacts/a.md")]);
5471 assert!(stale.key.as_deref().unwrap().contains("summary"));
5472 }
5473
5474 #[test]
5482 fn index_jsonl_stale_queryable_field_email() {
5483 let fx = Fixture::new();
5484 let contact = "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"a contact\"\nname: A\nemail: real@correct.com\n---\n\n# A\n";
5485 fx.write("records/contacts/a.md", contact);
5486 fx.rebuild_indexes();
5488 let jsonl_path = fx.dir.path().join("records/contacts/index.jsonl");
5489 let good = fs::read_to_string(&jsonl_path).unwrap();
5490 assert!(
5492 !has(&fx.store_all(), codes::INDEX_JSONL_STALE),
5493 "freshly-rebuilt sidecar must not be stale"
5494 );
5495 assert!(
5497 good.contains("real@correct.com"),
5498 "sidecar projects email: {good}"
5499 );
5500 fx.write(
5501 "records/contacts/index.jsonl",
5502 &good.replace("real@correct.com", "STALE-WRONG@evil.com"),
5503 );
5504
5505 let issues = fx.store_all();
5506 let stale = find(&issues, codes::INDEX_JSONL_STALE);
5507 assert_eq!(stale.related, vec![PathBuf::from("records/contacts/a.md")]);
5508 let key = stale.key.as_deref().unwrap();
5511 assert!(
5512 key.contains("email"),
5513 "expected `email` in stale key, got {key:?}"
5514 );
5515 assert!(!key.contains("summary"), "summary still matches: {key:?}");
5516 assert!(!key.contains("type"), "type still matches: {key:?}");
5517 }
5518
5519 #[test]
5523 fn index_jsonl_stale_typed_and_list_fields() {
5524 let fx = Fixture::new();
5525 let expense = "---\ntype: expense\ncreated: 2026-05-20T08:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"office chairs\"\ntags: [furniture, q2]\namount: 1299\nvendor: Acme\ndate: 2026-05-20\n---\n\n# Expense\n";
5526 fx.write("records/expenses/e.md", expense);
5527 fx.rebuild_indexes();
5528 let jsonl_path = fx.dir.path().join("records/expenses/index.jsonl");
5529 let good = fs::read_to_string(&jsonl_path).unwrap();
5530 assert!(
5531 !has(&fx.store_all(), codes::INDEX_JSONL_STALE),
5532 "freshly-rebuilt sidecar must not be stale"
5533 );
5534 let stale_line = good
5536 .replace("\"q2\"", "\"WRONG-TAG\"")
5537 .replace("2026-05-22T10:00:00-07:00", "2099-01-01T00:00:00-07:00")
5538 .replace("1299", "9999");
5539 fx.write("records/expenses/index.jsonl", &stale_line);
5540
5541 let issues = fx.store_all();
5542 let stale = find(&issues, codes::INDEX_JSONL_STALE);
5543 let key = stale.key.as_deref().unwrap();
5544 for expected in ["amount", "tags", "updated"] {
5545 assert!(
5546 key.contains(expected),
5547 "expected `{expected}` in stale key, got {key:?}"
5548 );
5549 }
5550 }
5551
5552 #[test]
5553 fn index_orphan_in_noncanonical_folder() {
5554 let fx = Fixture::new();
5555 fx.write("records/contacts/a.md", &valid_contact("a"));
5556 fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5558 fx.write(
5559 "records/index.md",
5560 "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5561 );
5562 fx.write("records/contacts/index.md", "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n");
5563 fx.write(
5564 "records/contacts/index.jsonl",
5565 "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
5566 );
5567 fx.write(
5569 "records/contacts/subfolder/index.md",
5570 "---\ntype: index\nscope: type-folder\n---\n\n# stray\n",
5571 );
5572 let issues = fx.store_all();
5573 let orphan = find(&issues, codes::INDEX_ORPHAN);
5574 assert_eq!(orphan.severity, Severity::Warning);
5575 assert_eq!(
5576 orphan.file,
5577 PathBuf::from("records/contacts/subfolder/index.md")
5578 );
5579 }
5580
5581 #[test]
5582 fn index_wrong_scope() {
5583 let fx = Fixture::new();
5584 fx.write("records/contacts/a.md", &valid_contact("a"));
5585 fx.write("index.md", "---\ntype: index\nscope: layer\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5587 fx.write(
5588 "records/index.md",
5589 "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5590 );
5591 fx.write("records/contacts/index.md", "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n");
5592 fx.write(
5593 "records/contacts/index.jsonl",
5594 "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
5595 );
5596 let issues = fx.store_all();
5597 let issue = find(&issues, codes::INDEX_WRONG_SCOPE);
5598 assert_eq!(issue.severity, Severity::Warning);
5599 assert_eq!(issue.file, PathBuf::from("index.md"));
5600 }
5601
5602 #[test]
5603 fn capped_type_folder_index_does_not_flag_missing_entries() {
5604 let fx = Fixture::new();
5606 for i in 0..501 {
5607 fx.write(
5608 &format!("records/contacts/c{i:04}.md"),
5609 &valid_contact(&format!("contact {i}")),
5610 );
5611 }
5612 fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (501 files)\n");
5613 fx.write(
5614 "records/index.md",
5615 "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5616 );
5617 fx.write(
5619 "records/contacts/index.md",
5620 "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/c0000]] — contact 0\n\n## More\n\nThis folder has 501 files.\n",
5621 );
5622 let mut jsonl = String::new();
5624 for i in 0..501 {
5625 jsonl.push_str(&format!(
5626 "{{\"path\":\"records/contacts/c{i:04}.md\",\"type\":\"contact\",\"summary\":\"contact {i}\"}}\n"
5627 ));
5628 }
5629 fx.write("records/contacts/index.jsonl", &jsonl);
5630 let issues = fx.store_all();
5631 assert!(
5632 !has(&issues, codes::INDEX_MISSING_ENTRY),
5633 "over the cap, missing browse entries are expected: {issues:#?}"
5634 );
5635 assert!(
5637 !has(&issues, codes::INDEX_JSONL_DESYNC),
5638 "{:#?}",
5639 issues
5640 .iter()
5641 .filter(|i| i.code == codes::INDEX_JSONL_DESYNC)
5642 .collect::<Vec<_>>()
5643 );
5644 }
5645
5646 #[test]
5649 fn log_bad_timestamp_unknown_kind_out_of_order() {
5650 let fx = Fixture::new();
5651 fx.write(
5652 "log.md",
5653 concat!(
5654 "---\ntype: log\n---\n\n# Log\n\n",
5655 "## [2026-05-27 10:00] create | records/contacts/a\nx\n\n",
5656 "## [2026-05-27 09:00] update | records/contacts/b\nx\n\n", "## [2026-05-27 11:00] frobnicate | records/contacts/c\nx\n\n", "## [not-a-date] create | records/contacts/d\nx\n", ),
5660 );
5661 let issues = fx.store_all();
5662 assert!(has(&issues, codes::LOG_OUT_OF_ORDER), "{issues:#?}");
5663 assert_eq!(
5664 find(&issues, codes::LOG_OUT_OF_ORDER).severity,
5665 Severity::Warning
5666 );
5667 let unknown = find(&issues, codes::LOG_UNKNOWN_KIND);
5668 assert_eq!(unknown.severity, Severity::Warning);
5669 assert!(unknown.message.contains("frobnicate"));
5670 assert!(unknown
5671 .suggestion
5672 .as_deref()
5673 .is_some_and(|s| s.contains("create")));
5674 let bad = find(&issues, codes::LOG_BAD_TIMESTAMP);
5675 assert!(bad.is_error());
5676 }
5677
5678 #[test]
5679 fn log_validate_entry_without_object_is_well_formed() {
5680 let fx = Fixture::new();
5681 fx.write(
5682 "log.md",
5683 "---\ntype: log\n---\n\n## [2026-05-27 10:00] validate\nPASS\n",
5684 );
5685 let issues = fx.store_all();
5686 assert!(!has(&issues, codes::LOG_BAD_TIMESTAMP), "{issues:#?}");
5687 assert!(!has(&issues, codes::LOG_UNKNOWN_KIND), "{issues:#?}");
5688 }
5689
5690 #[test]
5691 fn log_in_order_is_clean() {
5692 let fx = Fixture::new();
5693 fx.write(
5694 "log.md",
5695 concat!(
5696 "---\ntype: log\n---\n\n",
5697 "## [2026-05-27 10:00] create | records/contacts/a\nx\n\n",
5698 "## [2026-05-27 10:05] update | records/contacts/a\nx\n",
5699 ),
5700 );
5701 let issues = fx.store_all();
5702 assert!(!has(&issues, codes::LOG_OUT_OF_ORDER), "{issues:#?}");
5703 }
5704
5705 #[test]
5706 fn log_not_checked_in_working_set() {
5707 let fx = Fixture::new();
5709 fx.write(
5710 "log.md",
5711 concat!(
5712 "---\ntype: log\n---\n\n",
5713 "## [2026-05-27 10:00] create | records/contacts/a\nx\n\n",
5714 "## [2026-05-27 09:00] update | records/contacts/a\nx\n",
5715 ),
5716 );
5717 let issues = validate_working_set(&fx.store(), None).unwrap();
5718 assert!(
5719 !has(&issues, codes::LOG_OUT_OF_ORDER),
5720 "log ordering is --all only: {issues:#?}"
5721 );
5722 }
5723
5724 #[test]
5727 fn working_set_validates_only_changed_files() {
5728 let fx = Fixture::new();
5729 fx.write(
5732 "records/contacts/dirty.md",
5733 "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
5734 );
5735 fx.write(
5736 "records/contacts/unlogged.md",
5737 "---\ntype: contact\ncreated: ALSO-BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: B\n---\n\n# B\n",
5738 );
5739 fx.write(
5740 "log.md",
5741 "---\ntype: log\n---\n\n## [2026-05-22 10:00] update | records/contacts/dirty\nedited\n",
5742 );
5743 let issues = validate_working_set(&fx.store(), None).unwrap();
5744 assert!(
5745 issues.iter().any(|i| i.code == codes::FM_BAD_TIMESTAMP
5746 && i.file == Path::new("records/contacts/dirty.md")),
5747 "{issues:#?}"
5748 );
5749 assert!(
5750 !issues
5751 .iter()
5752 .any(|i| i.file == Path::new("records/contacts/unlogged.md")),
5753 "unlogged file must not be in the working set: {issues:#?}"
5754 );
5755 }
5756
5757 #[test]
5758 fn working_set_includes_incoming_linkers_to_changed_path() {
5759 let fx = Fixture::new();
5760 fx.write(
5763 "records/profiles/linker.md",
5764 "---\ntype: profile\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: links to a removed page\n---\n\nSee [[records/contacts/changed]].\n",
5765 );
5766 fx.write(
5768 "log.md",
5769 "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/changed\nremoved\n",
5770 );
5771 let issues = validate_working_set(&fx.store(), None).unwrap();
5772 assert!(
5773 issues.iter().any(|i| i.code == codes::WIKI_LINK_BROKEN
5774 && i.file == Path::new("records/profiles/linker.md")),
5775 "incoming linker to a removed path must be validated: {issues:#?}"
5776 );
5777 }
5778
5779 #[test]
5780 fn working_set_respects_explicit_since_cutoff() {
5781 let fx = Fixture::new();
5782 fx.write(
5783 "records/contacts/old.md",
5784 "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
5785 );
5786 fx.write(
5787 "records/contacts/new.md",
5788 "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: B\n---\n\n# B\n",
5789 );
5790 fx.write(
5791 "log.md",
5792 concat!(
5793 "---\ntype: log\n---\n\n",
5794 "## [2026-05-20 10:00] update | records/contacts/old\nx\n\n",
5795 "## [2026-05-25 10:00] update | records/contacts/new\nx\n",
5796 ),
5797 );
5798 let since = DateTime::parse_from_rfc3339("2026-05-22T00:00:00+00:00").unwrap();
5800 let issues = validate_working_set(&fx.store(), Some(since)).unwrap();
5801 assert!(
5802 issues
5803 .iter()
5804 .any(|i| i.file == Path::new("records/contacts/new.md")),
5805 "{issues:#?}"
5806 );
5807 assert!(
5808 !issues
5809 .iter()
5810 .any(|i| i.file == Path::new("records/contacts/old.md")),
5811 "old change is before the cutoff: {issues:#?}"
5812 );
5813 }
5814
5815 #[test]
5816 fn working_set_default_since_is_last_validate_entry() {
5817 let fx = Fixture::new();
5818 fx.write(
5820 "records/contacts/before.md",
5821 "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
5822 );
5823 fx.write(
5824 "records/contacts/after.md",
5825 "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: B\n---\n\n# B\n",
5826 );
5827 fx.write(
5828 "log.md",
5829 concat!(
5830 "---\ntype: log\n---\n\n",
5831 "## [2026-05-20 10:00] update | records/contacts/before\nx\n\n",
5832 "## [2026-05-21 10:00] validate\nPASS\n\n",
5833 "## [2026-05-22 10:00] update | records/contacts/after\nx\n",
5834 ),
5835 );
5836 let issues = validate_working_set(&fx.store(), None).unwrap();
5837 assert!(
5838 issues
5839 .iter()
5840 .any(|i| i.file == Path::new("records/contacts/after.md")),
5841 "{issues:#?}"
5842 );
5843 assert!(
5844 !issues
5845 .iter()
5846 .any(|i| i.file == Path::new("records/contacts/before.md")),
5847 "change before the last validate entry is outside the default window: {issues:#?}"
5848 );
5849 }
5850
5851 #[test]
5854 fn issues_are_sorted_by_file_then_line() {
5855 let fx = Fixture::new();
5856 fx.write("records/profiles/z.md", "---\ntype: profile\nmeta-type: conclusion\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n");
5857 fx.write("records/profiles/a.md", "---\ntype: profile\nmeta-type: conclusion\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n");
5858 let issues = fx.store_all();
5859 let files: Vec<&PathBuf> = issues.iter().map(|i| &i.file).collect();
5860 let mut sorted = files.clone();
5861 sorted.sort();
5862 assert_eq!(
5863 files, sorted,
5864 "issues must be emitted in a stable file order"
5865 );
5866 }
5867
5868 #[test]
5871 fn frozen_page_is_not_a_validate_error() {
5872 let mut fx = Fixture::new();
5875 fx.config
5876 .frozen_pages
5877 .push(PathBuf::from("records/decisions/d.md"));
5878 fx.write(
5879 "records/decisions/d.md",
5880 "---\ntype: decision\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a finalized decision\n---\n\n# D\n",
5881 );
5882 let issues = fx.store_all();
5883 assert!(
5884 !has(&issues, codes::POLICY_FROZEN_PAGE),
5885 "frozen pages are enforced at write-time, not by validate: {issues:#?}"
5886 );
5887 }
5888
5889 #[test]
5890 fn wiki_link_ambiguous_is_never_emitted_under_full_path_doctrine() {
5891 let fx = Fixture::new();
5894 fx.write("records/contacts/sarah-chen.md", &valid_contact("sarah"));
5895 let mut body = valid_contact("links to sarah");
5896 body.push_str("\nSee [[records/contacts/sarah-chen]].\n");
5897 fx.write("records/contacts/p.md", &body);
5898 let issues = fx.store_all();
5899 assert!(!has(&issues, codes::WIKI_LINK_AMBIGUOUS), "{issues:#?}");
5900 }
5901
5902 #[test]
5905 fn unknown_type_passes_through() {
5906 let fx = Fixture::new();
5910 fx.write(
5911 "records/proposals/x.md",
5912 "---\ntype: proposal\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a proposal\ncustom_field: anything\nbudget: 5000\n---\n\n# Proposal\n",
5913 );
5914 let issues = fx.store_all();
5915 assert!(!has(&issues, codes::FM_MISSING_TYPE), "{issues:#?}");
5916 assert!(!has(&issues, codes::SCHEMA_MISSING_REQUIRED), "{issues:#?}");
5917 assert!(!has(&issues, codes::SCHEMA_SHAPE_MISMATCH), "{issues:#?}");
5918 assert!(
5920 !issues
5921 .iter()
5922 .any(|i| i.key.as_deref() == Some("custom_field")
5923 || i.key.as_deref() == Some("budget")),
5924 "unknown fields are ambient context: {issues:#?}"
5925 );
5926 }
5927
5928 #[test]
5931 fn incoming_linker_scan_does_not_prefix_match() {
5932 let fx = Fixture::new();
5935 fx.write(
5936 "records/profiles/only-sarah-chen.md",
5937 "---\ntype: profile\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nSee [[records/contacts/sarah-chen]].\n",
5938 );
5939 fx.write(
5941 "log.md",
5942 "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/sarah\nremoved\n",
5943 );
5944 let issues = validate_working_set(&fx.store(), None).unwrap();
5945 assert!(
5946 !issues
5947 .iter()
5948 .any(|i| i.file == Path::new("records/profiles/only-sarah-chen.md")),
5949 "a prefix-sharing link must not pull a file into the working set: {issues:#?}"
5950 );
5951 }
5952
5953 #[test]
5954 fn working_set_does_not_flag_stale_catalog_index_as_wiki_link_broken() {
5955 let fx = Fixture::new();
5969 fx.write(
5972 "records/contacts/index.md",
5973 "---\ntype: index\n---\n\n- [[records/contacts/sarah-chen]] — Sarah Chen\n",
5974 );
5975 fx.write(
5977 "log.md",
5978 "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/sarah-chen\nremoved\n",
5979 );
5980 let issues = validate_working_set(&fx.store(), None).unwrap();
5981 assert!(
5982 !issues
5983 .iter()
5984 .any(|i| i.file == Path::new("records/contacts/index.md")
5985 && i.code == codes::WIKI_LINK_BROKEN),
5986 "a stale catalog `index.md` entry must NOT be WIKI_LINK_BROKEN in the \
5987 working set (it is an INDEX_STALE_ENTRY under `--all`): {issues:#?}"
5988 );
5989 }
5990
5991 #[test]
5992 fn incoming_linker_scan_covers_the_whole_changed_set_in_one_pass() {
5993 let fx = Fixture::new();
6002 fx.write(
6004 "records/profiles/refers-sarah.md",
6005 "---\ntype: profile\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nSee [[records/contacts/sarah-chen]].\n",
6006 );
6007 fx.write(
6011 "records/meetings/2026/05/kickoff.md",
6012 "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: m\ndate: 2026-05-01\ncompany: \"[[records/companies/acme]]\"\n---\n\n# Kickoff\n",
6013 );
6014 fx.write(
6016 "log.md",
6017 "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/sarah-chen\nremoved\n\n## [2026-05-22 10:05] delete | records/companies/acme\nremoved\n",
6018 );
6019
6020 let issues = validate_working_set(&fx.store(), None).unwrap();
6021 assert!(
6022 issues
6023 .iter()
6024 .any(|i| i.file == Path::new("records/profiles/refers-sarah.md")
6025 && i.code == codes::WIKI_LINK_BROKEN),
6026 "linker to the FIRST deleted target must be pulled in and flagged: {issues:#?}"
6027 );
6028 assert!(
6029 issues.iter().any(
6030 |i| i.file == Path::new("records/meetings/2026/05/kickoff.md")
6031 && i.code == codes::WIKI_LINK_BROKEN
6032 ),
6033 "linker to the SECOND deleted target (typed-field edge) must also be \
6034 pulled in and flagged — proves the scan covers the whole changed set, \
6035 not just one object: {issues:#?}"
6036 );
6037 }
6038
6039 #[test]
6040 fn frontmatter_block_sequence_links_each_get_their_own_line() {
6041 let fx = Fixture::new();
6043 fx.write(
6045 "records/meetings/m.md",
6046 "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: m\ndate: 2026-05-01\nparticipants:\n - [[records/contacts/ghost1]]\n - [[records/contacts/ghost2]]\n---\n\n# M\n",
6047 );
6048 let issues = fx.store_all();
6049 let broken_lines: BTreeSet<Option<u32>> = issues
6050 .iter()
6051 .filter(|i| i.code == codes::WIKI_LINK_BROKEN)
6052 .map(|i| i.line)
6053 .collect();
6054 assert_eq!(
6055 broken_lines.len(),
6056 2,
6057 "two distinct broken-link lines: {issues:#?}"
6058 );
6059 }
6060
6061 #[test]
6064 fn null_created_is_missing_not_silently_passed() {
6065 let fx = Fixture::new();
6069 fx.write(
6070 "records/contacts/a.md",
6071 "---\ntype: contact\ncreated:\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
6072 );
6073 let issues = fx.store_all();
6074 assert!(
6075 has(&issues, codes::FM_MISSING_CREATED),
6076 "null `created:` must read as missing: {issues:#?}"
6077 );
6078 }
6079
6080 #[test]
6081 fn sequence_created_is_bad_timestamp() {
6082 let fx = Fixture::new();
6084 fx.write(
6085 "records/contacts/a.md",
6086 "---\ntype: contact\ncreated: [2026]\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
6087 );
6088 let issues = fx.store_all();
6089 assert!(
6090 issues
6091 .iter()
6092 .any(|i| i.code == codes::FM_BAD_TIMESTAMP && i.key.as_deref() == Some("created")),
6093 "a sequence `created:` must be FM_BAD_TIMESTAMP: {issues:#?}"
6094 );
6095 }
6096
6097 #[test]
6100 fn required_field_null_or_empty_collection_is_missing() {
6101 for value in ["", " []", " {}"] {
6106 let mut fx = Fixture::new();
6107 fx.config.schemas.insert(
6108 "contact".into(),
6109 Schema {
6110 fields: vec![FieldSpec {
6111 name: "name".into(),
6112 required: true,
6113 ..Default::default()
6114 }],
6115 ..Default::default()
6116 },
6117 );
6118 fx.write(
6119 "records/contacts/a.md",
6120 &format!(
6121 "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname:{value}\n---\n\n# A\n"
6122 ),
6123 );
6124 let issues = fx.store_all();
6125 assert!(
6126 issues
6127 .iter()
6128 .any(|i| i.code == codes::SCHEMA_MISSING_REQUIRED
6129 && i.key.as_deref() == Some("name")),
6130 "required `name:{value}` must be SCHEMA_MISSING_REQUIRED: {issues:#?}"
6131 );
6132 }
6133 }
6134
6135 #[test]
6138 fn wiki_link_to_raw_source_file_resolves() {
6139 let fx = Fixture::new();
6143 fx.write("sources/emails/2026-05-22-elena.eml", "raw email bytes\n");
6144 fx.write(
6145 "records/contacts/a.md",
6146 "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\nSee [[sources/emails/2026-05-22-elena.eml]] for context.\n",
6147 );
6148 let issues = fx.store_all();
6149 assert!(
6150 !issues.iter().any(|i| i.code == codes::WIKI_LINK_BROKEN),
6151 "a link to an existing raw source file must not be broken: {issues:#?}"
6152 );
6153 }
6154
6155 #[test]
6158 fn non_utf8_content_file_is_reported() {
6159 let fx = Fixture::new();
6163 let abs = fx.dir.path().join("records/notes/corrupt.md");
6164 fs::create_dir_all(abs.parent().unwrap()).unwrap();
6165 fs::write(&abs, [0xFF, 0xFE, 0x00, 0x01]).unwrap();
6166 let issues = validate_working_set(&fx.store(), None).unwrap();
6167 assert!(
6168 has(&issues, codes::FM_UNREADABLE),
6169 "an unreadable content file must be reported, not silently skipped: {issues:#?}"
6170 );
6171 }
6172
6173 #[test]
6176 fn tilde_fence_containing_backtick_fence_does_not_invert() {
6177 let body = "~~~markdown\n```\n[[fake-link]]\n```\n~~~\n";
6182 let links = extract_wiki_links(body);
6183 assert!(
6184 links.is_empty(),
6185 "wiki-link inside a nested code fence must be skipped: {links:?}"
6186 );
6187 }
6188
6189 #[test]
6192 fn all_sweep_visits_in_layer_log_folder() {
6193 let fx = Fixture::new();
6198 fx.write("records/log/2026-06-01-pricing.md", "no frontmatter here\n");
6199 let issues = fx.store_all();
6200 assert!(
6201 has(&issues, codes::FM_MISSING_TYPE),
6202 "--all must validate files under an in-layer `log/` folder: {issues:#?}"
6203 );
6204 }
6205
6206 #[test]
6209 fn flow_form_link_list_with_spaces_is_flagged() {
6210 let keys = detect_flow_form_link_lists("attendees: [ [[records/contacts/elena]] ]\n");
6214 assert!(
6215 keys.iter().any(|k| k == "attendees"),
6216 "spaced flow-form list must be detected: {keys:?}"
6217 );
6218 }
6219
6220 #[test]
6223 fn middot_hashtag_summary_tail_round_trips() {
6224 assert_eq!(
6230 extract_index_entry_summary("— Standup notes · #standup").as_deref(),
6231 Some("Standup notes · #standup"),
6232 "a single-spaced middot tail is part of the summary, not a tag block"
6233 );
6234 assert_eq!(
6236 extract_index_entry_summary("— Renewal champion · #renewal #acme").as_deref(),
6237 Some("Renewal champion"),
6238 "the renderer's double-spaced ` · #tag` suffix is stripped"
6239 );
6240 }
6241
6242 #[test]
6245 fn url_shape_accepts_short_http_and_rejects_bare_scheme() {
6246 assert!(is_url("http://x"), "an 8-char http URL is valid");
6247 assert!(is_url("https://x"), "a 9-char https URL is valid");
6248 assert!(!is_url("http://"), "a bare scheme with no host is rejected");
6249 assert!(!is_url("https://"), "a bare https scheme is rejected");
6250 }
6251
6252 #[test]
6253 fn email_shape_rejects_double_at() {
6254 assert!(!is_email("sarah@@acme.com"), "double-@ domain is rejected");
6255 assert!(!is_email("a@b@c.com"), "two @ signs are rejected");
6256 assert!(is_email("sarah@acme.com"), "a normal address still passes");
6257 }
6258
6259 #[test]
6262 fn working_set_does_not_flag_log_md_body_links() {
6263 let fx = Fixture::new();
6269 fx.write("records/contacts/a.md", &valid_contact("A"));
6270 fx.write(
6271 "log.md",
6272 "---\ntype: log\n---\n\n## [2026-06-01 10:00] delete | records/contacts/ghost\n\nRemoved [[records/contacts/ghost]] per cleanup.\n",
6273 );
6274 let issues = validate_working_set(&fx.store(), None).unwrap();
6275 assert!(
6276 !issues
6277 .iter()
6278 .any(|i| i.code == codes::WIKI_LINK_BROKEN
6279 && i.file == std::path::Path::new("log.md")),
6280 "a broken wiki-link inside append-only log.md must not be flagged: {issues:#?}"
6281 );
6282 }
6283
6284 #[test]
6287 fn schema_duplicate_field_name_is_flagged() {
6288 let mut fx = Fixture::new();
6289 fx.config.schemas.insert(
6290 "contact".into(),
6291 Schema {
6292 fields: vec![
6293 FieldSpec {
6294 name: "name".into(),
6295 required: true,
6296 ..Default::default()
6297 },
6298 FieldSpec {
6299 name: "name".into(),
6300 ..Default::default()
6301 },
6302 ],
6303 ..Default::default()
6304 },
6305 );
6306 let issues = fx.store_all();
6307 assert!(
6308 issues
6309 .iter()
6310 .any(|i| i.code == codes::DB_MD_SCHEMA_FIELD && i.key.as_deref() == Some("name")),
6311 "a duplicate schema field name must be flagged: {issues:#?}"
6312 );
6313 }
6314
6315 #[test]
6316 fn schema_unknown_modifier_is_info() {
6317 let mut fx = Fixture::new();
6318 fx.config.schemas.insert(
6319 "contact".into(),
6320 Schema {
6321 fields: vec![FieldSpec {
6322 name: "name".into(),
6323 unknown_modifiers: vec!["requierd".into()],
6324 ..Default::default()
6325 }],
6326 ..Default::default()
6327 },
6328 );
6329 let issues = fx.store_all();
6330 assert!(
6331 issues.iter().any(|i| i.code == codes::DB_MD_SCHEMA_FIELD
6332 && i.severity == Severity::Info
6333 && i.key.as_deref() == Some("name")),
6334 "an unrecognized schema modifier must surface as Info: {issues:#?}"
6335 );
6336 }
6337
6338 #[test]
6344 fn every_code_constant_is_documented_in_spec() {
6345 let this_src = include_str!("validate.rs");
6349 let mut codes_in_module: Vec<String> = Vec::new();
6350 let mut in_codes_mod = false;
6351 for line in this_src.lines() {
6352 let t = line.trim();
6353 if t.starts_with("pub mod codes") {
6354 in_codes_mod = true;
6355 continue;
6356 }
6357 if in_codes_mod && line == "}" {
6359 break;
6360 }
6361 if in_codes_mod {
6362 if let Some(rest) = t.strip_prefix("pub const ") {
6363 let value = rest
6365 .split_once('=')
6366 .map(|(_, v)| v.trim())
6367 .and_then(|v| v.strip_prefix('"'))
6368 .and_then(|v| v.strip_suffix("\";"))
6369 .unwrap_or_else(|| panic!("unparseable code constant line: {line:?}"));
6370 codes_in_module.push(value.to_string());
6371 }
6372 }
6373 }
6374 assert!(
6375 codes_in_module.len() >= 36,
6376 "parsed only {} code constants from `mod codes`; the parser likely \
6377 broke against a source-format change",
6378 codes_in_module.len()
6379 );
6380
6381 let spec_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("../../SPEC.md");
6383 let spec = fs::read_to_string(&spec_path)
6384 .unwrap_or_else(|e| panic!("cannot read {}: {e}", spec_path.display()));
6385
6386 let missing: Vec<&String> = codes_in_module
6388 .iter()
6389 .filter(|code| !spec.contains(&format!("| `{code}` |")))
6390 .collect();
6391 assert!(
6392 missing.is_empty(),
6393 "validation codes emitted by the engine but absent from SPEC.md \
6394 § Validation (the declared complete vocabulary): {missing:?}"
6395 );
6396 }
6397
6398 const LOOSE_ALICE: &str = "---\ntype: contact\nid: alice\ncreated: 2026-06-01T08:00:00-07:00\nupdated: 2026-06-01T08:00:00-07:00\nsummary: Alice\n---\nbody\n";
6401 const LOOSE_BOB: &str = "---\ntype: contact\nid: bob\ncreated: 2026-06-01T08:00:00-07:00\nupdated: 2026-06-01T08:00:00-07:00\nsummary: Bob loose\n---\nbody\n";
6402
6403 #[test]
6404 fn loose_file_catalogued_in_layer_jsonl_validates_clean() {
6405 let fx = Fixture::new();
6406 fx.write("records/contacts/alice.md", LOOSE_ALICE);
6407 fx.write("records/bob.md", LOOSE_BOB); fx.rebuild_indexes();
6409 let issues = fx.store_all();
6410 assert!(
6411 issues.is_empty(),
6412 "a rebuilt store with a catalogued loose file must validate clean, got: {issues:?}"
6413 );
6414 }
6415
6416 #[test]
6417 fn loose_file_with_missing_layer_jsonl_is_index_jsonl_missing() {
6418 let fx = Fixture::new();
6419 fx.write("records/contacts/alice.md", LOOSE_ALICE);
6420 fx.write("records/bob.md", LOOSE_BOB);
6421 fx.rebuild_indexes();
6422 fs::remove_file(fx.dir.path().join("records/index.jsonl")).unwrap();
6424 let issues = fx.store_all();
6425 assert!(
6426 has(&issues, codes::INDEX_JSONL_MISSING),
6427 "a loose file with no layer index.jsonl must raise INDEX_JSONL_MISSING, got: {issues:?}"
6428 );
6429 }
6430}