Skip to main content

dbmd_core/
validate.rs

1//! `validate` — the validation engine.
2//!
3//! The canonical issue-code vocabulary is **SPEC.md § Validation** (that table
4//! is the single source of truth). This module implements exactly those codes
5//! — no more, no fewer. If a code is added here it must be added to the SPEC
6//! table in the same change. The codes are exposed as the [`codes`] constants
7//! so call sites never spell a code as a bare string literal.
8//!
9//! **Two scopes.** [`validate_working_set`] is the loop default: content files
10//! changed since `since`, plus any file whose wiki-links target a changed path.
11//! The changed set and the per-file checks are O(changed); the incoming linkers
12//! are found by a *single* embedded-ripgrep pass over the store for the whole
13//! changed set at once ([`Store::find_links_to_any`], one scan — not a full read
14//! per changed object, and not the parse-the-tree walk `--all` does). It never
15//! calls [`Store::walk`] and never builds the global cross-file state.
16//! [`validate_all`] is the full SWEEP: it adds the checks that need that global
17//! state — entity-dedup `DUP_*`, every-index sync, and `log.md` ordering.
18//!
19//! ## Why this module is self-contained
20//!
21//! Validation does its own frontmatter split, YAML parse, wiki-link scan,
22//! log-header parse, and file walk here, reading only the two public,
23//! caller-populated fields of a [`Store`]: [`Store::root`] and
24//! [`Store::config`] — rather than routing through the sibling modules
25//! ([`crate::parser`], [`crate::store`], [`crate::log`], [`crate::index`]).
26//! Keeping the checks local lets the validator report precise, per-issue
27//! diagnostics (exact codes, file, and context) without coupling its output to
28//! incidental behavior of the shared readers; the public surface and the
29//! emitted issue vocabulary are the contract.
30
31use std::collections::{BTreeMap, BTreeSet, HashMap};
32use std::path::{Component, Path, PathBuf};
33
34use chrono::{DateTime, FixedOffset, NaiveDateTime};
35use serde_norway::Value;
36
37use crate::parser::{Schema, Shape};
38use crate::store::Store;
39
40/// Severity of a validation [`Issue`]. Any [`Severity::Error`] fails validation
41/// (non-zero exit); warnings and info do not.
42#[derive(Debug, Clone, Copy, PartialEq, Eq)]
43pub enum Severity {
44    /// Blocks: a hard violation of the format or doctrine.
45    Error,
46    /// A decision point the agent resolves at its discretion.
47    Warning,
48    /// Visibility only; never affects exit status.
49    Info,
50}
51
52/// A single structured validation finding. Agent-primary and machine-parseable
53/// via `--json`; `suggestion` is a deterministic remediation hint the agent
54/// applies without guessing.
55#[derive(Debug, Clone, PartialEq, Eq)]
56pub struct Issue {
57    /// The severity; only [`Severity::Error`] fails validation.
58    pub severity: Severity,
59    /// The structured code, e.g. `"WIKI_LINK_SHORT_FORM"` — one of [`codes`].
60    pub code: &'static str,
61    /// The file the issue is about.
62    pub file: PathBuf,
63    /// The 1-based line, when applicable.
64    pub line: Option<u32>,
65    /// The frontmatter key, when the issue is about a specific field.
66    pub key: Option<String>,
67    /// A human-readable message.
68    pub message: String,
69    /// A deterministic remediation hint, when one exists.
70    pub suggestion: Option<String>,
71    /// Other files involved (e.g. the duplicate partner in a collision).
72    pub related: Vec<PathBuf>,
73}
74
75impl Issue {
76    /// True if this issue fails validation (i.e. its severity is
77    /// [`Severity::Error`]).
78    pub fn is_error(&self) -> bool {
79        matches!(self.severity, Severity::Error)
80    }
81}
82
83/// The canonical validation issue codes — one constant per row of the SPEC.md
84/// § Validation table. Call sites reference these instead of bare strings so
85/// the code and the SPEC table can never silently drift.
86pub mod codes {
87    /// path has no `DB.md`; not a db.md store.
88    pub const NOT_A_STORE: &str = "NOT_A_STORE";
89    /// the store's `DB.md` is not `type: db-md`.
90    pub const DB_MD_BAD_TYPE: &str = "DB_MD_BAD_TYPE";
91    /// the store's `DB.md` frontmatter lacks `scope` or `owner`.
92    pub const DB_MD_MISSING_FIELD: &str = "DB_MD_MISSING_FIELD";
93    /// `DB.md` has an `##` section other than the three recognized ones.
94    pub const DB_MD_UNKNOWN_SECTION: &str = "DB_MD_UNKNOWN_SECTION";
95    /// content file has no `type:`.
96    pub const FM_MISSING_TYPE: &str = "FM_MISSING_TYPE";
97    /// content file has no `created:`.
98    pub const FM_MISSING_CREATED: &str = "FM_MISSING_CREATED";
99    /// content file has no `updated:`.
100    pub const FM_MISSING_UPDATED: &str = "FM_MISSING_UPDATED";
101    /// frontmatter block isn't valid YAML.
102    pub const FM_MALFORMED_YAML: &str = "FM_MALFORMED_YAML";
103    /// `created` or `updated` isn't ISO-8601.
104    pub const FM_BAD_TIMESTAMP: &str = "FM_BAD_TIMESTAMP";
105    /// content file has no `summary`.
106    pub const SUMMARY_MISSING: &str = "SUMMARY_MISSING";
107    /// `summary` present but empty.
108    pub const SUMMARY_EMPTY: &str = "SUMMARY_EMPTY";
109    /// `summary` contains newlines.
110    pub const SUMMARY_MULTILINE: &str = "SUMMARY_MULTILINE";
111    /// `summary` > 200 chars.
112    pub const SUMMARY_TOO_LONG: &str = "SUMMARY_TOO_LONG";
113    /// wiki-link target isn't a full store-relative path.
114    pub const WIKI_LINK_SHORT_FORM: &str = "WIKI_LINK_SHORT_FORM";
115    /// wiki-link target file doesn't exist.
116    pub const WIKI_LINK_BROKEN: &str = "WIKI_LINK_BROKEN";
117    /// wiki-link target matches multiple files (defensive).
118    pub const WIKI_LINK_AMBIGUOUS: &str = "WIKI_LINK_AMBIGUOUS";
119    /// wiki-link target carries a `.md` extension — drop it.
120    pub const WIKI_LINK_HAS_EXTENSION: &str = "WIKI_LINK_HAS_EXTENSION";
121    /// frontmatter list uses inline `[[[a]], [[b]]]` — use block form.
122    pub const WIKI_LINK_FLOW_FORM_LIST: &str = "WIKI_LINK_FLOW_FORM_LIST";
123    /// two files declare the same explicit `id`.
124    pub const DUP_ID: &str = "DUP_ID";
125    /// two records of a type collide on a `DB.md ## Schemas` `unique:` key.
126    pub const DUP_UNIQUE_KEY: &str = "DUP_UNIQUE_KEY";
127    /// a `DB.md` schema requires a field that's absent.
128    pub const SCHEMA_MISSING_REQUIRED: &str = "SCHEMA_MISSING_REQUIRED";
129    /// a value doesn't match the schema's shape modifier.
130    pub const SCHEMA_SHAPE_MISMATCH: &str = "SCHEMA_SHAPE_MISMATCH";
131    /// a `link to <prefix>/` field has a plain or wrong-prefix value.
132    pub const SCHEMA_LINK_PREFIX_MISMATCH: &str = "SCHEMA_LINK_PREFIX_MISMATCH";
133    /// a value isn't in the schema's `enum`.
134    pub const SCHEMA_ENUM_VIOLATION: &str = "SCHEMA_ENUM_VIOLATION";
135    /// a write was attempted on a `### Frozen pages` path (write-time).
136    pub const POLICY_FROZEN_PAGE: &str = "POLICY_FROZEN_PAGE";
137    /// a file with an `### Ignored types` type exists.
138    pub const POLICY_IGNORED_TYPE_PRESENT: &str = "POLICY_IGNORED_TYPE_PRESENT";
139    /// a `wiki-page` derives from an ignored-type record.
140    pub const POLICY_IGNORED_TYPE_DERIVED: &str = "POLICY_IGNORED_TYPE_DERIVED";
141    /// a `log.md` entry header timestamp is unparseable.
142    pub const LOG_BAD_TIMESTAMP: &str = "LOG_BAD_TIMESTAMP";
143    /// a `log.md` entry kind isn't recognized.
144    pub const LOG_UNKNOWN_KIND: &str = "LOG_UNKNOWN_KIND";
145    /// `log.md` entries aren't in non-decreasing time order (possible rewrite).
146    pub const LOG_OUT_OF_ORDER: &str = "LOG_OUT_OF_ORDER";
147    /// a non-empty canonical folder lacks `index.md`.
148    pub const INDEX_MISSING: &str = "INDEX_MISSING";
149    /// an `index.md` lists a file that no longer exists.
150    pub const INDEX_STALE_ENTRY: &str = "INDEX_STALE_ENTRY";
151    /// a file isn't listed in its folder's `index.md`.
152    pub const INDEX_MISSING_ENTRY: &str = "INDEX_MISSING_ENTRY";
153    /// an `index.md` sits in an empty / non-canonical folder.
154    pub const INDEX_ORPHAN: &str = "INDEX_ORPHAN";
155    /// an index's `scope:` doesn't match its filesystem location.
156    pub const INDEX_WRONG_SCOPE: &str = "INDEX_WRONG_SCOPE";
157    /// an index entry's text doesn't match the target file's `summary`.
158    pub const INDEX_SUMMARY_MISMATCH: &str = "INDEX_SUMMARY_MISMATCH";
159    /// a type-folder's `index.jsonl` twin is missing.
160    pub const INDEX_JSONL_MISSING: &str = "INDEX_JSONL_MISSING";
161    /// a file isn't in the `index.jsonl`, or a jsonl record points at a missing
162    /// file.
163    pub const INDEX_JSONL_DESYNC: &str = "INDEX_JSONL_DESYNC";
164    /// a `index.jsonl` record's fields don't match the file's frontmatter.
165    pub const INDEX_JSONL_STALE: &str = "INDEX_JSONL_STALE";
166    /// `tags` isn't a flat YAML list of short scalar labels.
167    pub const TAGS_MALFORMED: &str = "TAGS_MALFORMED";
168}
169
170/// The SPEC's `summary` length bound (chars). Over it → `SUMMARY_TOO_LONG`.
171const MAX_SUMMARY_LEN: usize = 200;
172
173/// Recognized `log.md` entry kinds (SPEC § `log.md`). Anything else →
174/// `LOG_UNKNOWN_KIND` (warning, not error).
175const RECOGNIZED_LOG_KINDS: &[&str] = &[
176    "ingest",
177    "create",
178    "update",
179    "delete",
180    "rename",
181    "link",
182    "validate",
183    "index-rebuild",
184    "contradiction",
185];
186
187// ─────────────────────────────────────────────────────────────────────────────
188//  Public entrypoints
189// ─────────────────────────────────────────────────────────────────────────────
190
191/// **Loop default.** Validate the working set: content files changed since
192/// `since` (default: the last `validate` entry in `log.md`), plus any file whose
193/// wiki-links target a changed/renamed/removed path. Per-file *checks* only —
194/// none of the cross-file global passes (entity-dedup, every-index sync,
195/// `log.md` ordering) that `--all` adds. If the default call finds no logged
196/// changed objects, it falls back to a per-file content sweep so an externally
197/// edited or freshly copied store cannot pass vacuously.
198///
199/// **Cost.** The changed set is read from `log.md` — O(changed): every
200/// `create`/`update`/`ingest`/`rename`/`delete`/`link` entry newer than the
201/// cutoff names an object. Per-file frontmatter + link-doctrine checks then run
202/// over that set plus its incoming linkers — also O(changed). The one part that
203/// is *not* O(changed) is discovering those incoming linkers: a link to a
204/// changed path can live in the body or a typed frontmatter field of any file,
205/// so it is found by a **single** embedded-ripgrep pass over the store
206/// ([`Store::find_links_to_any`]) for the whole changed set at once — one store
207/// scan, flat in the changed-set size. (It was previously a full store read
208/// *per* changed object — `O(changed × store)`; that is the blow-up this path
209/// no longer pays.) The unavoidable single content scan is the same shape as
210/// free-text `dbmd search`; the sidecar `links` projection can't replace it
211/// because it omits body/typed-field edges.
212pub fn validate_working_set(
213    store: &Store,
214    since: Option<DateTime<FixedOffset>>,
215) -> crate::Result<Vec<Issue>> {
216    if !store_marker_present(store) {
217        return Ok(vec![not_a_store_issue(store)]);
218    }
219
220    let cutoff = match since {
221        Some(ts) => Some(ts),
222        None => last_validate_at(store),
223    };
224
225    // 1. Changed objects, straight from the log (O(changed) — never a walk).
226    let changed = changed_objects_since(store, cutoff);
227    if changed.is_empty() && since.is_none() {
228        return validate_content_sweep(store);
229    }
230
231    // 2. Add every file with an incoming wiki-link to a changed/renamed/removed
232    //    path (the linker may now be stale even though it didn't change). The
233    //    incoming-linker scan is `Store::find_links_to_any` — ONE embedded-ripgrep
234    //    pass over the store for the WHOLE changed set (one `.md` walk, one
235    //    presence-only/early-exit scan per file), not one walk per object. This
236    //    is the fix for the `O(changed × store)` blow-up that calling
237    //    `find_links_to` in a loop produced (a full store read per changed
238    //    object); the cost is now a single store scan regardless of how many
239    //    objects changed. A returned self-link is harmlessly deduped by the set
240    //    (the object is already inserted below).
241    let changed_targets: Vec<PathBuf> = changed.iter().cloned().collect();
242    let mut working: BTreeSet<PathBuf> = changed;
243    for linker in store.find_links_to_any(&changed_targets)? {
244        working.insert(linker);
245    }
246
247    let mut issues = Vec::new();
248    for rel in &working {
249        let abs = store.root.join(rel);
250        // A changed path can be a *deletion* — skip files that no longer exist;
251        // the incoming-linker scan above already flagged links into them.
252        if !abs.is_file() {
253            continue;
254        }
255        // `None` basename index: the working-set pass does not build the
256        // store-wide basename map (that is a `--all`-only structure), so a bare
257        // short-form target is reported as plain `WIKI_LINK_SHORT_FORM` and the
258        // `--all` sweep does the ambiguity upgrade.
259        check_content_file(store, rel, &abs, None, &mut issues);
260    }
261    issues.sort_by(issue_order);
262    Ok(issues)
263}
264
265fn validate_content_sweep(store: &Store) -> crate::Result<Vec<Issue>> {
266    let mut issues = Vec::new();
267    for rel in store.walk()? {
268        let abs = store.root.join(&rel);
269        check_content_file(store, &rel, &abs, None, &mut issues);
270    }
271    issues.sort_by(issue_order);
272    Ok(issues)
273}
274
275/// **Full SWEEP (O(store)).** Validate every file, every link, and every index,
276/// adding the cross-file checks that need global state: entity-dedup `DUP_*`,
277/// every-index sync (md + jsonl), and `log.md` ordering. CI / recovery, not the
278/// loop.
279pub fn validate_all(store: &Store) -> crate::Result<Vec<Issue>> {
280    if !store_marker_present(store) {
281        return Ok(vec![not_a_store_issue(store)]);
282    }
283
284    let mut issues = Vec::new();
285
286    // Store-identity file: `DB.md` shape (type / required fields / section
287    // headers). A single root file, checked once in the sweep — not a content
288    // file (it carries no `summary`), so it is not part of `walk_content_files`.
289    check_db_md(store, &mut issues);
290
291    let files = walk_content_files(&store.root);
292
293    // The basename index makes the short-form wiki-link check able to upgrade a
294    // bare-basename target to `WIKI_LINK_AMBIGUOUS` when it matches ≥2 files.
295    // Built once from the already-gathered sweep list (no extra walk); only the
296    // `--all` path has it (the working-set path stays O(changed)).
297    let basenames = build_basename_index(&files);
298
299    // Per-file checks over the whole store.
300    let mut parsed: Vec<(PathBuf, Parsed)> = Vec::new();
301    for rel in &files {
302        let abs = store.root.join(rel);
303        if let Some(p) = check_content_file(store, rel, &abs, Some(&basenames), &mut issues) {
304            parsed.push((rel.clone(), p));
305        }
306    }
307
308    // Cross-file: hard `id` + soft schema-declared `unique:` dedup collisions.
309    check_duplicates(store, &parsed, &mut issues);
310
311    // Cross-file: hierarchical index.md + index.jsonl sync.
312    check_indexes(store, &files, &mut issues);
313
314    // Cross-file: log.md well-formedness + ordering.
315    check_log(store, &mut issues);
316
317    issues.sort_by(issue_order);
318    Ok(issues)
319}
320
321// ─────────────────────────────────────────────────────────────────────────────
322//  Per-file content checks (shared by both scopes)
323// ─────────────────────────────────────────────────────────────────────────────
324
325/// What `validate_all`'s cross-file pass needs from a per-file parse: the
326/// parsed YAML mapping (for dedup keys) and the raw frontmatter text (for
327/// text-based wiki-link extraction). The body and fence-line are consumed
328/// inline during the per-file pass and not carried here.
329struct Parsed {
330    /// The parsed top-level YAML mapping, keyed by string. `None` ⇒ malformed
331    /// YAML (a `FM_MALFORMED_YAML` was already emitted).
332    fm: Option<BTreeMap<String, Value>>,
333    /// The raw frontmatter YAML text (between the fences) — the source for
334    /// text-based wiki-link extraction in dedup.
335    fm_yaml: String,
336}
337
338/// Run every per-file check on one content file, pushing issues. Returns the
339/// parsed file so `validate_all` can reuse it for cross-file checks. Returns
340/// `None` only when the file is unreadable or has no frontmatter block at all
341/// (which for a content file is itself reported).
342fn check_content_file(
343    store: &Store,
344    rel: &Path,
345    abs: &Path,
346    basenames: Option<&BasenameIndex>,
347    issues: &mut Vec<Issue>,
348) -> Option<Parsed> {
349    let text = match std::fs::read_to_string(abs) {
350        Ok(t) => t,
351        Err(_) => return None,
352    };
353
354    let is_content = is_content_file(rel);
355
356    let (fm_yaml, body, fm_end_line) = match split_frontmatter(&text) {
357        Some(split) => split,
358        None => {
359            // No frontmatter at all. For a content file that means there's no
360            // `type:` and no `summary:` — report both the way a parsed-but-empty
361            // file would, so the agent gets the same actionable codes.
362            if is_content {
363                push(
364                    issues,
365                    Severity::Error,
366                    codes::FM_MISSING_TYPE,
367                    rel,
368                    None,
369                    Some("type".into()),
370                    "content file has no frontmatter `type:`".into(),
371                    Some("add a YAML frontmatter block with `type:`".into()),
372                    vec![],
373                );
374                push(
375                    issues,
376                    Severity::Error,
377                    codes::SUMMARY_MISSING,
378                    rel,
379                    None,
380                    Some("summary".into()),
381                    "content file has no `summary`".into(),
382                    Some("run `dbmd fm init`".into()),
383                    vec![],
384                );
385            }
386            return None;
387        }
388    };
389
390    // Parse the YAML block.
391    let fm: Option<BTreeMap<String, Value>> = match serde_norway::from_str::<Value>(&fm_yaml) {
392        Ok(Value::Mapping(map)) => Some(yaml_map_to_btree(&map)),
393        // An empty frontmatter block parses as Null; treat as an empty mapping.
394        Ok(Value::Null) => Some(BTreeMap::new()),
395        Ok(_) => {
396            // A scalar / sequence at the top level isn't a frontmatter mapping.
397            // Anchor to line 1 — the frontmatter block's opening `---`; the whole
398            // block is opaque, so there is no single offending field line.
399            push(
400                issues,
401                Severity::Error,
402                codes::FM_MALFORMED_YAML,
403                rel,
404                Some(1),
405                None,
406                "frontmatter is not a YAML mapping".into(),
407                Some("repair the frontmatter YAML mapping, then rerun `dbmd validate`".into()),
408                vec![],
409            );
410            None
411        }
412        Err(e) => {
413            // Anchor to line 1 (the opening `---`): an unparseable block has no
414            // single offending field line; the agent re-reads the whole block.
415            push(
416                issues,
417                Severity::Error,
418                codes::FM_MALFORMED_YAML,
419                rel,
420                Some(1),
421                None,
422                format!("frontmatter block isn't valid YAML: {e}"),
423                Some("repair the frontmatter YAML block, then rerun `dbmd validate`".into()),
424                vec![],
425            );
426            None
427        }
428    };
429
430    if let Some(map) = &fm {
431        // The detailed frontmatter checks only run when the YAML parsed.
432        check_frontmatter(store, rel, map, &fm_yaml, basenames, issues, is_content);
433    }
434
435    // Wiki-link doctrine checks run on the body of every content file (and
436    // also on index/log meta files, whose entries are wiki-links too).
437    check_body_wiki_links(store, rel, &body, fm_end_line, basenames, issues);
438
439    Some(Parsed { fm, fm_yaml })
440}
441
442/// All frontmatter-level checks for a content file with valid YAML.
443fn check_frontmatter(
444    store: &Store,
445    rel: &Path,
446    fm: &BTreeMap<String, Value>,
447    fm_yaml: &str,
448    basenames: Option<&BasenameIndex>,
449    issues: &mut Vec<Issue>,
450    is_content: bool,
451) {
452    let type_ = fm.get("type").and_then(scalar_string);
453
454    // ── type ────────────────────────────────────────────────────────────────
455    if is_content && type_.is_none() {
456        push(
457            issues,
458            Severity::Error,
459            codes::FM_MISSING_TYPE,
460            rel,
461            fm_key_line_or_top(fm_yaml, "type"),
462            Some("type".into()),
463            "content file has no `type:`".into(),
464            Some("add a `type:` field (e.g. `type: contact`)".into()),
465            vec![],
466        );
467    }
468
469    // ── summary (universal on content files) ──────────────────────────────────
470    if is_content {
471        check_summary(rel, fm, fm_yaml, issues);
472    }
473
474    // ── timestamps: created / updated ─────────────────────────────────────────
475    for (key, missing_code) in [
476        ("created", codes::FM_MISSING_CREATED),
477        ("updated", codes::FM_MISSING_UPDATED),
478    ] {
479        if is_content && !fm.contains_key(key) {
480            push(
481                issues,
482                Severity::Error,
483                missing_code,
484                rel,
485                fm_key_line_or_top(fm_yaml, key),
486                Some(key.into()),
487                format!("content file has no `{key}:` timestamp"),
488                Some(format!(
489                    "set `{key}` to an RFC3339 timestamp, e.g. 2026-05-27T08:00:00-07:00"
490                )),
491                vec![],
492            );
493        } else if let Some(v) = fm.get(key) {
494            if let Some(s) = scalar_string(v) {
495                if !is_iso8601(&s) {
496                    push(
497                        issues,
498                        Severity::Error,
499                        codes::FM_BAD_TIMESTAMP,
500                        rel,
501                        fm_key_line(fm_yaml, key),
502                        Some(key.into()),
503                        format!("`{key}` is not ISO-8601: {s:?}"),
504                        Some("use RFC3339, e.g. 2026-05-27T08:00:00-07:00".into()),
505                        vec![],
506                    );
507                }
508            }
509        }
510    }
511    // ── tags shape ────────────────────────────────────────────────────────────
512    if let Some(tags) = fm.get("tags") {
513        if !is_flat_scalar_list(tags) {
514            push(
515                issues,
516                Severity::Warning,
517                codes::TAGS_MALFORMED,
518                rel,
519                fm_key_line(fm_yaml, "tags"),
520                Some("tags".into()),
521                "`tags` must be a flat YAML list of short scalar labels".into(),
522                Some("use block form: one `- <tag>` per line".into()),
523                vec![],
524            );
525        }
526    }
527
528    // ── inline flow-form wiki-link lists in frontmatter ──────────────────────
529    for key in detect_flow_form_link_lists(fm_yaml) {
530        push(
531            issues,
532            Severity::Error,
533            codes::WIKI_LINK_FLOW_FORM_LIST,
534            rel,
535            fm_key_line(fm_yaml, &key),
536            Some(key.clone()),
537            format!("`{key}` uses inline flow form `[[[a]], [[b]]]`"),
538            Some("use YAML block-sequence form: one `- [[...]]` per line".into()),
539            vec![],
540        );
541    }
542
543    // ── frontmatter wiki-link fields: doctrine + integrity ───────────────────
544    // Skip keys that have an explicit `link to` schema spec — those are checked
545    // (with prefix enforcement) in `check_schema`, and double-reporting the same
546    // link via two paths would be noise.
547    let schema_link_keys: BTreeSet<String> =
548        effective_schema(store, type_.as_deref().unwrap_or(""))
549            .map(|s| {
550                s.fields
551                    .iter()
552                    .filter(|f| f.link_prefix.is_some())
553                    .map(|f| f.name.clone())
554                    .collect()
555            })
556            .unwrap_or_default();
557    for (key, link) in frontmatter_link_fields_text(fm_yaml, 2) {
558        if schema_link_keys.contains(&key) {
559            continue;
560        }
561        check_wiki_link(
562            store,
563            rel,
564            &link,
565            Some(link.line),
566            Some(&key),
567            basenames,
568            issues,
569        );
570    }
571
572    // ── policies: ignored types ──────────────────────────────────────────────
573    if let Some(t) = &type_ {
574        if store.config.ignored_types.iter().any(|it| it == t) {
575            push(
576                issues,
577                Severity::Info,
578                codes::POLICY_IGNORED_TYPE_PRESENT,
579                rel,
580                fm_key_line(fm_yaml, "type"),
581                Some("type".into()),
582                format!("file has ignored type `{t}` (per DB.md ## Policies)"),
583                Some(
584                    "change the `type`, or remove it from DB.md `### Ignored types` if it should be managed"
585                        .into(),
586                ),
587                // The policy source: `DB.md` declares the ignored type.
588                vec![PathBuf::from("DB.md")],
589            );
590        }
591        // A wiki-page deriving from an ignored-type record → warning. The
592        // decision lives in the shared `derived_from_ignored_type` entry point;
593        // this side only supplies the `derived_from` targets (with their line,
594        // which the issue carries) and renders the finding.
595        for link in frontmatter_links_for_key(fm_yaml, "derived_from", 2) {
596            if let Some(hit) =
597                derived_from_ignored_type(store, t, std::iter::once(link.target.as_str()))
598            {
599                push(
600                    issues,
601                    Severity::Warning,
602                    codes::POLICY_IGNORED_TYPE_DERIVED,
603                    rel,
604                    Some(link.line),
605                    Some("derived_from".into()),
606                    format!(
607                        "wiki-page derives from ignored-type record `{}` (type `{}`)",
608                        hit.target, hit.target_type
609                    ),
610                    Some(
611                        "drop this `derived_from` link, or remove the target type from DB.md `### Ignored types`"
612                            .into(),
613                    ),
614                    // The ignored-type source record, plus `DB.md` (the policy
615                    // source that lists the ignored type).
616                    vec![
617                        PathBuf::from(format!("{}.md", hit.target)),
618                        PathBuf::from("DB.md"),
619                    ],
620                );
621            }
622        }
623    }
624
625    // ── schema enforcement: DB.md ## Schemas (the only schema source) ─────────
626    if let Some(t) = &type_ {
627        if let Some(schema) = effective_schema(store, t) {
628            check_schema(store, rel, fm, fm_yaml, &schema, issues);
629        }
630    }
631}
632
633/// `summary` rules: required, non-empty, single-line, ≤ 200 chars.
634fn check_summary(rel: &Path, fm: &BTreeMap<String, Value>, fm_yaml: &str, issues: &mut Vec<Issue>) {
635    let line = fm_key_line(fm_yaml, "summary");
636    match fm.get("summary") {
637        None => push(
638            issues,
639            Severity::Error,
640            codes::SUMMARY_MISSING,
641            rel,
642            // A missing `summary` key has no line of its own → anchor to the
643            // frontmatter block top (line 1), the EXPECTED field-absence rule.
644            fm_key_line_or_top(fm_yaml, "summary"),
645            Some("summary".into()),
646            "content file has no `summary`".into(),
647            Some("run `dbmd fm init`".into()),
648            vec![],
649        ),
650        Some(v) => {
651            let s = scalar_string(v).unwrap_or_default();
652            if s.trim().is_empty() {
653                push(
654                    issues,
655                    Severity::Error,
656                    codes::SUMMARY_EMPTY,
657                    rel,
658                    line,
659                    Some("summary".into()),
660                    "`summary` is present but empty".into(),
661                    Some("write a one-line summary, or run `dbmd fm init`".into()),
662                    vec![],
663                );
664            } else if s.contains('\n') {
665                push(
666                    issues,
667                    Severity::Error,
668                    codes::SUMMARY_MULTILINE,
669                    rel,
670                    line,
671                    Some("summary".into()),
672                    "`summary` must be one line (contains a newline)".into(),
673                    Some("collapse the summary to a single line".into()),
674                    vec![],
675                );
676            } else if s.chars().count() > MAX_SUMMARY_LEN {
677                push(
678                    issues,
679                    Severity::Warning,
680                    codes::SUMMARY_TOO_LONG,
681                    rel,
682                    line,
683                    Some("summary".into()),
684                    format!(
685                        "`summary` is {} chars (> {MAX_SUMMARY_LEN})",
686                        s.chars().count()
687                    ),
688                    Some(format!("trim the summary to ≤ {MAX_SUMMARY_LEN} chars")),
689                    vec![],
690                );
691            }
692        }
693    }
694}
695
696/// Wiki-link checks for a body. Per-link doctrine (`WIKI_LINK_*`).
697fn check_body_wiki_links(
698    store: &Store,
699    rel: &Path,
700    body: &str,
701    fm_end_line: u32,
702    basenames: Option<&BasenameIndex>,
703    issues: &mut Vec<Issue>,
704) {
705    for link in extract_wiki_links(body) {
706        // Body lines are offset past the frontmatter block. `link.line` is
707        // 1-based within `body`; the body starts at `fm_end_line + 1`.
708        let abs_line = fm_end_line + link.line;
709        check_wiki_link(store, rel, &link, Some(abs_line), None, basenames, issues);
710    }
711}
712
713/// A store-wide map from a file's bare basename (its stem, no `.md`) to every
714/// store-relative path carrying that basename. Built once per `validate --all`
715/// sweep so the short-form wiki-link check can distinguish a merely short-form
716/// target (`WIKI_LINK_SHORT_FORM`) from one that is *ambiguous* because the bare
717/// basename matches two or more files (`WIKI_LINK_AMBIGUOUS`, the defensive
718/// code). `None` in the working-set path — that loop is O(changed) and never
719/// walks the store, so it reports the plain short-form error without the scan.
720type BasenameIndex = HashMap<String, Vec<PathBuf>>;
721
722/// Build the [`BasenameIndex`] from the swept file list (already gathered by
723/// `validate_all`; no extra walk).
724fn build_basename_index(files: &[PathBuf]) -> BasenameIndex {
725    let mut idx: BasenameIndex = HashMap::new();
726    for rel in files {
727        if let Some(stem) = rel.file_stem().and_then(|s| s.to_str()) {
728            idx.entry(stem.to_string()).or_default().push(rel.clone());
729        }
730    }
731    idx
732}
733
734/// The shared per-wiki-link doctrine + integrity check used by both body links
735/// and frontmatter link-fields. `basenames` is `Some` only in the `--all`
736/// sweep, where a no-slash short-form target is upgraded to `WIKI_LINK_AMBIGUOUS`
737/// when its bare basename matches ≥2 files.
738fn check_wiki_link(
739    store: &Store,
740    rel: &Path,
741    link: &Link,
742    line: Option<u32>,
743    key: Option<&str>,
744    basenames: Option<&BasenameIndex>,
745    issues: &mut Vec<Issue>,
746) {
747    let bare = link.target.trim_end_matches(".md");
748
749    // Short-form: not a full store-relative path (no `/`, or first segment isn't
750    // a known layer).
751    if !is_full_store_path(bare) {
752        // Ambiguous (defensive) takes precedence over plain short-form when the
753        // target is a bare basename (no `/`) that matches ≥2 files in the store.
754        // Only computable in the sweep (where `basenames` is populated); the
755        // working-set path falls through to the plain short-form error.
756        if !bare.contains('/') {
757            if let Some(idx) = basenames {
758                if let Some(matches) = idx.get(bare) {
759                    if matches.len() >= 2 {
760                        let mut related = matches.clone();
761                        related.sort();
762                        push(
763                            issues,
764                            Severity::Error,
765                            codes::WIKI_LINK_AMBIGUOUS,
766                            rel,
767                            line,
768                            key.map(str::to_string),
769                            format!(
770                                "short-form wiki-link `[[{}]]` matches multiple files",
771                                link.target
772                            ),
773                            Some("use the full store-relative path to disambiguate".into()),
774                            related,
775                        );
776                        return;
777                    }
778                }
779            }
780        }
781        push(
782            issues,
783            Severity::Error,
784            codes::WIKI_LINK_SHORT_FORM,
785            rel,
786            line,
787            key.map(str::to_string),
788            format!(
789                "wiki-link `[[{}]]` is not a full store-relative path",
790                link.target
791            ),
792            short_form_suggestion(bare),
793            vec![],
794        );
795        // Don't also report broken; the agent must fix the form first.
796        return;
797    }
798
799    // `.md` extension → warning, then still check existence.
800    if link.target.ends_with(".md") {
801        push(
802            issues,
803            Severity::Warning,
804            codes::WIKI_LINK_HAS_EXTENSION,
805            rel,
806            line,
807            key.map(str::to_string),
808            format!("wiki-link `[[{}]]` carries a `.md` extension", link.target),
809            Some(format!("drop the extension: [[{bare}]]")),
810            vec![],
811        );
812    }
813
814    let Some(target_rel) = safe_md_target_rel(bare) else {
815        push(
816            issues,
817            Severity::Error,
818            codes::WIKI_LINK_BROKEN,
819            rel,
820            line,
821            key.map(str::to_string),
822            format!("wiki-link target `{bare}` is not a safe store-relative path"),
823            Some("use a full store-relative path under sources/, records/, or wiki/".into()),
824            vec![],
825        );
826        return;
827    };
828
829    // Broken: target file doesn't exist (O(1) stat).
830    let target_abs = store.root.join(target_rel);
831    if !target_abs.is_file() {
832        push(
833            issues,
834            Severity::Error,
835            codes::WIKI_LINK_BROKEN,
836            rel,
837            line,
838            key.map(str::to_string),
839            format!("wiki-link target `{bare}` doesn't exist"),
840            Some(format!(
841                "create `{bare}.md`, or point the link at an existing file"
842            )),
843            vec![],
844        );
845    }
846}
847
848// ─────────────────────────────────────────────────────────────────────────────
849//  Schema enforcement (user-declared DB.md ## Schemas — the only source)
850// ─────────────────────────────────────────────────────────────────────────────
851
852/// The effective schema for a type: the store's explicit `DB.md ## Schemas`
853/// block, or `None`. This is the **only** source of schema enforcement — the
854/// toolkit ships no implicit or built-in per-type schema (SPEC § Schemas). A
855/// store that wants its `contact` / `expense` / etc. fields enforced declares
856/// them in `## Schemas`; the example schema pack in SPEC § Example types is a
857/// copy-in starting point.
858fn effective_schema(store: &Store, type_: &str) -> Option<Schema> {
859    store.config.schemas.get(type_).cloned()
860}
861
862/// Validate a file's frontmatter against a schema's [`FieldSpec`]s.
863fn check_schema(
864    store: &Store,
865    rel: &Path,
866    fm: &BTreeMap<String, Value>,
867    fm_yaml: &str,
868    schema: &Schema,
869    issues: &mut Vec<Issue>,
870) {
871    for spec in &schema.fields {
872        let present = fm.get(&spec.name);
873        let line = fm_key_line(fm_yaml, &spec.name);
874
875        // Required.
876        let is_empty = match present {
877            None => true,
878            Some(v) => scalar_string(v)
879                .map(|s| s.trim().is_empty())
880                .unwrap_or(false),
881        };
882        if spec.required && is_empty {
883            push(
884                issues,
885                Severity::Error,
886                codes::SCHEMA_MISSING_REQUIRED,
887                rel,
888                // Absent key → anchor to the frontmatter top (line 1); a
889                // present-but-empty value keeps its own line.
890                fm_key_line_or_top(fm_yaml, &spec.name),
891                Some(spec.name.clone()),
892                format!("required field `{}` is absent or empty", spec.name),
893                Some(format!("set `{}` to a non-empty value", spec.name)),
894                vec![],
895            );
896            continue;
897        }
898        let Some(value) = present else { continue };
899
900        // An OPTIONAL field that is `null` or empty is simply unset — there is
901        // no value to shape/enum/link-check. (The required+empty case already
902        // returned above as `SCHEMA_MISSING_REQUIRED`.) Without this, an
903        // `paid_at: null` on an `invoice` whose schema marks `paid_at (date)`
904        // would wrongly fire `SCHEMA_SHAPE_MISMATCH` against the empty string.
905        let value_empty = value.is_null()
906            || scalar_string(value)
907                .map(|s| s.trim().is_empty())
908                .unwrap_or(false);
909        if !spec.required && value_empty {
910            continue;
911        }
912
913        // link to <prefix>/ — extract the link target(s) from the raw frontmatter
914        // text (unquoted `[[...]]` is a YAML nested-sequence, not a string).
915        if let Some(prefix) = &spec.link_prefix {
916            check_schema_link(store, rel, &spec.name, fm_yaml, prefix, line, issues);
917            continue; // a link field is never also shape/enum-checked
918        }
919
920        // A shape- or enum-constrained field expects a SCALAR. A YAML sequence
921        // or mapping satisfies neither, and would otherwise slip through both
922        // checks (`scalar_string` returns `None` for non-scalars, so the enum
923        // and shape bodies silently no-op). Flag it as a shape mismatch rather
924        // than let a structurally-wrong value validate clean. (Link fields,
925        // which legitimately take block-form sequences, already `continue`d.)
926        if (spec.shape.is_some() || spec.enum_values.is_some()) && scalar_string(value).is_none() {
927            push(
928                issues,
929                Severity::Error,
930                codes::SCHEMA_SHAPE_MISMATCH,
931                rel,
932                line,
933                Some(spec.name.clone()),
934                format!(
935                    "`{}` must be a scalar value, found a list or mapping",
936                    spec.name
937                ),
938                Some(format!("set `{}` to a single scalar value", spec.name)),
939                vec![],
940            );
941            continue;
942        }
943
944        // enum
945        if let Some(allowed) = &spec.enum_values {
946            if let Some(s) = scalar_string(value) {
947                if !allowed.iter().any(|a| a == &s) {
948                    push(
949                        issues,
950                        Severity::Error,
951                        codes::SCHEMA_ENUM_VIOLATION,
952                        rel,
953                        line,
954                        Some(spec.name.clone()),
955                        format!("`{}` value {s:?} not in enum {allowed:?}", spec.name),
956                        Some(format!("use one of: {}", allowed.join(", "))),
957                        vec![],
958                    );
959                }
960            }
961            continue;
962        }
963
964        // shape
965        if let Some(shape) = spec.shape {
966            check_schema_shape(rel, &spec.name, value, shape, line, issues);
967        }
968    }
969}
970
971/// `link to <prefix>/` enforcement: the value must be a wiki-link whose target
972/// starts with `<prefix>`. Reads the link target(s) from the raw frontmatter
973/// text so unquoted `field: [[...]]` (a YAML nested-sequence, not a string) is
974/// recognized exactly like the quoted form.
975fn check_schema_link(
976    store: &Store,
977    rel: &Path,
978    field: &str,
979    fm_yaml: &str,
980    prefix: &Path,
981    line: Option<u32>,
982    issues: &mut Vec<Issue>,
983) {
984    let prefix_str = prefix.to_string_lossy();
985    let prefix_str = prefix_str.trim_end_matches('/');
986    let suggestion = |target_leaf: &str| {
987        Some(format!(
988            "expected `link to {prefix_str}/`; replace with [[{prefix_str}/{target_leaf}]]"
989        ))
990    };
991
992    let links = frontmatter_links_for_key(fm_yaml, field, 2);
993    if links.is_empty() {
994        // No wiki-link in the field's value → it's a plain string.
995        let raw = frontmatter_raw_value_for_key(fm_yaml, field, 2).unwrap_or_default();
996        let raw = raw.trim().trim_matches('"').trim_matches('\'').trim();
997        let leaf = slugish(raw);
998        push(
999            issues,
1000            Severity::Error,
1001            codes::SCHEMA_LINK_PREFIX_MISMATCH,
1002            rel,
1003            line,
1004            Some(field.to_string()),
1005            format!(
1006                "`{field}` is a plain string {raw:?}, expected a wiki-link under `{prefix_str}/`"
1007            ),
1008            suggestion(&leaf),
1009            vec![],
1010        );
1011        return;
1012    }
1013
1014    for link in links {
1015        if link.target.ends_with(".md") {
1016            let bare = link.target.trim_end_matches(".md");
1017            push(
1018                issues,
1019                Severity::Warning,
1020                codes::WIKI_LINK_HAS_EXTENSION,
1021                rel,
1022                Some(link.line),
1023                Some(field.to_string()),
1024                format!("wiki-link `[[{}]]` carries a `.md` extension", link.target),
1025                Some(format!("drop the extension: [[{bare}]]")),
1026                vec![],
1027            );
1028        }
1029        let bare = link.target.trim_end_matches(".md");
1030        if !path_under_prefix(bare, prefix_str) {
1031            let leaf = bare.rsplit('/').next().unwrap_or(bare);
1032            push(
1033                issues,
1034                Severity::Error,
1035                codes::SCHEMA_LINK_PREFIX_MISMATCH,
1036                rel,
1037                line,
1038                Some(field.to_string()),
1039                format!("`{field}` target `{bare}` is not under `{prefix_str}/`"),
1040                suggestion(leaf),
1041                vec![],
1042            );
1043        } else {
1044            let Some(target_rel) = safe_md_target_rel(bare) else {
1045                push(
1046                    issues,
1047                    Severity::Error,
1048                    codes::WIKI_LINK_BROKEN,
1049                    rel,
1050                    line,
1051                    Some(field.to_string()),
1052                    format!("wiki-link target `{bare}` is not a safe store-relative path"),
1053                    Some(
1054                        "use a full store-relative path under sources/, records/, or wiki/".into(),
1055                    ),
1056                    vec![],
1057                );
1058                continue;
1059            };
1060            // Correct prefix — still surface a broken target so the agent sees
1061            // one consistent vocabulary.
1062            let target_abs = store.root.join(target_rel);
1063            if !target_abs.is_file() {
1064                push(
1065                    issues,
1066                    Severity::Error,
1067                    codes::WIKI_LINK_BROKEN,
1068                    rel,
1069                    line,
1070                    Some(field.to_string()),
1071                    format!("wiki-link target `{bare}` doesn't exist"),
1072                    Some(format!(
1073                        "create `{bare}.md`, or point the link at an existing file"
1074                    )),
1075                    vec![],
1076                );
1077            }
1078        }
1079    }
1080}
1081
1082/// Shape enforcement for a non-link, non-enum schema field.
1083fn check_schema_shape(
1084    rel: &Path,
1085    field: &str,
1086    value: &Value,
1087    shape: Shape,
1088    line: Option<u32>,
1089    issues: &mut Vec<Issue>,
1090) {
1091    let s = scalar_string(value).unwrap_or_default();
1092    let ok = match shape {
1093        Shape::String => true, // any scalar string
1094        Shape::Int => value.is_i64() || value.is_u64() || s.trim().parse::<i64>().is_ok(),
1095        Shape::Bool => value.is_bool() || matches!(s.trim(), "true" | "false"),
1096        Shape::Date => is_iso8601_date_or_datetime(&s),
1097        Shape::Email => is_email(&s),
1098        Shape::Currency => is_currency(&s),
1099        Shape::Url => is_url(&s),
1100    };
1101    if !ok {
1102        push(
1103            issues,
1104            Severity::Error,
1105            codes::SCHEMA_SHAPE_MISMATCH,
1106            rel,
1107            line,
1108            Some(field.to_string()),
1109            format!("`{field}` value {s:?} doesn't match shape {shape:?}"),
1110            Some(shape_suggestion(shape)),
1111            vec![],
1112        );
1113    }
1114}
1115
1116// ─────────────────────────────────────────────────────────────────────────────
1117//  Cross-file: entity-dedup collisions (validate_all only)
1118// ─────────────────────────────────────────────────────────────────────────────
1119
1120/// Hard `DUP_ID` + the soft, schema-declared `DUP_UNIQUE_KEY` collisions.
1121///
1122/// `DUP_ID` is universal (two files with the same explicit `id`).
1123/// `DUP_UNIQUE_KEY` is driven entirely by the store's `DB.md ## Schemas`: each
1124/// `- unique: <field>[, <field> …]` directive on a `### <type>` declares a
1125/// uniqueness constraint, and two records of that type whose declared values
1126/// collide warn. No type carries a built-in dedup key — the store opts in.
1127///
1128/// **Reporting precedence (rule #1 in `corpus-b-edges/EXPECTED/README.md`):** a
1129/// collision group of N files yields exactly ONE issue, not N. Its `file` is the
1130/// lexicographically smallest store-relative path in the group (a total order →
1131/// deterministic); `related` is the rest, sorted. A single-field key anchors to
1132/// that field's line on the reported file and carries it as `key`; a multi-field
1133/// key anchors to line 1 with a null key.
1134fn check_duplicates(store: &Store, parsed: &[(PathBuf, Parsed)], issues: &mut Vec<Issue>) {
1135    // Path → frontmatter YAML, for resolving the anchor field's line on the
1136    // reported (smallest-path) member.
1137    let fm_yaml_of: HashMap<&PathBuf, &str> = parsed
1138        .iter()
1139        .map(|(rel, p)| (rel, p.fm_yaml.as_str()))
1140        .collect();
1141
1142    // ── DUP_ID (hard error): two files with the same explicit `id`. ──────────
1143    let mut by_id: HashMap<String, Vec<PathBuf>> = HashMap::new();
1144    for (rel, p) in parsed {
1145        if let Some(map) = &p.fm {
1146            if let Some(id) = map.get("id").and_then(scalar_string) {
1147                if !id.trim().is_empty() {
1148                    by_id.entry(id).or_default().push(rel.clone());
1149                }
1150            }
1151        }
1152    }
1153    for (id, files) in &by_id {
1154        if files.len() > 1 {
1155            let (reported, related) = canonical_and_related(files);
1156            let line = fm_yaml_of.get(&reported).and_then(|y| fm_key_line(y, "id"));
1157            push(
1158                issues,
1159                Severity::Error,
1160                codes::DUP_ID,
1161                &reported,
1162                line,
1163                Some("id".into()),
1164                format!("id {id:?} is declared by more than one file"),
1165                Some("give each file a unique `id` (or drop it to derive from the path)".into()),
1166                related,
1167            );
1168        }
1169    }
1170
1171    // ── DUP_UNIQUE_KEY (warning): schema-declared `unique:` collisions. ───────
1172    // Every constraint comes from the store's `## Schemas`; a type with no
1173    // `unique:` directive is never dedup-checked. Iteration over the BTreeMap is
1174    // key-ordered, so emitted issues are deterministic across runs.
1175    for (type_name, schema) in &store.config.schemas {
1176        for key_fields in &schema.unique_keys {
1177            soft_dup(parsed, issues, type_name, key_fields, &fm_yaml_of);
1178        }
1179    }
1180}
1181
1182/// Emit ONE `DUP_UNIQUE_KEY` warning per group of ≥2 files of `type_` whose
1183/// declared `key_fields` render to the same token tuple. Files missing any key
1184/// field are skipped — an incomplete key is never a collision.
1185///
1186/// Per reporting rule #1 the issue is keyed on the lexicographically smallest
1187/// store-relative path; `related` is the rest. A single-field key anchors to
1188/// that field's line on the reported file and carries it as `key`; a multi-field
1189/// key anchors to line 1 with a null key. `fm_yaml_of` resolves the field line.
1190fn soft_dup(
1191    parsed: &[(PathBuf, Parsed)],
1192    issues: &mut Vec<Issue>,
1193    type_: &str,
1194    key_fields: &[String],
1195    fm_yaml_of: &HashMap<&PathBuf, &str>,
1196) {
1197    if key_fields.is_empty() {
1198        return;
1199    }
1200    let mut groups: HashMap<Vec<String>, Vec<PathBuf>> = HashMap::new();
1201    for (rel, p) in parsed {
1202        let is_type =
1203            p.fm.as_ref()
1204                .and_then(|m| m.get("type"))
1205                .and_then(scalar_string)
1206                .map(|t| t == type_)
1207                .unwrap_or(false);
1208        if !is_type {
1209            continue;
1210        }
1211        if let Some(key) = dedup_key(p, key_fields) {
1212            groups.entry(key).or_default().push(rel.clone());
1213        }
1214    }
1215    // HashMap iteration is nondeterministic; sort by reported member so the
1216    // emitted issue order is stable across runs.
1217    let mut collisions: Vec<(PathBuf, Vec<PathBuf>)> = groups
1218        .values()
1219        .filter(|files| files.len() > 1)
1220        .map(|files| canonical_and_related(files))
1221        .collect();
1222    collisions.sort_by(|a, b| a.0.cmp(&b.0));
1223
1224    let fields_disp = key_fields.join(", ");
1225    for (reported, related) in collisions {
1226        // Single-field keys anchor to the field's line + carry the key; multi-
1227        // field keys anchor to line 1 with a null key.
1228        let (line, key) = if key_fields.len() == 1 {
1229            (
1230                fm_yaml_of
1231                    .get(&reported)
1232                    .and_then(|y| fm_key_line(y, &key_fields[0])),
1233                Some(key_fields[0].clone()),
1234            )
1235        } else {
1236            (Some(1), None)
1237        };
1238        let n = related.len();
1239        push(
1240            issues,
1241            Severity::Warning,
1242            codes::DUP_UNIQUE_KEY,
1243            &reported,
1244            line,
1245            key,
1246            format!("`{type_}` unique key ({fields_disp}) collides with {n} other record(s)"),
1247            Some("merge with `dbmd rename`, or cross-link with `dbmd link`".into()),
1248            related,
1249        );
1250    }
1251}
1252
1253/// Render a type's `unique:` key for one file: each field's dedup token in
1254/// order, or `None` if any field is absent/empty (an incomplete key never
1255/// collides).
1256fn dedup_key(p: &Parsed, key_fields: &[String]) -> Option<Vec<String>> {
1257    let mut out = Vec::with_capacity(key_fields.len());
1258    for f in key_fields {
1259        out.push(dedup_token(p, f)?);
1260    }
1261    Some(out)
1262}
1263
1264/// One field's normalized dedup token, or `None` when absent/empty. Wiki-link
1265/// values (single or block-sequence list) reduce to their lower-cased target
1266/// path(s); a list collapses to a sorted, de-duplicated set so item order never
1267/// matters. Plain scalars (and YAML scalar lists) lower-case and trim.
1268fn dedup_token(p: &Parsed, field: &str) -> Option<String> {
1269    // Wiki-links first — read from the raw frontmatter text so the unquoted
1270    // `field: [[...]]` (a YAML nested-sequence, not a string) is handled.
1271    let links = frontmatter_links_for_key(&p.fm_yaml, field, 2);
1272    if !links.is_empty() {
1273        let set: BTreeSet<String> = links
1274            .into_iter()
1275            .map(|l| l.target.trim_end_matches(".md").to_lowercase())
1276            .filter(|t| !t.is_empty())
1277            .collect();
1278        return if set.is_empty() {
1279            None
1280        } else {
1281            Some(set.into_iter().collect::<Vec<_>>().join(","))
1282        };
1283    }
1284    match p.fm.as_ref()?.get(field) {
1285        Some(Value::Sequence(items)) => {
1286            let set: BTreeSet<String> = items
1287                .iter()
1288                .filter_map(scalar_string)
1289                .map(|s| s.trim().to_lowercase())
1290                .filter(|t| !t.is_empty())
1291                .collect();
1292            if set.is_empty() {
1293                None
1294            } else {
1295                Some(set.into_iter().collect::<Vec<_>>().join(","))
1296            }
1297        }
1298        Some(v) => {
1299            let s = scalar_string(v)?.trim().to_lowercase();
1300            if s.is_empty() {
1301                None
1302            } else {
1303                Some(s)
1304            }
1305        }
1306        None => None,
1307    }
1308}
1309
1310/// Split a non-empty collision group into `(reported, related)`: the
1311/// lexicographically smallest store-relative path is the reported member; the
1312/// rest, sorted ascending, are `related`. Deterministic because store-relative
1313/// path is a total order — the property reporting rule #1 relies on.
1314fn canonical_and_related(files: &[PathBuf]) -> (PathBuf, Vec<PathBuf>) {
1315    let mut sorted = files.to_vec();
1316    sorted.sort();
1317    let reported = sorted[0].clone();
1318    let related = sorted[1..].to_vec();
1319    (reported, related)
1320}
1321
1322// ─────────────────────────────────────────────────────────────────────────────
1323//  Cross-file: hierarchical index.md + index.jsonl sync (validate_all only)
1324// ─────────────────────────────────────────────────────────────────────────────
1325
1326/// All `INDEX_*` and `INDEX_JSONL_*` checks across the three canonical levels.
1327fn check_indexes(store: &Store, files: &[PathBuf], issues: &mut Vec<Issue>) {
1328    // Group content files by their immediate parent folder (the type-folder,
1329    // *across date shards* — a sharded file's "type folder" is the folder right
1330    // under the layer). We key on the type-folder so shards roll up correctly.
1331    let mut type_folders: BTreeMap<PathBuf, Vec<PathBuf>> = BTreeMap::new();
1332    let mut layers_present: BTreeSet<&'static str> = BTreeSet::new();
1333    for rel in files {
1334        // The layer is the first path component — recorded independently of the
1335        // type-folder so a layer containing only loose files still requires an
1336        // `index.md`.
1337        if let Some(layer) = rel.iter().next().and_then(|s| s.to_str()) {
1338            match layer {
1339                "sources" => layers_present.insert("sources"),
1340                "records" => layers_present.insert("records"),
1341                "wiki" => layers_present.insert("wiki"),
1342                _ => false,
1343            };
1344        }
1345        if let Some(tf) = type_folder_of(rel) {
1346            type_folders.entry(tf).or_default().push(rel.clone());
1347        }
1348    }
1349
1350    // ── Root index.md ─────────────────────────────────────────────────────────
1351    if !files.is_empty() {
1352        let root_index = store.root.join("index.md");
1353        if !root_index.is_file() {
1354            push(
1355                issues,
1356                Severity::Error,
1357                codes::INDEX_MISSING,
1358                Path::new("index.md"),
1359                None,
1360                None,
1361                "store has files but no root `index.md`".into(),
1362                Some("run `dbmd index rebuild`".into()),
1363                vec![],
1364            );
1365        } else {
1366            check_index_scope(store, Path::new("index.md"), "root", None, issues);
1367        }
1368    }
1369
1370    // ── Layer index.md ────────────────────────────────────────────────────────
1371    for layer in &layers_present {
1372        let layer_index_rel = PathBuf::from(layer).join("index.md");
1373        let abs = store.root.join(&layer_index_rel);
1374        if !abs.is_file() {
1375            push(
1376                issues,
1377                Severity::Error,
1378                codes::INDEX_MISSING,
1379                &layer_index_rel,
1380                None,
1381                None,
1382                format!("layer `{layer}/` has files but no `index.md`"),
1383                Some("run `dbmd index rebuild`".into()),
1384                vec![],
1385            );
1386        } else {
1387            check_index_scope(store, &layer_index_rel, "layer", Some(layer), issues);
1388        }
1389    }
1390
1391    // ── Type-folder index.md + index.jsonl ───────────────────────────────────
1392    for (tf, members) in &type_folders {
1393        let index_md_rel = tf.join("index.md");
1394        let index_md_abs = store.root.join(&index_md_rel);
1395        let index_md_present = index_md_abs.is_file();
1396        if !index_md_present {
1397            // The whole folder index is absent → a single `INDEX_MISSING` keyed
1398            // on the FOLDER (not the would-be `index.md` path). When the index is
1399            // entirely missing we do NOT additionally evaluate per-entry
1400            // completeness or the `index.jsonl` twin: one `INDEX_MISSING` covers
1401            // the folder (precedence rule #4 in `corpus-b-edges/EXPECTED`).
1402            push(
1403                issues,
1404                Severity::Error,
1405                codes::INDEX_MISSING,
1406                tf,
1407                None,
1408                None,
1409                format!("non-empty folder `{}` has no index.md", tf.display()),
1410                Some(format!(
1411                    "run `dbmd index rebuild --folder {}`",
1412                    tf.display()
1413                )),
1414                vec![],
1415            );
1416            continue;
1417        }
1418
1419        check_index_scope(store, &index_md_rel, "type-folder", tf.to_str(), issues);
1420        check_type_folder_index_md(store, tf, &index_md_rel, members, issues);
1421
1422        // index.jsonl twin — must exist and be complete (uncapped). Only checked
1423        // when the `index.md` is present (above): a folder whose entire index is
1424        // missing is one `INDEX_MISSING`, not also an `INDEX_JSONL_MISSING`.
1425        let jsonl_rel = tf.join("index.jsonl");
1426        let jsonl_abs = store.root.join(&jsonl_rel);
1427        if !jsonl_abs.is_file() {
1428            push(
1429                issues,
1430                Severity::Error,
1431                codes::INDEX_JSONL_MISSING,
1432                &jsonl_rel,
1433                None,
1434                None,
1435                format!("type-folder `{}/` has no `index.jsonl` twin", tf.display()),
1436                Some("run `dbmd index rebuild`".into()),
1437                vec![],
1438            );
1439        } else {
1440            check_type_folder_index_jsonl(store, tf, &jsonl_rel, members, issues);
1441        }
1442    }
1443
1444    // ── Orphan index.md: an index file in a folder with no content. ──────────
1445    for rel in walk_index_files(&store.root) {
1446        let parent = rel.parent().unwrap_or(Path::new("")).to_path_buf();
1447        let parent_str = parent.to_string_lossy().to_string();
1448        let is_canonical = parent_str.is_empty() // root
1449            || matches!(parent_str.as_str(), "sources" | "records" | "wiki")
1450            || type_folders.contains_key(&parent);
1451        if !is_canonical {
1452            push(
1453                issues,
1454                Severity::Warning,
1455                codes::INDEX_ORPHAN,
1456                &rel,
1457                None,
1458                None,
1459                format!(
1460                    "`{}` sits in an empty or non-canonical folder",
1461                    rel.display()
1462                ),
1463                Some("remove it, or run `dbmd index rebuild`".into()),
1464                vec![],
1465            );
1466        }
1467    }
1468}
1469
1470/// Check a type-folder `index.md`'s entries against the folder's actual files:
1471/// stale entries (target gone), missing entries (file not listed), and
1472/// summary mismatches.
1473fn check_type_folder_index_md(
1474    store: &Store,
1475    tf: &Path,
1476    index_rel: &Path,
1477    members: &[PathBuf],
1478    issues: &mut Vec<Issue>,
1479) {
1480    let abs = store.root.join(index_rel);
1481    let Ok(text) = std::fs::read_to_string(&abs) else {
1482        return;
1483    };
1484    let entries = parse_index_entries(&text);
1485
1486    let listed: BTreeSet<PathBuf> = entries
1487        .iter()
1488        .map(|e| PathBuf::from(e.target.trim_end_matches(".md")))
1489        .collect();
1490
1491    // Stale entries + summary mismatch.
1492    for entry in &entries {
1493        let bare = entry.target.trim_end_matches(".md");
1494        let Some(target_rel) = safe_md_target_rel(bare) else {
1495            push(
1496                issues,
1497                Severity::Error,
1498                codes::INDEX_STALE_ENTRY,
1499                index_rel,
1500                Some(entry.line),
1501                None,
1502                format!("index entry `[[{bare}]]` is not a safe store-relative path"),
1503                Some("run `dbmd index rebuild`".into()),
1504                vec![],
1505            );
1506            continue;
1507        };
1508        let target_abs = store.root.join(target_rel);
1509        if !target_abs.is_file() {
1510            push(
1511                issues,
1512                Severity::Error,
1513                codes::INDEX_STALE_ENTRY,
1514                index_rel,
1515                Some(entry.line),
1516                None,
1517                format!("index entry `[[{bare}]]` points at a missing file"),
1518                Some("run `dbmd index rebuild`".into()),
1519                // The stale target the entry names (the file that no longer
1520                // exists) — so the agent can locate the dangling reference.
1521                vec![PathBuf::from(format!("{bare}.md"))],
1522            );
1523            continue;
1524        }
1525        // Summary mismatch: the entry text must equal the file's `summary`. A
1526        // bare `- [[path]]` entry (no `— <text>`) when the file HAS a non-empty
1527        // summary is also a mismatch — the SPEC requires every type-folder index
1528        // entry to quote the file's `summary` (`- [[path]] — <summary>`), so a
1529        // missing quote can't validate clean just because there's nothing to
1530        // compare.
1531        if let Some(expected) = read_summary(&target_abs) {
1532            match &entry.summary_text {
1533                Some(text_part) if text_part.trim() != expected.trim() => {
1534                    push(
1535                        issues,
1536                        Severity::Error,
1537                        codes::INDEX_SUMMARY_MISMATCH,
1538                        index_rel,
1539                        Some(entry.line),
1540                        None,
1541                        format!("index entry for `{bare}` text doesn't match the file's `summary`"),
1542                        Some("run `dbmd index rebuild`".into()),
1543                        vec![PathBuf::from(format!("{bare}.md"))],
1544                    );
1545                }
1546                None if !expected.trim().is_empty() => {
1547                    push(
1548                        issues,
1549                        Severity::Error,
1550                        codes::INDEX_SUMMARY_MISMATCH,
1551                        index_rel,
1552                        Some(entry.line),
1553                        None,
1554                        format!("index entry for `{bare}` is missing its summary text (the file has a `summary`)"),
1555                        Some("run `dbmd index rebuild`".into()),
1556                        vec![PathBuf::from(format!("{bare}.md"))],
1557                    );
1558                }
1559                _ => {}
1560            }
1561        }
1562    }
1563
1564    // Missing entries: a member file not listed. Skip the index/log meta files.
1565    // The browse view caps at 500; only flag a missing entry when the folder is
1566    // under the cap (a capped folder legitimately omits older files).
1567    let content_members: Vec<&PathBuf> = members.iter().filter(|m| is_content_file(m)).collect();
1568    if content_members.len() <= 500 {
1569        for m in content_members {
1570            let bare = PathBuf::from(m.to_string_lossy().trim_end_matches(".md").to_string());
1571            if !listed.contains(&bare) {
1572                push(
1573                    issues,
1574                    Severity::Error,
1575                    codes::INDEX_MISSING_ENTRY,
1576                    index_rel,
1577                    None,
1578                    None,
1579                    format!(
1580                        "file `{}` is not listed in its folder's `index.md`",
1581                        m.display()
1582                    ),
1583                    Some("run `dbmd index rebuild`".into()),
1584                    vec![(*m).clone()],
1585                );
1586            }
1587        }
1588    }
1589    let _ = tf;
1590}
1591
1592/// Check a type-folder `index.jsonl` twin: it must list **every** file in the
1593/// folder (uncapped), every record must point at a real file, and each record's
1594/// fields must match the file's frontmatter.
1595fn check_type_folder_index_jsonl(
1596    store: &Store,
1597    tf: &Path,
1598    jsonl_rel: &Path,
1599    members: &[PathBuf],
1600    issues: &mut Vec<Issue>,
1601) {
1602    let abs = store.root.join(jsonl_rel);
1603    let Ok(text) = std::fs::read_to_string(&abs) else {
1604        return;
1605    };
1606
1607    // Parse records (last-write-wins by path), tolerating tombstones/blank lines.
1608    let mut records: BTreeMap<PathBuf, serde_json::Value> = BTreeMap::new();
1609    for (i, line) in text.lines().enumerate() {
1610        let line = line.trim();
1611        if line.is_empty() {
1612            continue;
1613        }
1614        let rec: serde_json::Value = match serde_json::from_str(line) {
1615            Ok(v) => v,
1616            Err(e) => {
1617                push(
1618                    issues,
1619                    Severity::Error,
1620                    codes::INDEX_JSONL_DESYNC,
1621                    jsonl_rel,
1622                    Some((i + 1) as u32),
1623                    None,
1624                    format!("`index.jsonl` line {} is not valid JSON: {e}", i + 1),
1625                    Some("run `dbmd index rebuild`".into()),
1626                    vec![],
1627                );
1628                continue;
1629            }
1630        };
1631        if let Some(path) = rec.get("path").and_then(|v| v.as_str()) {
1632            if !is_safe_store_relative_path(Path::new(path)) {
1633                push(
1634                    issues,
1635                    Severity::Error,
1636                    codes::INDEX_JSONL_DESYNC,
1637                    jsonl_rel,
1638                    Some((i + 1) as u32),
1639                    None,
1640                    format!("`index.jsonl` record path `{path}` is not a safe store-relative path"),
1641                    Some("run `dbmd index rebuild`".into()),
1642                    vec![],
1643                );
1644                continue;
1645            }
1646            records.insert(PathBuf::from(path), rec);
1647        }
1648    }
1649
1650    let member_set: BTreeSet<PathBuf> = members
1651        .iter()
1652        .filter(|m| is_content_file(m))
1653        .cloned()
1654        .collect();
1655
1656    // jsonl record → missing file = desync.
1657    for path in records.keys() {
1658        let target_abs = store.root.join(path);
1659        if !target_abs.is_file() {
1660            push(
1661                issues,
1662                Severity::Error,
1663                codes::INDEX_JSONL_DESYNC,
1664                jsonl_rel,
1665                None,
1666                None,
1667                format!(
1668                    "`index.jsonl` record points at missing file `{}`",
1669                    path.display()
1670                ),
1671                Some("run `dbmd index rebuild`".into()),
1672                vec![],
1673            );
1674        }
1675    }
1676
1677    // file not in jsonl = desync (the jsonl is the complete twin — no cap).
1678    for m in &member_set {
1679        if !records.contains_key(m) {
1680            push(
1681                issues,
1682                Severity::Error,
1683                codes::INDEX_JSONL_DESYNC,
1684                jsonl_rel,
1685                None,
1686                None,
1687                format!(
1688                    "file `{}` is missing from the complete `index.jsonl`",
1689                    m.display()
1690                ),
1691                Some("run `dbmd index rebuild`".into()),
1692                vec![m.clone()],
1693            );
1694        }
1695    }
1696
1697    // Record fields stale vs. frontmatter. SPEC § Validation defines
1698    // `INDEX_JSONL_STALE` as "an `index.jsonl` record's fields don't match the
1699    // file's frontmatter" — ANY field, not just `summary`/`type`. The query and
1700    // search paths read every field straight from these sidecars (`tags`,
1701    // `links`, `created`, `updated`, plus type-specific `email` / `domain` /
1702    // `company` / `amount` / `vendor` …), so a single field left unchecked lets
1703    // a stale value answer queries with data that exists in no `.md` file.
1704    //
1705    // Rather than re-list (and drift from) every projected key, rebuild the
1706    // record the canonical projection would write for this file
1707    // ([`IndexRecord::expected_from_file`], the same path `index rebuild` uses)
1708    // and diff the two as flat JSON maps. Every key the projection emits is
1709    // covered automatically; `path` is the join key and is skipped.
1710    for (path, rec) in &records {
1711        let target_abs = store.root.join(path);
1712        if !target_abs.is_file() {
1713            continue;
1714        }
1715        let Ok(expected) = crate::index::IndexRecord::expected_from_file(&target_abs, path.clone())
1716        else {
1717            continue; // unreadable / unparseable frontmatter is reported elsewhere
1718        };
1719        let Ok(expected_json) = serde_json::to_value(&expected) else {
1720            continue;
1721        };
1722        let (Some(have), Some(want)) = (rec.as_object(), expected_json.as_object()) else {
1723            continue;
1724        };
1725
1726        // Compare the union of keys present on either side; a key the file
1727        // projects but the sidecar omits is just as stale as a wrong value.
1728        let mut mismatched_keys: BTreeSet<&str> = BTreeSet::new();
1729        for key in have.keys().chain(want.keys()) {
1730            if key == "path" {
1731                continue;
1732            }
1733            if have.get(key) != want.get(key) {
1734                mismatched_keys.insert(key);
1735            }
1736        }
1737
1738        if !mismatched_keys.is_empty() {
1739            let keys: Vec<&str> = mismatched_keys.into_iter().collect();
1740            push(
1741                issues,
1742                Severity::Error,
1743                codes::INDEX_JSONL_STALE,
1744                jsonl_rel,
1745                None,
1746                Some(keys.join(",")),
1747                format!(
1748                    "`index.jsonl` record for `{}` is stale ({})",
1749                    path.display(),
1750                    keys.join(", ")
1751                ),
1752                Some("run `dbmd index rebuild`".into()),
1753                vec![path.clone()],
1754            );
1755        }
1756    }
1757    let _ = tf;
1758}
1759
1760/// Check an index's `scope:` frontmatter against its filesystem location.
1761fn check_index_scope(
1762    store: &Store,
1763    index_rel: &Path,
1764    expected_scope: &str,
1765    expected_folder: Option<&str>,
1766    issues: &mut Vec<Issue>,
1767) {
1768    let abs = store.root.join(index_rel);
1769    let Ok(text) = std::fs::read_to_string(&abs) else {
1770        return;
1771    };
1772    let Some((yaml, _, _)) = split_frontmatter(&text) else {
1773        return;
1774    };
1775    let Ok(Value::Mapping(map)) = serde_norway::from_str::<Value>(&yaml) else {
1776        return;
1777    };
1778    let fm = yaml_map_to_btree(&map);
1779
1780    if let Some(scope) = fm.get("scope").and_then(scalar_string) {
1781        // Accept "type-folder" and the SPEC example's looser "folder" alias.
1782        let scope_ok =
1783            scope == expected_scope || (expected_scope == "type-folder" && scope == "folder");
1784        if !scope_ok {
1785            push(
1786                issues,
1787                Severity::Warning,
1788                codes::INDEX_WRONG_SCOPE,
1789                index_rel,
1790                fm_key_line(&yaml, "scope"),
1791                Some("scope".into()),
1792                format!(
1793                    "index `scope: {scope}` doesn't match location (expected `{expected_scope}`)"
1794                ),
1795                Some(format!("set `scope: {expected_scope}`")),
1796                vec![],
1797            );
1798        }
1799    }
1800    // folder: must match for layer/type-folder indexes.
1801    if let Some(expected) = expected_folder {
1802        if let Some(folder) = fm.get("folder").and_then(scalar_string) {
1803            if folder.trim_end_matches('/') != expected.trim_end_matches('/') {
1804                push(
1805                    issues,
1806                    Severity::Warning,
1807                    codes::INDEX_WRONG_SCOPE,
1808                    index_rel,
1809                    fm_key_line(&yaml, "folder"),
1810                    Some("folder".into()),
1811                    format!("index `folder: {folder}` doesn't match location `{expected}`"),
1812                    Some(format!("set `folder: {expected}`")),
1813                    vec![],
1814                );
1815            }
1816        }
1817    }
1818}
1819
1820// ─────────────────────────────────────────────────────────────────────────────
1821//  Cross-file: log.md well-formedness + ordering (validate_all only)
1822// ─────────────────────────────────────────────────────────────────────────────
1823
1824/// `LOG_*` checks: bad timestamps, unknown kinds, out-of-order entries.
1825fn check_log(store: &Store, issues: &mut Vec<Issue>) {
1826    let log_rel = Path::new("log.md");
1827    let abs = store.root.join(log_rel);
1828    let Ok(text) = std::fs::read_to_string(&abs) else {
1829        return;
1830    };
1831
1832    let mut prev: Option<DateTime<FixedOffset>> = None;
1833    for (i, line) in text.lines().enumerate() {
1834        if !line.starts_with("## [") {
1835            continue;
1836        }
1837        let line_no = (i + 1) as u32;
1838        match parse_log_header(line) {
1839            None => push(
1840                issues,
1841                Severity::Error,
1842                codes::LOG_BAD_TIMESTAMP,
1843                log_rel,
1844                Some(line_no),
1845                None,
1846                format!("log entry header has an unparseable timestamp: {line:?}"),
1847                Some("use `## [YYYY-MM-DD HH:MM] <kind> | <object>`".into()),
1848                vec![],
1849            ),
1850            Some((ts, kind, _object)) => {
1851                if !RECOGNIZED_LOG_KINDS.contains(&kind.as_str()) {
1852                    push(
1853                        issues,
1854                        Severity::Warning,
1855                        codes::LOG_UNKNOWN_KIND,
1856                        log_rel,
1857                        Some(line_no),
1858                        None,
1859                        format!("log entry kind `{kind}` is not recognized"),
1860                        Some(format!("use one of: {}", RECOGNIZED_LOG_KINDS.join(", "))),
1861                        vec![],
1862                    );
1863                }
1864                if let Some(p) = prev {
1865                    if ts < p {
1866                        push(
1867                            issues,
1868                            Severity::Warning,
1869                            codes::LOG_OUT_OF_ORDER,
1870                            log_rel,
1871                            Some(line_no),
1872                            None,
1873                            "log entry is older than the entry above it (possible rewrite)".into(),
1874                            Some("append corrective entries; never reorder past ones".into()),
1875                            vec![],
1876                        );
1877                    }
1878                }
1879                prev = Some(ts);
1880            }
1881        }
1882    }
1883}
1884
1885// ─────────────────────────────────────────────────────────────────────────────
1886//  Self-contained primitives (collapse onto sibling modules once they land)
1887// ─────────────────────────────────────────────────────────────────────────────
1888
1889/// A minimal wiki-link found in a body: target, optional display, 1-based line.
1890struct Link {
1891    target: String,
1892    line: u32,
1893}
1894
1895/// True if the store marker (`DB.md`, uppercase) is present at the root. On a
1896/// case-insensitive filesystem `db.md` would also match `DB.md`; we require the
1897/// exact-cased directory entry to be present.
1898fn store_marker_present(store: &Store) -> bool {
1899    let want = store.root.join("DB.md");
1900    if !want.is_file() {
1901        return false;
1902    }
1903    // Reject a case-folded match (`db.md`) on case-insensitive filesystems.
1904    match std::fs::read_dir(&store.root) {
1905        Ok(entries) => entries
1906            .flatten()
1907            .any(|e| e.file_name().to_str() == Some("DB.md")),
1908        Err(_) => true, // can't enumerate; trust the is_file() above
1909    }
1910}
1911
1912/// Validate the store's identity file, `DB.md`: its frontmatter `type:` must be
1913/// `db-md`, it must carry both `scope` and `owner`, and its body may contain
1914/// only the three recognized `##` sections (`Agent instructions`, `Policies`,
1915/// `Schemas`).
1916///
1917/// `DB.md` is not a content file (no `summary`), so it is checked here rather
1918/// than through `check_content_file`. The marker presence is established by the
1919/// caller (`store_marker_present`); a malformed-frontmatter `DB.md` still counts
1920/// as a store (the marker is the filename), so we report its shape rather than
1921/// `NOT_A_STORE`. Issues anchor to `DB.md` as the store-relative path.
1922fn check_db_md(store: &Store, issues: &mut Vec<Issue>) {
1923    let rel = Path::new("DB.md");
1924    let abs = store.root.join("DB.md");
1925    let Ok(text) = std::fs::read_to_string(&abs) else {
1926        return; // marker present but unreadable: nothing more to say.
1927    };
1928
1929    let Some((fm_yaml, body, fm_end_line)) = split_frontmatter(&text) else {
1930        // No frontmatter block at all → it cannot declare `type: db-md` and has
1931        // neither required field. Report the type and both missing fields,
1932        // anchored to line 1 (the would-be opening fence).
1933        push(
1934            issues,
1935            Severity::Error,
1936            codes::DB_MD_BAD_TYPE,
1937            rel,
1938            Some(1),
1939            Some("type".into()),
1940            "DB.md has no frontmatter; it must declare `type: db-md`".into(),
1941            Some("add a `---` frontmatter block with `type: db-md`".into()),
1942            vec![],
1943        );
1944        for field in ["scope", "owner"] {
1945            push(
1946                issues,
1947                Severity::Error,
1948                codes::DB_MD_MISSING_FIELD,
1949                rel,
1950                Some(1),
1951                Some(field.into()),
1952                format!("DB.md frontmatter is missing required field `{field}`"),
1953                Some(format!("add `{field}:` to the DB.md frontmatter")),
1954                vec![],
1955            );
1956        }
1957        return;
1958    };
1959
1960    // Parse the frontmatter mapping. If it doesn't parse, we can still say the
1961    // identity contract is unmet (no provable `type: db-md`, no provable fields).
1962    let fm: Option<BTreeMap<String, Value>> = match serde_norway::from_str::<Value>(&fm_yaml) {
1963        Ok(Value::Mapping(map)) => Some(yaml_map_to_btree(&map)),
1964        Ok(Value::Null) => Some(BTreeMap::new()),
1965        _ => None,
1966    };
1967
1968    match &fm {
1969        Some(map) => {
1970            // ── type: db-md ──────────────────────────────────────────────────
1971            let type_ = map.get("type").and_then(scalar_string);
1972            if type_.as_deref() != Some("db-md") {
1973                let (line, msg) = match &type_ {
1974                    Some(t) => (
1975                        fm_key_line(&fm_yaml, "type"),
1976                        format!("DB.md has `type: {t}`; a store's DB.md must be `type: db-md`"),
1977                    ),
1978                    None => (
1979                        Some(1),
1980                        "DB.md frontmatter has no `type:`; it must be `type: db-md`".to_string(),
1981                    ),
1982                };
1983                push(
1984                    issues,
1985                    Severity::Error,
1986                    codes::DB_MD_BAD_TYPE,
1987                    rel,
1988                    line,
1989                    Some("type".into()),
1990                    msg,
1991                    Some("set `type: db-md` in the DB.md frontmatter".into()),
1992                    vec![],
1993                );
1994            }
1995
1996            // ── required fields: scope + owner ───────────────────────────────
1997            for field in ["scope", "owner"] {
1998                let present = map
1999                    .get(field)
2000                    .and_then(scalar_string)
2001                    .map(|s| !s.trim().is_empty())
2002                    .unwrap_or(false);
2003                if !present {
2004                    push(
2005                        issues,
2006                        Severity::Error,
2007                        codes::DB_MD_MISSING_FIELD,
2008                        rel,
2009                        // A present-but-empty field anchors to its line; a fully
2010                        // absent one to the block top.
2011                        fm_key_line_or_top(&fm_yaml, field),
2012                        Some(field.into()),
2013                        format!("DB.md frontmatter is missing required field `{field}`"),
2014                        Some(format!("add `{field}:` to the DB.md frontmatter")),
2015                        vec![],
2016                    );
2017                }
2018            }
2019        }
2020        None => {
2021            // Unparseable frontmatter: the identity contract is unprovable. Emit
2022            // the type error and both field errors, anchored to the block top.
2023            push(
2024                issues,
2025                Severity::Error,
2026                codes::DB_MD_BAD_TYPE,
2027                rel,
2028                Some(1),
2029                Some("type".into()),
2030                "DB.md frontmatter isn't valid YAML; it must declare `type: db-md`".into(),
2031                Some("fix the DB.md frontmatter and set `type: db-md`".into()),
2032                vec![],
2033            );
2034            for field in ["scope", "owner"] {
2035                push(
2036                    issues,
2037                    Severity::Error,
2038                    codes::DB_MD_MISSING_FIELD,
2039                    rel,
2040                    Some(1),
2041                    Some(field.into()),
2042                    format!("DB.md frontmatter is missing required field `{field}`"),
2043                    Some(format!("add `{field}:` to the DB.md frontmatter")),
2044                    vec![],
2045                );
2046            }
2047        }
2048    }
2049
2050    // ── recognized `##` section headers only ─────────────────────────────────
2051    // The body's H2 headings must be one of the three the toolkit reads; any
2052    // other is a likely typo / misplacement (warning — the parser ignores it,
2053    // so the config is not corrupted, but the operator wrote a section that will
2054    // never be read). H3 sub-headings (Frozen pages, Ignored types, `### <type>`
2055    // schema blocks) live under their H2 and are not flagged here.
2056    for section in crate::parser::extract_sections(&body) {
2057        if section.level != 2 {
2058            continue;
2059        }
2060        let name = section.heading.trim().to_ascii_lowercase();
2061        if matches!(name.as_str(), "agent instructions" | "policies" | "schemas") {
2062            continue;
2063        }
2064        // `Section::line` is 1-based within the body; the body begins at file
2065        // line `fm_end_line + 1`.
2066        let file_line = fm_end_line + section.line;
2067        push(
2068            issues,
2069            Severity::Warning,
2070            codes::DB_MD_UNKNOWN_SECTION,
2071            rel,
2072            Some(file_line),
2073            None,
2074            format!(
2075                "DB.md has an unrecognized `## {}` section",
2076                section.heading.trim()
2077            ),
2078            Some(
2079                "DB.md sections are `## Agent instructions`, `## Policies`, `## Schemas` — \
2080                 remove or rename this heading"
2081                    .into(),
2082            ),
2083            vec![],
2084        );
2085    }
2086}
2087
2088/// The `NOT_A_STORE` issue for a root with no `DB.md`.
2089fn not_a_store_issue(store: &Store) -> Issue {
2090    Issue {
2091        severity: Severity::Error,
2092        code: codes::NOT_A_STORE,
2093        file: store.root.clone(),
2094        line: None,
2095        key: None,
2096        message: format!("{} has no DB.md; not a db.md store", store.root.display()),
2097        suggestion: Some("create a `DB.md` at the store root".into()),
2098        related: vec![],
2099    }
2100}
2101
2102/// True if a store-relative path is a content file: under `sources/`,
2103/// `records/`, or `wiki/` and not an `index.md`/`index.jsonl`/`log.md`.
2104fn is_content_file(rel: &Path) -> bool {
2105    let Some(first) = rel.iter().next().and_then(|s| s.to_str()) else {
2106        return false;
2107    };
2108    if !matches!(first, "sources" | "records" | "wiki") {
2109        return false;
2110    }
2111    let name = rel.file_name().and_then(|s| s.to_str()).unwrap_or("");
2112    // Only the derived catalog twins are meta INSIDE a layer. `DB.md` / `log.md`
2113    // are reserved meta only at the store ROOT, which the `first` layer check
2114    // above already excludes — so a content file named `log.md` / `DB.md` inside
2115    // a layer (e.g. `records/docs/log.md`) is real content, consistent with
2116    // `Store::walk`.
2117    if matches!(name, "index.md" | "index.jsonl") {
2118        return false;
2119    }
2120    name.ends_with(".md")
2121}
2122
2123/// Split a file into `(frontmatter_yaml, body, closing_fence_line)`. The block
2124/// must start at the very first line with `---` and end at the next `---`.
2125/// Returns `None` if there's no leading frontmatter block.
2126fn split_frontmatter(text: &str) -> Option<(String, String, u32)> {
2127    // Tolerate a single leading UTF-8 BOM, matching parser/store/index (which
2128    // already strip it). Without this, a BOM-prefixed file is read as having no
2129    // frontmatter here while the catalog still indexes it — so validate would
2130    // silently skip frontmatter checks on a file the rest of the toolkit sees.
2131    let text = text.strip_prefix('\u{feff}').unwrap_or(text);
2132    let mut lines = text.lines();
2133    let first = lines.next()?;
2134    if first.trim_end() != "---" {
2135        return None;
2136    }
2137    let mut yaml = String::new();
2138    let mut close_line: Option<u32> = None;
2139    // line 1 is the opening fence; YAML starts at line 2.
2140    let mut current = 1u32;
2141    for line in lines {
2142        current += 1;
2143        if line.trim_end() == "---" {
2144            close_line = Some(current);
2145            break;
2146        }
2147        yaml.push_str(line);
2148        yaml.push('\n');
2149    }
2150    let close_line = close_line?;
2151    // Body = everything after the closing fence.
2152    let body: String = text
2153        .lines()
2154        .skip(close_line as usize)
2155        .collect::<Vec<_>>()
2156        .join("\n");
2157    Some((yaml, body, close_line))
2158}
2159
2160/// Read just the `summary` field of a file, or `None` if absent/unparseable.
2161fn read_summary(abs: &Path) -> Option<String> {
2162    let text = std::fs::read_to_string(abs).ok()?;
2163    let (yaml, _, _) = split_frontmatter(&text)?;
2164    let value: Value = serde_norway::from_str(&yaml).ok()?;
2165    if let Value::Mapping(m) = value {
2166        m.get(Value::String("summary".into()))
2167            .and_then(scalar_string)
2168    } else {
2169        None
2170    }
2171}
2172
2173/// Convert a `serde_norway` mapping into a string-keyed [`BTreeMap`], dropping
2174/// non-string keys (frontmatter keys are always strings).
2175fn yaml_map_to_btree(map: &serde_norway::Mapping) -> BTreeMap<String, Value> {
2176    let mut out = BTreeMap::new();
2177    for (k, v) in map {
2178        if let Value::String(s) = k {
2179            out.insert(s.clone(), v.clone());
2180        }
2181    }
2182    out
2183}
2184
2185/// A scalar YAML value as a string (`String`/`Number`/`Bool`); `None` for
2186/// sequences/mappings/null.
2187fn scalar_string(v: &Value) -> Option<String> {
2188    match v {
2189        Value::String(s) => Some(s.clone()),
2190        Value::Number(n) => Some(n.to_string()),
2191        Value::Bool(b) => Some(b.to_string()),
2192        _ => None,
2193    }
2194}
2195
2196/// True if `tags` is a flat YAML sequence of scalars. A mapping, a scalar, or a
2197/// sequence containing a nested sequence/mapping → false (`TAGS_MALFORMED`).
2198fn is_flat_scalar_list(v: &Value) -> bool {
2199    match v {
2200        Value::Sequence(items) => items.iter().all(|it| scalar_string(it).is_some()),
2201        _ => false,
2202    }
2203}
2204
2205/// Extract every frontmatter wiki-link, returning `(key, Link)` pairs with the
2206/// link's 1-based file line. **Text-based, by necessity:** an unquoted
2207/// `company: [[records/companies/x]]` parses in YAML as a nested *sequence*, not
2208/// a string (because `[[x]]` is YAML flow-list-in-a-list); a quoted
2209/// `"[[...]]"` parses as a string. Scanning the raw frontmatter text catches
2210/// both forms uniformly, the way the link textually appears — the doctrine view.
2211///
2212/// `fm_start_line` is the file line of the first YAML line (file line 2, since
2213/// line 1 is the opening `---`), so the returned `Link::line` is absolute.
2214fn frontmatter_link_fields_text(fm_yaml: &str, fm_start_line: u32) -> Vec<(String, Link)> {
2215    let mut out = Vec::new();
2216    for (key, _value_text, links) in frontmatter_key_blocks(fm_yaml, fm_start_line) {
2217        for link in links {
2218            out.push((key.clone(), link));
2219        }
2220    }
2221    out
2222}
2223
2224/// The wiki-link targets declared under a single top-level frontmatter key
2225/// (text-based; handles quoted + unquoted forms). Empty if the key is absent or
2226/// carries no `[[...]]`.
2227fn frontmatter_links_for_key(fm_yaml: &str, key: &str, fm_start_line: u32) -> Vec<Link> {
2228    for (k, _value_text, links) in frontmatter_key_blocks(fm_yaml, fm_start_line) {
2229        if k == key {
2230            return links;
2231        }
2232    }
2233    Vec::new()
2234}
2235
2236/// The raw value text under a single top-level frontmatter key (the remainder of
2237/// the key line plus any indented continuation/sequence lines), trimmed. Used to
2238/// decide whether a `link to` field holds a plain string vs. a wiki-link.
2239fn frontmatter_raw_value_for_key(fm_yaml: &str, key: &str, fm_start_line: u32) -> Option<String> {
2240    for (k, value_text, _links) in frontmatter_key_blocks(fm_yaml, fm_start_line) {
2241        if k == key {
2242            return Some(value_text);
2243        }
2244    }
2245    None
2246}
2247
2248/// Split a frontmatter YAML block into `(key, raw_value_text, wiki_links)` for
2249/// each top-level key. A top-level key is a line with no leading indentation in
2250/// `name:` form; its value spans the rest of that line plus any deeper-indented
2251/// continuation lines (block scalars, block sequences) until the next top-level
2252/// key. Wiki-links are every `[[...]]` found anywhere in that span, with their
2253/// absolute file line.
2254fn frontmatter_key_blocks(fm_yaml: &str, fm_start_line: u32) -> Vec<(String, String, Vec<Link>)> {
2255    let mut blocks: Vec<(String, String, Vec<Link>)> = Vec::new();
2256    let mut current: Option<(String, String, Vec<Link>)> = None;
2257
2258    for (idx, raw_line) in fm_yaml.lines().enumerate() {
2259        let file_line = fm_start_line + idx as u32;
2260        let indented = raw_line.starts_with(' ') || raw_line.starts_with('\t');
2261        let trimmed = raw_line.trim();
2262
2263        // A new top-level key: no indentation, `name:` prefix, not a list dash or
2264        // comment. (Indented or dash lines belong to the current key's value.)
2265        let new_key = if !indented && !trimmed.starts_with('#') && !trimmed.starts_with('-') {
2266            top_level_key(raw_line)
2267        } else {
2268            None
2269        };
2270
2271        if let Some((key, after)) = new_key {
2272            if let Some(done) = current.take() {
2273                blocks.push(done);
2274            }
2275            let mut links = Vec::new();
2276            collect_line_links(after, file_line, &mut links);
2277            current = Some((key, after.trim().to_string(), links));
2278        } else if let Some((_k, value_text, links)) = current.as_mut() {
2279            // Continuation of the current key's value (indented or dash line).
2280            if !value_text.is_empty() {
2281                value_text.push('\n');
2282            }
2283            value_text.push_str(trimmed);
2284            collect_line_links(raw_line, file_line, links);
2285        }
2286    }
2287    if let Some(done) = current.take() {
2288        blocks.push(done);
2289    }
2290    blocks
2291}
2292
2293/// Parse a top-level frontmatter key line into `(key, value_after_colon)`.
2294/// `None` if the line isn't a `name:` mapping entry.
2295fn top_level_key(line: &str) -> Option<(String, &str)> {
2296    let (key, rest) = line.split_once(':')?;
2297    let key = key.trim();
2298    if key.is_empty()
2299        || !key
2300            .chars()
2301            .all(|c| c.is_alphanumeric() || c == '_' || c == '-')
2302    {
2303        return None;
2304    }
2305    Some((key.to_string(), rest))
2306}
2307
2308/// Append every `[[target]]` / `[[target|display]]` found in `s` to `links`,
2309/// each tagged with `file_line`.
2310fn collect_line_links(s: &str, file_line: u32, links: &mut Vec<Link>) {
2311    let bytes = s.as_bytes();
2312    let mut i = 0;
2313    while i + 1 < bytes.len() {
2314        if bytes[i] == b'[' && bytes[i + 1] == b'[' {
2315            if let Some(close) = s[i + 2..].find("]]") {
2316                let inner = &s[i + 2..i + 2 + close];
2317                // Guard against `[[[` (nested) double-counting: the inner must
2318                // not itself open another `[[`.
2319                let target = inner
2320                    .trim_start_matches('[')
2321                    .split('|')
2322                    .next()
2323                    .unwrap_or(inner)
2324                    .trim()
2325                    .to_string();
2326                if !target.is_empty() {
2327                    links.push(Link {
2328                        target,
2329                        line: file_line,
2330                    });
2331                }
2332                i = i + 2 + close + 2;
2333                continue;
2334            }
2335        }
2336        i += 1;
2337    }
2338}
2339
2340/// Extract every `[[...]]` wiki-link from a body, with 1-based line numbers.
2341/// Skips fenced code blocks (```), so example links in docs don't trip the
2342/// validator.
2343fn extract_wiki_links(body: &str) -> Vec<Link> {
2344    let mut out = Vec::new();
2345    let mut in_fence = false;
2346    for (idx, line) in body.lines().enumerate() {
2347        let trimmed = line.trim_start();
2348        if trimmed.starts_with("```") || trimmed.starts_with("~~~") {
2349            in_fence = !in_fence;
2350            continue;
2351        }
2352        if in_fence {
2353            continue;
2354        }
2355        let line_no = (idx + 1) as u32;
2356        let bytes = line.as_bytes();
2357        let mut i = 0;
2358        while i + 1 < bytes.len() {
2359            if bytes[i] == b'[' && bytes[i + 1] == b'[' {
2360                if let Some(close) = line[i + 2..].find("]]") {
2361                    let inner = &line[i + 2..i + 2 + close];
2362                    let target = inner.split('|').next().unwrap_or(inner).trim().to_string();
2363                    // Skip a triple-bracket `[[[…` opening: the inner content
2364                    // starts with `[`, so this is the rejected flow-form list
2365                    // mis-encoding (`[[[a]], [[b]]]`), not a real wiki-link. A
2366                    // legitimate target never starts with `[`. The frontmatter
2367                    // `WIKI_LINK_FLOW_FORM_LIST` check already owns that error;
2368                    // extracting a bogus body link here would double-report it as
2369                    // a spurious `WIKI_LINK_SHORT_FORM`.
2370                    if !target.is_empty() && !target.starts_with('[') {
2371                        out.push(Link {
2372                            target,
2373                            line: line_no,
2374                        });
2375                    }
2376                    i = i + 2 + close + 2;
2377                    continue;
2378                }
2379            }
2380            i += 1;
2381        }
2382    }
2383    out
2384}
2385
2386/// Detect the frontmatter wiki-link-list mis-encoding: a YAML flow-sequence
2387/// whose items are themselves sequences (`attendees: [[[a]], [[b]]]`). Returns
2388/// the offending keys. The canonical block-sequence form is not flagged.
2389fn detect_flow_form_link_lists(fm_yaml: &str) -> Vec<String> {
2390    let mut out = Vec::new();
2391    for line in fm_yaml.lines() {
2392        let Some((key, rest)) = line.split_once(':') else {
2393            continue;
2394        };
2395        let key = key.trim();
2396        if key.is_empty() || key.starts_with('#') || key.starts_with('-') {
2397            continue;
2398        }
2399        let rest = rest.trim();
2400        // Flow sequence whose first element is itself a `[` (i.e. `[[[`) — a
2401        // nested flow list, which is the wiki-link-list mis-encoding.
2402        if rest.starts_with("[[[") {
2403            out.push(key.to_string());
2404        }
2405    }
2406    out
2407}
2408
2409/// True if a bare target (no `.md`) is a full store-relative path: it contains a
2410/// `/` and its first segment is a known layer.
2411fn is_full_store_path(bare: &str) -> bool {
2412    let mut parts = bare.splitn(2, '/');
2413    let first = parts.next().unwrap_or("");
2414    let has_rest = parts.next().map(|r| !r.is_empty()).unwrap_or(false);
2415    matches!(first, "sources" | "records" | "wiki") && has_rest
2416}
2417
2418/// True if a path contains only normal relative components. Validator inputs
2419/// come from user-authored markdown/JSON sidecars; never let absolute paths,
2420/// platform prefixes, or `..` turn a validation probe into a filesystem escape.
2421fn is_safe_store_relative_path(path: &Path) -> bool {
2422    let mut saw_component = false;
2423    for component in path.components() {
2424        match component {
2425            Component::Normal(_) => saw_component = true,
2426            Component::CurDir => {}
2427            Component::ParentDir | Component::RootDir | Component::Prefix(_) => return false,
2428        }
2429    }
2430    saw_component
2431}
2432
2433fn safe_md_target_rel(bare: &str) -> Option<PathBuf> {
2434    let path = Path::new(bare);
2435    if !is_safe_store_relative_path(path) {
2436        return None;
2437    }
2438    Some(PathBuf::from(format!("{bare}.md")))
2439}
2440
2441/// True if a bare target path is under `prefix` (both `.md`-stripped).
2442fn path_under_prefix(bare: &str, prefix: &str) -> bool {
2443    let prefix = prefix.trim_end_matches('/');
2444    bare == prefix || bare.starts_with(&format!("{prefix}/"))
2445}
2446
2447/// The type-folder for a store-relative content path: `<layer>/<type-folder>`
2448/// (the folder directly under the layer; date-shards roll up to it). `None` for
2449/// files directly in a layer folder or outside the three layers.
2450fn type_folder_of(rel: &Path) -> Option<PathBuf> {
2451    let comps: Vec<&str> = rel.iter().filter_map(|s| s.to_str()).collect();
2452    if comps.len() < 3 {
2453        return None; // need layer/type-folder/file at minimum
2454    }
2455    if !matches!(comps[0], "sources" | "records" | "wiki") {
2456        return None;
2457    }
2458    Some(PathBuf::from(comps[0]).join(comps[1]))
2459}
2460
2461/// **SWEEP.** Walk every `.md` content file under `sources/`/`records/`/`wiki/`,
2462/// returning store-relative paths to be parsed in full. Skips hidden dirs,
2463/// `log/`, and the index twin (`index.jsonl`). Used only by `validate_all`; the
2464/// working-set incoming-linker scan rides the embedded-ripgrep
2465/// `Store::find_links_to_any` (a single presence-only pass), so the loop default
2466/// never walks-and-*parses* the whole content tree.
2467fn walk_content_files(root: &Path) -> Vec<PathBuf> {
2468    let mut out = Vec::new();
2469    for layer in ["sources", "records", "wiki"] {
2470        let base = root.join(layer);
2471        if !base.is_dir() {
2472            continue;
2473        }
2474        for entry in walkdir::WalkDir::new(&base)
2475            .into_iter()
2476            .filter_entry(|e| {
2477                let name = e.file_name().to_str().unwrap_or("");
2478                !name.starts_with('.') && name != "log"
2479            })
2480            .flatten()
2481        {
2482            if !entry.file_type().is_file() {
2483                continue;
2484            }
2485            let name = entry.file_name().to_str().unwrap_or("");
2486            if name.ends_with(".md") && name != "index.md" {
2487                if let Ok(rel) = entry.path().strip_prefix(root) {
2488                    out.push(rel.to_path_buf());
2489                }
2490            }
2491        }
2492    }
2493    out.sort();
2494    out
2495}
2496
2497/// Every `index.md` under the store (root + layers + type-folders), as
2498/// store-relative paths. Used to detect orphan indexes.
2499fn walk_index_files(root: &Path) -> Vec<PathBuf> {
2500    let mut out = Vec::new();
2501    if root.join("index.md").is_file() {
2502        out.push(PathBuf::from("index.md"));
2503    }
2504    for layer in ["sources", "records", "wiki"] {
2505        let base = root.join(layer);
2506        if !base.is_dir() {
2507            continue;
2508        }
2509        for entry in walkdir::WalkDir::new(&base)
2510            .into_iter()
2511            .filter_entry(|e| {
2512                let name = e.file_name().to_str().unwrap_or("");
2513                !name.starts_with('.') && name != "log"
2514            })
2515            .flatten()
2516        {
2517            if entry.file_type().is_file() && entry.file_name().to_str() == Some("index.md") {
2518                if let Ok(rel) = entry.path().strip_prefix(root) {
2519                    out.push(rel.to_path_buf());
2520                }
2521            }
2522        }
2523    }
2524    out.sort();
2525    out
2526}
2527
2528/// A parsed `index.md` entry line: the wiki-link target, the optional summary
2529/// text after the `—`, and the 1-based line number.
2530struct IndexEntry {
2531    target: String,
2532    summary_text: Option<String>,
2533    line: u32,
2534}
2535
2536/// Parse the `- [[<path>]] — <summary>` entry lines of an `index.md`. Stops at a
2537/// `## More` footer (those lines aren't file entries). Root/layer entries with a
2538/// `|display` segment and a `(N)` count are parsed too — the target is the bare
2539/// path, the summary text is whatever follows the em dash.
2540fn parse_index_entries(text: &str) -> Vec<IndexEntry> {
2541    let mut out = Vec::new();
2542    let mut in_more = false;
2543    for (idx, line) in text.lines().enumerate() {
2544        let trimmed = line.trim_start();
2545        if trimmed.starts_with("## More") {
2546            in_more = true;
2547            continue;
2548        }
2549        if in_more {
2550            continue;
2551        }
2552        if !trimmed.starts_with("- ") {
2553            continue;
2554        }
2555        // Find the first `[[...]]`.
2556        let Some(open) = trimmed.find("[[") else {
2557            continue;
2558        };
2559        let Some(close_rel) = trimmed[open + 2..].find("]]") else {
2560            continue;
2561        };
2562        let inner = &trimmed[open + 2..open + 2 + close_rel];
2563        let target = inner.split('|').next().unwrap_or(inner).trim().to_string();
2564
2565        // Summary text: whatever follows the first em dash (`—`) or ` - `.
2566        let after = &trimmed[open + 2 + close_rel + 2..];
2567        let summary_text = extract_index_entry_summary(after);
2568
2569        out.push(IndexEntry {
2570            target,
2571            summary_text,
2572            line: (idx + 1) as u32,
2573        });
2574    }
2575    out
2576}
2577
2578/// Pull the summary portion out of the text trailing an index entry's
2579/// wiki-link: drop a leading `(N files)` count, then the `—`/`-` separator, then
2580/// strip a trailing `  ·  #tag` suffix **only when it is a genuine tag block**
2581/// (so a literal `·` inside the summary text is preserved, not mistaken for the
2582/// renderer's tag separator).
2583fn extract_index_entry_summary(after: &str) -> Option<String> {
2584    let mut s = after.trim();
2585    // Drop a leading "(N ...)" count segment, if present.
2586    if s.starts_with('(') {
2587        if let Some(close) = s.find(')') {
2588            s = s[close + 1..].trim_start();
2589        }
2590    }
2591    // Require an em dash or hyphen separator before the summary.
2592    let s = if let Some(rest) = s.strip_prefix('—') {
2593        rest.trim()
2594    } else if let Some(rest) = s.strip_prefix('-') {
2595        rest.trim()
2596    } else {
2597        return None;
2598    };
2599    if s.is_empty() {
2600        return None;
2601    }
2602    // Strip a trailing `  ·  #tag #tag` tag suffix — but ONLY when the segment
2603    // after the `·` separator is a genuine tag block (whitespace-separated
2604    // `#`-prefixed tokens), the exact shape the renderer emits
2605    // (`crate::index::format_md_entry`: `  ·  #tag #tag`, dot omitted when there
2606    // are no tags). A bare `·` inside the summary text itself (e.g. a free-text
2607    // `Acme · Q2 renewal`) is NOT a tag separator and must be preserved, or the
2608    // index-summary comparison spuriously reports `INDEX_SUMMARY_MISMATCH` on a
2609    // clean store. Match from the right (`rsplit_once`) so only the real trailing
2610    // tag block is considered, and accept either the renderer's double-spaced
2611    // delimiter or a single-spaced one as long as the suffix is all tags.
2612    let s = match s.rsplit_once(" · ") {
2613        Some((summary, tags)) if is_tag_suffix(tags) => summary.trim(),
2614        _ => s,
2615    };
2616    Some(s.to_string())
2617}
2618
2619/// True if `s` is a non-empty tag block: one or more whitespace-separated tokens
2620/// each starting with `#`, the exact shape the index renderer appends after the
2621/// `·` separator (`crate::index::format_md_entry`). Used to distinguish the
2622/// renderer's `  ·  #tag` suffix from a literal `·` inside the summary text.
2623fn is_tag_suffix(s: &str) -> bool {
2624    let mut any = false;
2625    for tok in s.split_whitespace() {
2626        if !tok.starts_with('#') || tok.len() < 2 {
2627            return false;
2628        }
2629        any = true;
2630    }
2631    any
2632}
2633
2634/// Parse a `log.md` entry header `## [YYYY-MM-DD HH:MM] <kind> | <object>`.
2635/// Returns `(timestamp, kind, object)`; `None` if the timestamp is unparseable
2636/// or the header isn't well-formed.
2637fn parse_log_header(line: &str) -> Option<(DateTime<FixedOffset>, String, Option<String>)> {
2638    let rest = line.strip_prefix("## [")?;
2639    let close = rest.find(']')?;
2640    let ts_str = &rest[..close];
2641    let tail = rest[close + 1..].trim();
2642
2643    // Parse `YYYY-MM-DD HH:MM` (the SPEC header form) as a naive local time and
2644    // attach a zero offset — the log header carries minute precision, no zone.
2645    let naive = NaiveDateTime::parse_from_str(ts_str.trim(), "%Y-%m-%d %H:%M").ok()?;
2646    let offset = FixedOffset::east_opt(0)?;
2647    let ts = naive.and_local_timezone(offset).single()?;
2648
2649    // kind | object
2650    let (kind, object) = match tail.split_once('|') {
2651        Some((k, o)) => {
2652            let o = o.trim();
2653            (
2654                k.trim().to_string(),
2655                if o.is_empty() {
2656                    None
2657                } else {
2658                    Some(o.to_string())
2659                },
2660            )
2661        }
2662        None => (tail.to_string(), None),
2663    };
2664    if kind.is_empty() {
2665        return None;
2666    }
2667    Some((ts, kind, object))
2668}
2669
2670/// Every log file that holds entries for the working-set scan: the active
2671/// `log.md` plus every `log/<YYYY-MM>.md` archive. [`Log::append`] rotates
2672/// strictly-prior-month entries into the archives, so the active file alone is
2673/// NOT the full timeline — both the last `validate` cutoff and a changed-but-
2674/// unvalidated object can live in an archive after a month rollover. Reading the
2675/// archives here keeps the working-set readers in sync with the rest of the log
2676/// layer (`Log::since`/`Log::tail`), which deliberately cross archives, and
2677/// prevents `dbmd validate` from silently skipping archived changed files. Reads
2678/// only log headers, never the content store, so the loop budget is preserved.
2679fn log_files_for_working_set(store: &Store) -> Vec<PathBuf> {
2680    let mut files = vec![store.root.join("log.md")];
2681    let archive_dir = store.root.join("log");
2682    if let Ok(entries) = std::fs::read_dir(&archive_dir) {
2683        let mut archives: Vec<PathBuf> = entries
2684            .flatten()
2685            .map(|e| e.path())
2686            .filter(|p| {
2687                p.is_file()
2688                    && p.file_name()
2689                        .and_then(|s| s.to_str())
2690                        .and_then(|n| n.strip_suffix(".md"))
2691                        .is_some_and(is_year_month_archive)
2692            })
2693            .collect();
2694        // Deterministic order (oldest month first); the callers fold across all
2695        // files so order doesn't affect the result, but a stable order keeps the
2696        // scan reproducible.
2697        archives.sort();
2698        files.extend(archives);
2699    }
2700    files
2701}
2702
2703/// True if `s` looks like a `YYYY-MM` archive stem (4 digits, `-`, 2 digits) —
2704/// the `log/<YYYY-MM>.md` naming the rotation in [`crate::log`] emits.
2705fn is_year_month_archive(s: &str) -> bool {
2706    let b = s.as_bytes();
2707    b.len() == 7
2708        && b[..4].iter().all(u8::is_ascii_digit)
2709        && b[4] == b'-'
2710        && b[5..7].iter().all(u8::is_ascii_digit)
2711}
2712
2713/// The timestamp of the most recent `validate` entry across the active `log.md`
2714/// **and** the `log/<YYYY-MM>.md` archives — the default working-set cutoff.
2715/// Reads only headers; never the whole store. Archive-aware so a `validate`
2716/// entry that rotated into an archive after a month rollover still anchors the
2717/// cutoff (without this, the cutoff silently resets to `None`).
2718fn last_validate_at(store: &Store) -> Option<DateTime<FixedOffset>> {
2719    let mut latest: Option<DateTime<FixedOffset>> = None;
2720    for file in log_files_for_working_set(store) {
2721        let Ok(text) = std::fs::read_to_string(&file) else {
2722            continue;
2723        };
2724        for line in text.lines() {
2725            if !line.starts_with("## [") {
2726                continue;
2727            }
2728            if let Some((ts, kind, _)) = parse_log_header(line) {
2729                if kind == "validate" {
2730                    latest = Some(match latest {
2731                        Some(p) if p >= ts => p,
2732                        _ => ts,
2733                    });
2734                }
2735            }
2736        }
2737    }
2738    latest
2739}
2740
2741/// The set of content objects changed since `cutoff`, read from log entries
2742/// whose kind mutates a file. When `cutoff` is `None`, every mutating entry
2743/// counts (no prior validate window). Returns store-relative `.md` paths.
2744///
2745/// Scans the active `log.md` **and** every `log/<YYYY-MM>.md` archive: after a
2746/// month rollover [`Log::append`] rotates prior-month entries out of the active
2747/// file, so an object changed-but-never-validated in a prior month lives only in
2748/// an archive. Reading the archives here is what keeps `dbmd validate` from
2749/// silently skipping those files. Reads only log headers, never the content
2750/// store.
2751fn changed_objects_since(
2752    store: &Store,
2753    cutoff: Option<DateTime<FixedOffset>>,
2754) -> BTreeSet<PathBuf> {
2755    let mut out = BTreeSet::new();
2756    for file in log_files_for_working_set(store) {
2757        let Ok(text) = std::fs::read_to_string(&file) else {
2758            continue;
2759        };
2760        for line in text.lines() {
2761            if !line.starts_with("## [") {
2762                continue;
2763            }
2764            let Some((ts, kind, object)) = parse_log_header(line) else {
2765                continue;
2766            };
2767            if let Some(c) = cutoff {
2768                if ts < c {
2769                    continue;
2770                }
2771            }
2772            if !matches!(
2773                kind.as_str(),
2774                "create" | "update" | "ingest" | "rename" | "delete" | "link"
2775            ) {
2776                continue;
2777            }
2778            if let Some(obj) = object {
2779                // The object slot is a store-relative path (or a wiki-link target).
2780                let bare = obj
2781                    .trim()
2782                    .trim_start_matches("[[")
2783                    .trim_end_matches("]]")
2784                    .split('|')
2785                    .next()
2786                    .unwrap_or("")
2787                    .trim()
2788                    .trim_end_matches(".md")
2789                    .to_string();
2790                if bare.is_empty() {
2791                    continue;
2792                }
2793                out.insert(PathBuf::from(format!("{bare}.md")));
2794            }
2795        }
2796    }
2797    out
2798}
2799
2800/// The result of the [`derived_from_ignored_type`] policy check: the
2801/// `derived_from` target that resolves to an ignored-type record, plus that
2802/// record's type. Carries exactly what both the validate finding and the
2803/// write-time warning need to render their message.
2804#[derive(Debug, Clone, PartialEq, Eq)]
2805pub struct DerivedFromIgnored {
2806    /// The `derived_from` wiki-link target as written (bare store-relative path,
2807    /// no `.md`).
2808    pub target: String,
2809    /// The resolved `type` of that target, which is present in
2810    /// `store.config.ignored_types`.
2811    pub target_type: String,
2812}
2813
2814/// **The single authoritative `### Ignored types` derivation check.** Decides
2815/// whether a `wiki-page` derives from an ignored-type record: the type must be
2816/// `wiki-page`, `### Ignored types` must be non-empty, and some `derived_from`
2817/// target must resolve to a record whose `type` is in `ignored_types`. Returns
2818/// the first such target (and its type), or `None`.
2819///
2820/// Both surfaces call this so the policy lives in exactly one place:
2821/// [`check_content_file`] (read side — `dbmd validate`) feeds it the
2822/// `derived_from` targets it scanned from the raw frontmatter, and the write
2823/// surface (`dbmd write`) feeds it the targets from the composed frontmatter.
2824/// The link *extraction* differs per surface (text-scan with line numbers vs.
2825/// the parsed `Frontmatter`); the *decision* — type gate, target-type
2826/// resolution, and `ignored_types` membership — does not.
2827pub fn derived_from_ignored_type<I, S>(
2828    store: &Store,
2829    type_: &str,
2830    derived_from_targets: I,
2831) -> Option<DerivedFromIgnored>
2832where
2833    I: IntoIterator<Item = S>,
2834    S: AsRef<str>,
2835{
2836    if type_ != "wiki-page" || store.config.ignored_types.is_empty() {
2837        return None;
2838    }
2839    for target in derived_from_targets {
2840        let target = target.as_ref();
2841        if let Some(target_type) = link_target_type(store, target) {
2842            if store.config.ignored_types.contains(&target_type) {
2843                return Some(DerivedFromIgnored {
2844                    target: target.to_string(),
2845                    target_type,
2846                });
2847            }
2848        }
2849    }
2850    None
2851}
2852
2853/// Resolve the `type` of a wiki-link target file (bare, no `.md`), or `None`.
2854fn link_target_type(store: &Store, target: &str) -> Option<String> {
2855    let bare = target.trim_end_matches(".md");
2856    let abs = store.root.join(safe_md_target_rel(bare)?);
2857    let text = std::fs::read_to_string(&abs).ok()?;
2858    let (yaml, _, _) = split_frontmatter(&text)?;
2859    let value: Value = serde_norway::from_str(&yaml).ok()?;
2860    if let Value::Mapping(m) = value {
2861        m.get(Value::String("type".into())).and_then(scalar_string)
2862    } else {
2863        None
2864    }
2865}
2866
2867// ── Shape validators ─────────────────────────────────────────────────────────
2868
2869/// True if a string is RFC3339 / ISO-8601 with a time + zone (the
2870/// `created`/`updated` contract: `2026-05-27T08:00:00-07:00`).
2871fn is_iso8601(s: &str) -> bool {
2872    DateTime::parse_from_rfc3339(s.trim()).is_ok()
2873}
2874
2875/// True if a string is an ISO-8601 *date* (`2026-05-27`) or a full RFC3339
2876/// datetime. Type-specific date fields (`expense.date`, `contact.last_touch`)
2877/// accept the date-only form per the SPEC's worked example.
2878fn is_iso8601_date_or_datetime(s: &str) -> bool {
2879    let s = s.trim();
2880    if DateTime::parse_from_rfc3339(s).is_ok() {
2881        return true;
2882    }
2883    chrono::NaiveDate::parse_from_str(s, "%Y-%m-%d").is_ok()
2884}
2885
2886/// True for `<local>@<domain>` with a non-empty local part and a dotted domain.
2887fn is_email(s: &str) -> bool {
2888    let s = s.trim();
2889    let Some((local, domain)) = s.split_once('@') else {
2890        return false;
2891    };
2892    !local.is_empty()
2893        && domain.contains('.')
2894        && !domain.starts_with('.')
2895        && !domain.ends_with('.')
2896        && !domain.contains(' ')
2897        && !local.contains(' ')
2898}
2899
2900/// True for a currency amount: an optional symbol or 3-letter ISO code, then a
2901/// plain decimal number with optional thousands separators and ≤ 2 decimals.
2902///
2903/// The numeric part is validated by hand (not `f64::parse`) so the non-numeric
2904/// floats `f64` accepts — `inf`, `-inf`, `NaN`, and `1e3`-style exponents — are
2905/// rejected, and the ≤ 2-decimal rule is actually enforced.
2906fn is_currency(s: &str) -> bool {
2907    let mut t = s.trim();
2908    // Strip a leading currency symbol …
2909    for sym in ["$", "€", "£", "¥"] {
2910        if let Some(rest) = t.strip_prefix(sym) {
2911            t = rest.trim_start();
2912            break;
2913        }
2914    }
2915    // … or a leading 3-letter ISO-4217-ish code (`USD 100`, `EUR 9.50`). The
2916    // code must be exactly three ASCII letters and separated from the number by
2917    // whitespace, so a bare `USD` with no amount still fails.
2918    if let Some((head, rest)) = t.split_once(char::is_whitespace) {
2919        if head.len() == 3 && head.chars().all(|c| c.is_ascii_alphabetic()) {
2920            t = rest.trim_start();
2921        }
2922    }
2923
2924    let cleaned: String = t.chars().filter(|c| *c != ',').collect();
2925    is_plain_amount(cleaned.trim())
2926}
2927
2928/// True for a bare decimal amount: optional sign, ≥ 1 digit, an optional
2929/// fractional part of 1–2 digits. No exponents, no `inf`/`NaN`, no empty string.
2930fn is_plain_amount(s: &str) -> bool {
2931    let digits = s.strip_prefix(['+', '-']).unwrap_or(s);
2932    let (int_part, frac_part) = match digits.split_once('.') {
2933        Some((i, f)) => (i, Some(f)),
2934        None => (digits, None),
2935    };
2936    if int_part.is_empty() || !int_part.bytes().all(|b| b.is_ascii_digit()) {
2937        return false;
2938    }
2939    match frac_part {
2940        None => true,
2941        Some(f) => (1..=2).contains(&f.len()) && f.bytes().all(|b| b.is_ascii_digit()),
2942    }
2943}
2944
2945/// True for an http(s) URL.
2946fn is_url(s: &str) -> bool {
2947    let s = s.trim();
2948    (s.starts_with("http://") || s.starts_with("https://")) && s.len() > "https://".len()
2949}
2950
2951/// A short, deterministic suggestion for a `SCHEMA_SHAPE_MISMATCH`.
2952fn shape_suggestion(shape: Shape) -> String {
2953    match shape {
2954        Shape::String => "use a scalar string".into(),
2955        Shape::Int => "use an integer".into(),
2956        Shape::Bool => "use `true` or `false`".into(),
2957        Shape::Date => "use an ISO-8601 date, e.g. 2026-05-27".into(),
2958        Shape::Email => "use a `<local>@<domain>` address".into(),
2959        Shape::Currency => "use a numeric amount, e.g. 1234.56".into(),
2960        Shape::Url => "use an http(s) URL".into(),
2961    }
2962}
2963
2964/// Suggest a full-path rewrite for a short-form wiki-link. Without the layer we
2965/// can't know the folder, so the suggestion is generic but actionable.
2966fn short_form_suggestion(bare: &str) -> Option<String> {
2967    Some(format!(
2968        "use a full store-relative path, e.g. [[records/contacts/{}]]",
2969        slugish(bare)
2970    ))
2971}
2972
2973/// A filesystem-ish leaf for a plain string (lowercase, spaces → hyphens).
2974fn slugish(s: &str) -> String {
2975    s.trim()
2976        .to_lowercase()
2977        .chars()
2978        .map(|c| if c.is_whitespace() { '-' } else { c })
2979        .filter(|c| c.is_alphanumeric() || *c == '-' || *c == '/' || *c == '_')
2980        .collect()
2981}
2982
2983/// Push a fully-formed [`Issue`].
2984#[allow(clippy::too_many_arguments)]
2985fn push(
2986    issues: &mut Vec<Issue>,
2987    severity: Severity,
2988    code: &'static str,
2989    file: &Path,
2990    line: Option<u32>,
2991    key: Option<String>,
2992    message: String,
2993    suggestion: Option<String>,
2994    related: Vec<PathBuf>,
2995) {
2996    issues.push(Issue {
2997        severity,
2998        code,
2999        file: file.to_path_buf(),
3000        line,
3001        key,
3002        message,
3003        suggestion,
3004        related,
3005    });
3006}
3007
3008/// 1-based line of a top-level frontmatter key inside the YAML block, offset to
3009/// the file (the YAML starts at file line 2). `None` if not found.
3010fn fm_key_line(fm_yaml: &str, key: &str) -> Option<u32> {
3011    for (i, line) in fm_yaml.lines().enumerate() {
3012        let trimmed = line.trim_start();
3013        // A top-level key line: `key:` with no leading list dash.
3014        if let Some(rest) = trimmed.strip_prefix(key) {
3015            if rest.starts_with(':') && line.starts_with(key) {
3016                // +2: file line 1 is the opening `---`, YAML line 0 → file line 2.
3017                return Some((i as u32) + 2);
3018            }
3019        }
3020    }
3021    None
3022}
3023
3024/// The line a *field-absence* issue (a required key that is missing entirely)
3025/// anchors to: the key's line when present, else line `1` — the frontmatter
3026/// block's opening `---`. A missing key has no line of its own; anchoring it to
3027/// the block top gives the agent (and the `EXPECTED` golden) a stable, non-null
3028/// line to point at instead of an unhelpful `null`.
3029fn fm_key_line_or_top(fm_yaml: &str, key: &str) -> Option<u32> {
3030    fm_key_line(fm_yaml, key).or(Some(1))
3031}
3032
3033/// A stable sort order for issues: by file, then line, then code. Keeps `--json`
3034/// output deterministic across runs.
3035fn issue_order(a: &Issue, b: &Issue) -> std::cmp::Ordering {
3036    a.file
3037        .cmp(&b.file)
3038        .then(a.line.cmp(&b.line))
3039        .then(a.code.cmp(b.code))
3040        .then(a.key.cmp(&b.key))
3041}
3042
3043// ═════════════════════════════════════════════════════════════════════════════
3044//  Tests
3045// ═════════════════════════════════════════════════════════════════════════════
3046
3047#[cfg(test)]
3048mod tests {
3049    use super::*;
3050    use crate::parser::{Config, FieldSpec};
3051    use std::fs;
3052    use tempfile::TempDir;
3053
3054    #[test]
3055    fn split_frontmatter_tolerates_leading_bom() {
3056        // Regression (finding #19 cross-module): a UTF-8 BOM before the opening
3057        // fence must not make validate treat the file as frontmatter-less while
3058        // the catalog indexes it. Pre-fix `first.trim_end() != "---"` was true
3059        // for `\u{feff}---` and the function returned None.
3060        let text = "\u{feff}---\ntype: contact\nsummary: hi\n---\nbody\n";
3061        let parsed = split_frontmatter(text);
3062        assert!(
3063            parsed.is_some(),
3064            "a leading BOM must not hide frontmatter from validate"
3065        );
3066        let (yaml, body, close_line) = parsed.unwrap();
3067        assert_eq!(yaml, "type: contact\nsummary: hi\n");
3068        assert_eq!(body, "body");
3069        assert_eq!(close_line, 4, "BOM is inline on line 1, not a new line");
3070    }
3071
3072    /// A test store builder over a real tempdir. Every helper writes real files
3073    /// so the assertions exercise real behavior, not mocks.
3074    struct Fixture {
3075        dir: TempDir,
3076        config: Config,
3077    }
3078
3079    impl Fixture {
3080        /// A fresh store with a **valid** `DB.md` (the identity contract:
3081        /// `type: db-md` + `scope` + `owner`) and the three layer dirs. A valid
3082        /// DB.md keeps `check_db_md` silent so a "clean store" fixture is truly
3083        /// clean; tests that want a broken DB.md write their own via `write`.
3084        fn new() -> Self {
3085            let dir = TempDir::new().unwrap();
3086            fs::write(
3087                dir.path().join("DB.md"),
3088                "---\ntype: db-md\nscope: company\nowner: Test\n---\n",
3089            )
3090            .unwrap();
3091            for layer in ["sources", "records", "wiki"] {
3092                fs::create_dir_all(dir.path().join(layer)).unwrap();
3093            }
3094            Fixture {
3095                dir,
3096                config: Config::default(),
3097            }
3098        }
3099
3100        /// A store with no `DB.md` marker.
3101        fn bare() -> Self {
3102            let dir = TempDir::new().unwrap();
3103            Fixture {
3104                dir,
3105                config: Config::default(),
3106            }
3107        }
3108
3109        /// Write a file at a store-relative path, creating parent dirs.
3110        fn write(&self, rel: &str, contents: &str) {
3111            let abs = self.dir.path().join(rel);
3112            fs::create_dir_all(abs.parent().unwrap()).unwrap();
3113            fs::write(abs, contents).unwrap();
3114        }
3115
3116        fn store(&self) -> Store {
3117            Store {
3118                root: self.dir.path().to_path_buf(),
3119                config: self.config.clone(),
3120            }
3121        }
3122
3123        fn store_all(&self) -> Vec<Issue> {
3124            validate_all(&self.store()).unwrap()
3125        }
3126
3127        /// Write the canonical `index.md` + `index.jsonl` at every level via the
3128        /// real builder ([`crate::index::Index::rebuild_all`]) — the same
3129        /// projection a `dbmd index rebuild` produces. Use this (rather than a
3130        /// hand-typed sidecar line) whenever a test asserts a *clean* store, so
3131        /// the sidecar carries the COMPLETE per-field projection and the fixture
3132        /// can't silently drift from what the index writer emits.
3133        fn rebuild_indexes(&self) {
3134            crate::index::Index::rebuild_all(&self.store()).unwrap();
3135        }
3136    }
3137
3138    /// True if any issue has this code.
3139    fn has(issues: &[Issue], code: &str) -> bool {
3140        issues.iter().any(|i| i.code == code)
3141    }
3142
3143    /// Count issues with a code.
3144    fn count(issues: &[Issue], code: &str) -> usize {
3145        issues.iter().filter(|i| i.code == code).count()
3146    }
3147
3148    /// The first issue with a code, or panic.
3149    fn find<'a>(issues: &'a [Issue], code: &str) -> &'a Issue {
3150        issues
3151            .iter()
3152            .find(|i| i.code == code)
3153            .unwrap_or_else(|| panic!("expected an issue with code {code}; got {issues:#?}"))
3154    }
3155
3156    /// A minimal valid `contact` body for reuse.
3157    fn valid_contact(summary: &str) -> String {
3158        format!(
3159            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"{summary}\"\nname: A\n---\n\n# A\n"
3160        )
3161    }
3162
3163    // ── store marker ──────────────────────────────────────────────────────────
3164
3165    #[test]
3166    fn not_a_store_when_db_md_absent() {
3167        let fx = Fixture::bare();
3168        let issues = fx.store_all();
3169        assert_eq!(issues.len(), 1, "only NOT_A_STORE expected: {issues:#?}");
3170        assert_eq!(issues[0].code, codes::NOT_A_STORE);
3171        assert!(issues[0].is_error());
3172    }
3173
3174    #[test]
3175    fn working_set_also_reports_not_a_store() {
3176        let fx = Fixture::bare();
3177        let issues = validate_working_set(&fx.store(), None).unwrap();
3178        assert!(has(&issues, codes::NOT_A_STORE));
3179    }
3180
3181    #[test]
3182    fn clean_store_has_no_issues() {
3183        let fx = Fixture::new();
3184        fx.write("records/contacts/a.md", &valid_contact("A contact"));
3185        // Build the canonical indexes (complete per-field jsonl included) the
3186        // same way `dbmd index rebuild` does, so a freshly-rebuilt store is
3187        // proven clean across every projected field, not just summary/type.
3188        fx.rebuild_indexes();
3189        let issues = fx.store_all();
3190        assert!(
3191            issues.is_empty(),
3192            "expected a clean store, got: {issues:#?}"
3193        );
3194    }
3195
3196    // ── DB.md structure ───────────────────────────────────────────────────────
3197
3198    /// The `Fixture::new` DB.md is valid → no `DB_MD_*` issue. This pins the
3199    /// "valid identity file is silent" half (a bug that flagged a valid DB.md
3200    /// would fail here).
3201    #[test]
3202    fn valid_db_md_emits_no_structure_issue() {
3203        let fx = Fixture::new();
3204        let issues = fx.store_all();
3205        assert!(
3206            !has(&issues, codes::DB_MD_BAD_TYPE)
3207                && !has(&issues, codes::DB_MD_MISSING_FIELD)
3208                && !has(&issues, codes::DB_MD_UNKNOWN_SECTION),
3209            "a valid DB.md (type: db-md + scope + owner, recognized sections) is silent: {issues:#?}"
3210        );
3211    }
3212
3213    /// A DB.md whose `type:` isn't `db-md` → `DB_MD_BAD_TYPE`, keyed on `type`,
3214    /// anchored to the `type:` line (file line 2). Failing to read the type, or
3215    /// accepting a non-`db-md` type, breaks this.
3216    #[test]
3217    fn db_md_wrong_type_is_error() {
3218        let fx = Fixture::new();
3219        fx.write("DB.md", "---\ntype: notes\nscope: company\nowner: T\n---\n");
3220        let issues = fx.store_all();
3221        let i = find(&issues, codes::DB_MD_BAD_TYPE);
3222        assert!(i.is_error());
3223        assert_eq!(i.file, PathBuf::from("DB.md"));
3224        assert_eq!(i.key.as_deref(), Some("type"));
3225        assert_eq!(i.line, Some(2), "anchors to the `type:` line");
3226    }
3227
3228    /// A DB.md missing `scope` and `owner` → one `DB_MD_MISSING_FIELD` per
3229    /// absent field, each keyed on its field name, anchored to the block top.
3230    #[test]
3231    fn db_md_missing_scope_and_owner_each_report() {
3232        let fx = Fixture::new();
3233        fx.write("DB.md", "---\ntype: db-md\n---\n");
3234        let issues = fx.store_all();
3235        assert_eq!(
3236            count(&issues, codes::DB_MD_MISSING_FIELD),
3237            2,
3238            "both scope and owner absent → two issues: {issues:#?}"
3239        );
3240        let keys: BTreeSet<Option<String>> = issues
3241            .iter()
3242            .filter(|i| i.code == codes::DB_MD_MISSING_FIELD)
3243            .map(|i| i.key.clone())
3244            .collect();
3245        assert_eq!(
3246            keys,
3247            BTreeSet::from([Some("scope".to_string()), Some("owner".to_string())]),
3248            "one issue keyed on each missing field"
3249        );
3250        for i in issues
3251            .iter()
3252            .filter(|i| i.code == codes::DB_MD_MISSING_FIELD)
3253        {
3254            assert!(i.is_error());
3255            assert_eq!(i.line, Some(1), "absent field anchors to the block top");
3256        }
3257    }
3258
3259    /// A present-but-blank required field is still missing (`DB_MD_MISSING_FIELD`),
3260    /// anchored to its own line — guarding against an "is the key textually
3261    /// present?" shortcut that would miss `owner:` with an empty value.
3262    #[test]
3263    fn db_md_blank_required_field_is_missing() {
3264        let fx = Fixture::new();
3265        fx.write(
3266            "DB.md",
3267            "---\ntype: db-md\nscope: company\nowner: \"\"\n---\n",
3268        );
3269        let issues = fx.store_all();
3270        let i = find(&issues, codes::DB_MD_MISSING_FIELD);
3271        assert_eq!(i.key.as_deref(), Some("owner"));
3272        assert_eq!(
3273            i.line,
3274            Some(4),
3275            "a present-but-empty field anchors to its line"
3276        );
3277        assert!(
3278            count(&issues, codes::DB_MD_MISSING_FIELD) == 1,
3279            "scope is present and non-empty → only owner reported"
3280        );
3281    }
3282
3283    /// An unrecognized `##` section → `DB_MD_UNKNOWN_SECTION` (warning), anchored
3284    /// to the heading's file line; the three recognized sections stay silent.
3285    #[test]
3286    fn db_md_unknown_section_is_warning() {
3287        let fx = Fixture::new();
3288        fx.write(
3289            "DB.md",
3290            // line 1 `---`, 2 type, 3 scope, 4 owner, 5 `---`, 6 blank,
3291            // 7 `## Agent instructions`, 8 blank, 9 prose, 10 blank,
3292            // 11 `## Glossary`.
3293            "---\ntype: db-md\nscope: company\nowner: T\n---\n\n## Agent instructions\n\nbe good\n\n## Glossary\n\nterms\n",
3294        );
3295        let issues = fx.store_all();
3296        let i = find(&issues, codes::DB_MD_UNKNOWN_SECTION);
3297        assert!(!i.is_error(), "unknown section is a warning, not an error");
3298        assert_eq!(i.severity, Severity::Warning);
3299        assert_eq!(
3300            i.line,
3301            Some(11),
3302            "anchors to the `## Glossary` heading line"
3303        );
3304        assert!(
3305            i.message.contains("Glossary"),
3306            "the message names the offending section: {}",
3307            i.message
3308        );
3309        // The recognized `## Agent instructions` section did NOT fire.
3310        assert_eq!(
3311            count(&issues, codes::DB_MD_UNKNOWN_SECTION),
3312            1,
3313            "only the unrecognized section is flagged: {issues:#?}"
3314        );
3315    }
3316
3317    /// A DB.md with no frontmatter at all → `DB_MD_BAD_TYPE` plus both
3318    /// `DB_MD_MISSING_FIELD`s (no provable type, no provable fields).
3319    #[test]
3320    fn db_md_no_frontmatter_reports_type_and_both_fields() {
3321        let fx = Fixture::new();
3322        fx.write("DB.md", "# just a heading, no frontmatter\n");
3323        let issues = fx.store_all();
3324        assert!(has(&issues, codes::DB_MD_BAD_TYPE));
3325        assert_eq!(count(&issues, codes::DB_MD_MISSING_FIELD), 2);
3326    }
3327
3328    // ── frontmatter ─────────────────────────────────────────────────────────
3329
3330    #[test]
3331    fn missing_type_is_error() {
3332        let fx = Fixture::new();
3333        fx.write(
3334            "records/contacts/a.md",
3335            "---\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\n# A\n",
3336        );
3337        let issues = fx.store_all();
3338        assert!(has(&issues, codes::FM_MISSING_TYPE));
3339        assert!(find(&issues, codes::FM_MISSING_TYPE).is_error());
3340    }
3341
3342    #[test]
3343    fn missing_universal_timestamps_are_errors_on_content_files() {
3344        let fx = Fixture::new();
3345        fx.write(
3346            "records/contacts/a.md",
3347            "---\ntype: contact\nsummary: x\nname: A\n---\n\n# A\n",
3348        );
3349        let issues = fx.store_all();
3350
3351        let missing_created = find(&issues, codes::FM_MISSING_CREATED);
3352        assert_eq!(missing_created.key.as_deref(), Some("created"));
3353        assert!(missing_created.is_error());
3354
3355        let missing_updated = find(&issues, codes::FM_MISSING_UPDATED);
3356        assert_eq!(missing_updated.key.as_deref(), Some("updated"));
3357        assert!(missing_updated.is_error());
3358    }
3359
3360    #[test]
3361    fn meta_files_do_not_require_universal_timestamps() {
3362        let fx = Fixture::new();
3363        let issues = fx.store_all();
3364
3365        assert!(
3366            !has(&issues, codes::FM_MISSING_CREATED),
3367            "DB.md/log/index meta files must not require content timestamps: {issues:#?}"
3368        );
3369        assert!(
3370            !has(&issues, codes::FM_MISSING_UPDATED),
3371            "DB.md/log/index meta files must not require content timestamps: {issues:#?}"
3372        );
3373    }
3374
3375    #[test]
3376    fn content_file_with_no_frontmatter_block_reports_type_and_summary() {
3377        let fx = Fixture::new();
3378        fx.write(
3379            "wiki/people/a.md",
3380            "# Just a heading\n\nNo frontmatter here.\n",
3381        );
3382        let issues = fx.store_all();
3383        assert!(has(&issues, codes::FM_MISSING_TYPE), "{issues:#?}");
3384        assert!(has(&issues, codes::SUMMARY_MISSING), "{issues:#?}");
3385    }
3386
3387    #[test]
3388    fn content_file_with_empty_frontmatter_reports_type_and_summary() {
3389        let fx = Fixture::new();
3390        fx.write("wiki/people/a.md", "---\n---\n\nbody\n");
3391        let issues = fx.store_all();
3392        assert!(has(&issues, codes::FM_MISSING_TYPE), "{issues:#?}");
3393        assert!(has(&issues, codes::SUMMARY_MISSING), "{issues:#?}");
3394    }
3395
3396    #[test]
3397    fn malformed_yaml_is_error_and_suppresses_field_checks() {
3398        let fx = Fixture::new();
3399        // A tab inside a mapping value is invalid YAML.
3400        fx.write(
3401            "records/contacts/a.md",
3402            "---\ntype: contact\n  bad: : : :\n: : nope\n---\n\nbody\n",
3403        );
3404        let issues = fx.store_all();
3405        let issue = find(&issues, codes::FM_MALFORMED_YAML);
3406        assert!(issue.is_error());
3407        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
3408        // When YAML doesn't parse we don't *also* claim the summary is missing;
3409        // the agent fixes the YAML first.
3410        assert!(
3411            !has(&issues, codes::SUMMARY_MISSING),
3412            "malformed YAML should suppress SUMMARY_MISSING: {issues:#?}"
3413        );
3414    }
3415
3416    #[test]
3417    fn bad_created_timestamp_is_error() {
3418        let fx = Fixture::new();
3419        fx.write(
3420            "records/contacts/a.md",
3421            "---\ntype: contact\ncreated: not-a-date\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
3422        );
3423        let issues = fx.store_all();
3424        let issue = find(&issues, codes::FM_BAD_TIMESTAMP);
3425        assert_eq!(issue.key.as_deref(), Some("created"));
3426        assert!(issue.is_error());
3427    }
3428
3429    #[test]
3430    fn date_only_created_is_rejected_but_type_date_field_accepted() {
3431        let fx = Fixture::new();
3432        // `created` must be a full RFC3339 datetime → a date-only value is bad.
3433        // `last_touch` is a type-specific date field → date-only is fine.
3434        fx.write(
3435            "records/contacts/a.md",
3436            "---\ntype: contact\ncreated: 2026-05-22\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\nlast_touch: 2026-05-22\n---\n\n# A\n",
3437        );
3438        let issues = fx.store_all();
3439        let created_issues: Vec<_> = issues
3440            .iter()
3441            .filter(|i| i.code == codes::FM_BAD_TIMESTAMP && i.key.as_deref() == Some("created"))
3442            .collect();
3443        assert_eq!(
3444            created_issues.len(),
3445            1,
3446            "date-only `created` must fail: {issues:#?}"
3447        );
3448        assert!(
3449            !issues.iter().any(
3450                |i| i.code == codes::FM_BAD_TIMESTAMP && i.key.as_deref() == Some("last_touch")
3451            ),
3452            "date-only `last_touch` is valid: {issues:#?}"
3453        );
3454    }
3455
3456    // ── summary ─────────────────────────────────────────────────────────────
3457
3458    #[test]
3459    fn summary_missing_empty_multiline_toolong() {
3460        let fx = Fixture::new();
3461        fx.write(
3462            "wiki/people/missing.md",
3463            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\n---\n\nbody\n",
3464        );
3465        fx.write(
3466            "wiki/people/empty.md",
3467            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"   \"\n---\n\nbody\n",
3468        );
3469        let long = "x".repeat(201);
3470        fx.write(
3471            "wiki/people/long.md",
3472            &format!("---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"{long}\"\n---\n\nbody\n"),
3473        );
3474        let issues = fx.store_all();
3475        assert!(has(&issues, codes::SUMMARY_MISSING));
3476        assert_eq!(
3477            find(&issues, codes::SUMMARY_MISSING).file,
3478            PathBuf::from("wiki/people/missing.md")
3479        );
3480        assert!(has(&issues, codes::SUMMARY_EMPTY));
3481        assert!(has(&issues, codes::SUMMARY_TOO_LONG));
3482        assert_eq!(
3483            find(&issues, codes::SUMMARY_TOO_LONG).severity,
3484            Severity::Warning
3485        );
3486    }
3487
3488    #[test]
3489    fn summary_multiline_via_yaml_block_scalar() {
3490        let fx = Fixture::new();
3491        // A literal block scalar produces a value with a newline.
3492        fx.write(
3493            "wiki/people/a.md",
3494            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: |\n  line one\n  line two\n---\n\nbody\n",
3495        );
3496        let issues = fx.store_all();
3497        assert!(has(&issues, codes::SUMMARY_MULTILINE), "{issues:#?}");
3498    }
3499
3500    #[test]
3501    fn summary_exactly_200_chars_is_ok() {
3502        let fx = Fixture::new();
3503        let s = "y".repeat(200);
3504        fx.write(
3505            "wiki/people/a.md",
3506            &format!("---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"{s}\"\n---\n\nbody\n"),
3507        );
3508        let issues = fx.store_all();
3509        assert!(
3510            !has(&issues, codes::SUMMARY_TOO_LONG),
3511            "200 is the bound, inclusive: {issues:#?}"
3512        );
3513    }
3514
3515    #[test]
3516    fn meta_files_need_no_summary() {
3517        let fx = Fixture::new();
3518        // The root/layer/type indexes + log carry no summary and must not be
3519        // flagged. (A lone DB.md store with one contact and full indexes.)
3520        fx.write("records/contacts/a.md", &valid_contact("A contact"));
3521        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n# I\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
3522        fx.write(
3523            "records/index.md",
3524            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
3525        );
3526        fx.write("records/contacts/index.md", "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — A contact\n");
3527        fx.write(
3528            "records/contacts/index.jsonl",
3529            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"A contact\"}\n",
3530        );
3531        fx.write("log.md", "---\ntype: log\n---\n\n# Log\n");
3532        let issues = fx.store_all();
3533        assert!(!has(&issues, codes::SUMMARY_MISSING), "{issues:#?}");
3534    }
3535
3536    // ── tags ────────────────────────────────────────────────────────────────
3537
3538    #[test]
3539    fn nested_tags_warns_flat_tags_ok() {
3540        let fx = Fixture::new();
3541        fx.write(
3542            "records/contacts/nested.md",
3543            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ntags:\n  - good\n  - [nested, list]\n---\n\n# A\n",
3544        );
3545        fx.write(
3546            "records/contacts/flat.md",
3547            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ntags: [customer, vip]\n---\n\n# A\n",
3548        );
3549        let issues = fx.store_all();
3550        let tag_issues: Vec<_> = issues
3551            .iter()
3552            .filter(|i| i.code == codes::TAGS_MALFORMED)
3553            .collect();
3554        assert_eq!(
3555            tag_issues.len(),
3556            1,
3557            "only the nested-tags file should warn: {issues:#?}"
3558        );
3559        assert_eq!(
3560            tag_issues[0].file,
3561            PathBuf::from("records/contacts/nested.md")
3562        );
3563        assert_eq!(tag_issues[0].severity, Severity::Warning);
3564    }
3565
3566    // ── wiki-links ────────────────────────────────────────────────────────────
3567
3568    #[test]
3569    fn short_form_wiki_link_is_error() {
3570        let fx = Fixture::new();
3571        let mut body = valid_contact("links to a short form");
3572        body.push_str("\nSee [[sarah-chen]] for details.\n");
3573        fx.write("wiki/people/a.md", &body);
3574        let issues = fx.store_all();
3575        let issue = find(&issues, codes::WIKI_LINK_SHORT_FORM);
3576        assert!(issue.is_error());
3577        assert!(issue.message.contains("sarah-chen"));
3578        // A short-form link must NOT also be reported broken — fix the form first.
3579        assert!(
3580            !issues
3581                .iter()
3582                .any(|i| i.code == codes::WIKI_LINK_BROKEN && i.message.contains("sarah-chen")),
3583            "short-form should suppress broken: {issues:#?}"
3584        );
3585    }
3586
3587    #[test]
3588    fn broken_full_path_wiki_link_is_error() {
3589        let fx = Fixture::new();
3590        let mut body = valid_contact("links to a missing file");
3591        body.push_str("\nSee [[records/contacts/ghost]].\n");
3592        fx.write("wiki/people/a.md", &body);
3593        let issues = fx.store_all();
3594        let issue = find(&issues, codes::WIKI_LINK_BROKEN);
3595        assert!(issue.is_error());
3596        assert!(issue.message.contains("records/contacts/ghost"));
3597        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
3598    }
3599
3600    #[test]
3601    fn traversal_full_path_wiki_link_is_rejected_before_probe() {
3602        let fx = Fixture::new();
3603        let mut body = valid_contact("links with traversal");
3604        body.push_str("\nSee [[records/contacts/../../ghost]].\n");
3605        fx.write("wiki/people/a.md", &body);
3606        let issues = fx.store_all();
3607        let issue = find(&issues, codes::WIKI_LINK_BROKEN);
3608        assert!(issue.message.contains("not a safe store-relative path"));
3609        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
3610    }
3611
3612    #[test]
3613    fn valid_full_path_wiki_link_passes() {
3614        let fx = Fixture::new();
3615        fx.write("records/contacts/target.md", &valid_contact("target"));
3616        let mut body = valid_contact("links to target");
3617        body.push_str("\nSee [[records/contacts/target]].\n");
3618        fx.write("wiki/people/a.md", &body);
3619        let issues = fx.store_all();
3620        assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
3621        assert!(!has(&issues, codes::WIKI_LINK_SHORT_FORM), "{issues:#?}");
3622    }
3623
3624    #[test]
3625    fn md_extension_wiki_link_warns_and_resolves() {
3626        let fx = Fixture::new();
3627        fx.write("records/contacts/target.md", &valid_contact("target"));
3628        let mut body = valid_contact("links with extension");
3629        body.push_str("\nSee [[records/contacts/target.md]].\n");
3630        fx.write("wiki/people/a.md", &body);
3631        let issues = fx.store_all();
3632        let issue = find(&issues, codes::WIKI_LINK_HAS_EXTENSION);
3633        assert_eq!(issue.severity, Severity::Warning);
3634        assert_eq!(
3635            issue.suggestion.as_deref(),
3636            Some("drop the extension: [[records/contacts/target]]")
3637        );
3638        // The target exists once `.md` is stripped → not broken.
3639        assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
3640    }
3641
3642    #[test]
3643    fn wiki_links_in_code_fences_are_ignored() {
3644        let fx = Fixture::new();
3645        let mut body = valid_contact("has a fenced example");
3646        body.push_str("\n```\n[[sarah-chen]]\n```\n");
3647        fx.write("wiki/people/a.md", &body);
3648        let issues = fx.store_all();
3649        assert!(
3650            !has(&issues, codes::WIKI_LINK_SHORT_FORM),
3651            "fenced wiki-links must be ignored: {issues:#?}"
3652        );
3653    }
3654
3655    #[test]
3656    fn flow_form_link_list_in_frontmatter_is_error() {
3657        let fx = Fixture::new();
3658        fx.write(
3659            "records/meetings/m.md",
3660            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a meeting\ndate: 2026-05-22\nattendees: [[[records/contacts/a]], [[records/contacts/b]]]\n---\n\n# M\n",
3661        );
3662        let issues = fx.store_all();
3663        let issue = find(&issues, codes::WIKI_LINK_FLOW_FORM_LIST);
3664        assert!(issue.is_error());
3665        assert_eq!(issue.key.as_deref(), Some("attendees"));
3666    }
3667
3668    #[test]
3669    fn block_form_link_list_in_frontmatter_is_not_flow_form() {
3670        let fx = Fixture::new();
3671        fx.write("records/contacts/a.md", &valid_contact("a"));
3672        fx.write("records/contacts/b.md", &valid_contact("b"));
3673        fx.write(
3674            "records/meetings/m.md",
3675            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a meeting\ndate: 2026-05-22\nattendees:\n  - [[records/contacts/a]]\n  - [[records/contacts/b]]\n---\n\n# M\n",
3676        );
3677        let issues = fx.store_all();
3678        assert!(
3679            !has(&issues, codes::WIKI_LINK_FLOW_FORM_LIST),
3680            "{issues:#?}"
3681        );
3682        // Block-form link targets are still integrity-checked (both exist here).
3683        assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
3684    }
3685
3686    #[test]
3687    fn frontmatter_short_form_link_field_is_error() {
3688        let fx = Fixture::new();
3689        // `related` is a *custom* (non-schema) wiki-link field, so it goes
3690        // through the generic doctrine path → a short form is WIKI_LINK_SHORT_FORM.
3691        fx.write(
3692            "wiki/people/a.md",
3693            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nrelated: \"[[sarah-chen]]\"\n---\n\n# A\n",
3694        );
3695        let issues = fx.store_all();
3696        let issue = find(&issues, codes::WIKI_LINK_SHORT_FORM);
3697        assert!(issue.is_error());
3698        assert_eq!(issue.key.as_deref(), Some("related"));
3699    }
3700
3701    #[test]
3702    fn unquoted_frontmatter_link_is_recognized() {
3703        // An UNQUOTED `[[...]]` parses in YAML as a nested sequence, not a
3704        // string. The validator must still see it as a wiki-link (text-based
3705        // extraction). A short-form custom field must report SHORT_FORM, and a
3706        // full-path one with a missing target must report BROKEN.
3707        let fx = Fixture::new();
3708        fx.write(
3709            "wiki/people/short.md",
3710            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nrelated: [[sarah-chen]]\n---\n\n# A\n",
3711        );
3712        fx.write(
3713            "wiki/people/broken.md",
3714            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nrelated: [[records/contacts/ghost]]\n---\n\n# A\n",
3715        );
3716        let issues = fx.store_all();
3717        assert!(
3718            issues.iter().any(|i| i.code == codes::WIKI_LINK_SHORT_FORM
3719                && i.file == Path::new("wiki/people/short.md")
3720                && i.key.as_deref() == Some("related")),
3721            "unquoted short-form frontmatter link must be caught: {issues:#?}"
3722        );
3723        assert!(
3724            issues.iter().any(|i| i.code == codes::WIKI_LINK_BROKEN
3725                && i.file == Path::new("wiki/people/broken.md")),
3726            "unquoted full-path frontmatter link to a missing file must be caught: {issues:#?}"
3727        );
3728    }
3729
3730    #[test]
3731    fn short_form_in_declared_link_field_is_prefix_mismatch_not_double_reported() {
3732        // A short-form value in a *declared* link field (a `### contact` schema
3733        // with `company link to records/companies/`) is SCHEMA_LINK_PREFIX_MISMATCH
3734        // (the target isn't under the prefix), and must NOT also be reported as a
3735        // bare WIKI_LINK_SHORT_FORM — the schema path owns that field once.
3736        let mut fx = Fixture::new();
3737        fx.config.schemas.insert(
3738            "contact".into(),
3739            Schema {
3740                fields: vec![FieldSpec {
3741                    name: "company".into(),
3742                    link_prefix: Some(PathBuf::from("records/companies")),
3743                    ..Default::default()
3744                }],
3745                ..Default::default()
3746            },
3747        );
3748        fx.write(
3749            "records/contacts/a.md",
3750            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ncompany: \"[[northstar]]\"\n---\n\n# A\n",
3751        );
3752        let issues = fx.store_all();
3753        let issue = find(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH);
3754        assert_eq!(issue.key.as_deref(), Some("company"));
3755        // The same link must NOT also be double-reported via the generic path.
3756        assert!(
3757            !issues
3758                .iter()
3759                .any(|i| i.code == codes::WIKI_LINK_SHORT_FORM
3760                    && i.key.as_deref() == Some("company")),
3761            "schema link fields are checked once, by the schema path: {issues:#?}"
3762        );
3763    }
3764
3765    #[test]
3766    fn schema_link_field_with_md_extension_still_warns() {
3767        let mut fx = Fixture::new();
3768        fx.config.schemas.insert(
3769            "contact".into(),
3770            Schema {
3771                fields: vec![FieldSpec {
3772                    name: "company".into(),
3773                    link_prefix: Some(PathBuf::from("records/companies")),
3774                    ..Default::default()
3775                }],
3776                ..Default::default()
3777            },
3778        );
3779        fx.write(
3780            "records/companies/acme.md",
3781            "---\ntype: company\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: Acme\nname: Acme\n---\n\n# Acme\n",
3782        );
3783        fx.write(
3784            "records/contacts/a.md",
3785            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ncompany: \"[[records/companies/acme.md]]\"\n---\n\n# A\n",
3786        );
3787        let issues = fx.store_all();
3788        let issue = issues
3789            .iter()
3790            .find(|i| {
3791                i.code == codes::WIKI_LINK_HAS_EXTENSION && i.key.as_deref() == Some("company")
3792            })
3793            .unwrap_or_else(|| panic!("schema link extension warning missing: {issues:#?}"));
3794        assert_eq!(issue.severity, Severity::Warning);
3795        assert!(
3796            !issues
3797                .iter()
3798                .any(|i| i.code == codes::WIKI_LINK_BROKEN && i.key.as_deref() == Some("company")),
3799            "extensionless existence check should still find acme.md: {issues:#?}"
3800        );
3801    }
3802
3803    // ── schema: explicit DB.md schema (required / shape / enum) ───────────────
3804
3805    #[test]
3806    fn explicit_schema_required_shape_enum() {
3807        let fx = {
3808            let mut fx = Fixture::new();
3809            // contact schema: name required, email required+email shape,
3810            // status enum: active|inactive
3811            let schema = Schema {
3812                fields: vec![
3813                    FieldSpec {
3814                        name: "name".into(),
3815                        required: true,
3816                        ..Default::default()
3817                    },
3818                    FieldSpec {
3819                        name: "email".into(),
3820                        required: true,
3821                        shape: Some(Shape::Email),
3822                        ..Default::default()
3823                    },
3824                    FieldSpec {
3825                        name: "status".into(),
3826                        enum_values: Some(vec!["active".into(), "inactive".into()]),
3827                        ..Default::default()
3828                    },
3829                ],
3830                ..Default::default()
3831            };
3832            fx.config.schemas.insert("contact".into(), schema);
3833            fx
3834        };
3835        fx.write(
3836            "records/contacts/a.md",
3837            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nemail: not-an-email\nstatus: archived\n---\n\n# A\n",
3838        );
3839        let issues = fx.store_all();
3840        // name absent → MISSING_REQUIRED
3841        assert!(
3842            issues
3843                .iter()
3844                .any(|i| i.code == codes::SCHEMA_MISSING_REQUIRED
3845                    && i.key.as_deref() == Some("name")),
3846            "{issues:#?}"
3847        );
3848        // email malformed → SHAPE_MISMATCH
3849        assert!(
3850            issues.iter().any(
3851                |i| i.code == codes::SCHEMA_SHAPE_MISMATCH && i.key.as_deref() == Some("email")
3852            ),
3853            "{issues:#?}"
3854        );
3855        // status archived not in enum → ENUM_VIOLATION
3856        assert!(
3857            issues
3858                .iter()
3859                .any(|i| i.code == codes::SCHEMA_ENUM_VIOLATION
3860                    && i.key.as_deref() == Some("status")),
3861            "{issues:#?}"
3862        );
3863    }
3864
3865    #[test]
3866    fn schema_without_link_field_allows_plain_value() {
3867        // A `contact` schema with no `company` link field means a plain `company`
3868        // string is fine — schema enforcement is exactly what the store declares,
3869        // nothing implicit.
3870        let mut fx = Fixture::new();
3871        fx.config.schemas.insert(
3872            "contact".into(),
3873            Schema {
3874                fields: vec![FieldSpec {
3875                    name: "name".into(),
3876                    required: true,
3877                    ..Default::default()
3878                }],
3879                ..Default::default()
3880            },
3881        );
3882        fx.write(
3883            "records/contacts/a.md",
3884            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: Sarah\ncompany: \"Acme Co\"\n---\n\n# Sarah\n",
3885        );
3886        let issues = fx.store_all();
3887        assert!(
3888            !has(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH),
3889            "no declared link field for `company` → a plain value is fine: {issues:#?}"
3890        );
3891    }
3892
3893    #[test]
3894    fn schema_link_field_plain_value_is_prefix_mismatch() {
3895        // The surviving link-enforcement path: a declared `link to <prefix>/`
3896        // field with a plain-string value is SCHEMA_LINK_PREFIX_MISMATCH.
3897        let mut fx = Fixture::new();
3898        fx.config.schemas.insert(
3899            "contact".into(),
3900            Schema {
3901                fields: vec![FieldSpec {
3902                    name: "company".into(),
3903                    link_prefix: Some(PathBuf::from("records/companies")),
3904                    ..Default::default()
3905                }],
3906                ..Default::default()
3907            },
3908        );
3909        fx.write(
3910            "records/contacts/a.md",
3911            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: Sarah\ncompany: \"Acme Co\"\n---\n\n# Sarah\n",
3912        );
3913        let issues = fx.store_all();
3914        let issue = find(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH);
3915        assert_eq!(issue.key.as_deref(), Some("company"));
3916        assert!(issue
3917            .suggestion
3918            .as_deref()
3919            .unwrap()
3920            .contains("records/companies/"));
3921    }
3922
3923    #[test]
3924    fn schema_shape_int_and_url_and_currency() {
3925        let mut fx = Fixture::new();
3926        fx.config.schemas.insert(
3927            "widget".into(),
3928            Schema {
3929                fields: vec![
3930                    FieldSpec {
3931                        name: "qty".into(),
3932                        shape: Some(Shape::Int),
3933                        ..Default::default()
3934                    },
3935                    FieldSpec {
3936                        name: "site".into(),
3937                        shape: Some(Shape::Url),
3938                        ..Default::default()
3939                    },
3940                    FieldSpec {
3941                        name: "price".into(),
3942                        shape: Some(Shape::Currency),
3943                        ..Default::default()
3944                    },
3945                ],
3946                ..Default::default()
3947            },
3948        );
3949        // `USD 100` is the corpus-realistic shape (an `expense.currency`-style
3950        // ISO code + amount). It must pass — it used to spuriously fail.
3951        fx.write(
3952            "records/widgets/ok.md",
3953            "---\ntype: widget\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: ok\nqty: 5\nsite: https://example.com\nprice: \"USD 1,234.50\"\n---\n\n# ok\n",
3954        );
3955        // `free` is non-numeric; `inf`/`NaN`/3-decimal used to slip through
3956        // because the old impl leaned on `f64::parse`. `price: inf` here guards
3957        // the under-rejection half of the finding.
3958        fx.write(
3959            "records/widgets/bad.md",
3960            "---\ntype: widget\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: bad\nqty: five\nsite: ftp://nope\nprice: inf\n---\n\n# bad\n",
3961        );
3962        let issues = fx.store_all();
3963        let bad_shape: Vec<_> = issues
3964            .iter()
3965            .filter(|i| {
3966                i.code == codes::SCHEMA_SHAPE_MISMATCH
3967                    && i.file == Path::new("records/widgets/bad.md")
3968            })
3969            .map(|i| i.key.clone().unwrap_or_default())
3970            .collect();
3971        assert!(bad_shape.contains(&"qty".to_string()), "{issues:#?}");
3972        assert!(bad_shape.contains(&"site".to_string()), "{issues:#?}");
3973        assert!(
3974            bad_shape.contains(&"price".to_string()),
3975            "inf must be rejected as currency: {issues:#?}"
3976        );
3977        assert!(
3978            !issues.iter().any(|i| i.code == codes::SCHEMA_SHAPE_MISMATCH
3979                && i.file == Path::new("records/widgets/ok.md")),
3980            "valid shapes (incl. `USD 1,234.50`) must not fire: {issues:#?}"
3981        );
3982    }
3983
3984    #[test]
3985    fn schema_shape_or_enum_field_with_non_scalar_value_is_shape_mismatch() {
3986        let mut fx = Fixture::new();
3987        fx.config.schemas.insert(
3988            "contact".into(),
3989            Schema {
3990                fields: vec![
3991                    FieldSpec {
3992                        name: "email".into(),
3993                        required: true,
3994                        shape: Some(Shape::Email),
3995                        ..Default::default()
3996                    },
3997                    FieldSpec {
3998                        name: "status".into(),
3999                        enum_values: Some(vec!["active".into(), "inactive".into()]),
4000                        ..Default::default()
4001                    },
4002                ],
4003                ..Default::default()
4004            },
4005        );
4006        // A required EMAIL field and an ENUM field, each holding a LIST. Both
4007        // used to slip through entirely (`scalar_string` → None → the shape and
4008        // enum bodies silently no-op); now they flag SCHEMA_SHAPE_MISMATCH.
4009        fx.write(
4010            "records/contacts/bad.md",
4011            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: bad\nemail:\n  - a@b.com\n  - c@d.com\nstatus:\n  - active\n---\n\n# bad\n",
4012        );
4013        let issues = fx.store_all();
4014        let mismatched: Vec<_> = issues
4015            .iter()
4016            .filter(|i| i.code == codes::SCHEMA_SHAPE_MISMATCH)
4017            .map(|i| i.key.clone().unwrap_or_default())
4018            .collect();
4019        assert!(
4020            mismatched.contains(&"email".to_string()),
4021            "list-valued required email must flag: {issues:#?}"
4022        );
4023        assert!(
4024            mismatched.contains(&"status".to_string()),
4025            "list-valued enum must flag: {issues:#?}"
4026        );
4027    }
4028
4029    #[test]
4030    fn is_currency_accepts_codes_and_rejects_non_numeric() {
4031        // Symbols and 3-letter ISO codes both strip; plain numbers pass.
4032        for ok in [
4033            "100",
4034            "1234.56",
4035            "$1,234.50",
4036            "USD 100", // the finding's headline probe — used to be false
4037            "usd 100", // case-insensitive code
4038            "EUR 9.50",
4039            "£12",
4040            "¥1000",
4041            "-5.00", // signed amounts are real (refunds)
4042            "+5",
4043            "1,000,000",
4044        ] {
4045            assert!(is_currency(ok), "expected currency: {ok:?}");
4046        }
4047        // Non-numeric floats `f64::parse` would accept, and the > 2-decimal /
4048        // bare-code / exponent cases the docstring forbids.
4049        for bad in [
4050            "inf", "-inf", "infinity", "NaN", "nan",    // f64 accepts these; we must not
4051            "12.999", // 3 decimals
4052            "1.2345", // 4 decimals
4053            "USD",    // bare code, no amount
4054            "$",      // bare symbol
4055            "free", "", " ", "1e3",      // exponent form
4056            "1.",       // trailing dot, no fractional digits
4057            ".5",       // leading dot, no integer digits
4058            "1 000",    // space as separator is not a thousands separator
4059            "USDD 100", // 4-letter "code" must not strip
4060        ] {
4061            assert!(!is_currency(bad), "expected NOT currency: {bad:?}");
4062        }
4063    }
4064
4065    // ── policies ───────────────────────────────────────────────────────────
4066
4067    #[test]
4068    fn ignored_type_present_is_info() {
4069        let mut fx = Fixture::new();
4070        fx.config.ignored_types.push("temp".into());
4071        fx.write(
4072            "records/temps/x.md",
4073            "---\ntype: temp\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a temp\n---\n\n# x\n",
4074        );
4075        let issues = fx.store_all();
4076        let issue = find(&issues, codes::POLICY_IGNORED_TYPE_PRESENT);
4077        assert_eq!(issue.severity, Severity::Info);
4078        assert!(!issue.is_error());
4079        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4080    }
4081
4082    #[test]
4083    fn wiki_page_derived_from_ignored_type_warns() {
4084        let mut fx = Fixture::new();
4085        fx.config.ignored_types.push("temp".into());
4086        fx.write(
4087            "records/temps/x.md",
4088            "---\ntype: temp\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a temp\n---\n\n# x\n",
4089        );
4090        fx.write(
4091            "wiki/themes/t.md",
4092            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: derived\nderived_from: \"[[records/temps/x]]\"\n---\n\n# t\n",
4093        );
4094        let issues = fx.store_all();
4095        let issue = find(&issues, codes::POLICY_IGNORED_TYPE_DERIVED);
4096        assert_eq!(issue.severity, Severity::Warning);
4097        assert_eq!(issue.key.as_deref(), Some("derived_from"));
4098        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4099    }
4100
4101    /// The shared `derived_from_ignored_type` entry point — the single
4102    /// policy-decision both `dbmd validate` (read) and `dbmd write` (write-time
4103    /// warning) now route through, so they cannot diverge. This pins its
4104    /// contract directly: the type gate, the empty-ignored-types gate, a
4105    /// positive match carrying the resolved target type, and a non-ignored
4106    /// target rejected.
4107    #[test]
4108    fn derived_from_ignored_type_is_the_shared_policy_decision() {
4109        let mut fx = Fixture::new();
4110        fx.config.ignored_types.push("secret".into());
4111        // An ignored-type record …
4112        fx.write(
4113            "records/secrets/s.md",
4114            "---\ntype: secret\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: hush\n---\n\n# s\n",
4115        );
4116        // … and a non-ignored record.
4117        fx.write(
4118            "records/contacts/c.md",
4119            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: ok\nname: C\n---\n\n# c\n",
4120        );
4121        let store = fx.store();
4122
4123        // Positive: a wiki-page deriving from the ignored-type record matches,
4124        // and the hit carries both the target (as written) and its resolved type.
4125        let hit =
4126            derived_from_ignored_type(&store, "wiki-page", std::iter::once("records/secrets/s"))
4127                .expect("wiki-page → ignored-type record must match");
4128        assert_eq!(hit.target, "records/secrets/s");
4129        assert_eq!(hit.target_type, "secret");
4130
4131        // Type gate: a non-`wiki-page` type never triggers, even with the same
4132        // ignored-type target.
4133        assert_eq!(
4134            derived_from_ignored_type(&store, "contact", std::iter::once("records/secrets/s")),
4135            None,
4136            "only wiki-page derivation is policed"
4137        );
4138
4139        // Target gate: a wiki-page deriving from a non-ignored record is fine.
4140        assert_eq!(
4141            derived_from_ignored_type(&store, "wiki-page", std::iter::once("records/contacts/c")),
4142            None,
4143            "deriving from a non-ignored type is allowed"
4144        );
4145
4146        // First match wins across multiple targets (here the second is the hit).
4147        let hit = derived_from_ignored_type(
4148            &store,
4149            "wiki-page",
4150            ["records/contacts/c", "records/secrets/s"],
4151        )
4152        .expect("a later ignored-type target must still be found");
4153        assert_eq!(hit.target, "records/secrets/s");
4154
4155        // Empty-policy gate: with no `### Ignored types`, nothing is policed.
4156        fx.config.ignored_types.clear();
4157        let store = fx.store();
4158        assert_eq!(
4159            derived_from_ignored_type(&store, "wiki-page", std::iter::once("records/secrets/s")),
4160            None,
4161            "an empty ignored-types policy short-circuits"
4162        );
4163    }
4164
4165    // ── duplicates ───────────────────────────────────────────────────────────
4166
4167    #[test]
4168    fn dup_id_is_hard_error_with_related() {
4169        let fx = Fixture::new();
4170        fx.write(
4171            "records/contacts/a.md",
4172            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a\nname: A\n---\n\n# A\n",
4173        );
4174        fx.write(
4175            "records/contacts/b.md",
4176            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: b\nname: B\n---\n\n# B\n",
4177        );
4178        let issues = fx.store_all();
4179        // Reporting rule #1: ONE issue per collision group, keyed on the
4180        // lexicographically smallest path (`a.md`), partner in `related`.
4181        assert_eq!(
4182            count(&issues, codes::DUP_ID),
4183            1,
4184            "one issue per group: {issues:#?}"
4185        );
4186        let a = issues.iter().find(|i| i.code == codes::DUP_ID).unwrap();
4187        assert_eq!(a.file, PathBuf::from("records/contacts/a.md"));
4188        assert!(a.is_error());
4189        assert_eq!(a.key.as_deref(), Some("id"));
4190        assert_eq!(
4191            a.line,
4192            Some(3),
4193            "anchors to the `id` line on the reported file"
4194        );
4195        assert_eq!(a.related, vec![PathBuf::from("records/contacts/b.md")]);
4196    }
4197
4198    #[test]
4199    fn dup_id_not_fired_in_working_set() {
4200        // DUP_* is an --all-only cross-file check; the working set must not run it.
4201        let fx = Fixture::new();
4202        fx.write(
4203            "records/contacts/a.md",
4204            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a\nname: A\n---\n\n# A\n",
4205        );
4206        fx.write(
4207            "records/contacts/b.md",
4208            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: b\nname: B\n---\n\n# B\n",
4209        );
4210        // Log says both changed since epoch, so they're in the working set.
4211        fx.write(
4212            "log.md",
4213            "---\ntype: log\n---\n\n## [2026-05-22 10:00] create | records/contacts/a\nx\n\n## [2026-05-22 10:01] create | records/contacts/b\nx\n",
4214        );
4215        let issues = validate_working_set(&fx.store(), None).unwrap();
4216        assert!(
4217            !has(&issues, codes::DUP_ID),
4218            "DUP_ID is --all only: {issues:#?}"
4219        );
4220    }
4221
4222    #[test]
4223    fn dup_unique_key_single_field_is_warning() {
4224        let mut fx = Fixture::new();
4225        // contact declares `- unique: email`.
4226        fx.config.schemas.insert(
4227            "contact".into(),
4228            Schema {
4229                unique_keys: vec![vec!["email".into()]],
4230                ..Default::default()
4231            },
4232        );
4233        for (f, name) in [("a", "A"), ("b", "B")] {
4234            fx.write(
4235                &format!("records/contacts/{f}.md"),
4236                &format!("---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: s\nname: {name}\nemail: dup@x.com\n---\n\n# {name}\n"),
4237            );
4238        }
4239        let issues = fx.store_all();
4240        // One issue per group (rule #1), keyed on the smallest path, anchored to
4241        // the single `email` field.
4242        assert_eq!(count(&issues, codes::DUP_UNIQUE_KEY), 1);
4243        let dup = find(&issues, codes::DUP_UNIQUE_KEY);
4244        assert_eq!(dup.severity, Severity::Warning);
4245        assert_eq!(dup.file, PathBuf::from("records/contacts/a.md"));
4246        assert_eq!(dup.key.as_deref(), Some("email"));
4247        assert_eq!(dup.related, vec![PathBuf::from("records/contacts/b.md")]);
4248    }
4249
4250    #[test]
4251    fn dup_unique_key_compound_and_clean_when_one_field_differs() {
4252        let mut fx = Fixture::new();
4253        // expense declares `- unique: date, amount, vendor` (a compound key).
4254        fx.config.schemas.insert(
4255            "expense".into(),
4256            Schema {
4257                unique_keys: vec![vec!["date".into(), "amount".into(), "vendor".into()]],
4258                ..Default::default()
4259            },
4260        );
4261        fx.write("records/companies/acme.md", "---\ntype: company\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: c\nname: Acme\n---\n# A\n");
4262        let exp = |f: &str, amount: &str| {
4263            format!(
4264            "---\ntype: expense\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: e\ndate: 2026-05-01\namount: {amount}\nvendor: \"[[records/companies/acme]]\"\n---\n\n# {f}\n"
4265        )
4266        };
4267        fx.write("records/expenses/e1.md", &exp("e1", "100"));
4268        fx.write("records/expenses/e2.md", &exp("e2", "100"));
4269        fx.write("records/expenses/e3.md", &exp("e3", "200")); // different amount
4270        let issues = fx.store_all();
4271        // One issue for the e1+e2 group (rule #1), keyed on the smallest path
4272        // (e1) with e2 in `related`; e3 differs on amount and never appears.
4273        assert_eq!(
4274            count(&issues, codes::DUP_UNIQUE_KEY),
4275            1,
4276            "only e1+e2 collide, one issue: {issues:#?}"
4277        );
4278        let dup = find(&issues, codes::DUP_UNIQUE_KEY);
4279        assert_eq!(dup.file, PathBuf::from("records/expenses/e1.md"));
4280        assert_eq!(
4281            dup.line,
4282            Some(1),
4283            "compound-key collision anchors to line 1"
4284        );
4285        assert_eq!(dup.related, vec![PathBuf::from("records/expenses/e2.md")]);
4286        assert!(
4287            !issues.iter().any(|i| i.code == codes::DUP_UNIQUE_KEY
4288                && i.related.contains(&PathBuf::from("records/expenses/e3.md"))),
4289            "e3 differs on amount and must not collide: {issues:#?}"
4290        );
4291    }
4292
4293    #[test]
4294    fn dup_unique_key_list_field_is_order_independent() {
4295        let mut fx = Fixture::new();
4296        // meeting declares `- unique: date, attendees`; the list field is a set.
4297        fx.config.schemas.insert(
4298            "meeting".into(),
4299            Schema {
4300                unique_keys: vec![vec!["date".into(), "attendees".into()]],
4301                ..Default::default()
4302            },
4303        );
4304        fx.write("records/contacts/a.md", &valid_contact("a"));
4305        fx.write("records/contacts/b.md", &valid_contact("b"));
4306        let m = |f: &str, order: &str| {
4307            let attendees = if order == "ab" {
4308                "  - [[records/contacts/a]]\n  - [[records/contacts/b]]"
4309            } else {
4310                "  - [[records/contacts/b]]\n  - [[records/contacts/a]]"
4311            };
4312            format!(
4313                "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: m\ndate: 2026-05-01\nattendees:\n{attendees}\n---\n\n# {f}\n"
4314            )
4315        };
4316        fx.write("records/meetings/m1.md", &m("m1", "ab"));
4317        fx.write("records/meetings/m2.md", &m("m2", "ba"));
4318        let issues = fx.store_all();
4319        // The attendee SET is order-independent, so m1 (ab) and m2 (ba) collide
4320        // → a single issue on the smaller path.
4321        assert_eq!(
4322            count(&issues, codes::DUP_UNIQUE_KEY),
4323            1,
4324            "same date + same attendee set (any order) collide as one issue: {issues:#?}"
4325        );
4326        let dup = find(&issues, codes::DUP_UNIQUE_KEY);
4327        assert_eq!(dup.file, PathBuf::from("records/meetings/m1.md"));
4328        assert_eq!(dup.related, vec![PathBuf::from("records/meetings/m2.md")]);
4329    }
4330
4331    // ── indexes ───────────────────────────────────────────────────────────────
4332
4333    #[test]
4334    fn missing_indexes_at_all_three_levels() {
4335        let fx = Fixture::new();
4336        fx.write("records/contacts/a.md", &valid_contact("a"));
4337        let issues = fx.store_all();
4338        // root, layer (records), and type-folder (records/contacts) all missing.
4339        // The type-folder INDEX_MISSING is keyed on the FOLDER path (not its
4340        // would-be index.md), per the field convention `EXPECTED` pins.
4341        let missing_files: BTreeSet<PathBuf> = issues
4342            .iter()
4343            .filter(|i| i.code == codes::INDEX_MISSING)
4344            .map(|i| i.file.clone())
4345            .collect();
4346        assert!(
4347            missing_files.contains(&PathBuf::from("index.md")),
4348            "{issues:#?}"
4349        );
4350        assert!(
4351            missing_files.contains(&PathBuf::from("records/index.md")),
4352            "{issues:#?}"
4353        );
4354        assert!(
4355            missing_files.contains(&PathBuf::from("records/contacts")),
4356            "{issues:#?}"
4357        );
4358        // When the index.md is entirely absent we do NOT additionally fire
4359        // INDEX_JSONL_MISSING — one INDEX_MISSING covers the folder (rule #4).
4360        assert!(!has(&issues, codes::INDEX_JSONL_MISSING), "{issues:#?}");
4361    }
4362
4363    #[test]
4364    fn index_stale_entry_and_missing_entry() {
4365        let fx = Fixture::new();
4366        fx.write(
4367            "records/contacts/present.md",
4368            &valid_contact("present contact"),
4369        );
4370        // Indexes for the parents (root/layer) present so we isolate type-folder.
4371        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4372        fx.write(
4373            "records/index.md",
4374            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4375        );
4376        // Type-folder index lists a GHOST (stale) and omits `present` (missing).
4377        fx.write(
4378            "records/contacts/index.md",
4379            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/ghost]] — gone\n",
4380        );
4381        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/present.md\",\"type\":\"contact\",\"summary\":\"present contact\"}\n");
4382        let issues = fx.store_all();
4383        let stale = find(&issues, codes::INDEX_STALE_ENTRY);
4384        assert!(stale.message.contains("ghost"));
4385        assert!(stale.is_error());
4386        let missing = find(&issues, codes::INDEX_MISSING_ENTRY);
4387        assert!(
4388            missing.message.contains("present.md"),
4389            "{}",
4390            missing.message
4391        );
4392    }
4393
4394    #[test]
4395    fn index_md_entry_with_traversal_path_is_stale_not_probe() {
4396        let fx = Fixture::new();
4397        fx.write("records/contacts/a.md", &valid_contact("a"));
4398        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4399        fx.write(
4400            "records/index.md",
4401            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4402        );
4403        fx.write(
4404            "records/contacts/index.md",
4405            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/../../ghost]] — unsafe\n",
4406        );
4407        fx.write(
4408            "records/contacts/index.jsonl",
4409            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
4410        );
4411        let issues = fx.store_all();
4412        let stale = find(&issues, codes::INDEX_STALE_ENTRY);
4413        assert!(stale.message.contains("not a safe store-relative path"));
4414    }
4415
4416    #[test]
4417    fn index_summary_mismatch() {
4418        let fx = Fixture::new();
4419        fx.write("records/contacts/a.md", &valid_contact("the real summary"));
4420        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4421        fx.write(
4422            "records/index.md",
4423            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4424        );
4425        fx.write(
4426            "records/contacts/index.md",
4427            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a STALE summary\n",
4428        );
4429        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"the real summary\"}\n");
4430        let issues = fx.store_all();
4431        let issue = find(&issues, codes::INDEX_SUMMARY_MISMATCH);
4432        assert!(issue.is_error());
4433        assert_eq!(issue.related, vec![PathBuf::from("records/contacts/a.md")]);
4434    }
4435
4436    #[test]
4437    fn index_summary_match_passes() {
4438        let fx = Fixture::new();
4439        fx.write("records/contacts/a.md", &valid_contact("matching summary"));
4440        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4441        fx.write(
4442            "records/index.md",
4443            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4444        );
4445        fx.write(
4446            "records/contacts/index.md",
4447            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — matching summary\n",
4448        );
4449        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"matching summary\"}\n");
4450        let issues = fx.store_all();
4451        assert!(!has(&issues, codes::INDEX_SUMMARY_MISMATCH), "{issues:#?}");
4452    }
4453
4454    #[test]
4455    fn index_entry_with_tag_suffix_matches_summary() {
4456        let fx = Fixture::new();
4457        fx.write("records/contacts/a.md", &valid_contact("clean summary"));
4458        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4459        fx.write(
4460            "records/index.md",
4461            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4462        );
4463        // Entry carries a ` · #tag` suffix which must be stripped before compare.
4464        fx.write(
4465            "records/contacts/index.md",
4466            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — clean summary · #customer\n",
4467        );
4468        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"clean summary\"}\n");
4469        let issues = fx.store_all();
4470        assert!(
4471            !has(&issues, codes::INDEX_SUMMARY_MISMATCH),
4472            "tag suffix should be stripped: {issues:#?}"
4473        );
4474    }
4475
4476    #[test]
4477    fn index_jsonl_desync_missing_file_in_jsonl() {
4478        let fx = Fixture::new();
4479        fx.write("records/contacts/a.md", &valid_contact("a"));
4480        fx.write("records/contacts/b.md", &valid_contact("b"));
4481        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (2 files)\n");
4482        fx.write(
4483            "records/index.md",
4484            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4485        );
4486        fx.write(
4487            "records/contacts/index.md",
4488            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n- [[records/contacts/b]] — b\n",
4489        );
4490        // jsonl only lists `a` → `b` is a desync (the twin must be complete).
4491        fx.write(
4492            "records/contacts/index.jsonl",
4493            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
4494        );
4495        let issues = fx.store_all();
4496        let desync = find(&issues, codes::INDEX_JSONL_DESYNC);
4497        assert!(desync.message.contains("b.md"), "{}", desync.message);
4498    }
4499
4500    #[test]
4501    fn index_jsonl_desync_record_points_at_missing_file() {
4502        let fx = Fixture::new();
4503        fx.write("records/contacts/a.md", &valid_contact("a"));
4504        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4505        fx.write(
4506            "records/index.md",
4507            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4508        );
4509        fx.write(
4510            "records/contacts/index.md",
4511            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n",
4512        );
4513        fx.write(
4514            "records/contacts/index.jsonl",
4515            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n{\"path\":\"records/contacts/ghost.md\",\"type\":\"contact\",\"summary\":\"x\"}\n",
4516        );
4517        let issues = fx.store_all();
4518        assert!(
4519            issues
4520                .iter()
4521                .any(|i| i.code == codes::INDEX_JSONL_DESYNC && i.message.contains("ghost.md")),
4522            "{issues:#?}"
4523        );
4524    }
4525
4526    #[test]
4527    fn index_jsonl_record_with_traversal_path_is_desync_not_probe() {
4528        let fx = Fixture::new();
4529        fx.write("records/contacts/a.md", &valid_contact("a"));
4530        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4531        fx.write(
4532            "records/index.md",
4533            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4534        );
4535        fx.write(
4536            "records/contacts/index.md",
4537            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n",
4538        );
4539        fx.write(
4540            "records/contacts/index.jsonl",
4541            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n{\"path\":\"records/contacts/../../ghost.md\",\"type\":\"contact\",\"summary\":\"x\"}\n",
4542        );
4543        let issues = fx.store_all();
4544        assert!(
4545            issues.iter().any(|i| i.code == codes::INDEX_JSONL_DESYNC
4546                && i.message.contains("not a safe store-relative path")),
4547            "{issues:#?}"
4548        );
4549    }
4550
4551    #[test]
4552    fn index_jsonl_stale_summary() {
4553        let fx = Fixture::new();
4554        fx.write("records/contacts/a.md", &valid_contact("real summary"));
4555        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4556        fx.write(
4557            "records/index.md",
4558            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4559        );
4560        fx.write(
4561            "records/contacts/index.md",
4562            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — real summary\n",
4563        );
4564        // jsonl summary disagrees with the file frontmatter.
4565        fx.write(
4566            "records/contacts/index.jsonl",
4567            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"OUTDATED\"}\n",
4568        );
4569        let issues = fx.store_all();
4570        let stale = find(&issues, codes::INDEX_JSONL_STALE);
4571        assert_eq!(stale.related, vec![PathBuf::from("records/contacts/a.md")]);
4572        assert!(stale.key.as_deref().unwrap().contains("summary"));
4573    }
4574
4575    /// The whole point of `INDEX_JSONL_STALE`: a sidecar field the query/search
4576    /// path actually reads (`email`, `domain`, the `(date,amount,vendor)` dedup
4577    /// tuple, `tags`, `updated`, `links`, `company` …) that disagrees with the
4578    /// `.md` is STALE — even when `summary` and `type` are perfectly correct.
4579    /// Pre-fix the validator only diffed summary+type, so a sidecar with a wrong
4580    /// `email` validated clean and answered `--where email=…` with a phantom
4581    /// value present in no file. This is the direct regression guard.
4582    #[test]
4583    fn index_jsonl_stale_queryable_field_email() {
4584        let fx = Fixture::new();
4585        let contact = "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"a contact\"\nname: A\nemail: real@correct.com\n---\n\n# A\n";
4586        fx.write("records/contacts/a.md", contact);
4587        // Start from the canonical, fully-correct sidecar set …
4588        fx.rebuild_indexes();
4589        let jsonl_path = fx.dir.path().join("records/contacts/index.jsonl");
4590        let good = fs::read_to_string(&jsonl_path).unwrap();
4591        // sanity: the canonical store is clean (no STALE on a fresh rebuild).
4592        assert!(
4593            !has(&fx.store_all(), codes::INDEX_JSONL_STALE),
4594            "freshly-rebuilt sidecar must not be stale"
4595        );
4596        // … then desync ONLY the email so it's the single differing field.
4597        assert!(
4598            good.contains("real@correct.com"),
4599            "sidecar projects email: {good}"
4600        );
4601        fx.write(
4602            "records/contacts/index.jsonl",
4603            &good.replace("real@correct.com", "STALE-WRONG@evil.com"),
4604        );
4605
4606        let issues = fx.store_all();
4607        let stale = find(&issues, codes::INDEX_JSONL_STALE);
4608        assert_eq!(stale.related, vec![PathBuf::from("records/contacts/a.md")]);
4609        // The mismatch is reported precisely on `email`, and summary/type — which
4610        // still match — are NOT named.
4611        let key = stale.key.as_deref().unwrap();
4612        assert!(
4613            key.contains("email"),
4614            "expected `email` in stale key, got {key:?}"
4615        );
4616        assert!(!key.contains("summary"), "summary still matches: {key:?}");
4617        assert!(!key.contains("type"), "type still matches: {key:?}");
4618    }
4619
4620    /// Broaden the guard across the typed/list/timestamp projections at once:
4621    /// a wrong `tags`, `updated`, and a custom dedup field (`amount`) are each
4622    /// caught, with all three named in one issue.
4623    #[test]
4624    fn index_jsonl_stale_typed_and_list_fields() {
4625        let fx = Fixture::new();
4626        let expense = "---\ntype: expense\ncreated: 2026-05-20T08:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"office chairs\"\ntags: [furniture, q2]\namount: 1299\nvendor: Acme\ndate: 2026-05-20\n---\n\n# Expense\n";
4627        fx.write("records/expenses/e.md", expense);
4628        fx.rebuild_indexes();
4629        let jsonl_path = fx.dir.path().join("records/expenses/index.jsonl");
4630        let good = fs::read_to_string(&jsonl_path).unwrap();
4631        assert!(
4632            !has(&fx.store_all(), codes::INDEX_JSONL_STALE),
4633            "freshly-rebuilt sidecar must not be stale"
4634        );
4635        // Desync a list field (tags), a timestamp (updated), and a number (amount).
4636        let stale_line = good
4637            .replace("\"q2\"", "\"WRONG-TAG\"")
4638            .replace("2026-05-22T10:00:00-07:00", "2099-01-01T00:00:00-07:00")
4639            .replace("1299", "9999");
4640        fx.write("records/expenses/index.jsonl", &stale_line);
4641
4642        let issues = fx.store_all();
4643        let stale = find(&issues, codes::INDEX_JSONL_STALE);
4644        let key = stale.key.as_deref().unwrap();
4645        for expected in ["amount", "tags", "updated"] {
4646            assert!(
4647                key.contains(expected),
4648                "expected `{expected}` in stale key, got {key:?}"
4649            );
4650        }
4651    }
4652
4653    #[test]
4654    fn index_orphan_in_noncanonical_folder() {
4655        let fx = Fixture::new();
4656        fx.write("records/contacts/a.md", &valid_contact("a"));
4657        // Build the canonical indexes so they aren't reported as orphans.
4658        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4659        fx.write(
4660            "records/index.md",
4661            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4662        );
4663        fx.write("records/contacts/index.md", "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n");
4664        fx.write(
4665            "records/contacts/index.jsonl",
4666            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
4667        );
4668        // An index.md inside a sub-sub-folder (operator territory) is an orphan.
4669        fx.write(
4670            "records/contacts/subfolder/index.md",
4671            "---\ntype: index\nscope: type-folder\n---\n\n# stray\n",
4672        );
4673        let issues = fx.store_all();
4674        let orphan = find(&issues, codes::INDEX_ORPHAN);
4675        assert_eq!(orphan.severity, Severity::Warning);
4676        assert_eq!(
4677            orphan.file,
4678            PathBuf::from("records/contacts/subfolder/index.md")
4679        );
4680    }
4681
4682    #[test]
4683    fn index_wrong_scope() {
4684        let fx = Fixture::new();
4685        fx.write("records/contacts/a.md", &valid_contact("a"));
4686        // Root index declares the wrong scope.
4687        fx.write("index.md", "---\ntype: index\nscope: layer\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4688        fx.write(
4689            "records/index.md",
4690            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4691        );
4692        fx.write("records/contacts/index.md", "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n");
4693        fx.write(
4694            "records/contacts/index.jsonl",
4695            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
4696        );
4697        let issues = fx.store_all();
4698        let issue = find(&issues, codes::INDEX_WRONG_SCOPE);
4699        assert_eq!(issue.severity, Severity::Warning);
4700        assert_eq!(issue.file, PathBuf::from("index.md"));
4701    }
4702
4703    #[test]
4704    fn capped_type_folder_index_does_not_flag_missing_entries() {
4705        // Over the 500-entry cap, omitted entries are expected, not an error.
4706        let fx = Fixture::new();
4707        for i in 0..501 {
4708            fx.write(
4709                &format!("records/contacts/c{i:04}.md"),
4710                &valid_contact(&format!("contact {i}")),
4711            );
4712        }
4713        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (501 files)\n");
4714        fx.write(
4715            "records/index.md",
4716            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4717        );
4718        // Type-folder index lists only ONE entry + a More footer.
4719        fx.write(
4720            "records/contacts/index.md",
4721            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/c0000]] — contact 0\n\n## More\n\nThis folder has 501 files.\n",
4722        );
4723        // jsonl must still be complete — write all 501 lines.
4724        let mut jsonl = String::new();
4725        for i in 0..501 {
4726            jsonl.push_str(&format!(
4727                "{{\"path\":\"records/contacts/c{i:04}.md\",\"type\":\"contact\",\"summary\":\"contact {i}\"}}\n"
4728            ));
4729        }
4730        fx.write("records/contacts/index.jsonl", &jsonl);
4731        let issues = fx.store_all();
4732        assert!(
4733            !has(&issues, codes::INDEX_MISSING_ENTRY),
4734            "over the cap, missing browse entries are expected: {issues:#?}"
4735        );
4736        // But the jsonl is complete → no desync.
4737        assert!(
4738            !has(&issues, codes::INDEX_JSONL_DESYNC),
4739            "{:#?}",
4740            issues
4741                .iter()
4742                .filter(|i| i.code == codes::INDEX_JSONL_DESYNC)
4743                .collect::<Vec<_>>()
4744        );
4745    }
4746
4747    // ── log ────────────────────────────────────────────────────────────────
4748
4749    #[test]
4750    fn log_bad_timestamp_unknown_kind_out_of_order() {
4751        let fx = Fixture::new();
4752        fx.write(
4753            "log.md",
4754            concat!(
4755                "---\ntype: log\n---\n\n# Log\n\n",
4756                "## [2026-05-27 10:00] create | records/contacts/a\nx\n\n",
4757                "## [2026-05-27 09:00] update | records/contacts/b\nx\n\n", // out of order
4758                "## [2026-05-27 11:00] frobnicate | records/contacts/c\nx\n\n", // unknown kind
4759                "## [not-a-date] create | records/contacts/d\nx\n",         // bad timestamp
4760            ),
4761        );
4762        let issues = fx.store_all();
4763        assert!(has(&issues, codes::LOG_OUT_OF_ORDER), "{issues:#?}");
4764        assert_eq!(
4765            find(&issues, codes::LOG_OUT_OF_ORDER).severity,
4766            Severity::Warning
4767        );
4768        let unknown = find(&issues, codes::LOG_UNKNOWN_KIND);
4769        assert_eq!(unknown.severity, Severity::Warning);
4770        assert!(unknown.message.contains("frobnicate"));
4771        assert!(unknown
4772            .suggestion
4773            .as_deref()
4774            .is_some_and(|s| s.contains("create")));
4775        let bad = find(&issues, codes::LOG_BAD_TIMESTAMP);
4776        assert!(bad.is_error());
4777    }
4778
4779    #[test]
4780    fn log_validate_entry_without_object_is_well_formed() {
4781        let fx = Fixture::new();
4782        fx.write(
4783            "log.md",
4784            "---\ntype: log\n---\n\n## [2026-05-27 10:00] validate\nPASS\n",
4785        );
4786        let issues = fx.store_all();
4787        assert!(!has(&issues, codes::LOG_BAD_TIMESTAMP), "{issues:#?}");
4788        assert!(!has(&issues, codes::LOG_UNKNOWN_KIND), "{issues:#?}");
4789    }
4790
4791    #[test]
4792    fn log_in_order_is_clean() {
4793        let fx = Fixture::new();
4794        fx.write(
4795            "log.md",
4796            concat!(
4797                "---\ntype: log\n---\n\n",
4798                "## [2026-05-27 10:00] create | records/contacts/a\nx\n\n",
4799                "## [2026-05-27 10:05] update | records/contacts/a\nx\n",
4800            ),
4801        );
4802        let issues = fx.store_all();
4803        assert!(!has(&issues, codes::LOG_OUT_OF_ORDER), "{issues:#?}");
4804    }
4805
4806    #[test]
4807    fn log_not_checked_in_working_set() {
4808        // log.md ordering is an --all-only check.
4809        let fx = Fixture::new();
4810        fx.write(
4811            "log.md",
4812            concat!(
4813                "---\ntype: log\n---\n\n",
4814                "## [2026-05-27 10:00] create | records/contacts/a\nx\n\n",
4815                "## [2026-05-27 09:00] update | records/contacts/a\nx\n",
4816            ),
4817        );
4818        let issues = validate_working_set(&fx.store(), None).unwrap();
4819        assert!(
4820            !has(&issues, codes::LOG_OUT_OF_ORDER),
4821            "log ordering is --all only: {issues:#?}"
4822        );
4823    }
4824
4825    // ── working-set scoping ───────────────────────────────────────────────────
4826
4827    #[test]
4828    fn working_set_validates_only_changed_files() {
4829        let fx = Fixture::new();
4830        // `dirty` has a bad timestamp; `clean_but_unlogged` also does but is NOT
4831        // in the log → working set must skip it.
4832        fx.write(
4833            "records/contacts/dirty.md",
4834            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
4835        );
4836        fx.write(
4837            "records/contacts/unlogged.md",
4838            "---\ntype: contact\ncreated: ALSO-BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: B\n---\n\n# B\n",
4839        );
4840        fx.write(
4841            "log.md",
4842            "---\ntype: log\n---\n\n## [2026-05-22 10:00] update | records/contacts/dirty\nedited\n",
4843        );
4844        let issues = validate_working_set(&fx.store(), None).unwrap();
4845        assert!(
4846            issues.iter().any(|i| i.code == codes::FM_BAD_TIMESTAMP
4847                && i.file == Path::new("records/contacts/dirty.md")),
4848            "{issues:#?}"
4849        );
4850        assert!(
4851            !issues
4852                .iter()
4853                .any(|i| i.file == Path::new("records/contacts/unlogged.md")),
4854            "unlogged file must not be in the working set: {issues:#?}"
4855        );
4856    }
4857
4858    #[test]
4859    fn working_set_includes_incoming_linkers_to_changed_path() {
4860        let fx = Fixture::new();
4861        // `changed` was renamed/removed (logged). `linker` points at it with a
4862        // now-broken link and was NOT itself logged — but must be pulled in.
4863        fx.write(
4864            "wiki/people/linker.md",
4865            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: links to a removed page\n---\n\nSee [[records/contacts/changed]].\n",
4866        );
4867        // `changed.md` does NOT exist on disk (removed).
4868        fx.write(
4869            "log.md",
4870            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/changed\nremoved\n",
4871        );
4872        let issues = validate_working_set(&fx.store(), None).unwrap();
4873        assert!(
4874            issues.iter().any(|i| i.code == codes::WIKI_LINK_BROKEN
4875                && i.file == Path::new("wiki/people/linker.md")),
4876            "incoming linker to a removed path must be validated: {issues:#?}"
4877        );
4878    }
4879
4880    #[test]
4881    fn working_set_respects_explicit_since_cutoff() {
4882        let fx = Fixture::new();
4883        fx.write(
4884            "records/contacts/old.md",
4885            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
4886        );
4887        fx.write(
4888            "records/contacts/new.md",
4889            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: B\n---\n\n# B\n",
4890        );
4891        fx.write(
4892            "log.md",
4893            concat!(
4894                "---\ntype: log\n---\n\n",
4895                "## [2026-05-20 10:00] update | records/contacts/old\nx\n\n",
4896                "## [2026-05-25 10:00] update | records/contacts/new\nx\n",
4897            ),
4898        );
4899        // Cutoff after `old` but before `new`.
4900        let since = DateTime::parse_from_rfc3339("2026-05-22T00:00:00+00:00").unwrap();
4901        let issues = validate_working_set(&fx.store(), Some(since)).unwrap();
4902        assert!(
4903            issues
4904                .iter()
4905                .any(|i| i.file == Path::new("records/contacts/new.md")),
4906            "{issues:#?}"
4907        );
4908        assert!(
4909            !issues
4910                .iter()
4911                .any(|i| i.file == Path::new("records/contacts/old.md")),
4912            "old change is before the cutoff: {issues:#?}"
4913        );
4914    }
4915
4916    #[test]
4917    fn working_set_default_since_is_last_validate_entry() {
4918        let fx = Fixture::new();
4919        // `before` changed before the last validate; `after` changed after.
4920        fx.write(
4921            "records/contacts/before.md",
4922            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
4923        );
4924        fx.write(
4925            "records/contacts/after.md",
4926            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: B\n---\n\n# B\n",
4927        );
4928        fx.write(
4929            "log.md",
4930            concat!(
4931                "---\ntype: log\n---\n\n",
4932                "## [2026-05-20 10:00] update | records/contacts/before\nx\n\n",
4933                "## [2026-05-21 10:00] validate\nPASS\n\n",
4934                "## [2026-05-22 10:00] update | records/contacts/after\nx\n",
4935            ),
4936        );
4937        let issues = validate_working_set(&fx.store(), None).unwrap();
4938        assert!(
4939            issues
4940                .iter()
4941                .any(|i| i.file == Path::new("records/contacts/after.md")),
4942            "{issues:#?}"
4943        );
4944        assert!(
4945            !issues
4946                .iter()
4947                .any(|i| i.file == Path::new("records/contacts/before.md")),
4948            "change before the last validate entry is outside the default window: {issues:#?}"
4949        );
4950    }
4951
4952    // ── ordering / determinism ────────────────────────────────────────────────
4953
4954    #[test]
4955    fn issues_are_sorted_by_file_then_line() {
4956        let fx = Fixture::new();
4957        fx.write("wiki/people/z.md", "---\ntype: wiki-page\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n");
4958        fx.write("wiki/people/a.md", "---\ntype: wiki-page\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n");
4959        let issues = fx.store_all();
4960        let files: Vec<&PathBuf> = issues.iter().map(|i| &i.file).collect();
4961        let mut sorted = files.clone();
4962        sorted.sort();
4963        assert_eq!(
4964            files, sorted,
4965            "issues must be emitted in a stable file order"
4966        );
4967    }
4968
4969    // ── boundaries: codes validate must NOT emit ──────────────────────────────
4970
4971    #[test]
4972    fn frozen_page_is_not_a_validate_error() {
4973        // POLICY_FROZEN_PAGE is a *write-time* refusal, never a validate finding.
4974        // A clean file listed in `### Frozen pages` must validate clean.
4975        let mut fx = Fixture::new();
4976        fx.config
4977            .frozen_pages
4978            .push(PathBuf::from("records/decisions/d.md"));
4979        fx.write(
4980            "records/decisions/d.md",
4981            "---\ntype: decision\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a finalized decision\n---\n\n# D\n",
4982        );
4983        let issues = fx.store_all();
4984        assert!(
4985            !has(&issues, codes::POLICY_FROZEN_PAGE),
4986            "frozen pages are enforced at write-time, not by validate: {issues:#?}"
4987        );
4988    }
4989
4990    #[test]
4991    fn wiki_link_ambiguous_is_never_emitted_under_full_path_doctrine() {
4992        // The full-path doctrine makes ambiguity impossible; the defensive code
4993        // must never fire on a normal store.
4994        let fx = Fixture::new();
4995        fx.write("records/contacts/sarah-chen.md", &valid_contact("sarah"));
4996        let mut body = valid_contact("links to sarah");
4997        body.push_str("\nSee [[records/contacts/sarah-chen]].\n");
4998        fx.write("wiki/people/p.md", &body);
4999        let issues = fx.store_all();
5000        assert!(!has(&issues, codes::WIKI_LINK_AMBIGUOUS), "{issues:#?}");
5001    }
5002
5003    // ── unknown-type / unknown-field passthrough ──────────────────────────────
5004
5005    #[test]
5006    fn unknown_type_passes_through() {
5007        // A custom type is ambient context: it has a `type`, so no
5008        // FM_MISSING_TYPE, and with no matching schema there are no schema
5009        // errors. Only the universal contract (summary, timestamps) applies.
5010        let fx = Fixture::new();
5011        fx.write(
5012            "records/proposals/x.md",
5013            "---\ntype: proposal\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a proposal\ncustom_field: anything\nbudget: 5000\n---\n\n# Proposal\n",
5014        );
5015        let issues = fx.store_all();
5016        assert!(!has(&issues, codes::FM_MISSING_TYPE), "{issues:#?}");
5017        assert!(!has(&issues, codes::SCHEMA_MISSING_REQUIRED), "{issues:#?}");
5018        assert!(!has(&issues, codes::SCHEMA_SHAPE_MISMATCH), "{issues:#?}");
5019        // The unknown fields don't trip anything.
5020        assert!(
5021            !issues
5022                .iter()
5023                .any(|i| i.key.as_deref() == Some("custom_field")
5024                    || i.key.as_deref() == Some("budget")),
5025            "unknown fields are ambient context: {issues:#?}"
5026        );
5027    }
5028
5029    // ── find_links_to prefix-collision safety (working set) ───────────────────
5030
5031    #[test]
5032    fn incoming_linker_scan_does_not_prefix_match() {
5033        // A changed `records/contacts/sarah` must NOT pull in a file that only
5034        // links to `records/contacts/sarah-chen` (a longer path sharing a prefix).
5035        let fx = Fixture::new();
5036        fx.write(
5037            "wiki/people/only-sarah-chen.md",
5038            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nSee [[records/contacts/sarah-chen]].\n",
5039        );
5040        // The log says `records/contacts/sarah` (the shorter path) changed.
5041        fx.write(
5042            "log.md",
5043            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/sarah\nremoved\n",
5044        );
5045        let issues = validate_working_set(&fx.store(), None).unwrap();
5046        assert!(
5047            !issues
5048                .iter()
5049                .any(|i| i.file == Path::new("wiki/people/only-sarah-chen.md")),
5050            "a prefix-sharing link must not pull a file into the working set: {issues:#?}"
5051        );
5052    }
5053
5054    #[test]
5055    fn incoming_linker_scan_pulls_in_catalog_index_md() {
5056        // CONTRACT: the working-set incoming-linker scan rides the embedded-
5057        // ripgrep `Store::find_links_to`, which scans EVERY `.md` (including
5058        // `index.md` catalogs) — NOT the walk-and-read over `walk_content_files`,
5059        // which excludes `index.md`. A type-folder `index.md` that lists a now-
5060        // deleted target must be pulled into the working set so its dangling
5061        // catalog entry is flagged `WIKI_LINK_BROKEN`. The old walk-and-read
5062        // implementation skipped `index.md` and let this broken link survive the
5063        // loop silently; this test fails if anyone reverts to that path.
5064        let fx = Fixture::new();
5065        // A catalog that still lists the deleted contact (a real, common stale
5066        // state after a `delete`). No other file references the target, so the
5067        // catalog is the ONLY incoming linker — if it isn't scanned, nothing is.
5068        fx.write(
5069            "records/contacts/index.md",
5070            "---\ntype: index\n---\n\n- [[records/contacts/sarah-chen]] — Sarah Chen\n",
5071        );
5072        // The log says `records/contacts/sarah-chen` was deleted.
5073        fx.write(
5074            "log.md",
5075            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/sarah-chen\nremoved\n",
5076        );
5077        let issues = validate_working_set(&fx.store(), None).unwrap();
5078        assert!(
5079            issues
5080                .iter()
5081                .any(|i| i.file == Path::new("records/contacts/index.md")
5082                    && i.code == codes::WIKI_LINK_BROKEN),
5083            "the catalog `index.md` linking to the deleted target must be pulled \
5084             into the working set and flagged WIKI_LINK_BROKEN (proves the scan \
5085             uses embedded-ripgrep `Store::find_links_to`, not the index-skipping \
5086             walk-and-read): {issues:#?}"
5087        );
5088    }
5089
5090    #[test]
5091    fn incoming_linker_scan_covers_the_whole_changed_set_in_one_pass() {
5092        // CONTRACT (the O(changed × store) fix): the working-set scan finds
5093        // incoming linkers for EVERY changed object, and does so via the single
5094        // batch pass `Store::find_links_to_any` — not one full store read per
5095        // changed object. This test pins the behavior that makes the single-pass
5096        // correct: with two DISTINCT deleted targets, the linker to EACH is pulled
5097        // into the working set and flagged. A regression that scanned for only the
5098        // first/last changed object, or that dropped the batch union, would leave
5099        // one of the two broken links unreported and fail here.
5100        let fx = Fixture::new();
5101        // Linker A → deleted target #1 (in the body).
5102        fx.write(
5103            "wiki/people/refers-sarah.md",
5104            "---\ntype: wiki-page\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nSee [[records/contacts/sarah-chen]].\n",
5105        );
5106        // Linker B → deleted target #2 (in a typed frontmatter field — an edge the
5107        // sidecar `links` projection would miss, which is why this must be a
5108        // content scan, not a sidecar read).
5109        fx.write(
5110            "records/meetings/2026/05/kickoff.md",
5111            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: m\ndate: 2026-05-01\ncompany: \"[[records/companies/acme]]\"\n---\n\n# Kickoff\n",
5112        );
5113        // The log says BOTH targets were deleted in this window.
5114        fx.write(
5115            "log.md",
5116            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/sarah-chen\nremoved\n\n## [2026-05-22 10:05] delete | records/companies/acme\nremoved\n",
5117        );
5118
5119        let issues = validate_working_set(&fx.store(), None).unwrap();
5120        assert!(
5121            issues
5122                .iter()
5123                .any(|i| i.file == Path::new("wiki/people/refers-sarah.md")
5124                    && i.code == codes::WIKI_LINK_BROKEN),
5125            "linker to the FIRST deleted target must be pulled in and flagged: {issues:#?}"
5126        );
5127        assert!(
5128            issues.iter().any(
5129                |i| i.file == Path::new("records/meetings/2026/05/kickoff.md")
5130                    && i.code == codes::WIKI_LINK_BROKEN
5131            ),
5132            "linker to the SECOND deleted target (typed-field edge) must also be \
5133             pulled in and flagged — proves the scan covers the whole changed set, \
5134             not just one object: {issues:#?}"
5135        );
5136    }
5137
5138    #[test]
5139    fn frontmatter_block_sequence_links_each_get_their_own_line() {
5140        // Each block-sequence wiki-link reports on its own source line.
5141        let fx = Fixture::new();
5142        // Neither target exists → two WIKI_LINK_BROKEN, on different lines.
5143        fx.write(
5144            "records/meetings/m.md",
5145            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: m\ndate: 2026-05-01\nparticipants:\n  - [[records/contacts/ghost1]]\n  - [[records/contacts/ghost2]]\n---\n\n# M\n",
5146        );
5147        let issues = fx.store_all();
5148        let broken_lines: BTreeSet<Option<u32>> = issues
5149            .iter()
5150            .filter(|i| i.code == codes::WIKI_LINK_BROKEN)
5151            .map(|i| i.line)
5152            .collect();
5153        assert_eq!(
5154            broken_lines.len(),
5155            2,
5156            "two distinct broken-link lines: {issues:#?}"
5157        );
5158    }
5159
5160    /// Every code in `mod codes` must appear as a row in SPEC.md § Validation —
5161    /// the SPEC table is the declared "complete vocabulary" an agent branches on,
5162    /// and the module doc-comment promises this code implements "exactly those
5163    /// codes — no more, no fewer." This guards against the code/SPEC drift where a
5164    /// new validation code is added to the engine but never documented.
5165    #[test]
5166    fn every_code_constant_is_documented_in_spec() {
5167        // Parse the canonical constant *values* straight out of this module's
5168        // source, so a future `pub const X: &str = "X";` is covered with no test
5169        // edit. Format is uniform: `    pub const NAME: &str = "VALUE";`.
5170        let this_src = include_str!("validate.rs");
5171        let mut codes_in_module: Vec<String> = Vec::new();
5172        let mut in_codes_mod = false;
5173        for line in this_src.lines() {
5174            let t = line.trim();
5175            if t.starts_with("pub mod codes") {
5176                in_codes_mod = true;
5177                continue;
5178            }
5179            // The `mod codes` block ends at its closing brace at column 0.
5180            if in_codes_mod && line == "}" {
5181                break;
5182            }
5183            if in_codes_mod {
5184                if let Some(rest) = t.strip_prefix("pub const ") {
5185                    // rest = `NAME: &str = "VALUE";`
5186                    let value = rest
5187                        .split_once('=')
5188                        .map(|(_, v)| v.trim())
5189                        .and_then(|v| v.strip_prefix('"'))
5190                        .and_then(|v| v.strip_suffix("\";"))
5191                        .unwrap_or_else(|| panic!("unparseable code constant line: {line:?}"));
5192                    codes_in_module.push(value.to_string());
5193                }
5194            }
5195        }
5196        assert!(
5197            codes_in_module.len() >= 36,
5198            "parsed only {} code constants from `mod codes`; the parser likely \
5199             broke against a source-format change",
5200            codes_in_module.len()
5201        );
5202
5203        // SPEC.md lives at the repo root, two levels up from this crate's manifest.
5204        let spec_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("../../SPEC.md");
5205        let spec = fs::read_to_string(&spec_path)
5206            .unwrap_or_else(|e| panic!("cannot read {}: {e}", spec_path.display()));
5207
5208        // Each code must appear as a SPEC § Validation table cell: `` | `CODE` | ``.
5209        let missing: Vec<&String> = codes_in_module
5210            .iter()
5211            .filter(|code| !spec.contains(&format!("| `{code}` |")))
5212            .collect();
5213        assert!(
5214            missing.is_empty(),
5215            "validation codes emitted by the engine but absent from SPEC.md \
5216             § Validation (the declared complete vocabulary): {missing:?}"
5217        );
5218    }
5219}