Skip to main content

dbmd_core/
validate.rs

1//! `validate` — the validation engine.
2//!
3//! The canonical issue-code vocabulary is **SPEC.md § Validation** (that table
4//! is the single source of truth). This module implements exactly those codes
5//! — no more, no fewer. If a code is added here it must be added to the SPEC
6//! table in the same change. The codes are exposed as the [`codes`] constants
7//! so call sites never spell a code as a bare string literal.
8//!
9//! **Two scopes.** [`validate_working_set`] is the loop default: content files
10//! changed since `since`, plus any file whose wiki-links target a changed path.
11//! The changed set and the per-file checks are O(changed); the incoming linkers
12//! are found by a *single* embedded-ripgrep pass over the store for the whole
13//! changed set at once ([`Store::find_links_to_any`], one scan — not a full read
14//! per changed object, and not the parse-the-tree walk `--all` does). On this
15//! changed-set path it never builds the global cross-file state.
16//!
17//! The **one** exception is the vacuous-pass guard: when the change log records
18//! no objects since the cutoff and no explicit `--since` was given (a fresh
19//! store, a missing/empty `log.md`, or external edits never logged), the default
20//! call falls back to a single per-file content sweep ([`Store::walk`]) so an
21//! externally edited or freshly copied store cannot pass validation vacuously.
22//! That fallback is O(store) by design; the O(changed) guarantee is about the
23//! normal post-write path, not this safety net.
24//!
25//! [`validate_all`] is the full SWEEP: it adds the checks that need the global
26//! cross-file state — entity-dedup `DUP_*`, every-index sync, and `log.md`
27//! ordering.
28//!
29//! ## Why this module is self-contained
30//!
31//! Validation does its own frontmatter split, YAML parse, wiki-link scan,
32//! log-header parse, and file walk here, reading only the two public,
33//! caller-populated fields of a [`Store`]: [`Store::root`] and
34//! [`Store::config`] — rather than routing through the sibling modules
35//! ([`crate::parser`], [`crate::store`], [`crate::log`], [`crate::index`]).
36//! Keeping the checks local lets the validator report precise, per-issue
37//! diagnostics (exact codes, file, and context) without coupling its output to
38//! incidental behavior of the shared readers; the public surface and the
39//! emitted issue vocabulary are the contract.
40
41use std::collections::{BTreeMap, BTreeSet, HashMap};
42use std::path::{Component, Path, PathBuf};
43
44use chrono::{DateTime, FixedOffset, NaiveDateTime};
45use serde_norway::Value;
46
47use crate::parser::{Schema, Shape};
48use crate::store::Store;
49
50/// Severity of a validation [`Issue`]. Any [`Severity::Error`] fails validation
51/// (non-zero exit); warnings and info do not.
52#[derive(Debug, Clone, Copy, PartialEq, Eq)]
53pub enum Severity {
54    /// Blocks: a hard violation of the format or doctrine.
55    Error,
56    /// A decision point the agent resolves at its discretion.
57    Warning,
58    /// Visibility only; never affects exit status.
59    Info,
60}
61
62/// A single structured validation finding. Agent-primary and machine-parseable
63/// via `--json`; `suggestion` is a deterministic remediation hint the agent
64/// applies without guessing.
65#[derive(Debug, Clone, PartialEq, Eq)]
66pub struct Issue {
67    /// The severity; only [`Severity::Error`] fails validation.
68    pub severity: Severity,
69    /// The structured code, e.g. `"WIKI_LINK_SHORT_FORM"` — one of [`codes`].
70    pub code: &'static str,
71    /// The file the issue is about.
72    pub file: PathBuf,
73    /// The 1-based line, when applicable.
74    pub line: Option<u32>,
75    /// The frontmatter key, when the issue is about a specific field.
76    pub key: Option<String>,
77    /// A human-readable message.
78    pub message: String,
79    /// A deterministic remediation hint, when one exists.
80    pub suggestion: Option<String>,
81    /// Other files involved (e.g. the duplicate partner in a collision).
82    pub related: Vec<PathBuf>,
83}
84
85impl Issue {
86    /// True if this issue fails validation (i.e. its severity is
87    /// [`Severity::Error`]).
88    pub fn is_error(&self) -> bool {
89        matches!(self.severity, Severity::Error)
90    }
91}
92
93/// The canonical validation issue codes — one constant per row of the SPEC.md
94/// § Validation table. Call sites reference these instead of bare strings so
95/// the code and the SPEC table can never silently drift.
96pub mod codes {
97    /// path has no `DB.md`; not a db.md store.
98    pub const NOT_A_STORE: &str = "NOT_A_STORE";
99    /// the store's `DB.md` is not `type: db-md`.
100    pub const DB_MD_BAD_TYPE: &str = "DB_MD_BAD_TYPE";
101    /// the store's `DB.md` frontmatter lacks `scope` or `owner`.
102    pub const DB_MD_MISSING_FIELD: &str = "DB_MD_MISSING_FIELD";
103    /// `DB.md` has an `##` section other than the three recognized ones.
104    pub const DB_MD_UNKNOWN_SECTION: &str = "DB_MD_UNKNOWN_SECTION";
105    /// a `DB.md ## Schemas` field declaration is malformed (empty or duplicate
106    /// field name) or carries an unrecognized modifier.
107    pub const DB_MD_SCHEMA_FIELD: &str = "DB_MD_SCHEMA_FIELD";
108    /// content file has no `type:`.
109    pub const FM_MISSING_TYPE: &str = "FM_MISSING_TYPE";
110    /// content file has no `created:`.
111    pub const FM_MISSING_CREATED: &str = "FM_MISSING_CREATED";
112    /// content file has no `updated:`.
113    pub const FM_MISSING_UPDATED: &str = "FM_MISSING_UPDATED";
114    /// content file can't be read (not valid UTF-8, or an I/O error).
115    pub const FM_UNREADABLE: &str = "FM_UNREADABLE";
116    /// frontmatter block isn't valid YAML.
117    pub const FM_MALFORMED_YAML: &str = "FM_MALFORMED_YAML";
118    /// `created` or `updated` isn't ISO-8601.
119    pub const FM_BAD_TIMESTAMP: &str = "FM_BAD_TIMESTAMP";
120    /// `meta-type` is present but not one of fact / operational / conclusion.
121    pub const FM_BAD_META_TYPE: &str = "FM_BAD_META_TYPE";
122    /// `id` is present but unusable as an identifier (non-scalar, empty, or
123    /// contains whitespace). Warning: the recommended lowercase-ULID form is
124    /// never enforced — hand-authored opaque ids stay legal (SPEC v0.4).
125    pub const FM_BAD_ID: &str = "FM_BAD_ID";
126    /// the body of a content file opens with a second `---` frontmatter block
127    /// (typically an imported source file's own frontmatter embedded verbatim
128    /// as the body). Warning: the file still parses because the real
129    /// frontmatter is valid, but the leftover block is body text, not fields.
130    pub const FM_IN_BODY: &str = "FM_IN_BODY";
131    /// content file has no `summary`.
132    pub const SUMMARY_MISSING: &str = "SUMMARY_MISSING";
133    /// `summary` present but empty.
134    pub const SUMMARY_EMPTY: &str = "SUMMARY_EMPTY";
135    /// `summary` contains newlines.
136    pub const SUMMARY_MULTILINE: &str = "SUMMARY_MULTILINE";
137    /// `summary` > 200 chars.
138    pub const SUMMARY_TOO_LONG: &str = "SUMMARY_TOO_LONG";
139    /// wiki-link target isn't a full store-relative path.
140    pub const WIKI_LINK_SHORT_FORM: &str = "WIKI_LINK_SHORT_FORM";
141    /// wiki-link target file doesn't exist.
142    pub const WIKI_LINK_BROKEN: &str = "WIKI_LINK_BROKEN";
143    /// wiki-link target matches multiple files (defensive).
144    pub const WIKI_LINK_AMBIGUOUS: &str = "WIKI_LINK_AMBIGUOUS";
145    /// wiki-link target carries a `.md` extension — drop it.
146    pub const WIKI_LINK_HAS_EXTENSION: &str = "WIKI_LINK_HAS_EXTENSION";
147    /// frontmatter list uses inline `[[[a]], [[b]]]` — use block form.
148    pub const WIKI_LINK_FLOW_FORM_LIST: &str = "WIKI_LINK_FLOW_FORM_LIST";
149    /// two files declare the same explicit `id`.
150    pub const DUP_ID: &str = "DUP_ID";
151    /// two records of a type collide on a `DB.md ## Schemas` `unique:` key.
152    pub const DUP_UNIQUE_KEY: &str = "DUP_UNIQUE_KEY";
153    /// a `DB.md` schema requires a field that's absent.
154    pub const SCHEMA_MISSING_REQUIRED: &str = "SCHEMA_MISSING_REQUIRED";
155    /// a value doesn't match the schema's shape modifier.
156    pub const SCHEMA_SHAPE_MISMATCH: &str = "SCHEMA_SHAPE_MISMATCH";
157    /// a `link to <prefix>/` field has a plain or wrong-prefix value.
158    pub const SCHEMA_LINK_PREFIX_MISMATCH: &str = "SCHEMA_LINK_PREFIX_MISMATCH";
159    /// a value isn't in the schema's `enum`.
160    pub const SCHEMA_ENUM_VIOLATION: &str = "SCHEMA_ENUM_VIOLATION";
161    /// a write was attempted on a `### Frozen pages` path (write-time).
162    pub const POLICY_FROZEN_PAGE: &str = "POLICY_FROZEN_PAGE";
163    /// a file with an `### Ignored types` type exists.
164    pub const POLICY_IGNORED_TYPE_PRESENT: &str = "POLICY_IGNORED_TYPE_PRESENT";
165    /// a `meta-type: conclusion` record derives from an ignored-type record.
166    pub const POLICY_IGNORED_TYPE_DERIVED: &str = "POLICY_IGNORED_TYPE_DERIVED";
167    /// a `log.md` entry header timestamp is unparseable.
168    pub const LOG_BAD_TIMESTAMP: &str = "LOG_BAD_TIMESTAMP";
169    /// a `log.md` entry kind isn't recognized.
170    pub const LOG_UNKNOWN_KIND: &str = "LOG_UNKNOWN_KIND";
171    /// `log.md` entries aren't in non-decreasing time order (possible rewrite).
172    pub const LOG_OUT_OF_ORDER: &str = "LOG_OUT_OF_ORDER";
173    /// a non-empty canonical folder lacks `index.md`.
174    pub const INDEX_MISSING: &str = "INDEX_MISSING";
175    /// an `index.md` lists a file that no longer exists.
176    pub const INDEX_STALE_ENTRY: &str = "INDEX_STALE_ENTRY";
177    /// a file isn't listed in its folder's `index.md`.
178    pub const INDEX_MISSING_ENTRY: &str = "INDEX_MISSING_ENTRY";
179    /// an `index.md` sits in an empty / non-canonical folder.
180    pub const INDEX_ORPHAN: &str = "INDEX_ORPHAN";
181    /// an index's `scope:` doesn't match its filesystem location.
182    pub const INDEX_WRONG_SCOPE: &str = "INDEX_WRONG_SCOPE";
183    /// an index entry's text doesn't match the target file's `summary`.
184    pub const INDEX_SUMMARY_MISMATCH: &str = "INDEX_SUMMARY_MISMATCH";
185    /// a type-folder's `index.jsonl` twin is missing.
186    pub const INDEX_JSONL_MISSING: &str = "INDEX_JSONL_MISSING";
187    /// a file isn't in the `index.jsonl`, or a jsonl record points at a missing
188    /// file.
189    pub const INDEX_JSONL_DESYNC: &str = "INDEX_JSONL_DESYNC";
190    /// a `index.jsonl` record's fields don't match the file's frontmatter.
191    pub const INDEX_JSONL_STALE: &str = "INDEX_JSONL_STALE";
192    /// `tags` isn't a flat YAML list of short scalar labels.
193    pub const TAGS_MALFORMED: &str = "TAGS_MALFORMED";
194    /// a line in `assets.jsonl` is not a valid asset record.
195    pub const ASSET_MANIFEST_MALFORMED: &str = "ASSET_MANIFEST_MALFORMED";
196    /// a content file references an `asset`/`assets` path with no record in
197    /// `assets.jsonl` (run `dbmd assets scan`).
198    pub const ASSET_UNDECLARED: &str = "ASSET_UNDECLARED";
199    /// an `assets.jsonl` record names a wrapper file that does not exist.
200    pub const ASSET_WRAPPER_BROKEN: &str = "ASSET_WRAPPER_BROKEN";
201    /// an `assets.jsonl` record's path is referenced by no wrapper.
202    pub const ASSET_MANIFEST_ORPHAN: &str = "ASSET_MANIFEST_ORPHAN";
203    /// an `asset`/`assets` path points at a tracked markdown content file.
204    pub const ASSET_PATH_IS_CONTENT: &str = "ASSET_PATH_IS_CONTENT";
205}
206
207/// The SPEC's `summary` length bound (chars). Over it → `SUMMARY_TOO_LONG`.
208const MAX_SUMMARY_LEN: usize = 200;
209
210/// Recognized `log.md` entry kinds (SPEC § `log.md`). Anything else →
211/// `LOG_UNKNOWN_KIND` (warning, not error).
212const RECOGNIZED_LOG_KINDS: &[&str] = &[
213    "ingest",
214    "create",
215    "update",
216    "delete",
217    "rename",
218    "link",
219    "validate",
220    "index-rebuild",
221    "contradiction",
222];
223
224// ─────────────────────────────────────────────────────────────────────────────
225//  Public entrypoints
226// ─────────────────────────────────────────────────────────────────────────────
227
228/// **Loop default.** Validate the working set: content files changed since
229/// `since` (default: the last `validate` entry in `log.md`), plus any file whose
230/// wiki-links target a changed/renamed/removed path. Per-file *checks* only —
231/// none of the cross-file global passes (entity-dedup, every-index sync,
232/// `log.md` ordering) that `--all` adds. If the default call finds no logged
233/// changed objects, it falls back to a per-file content sweep so an externally
234/// edited or freshly copied store cannot pass vacuously.
235///
236/// **Cost.** The changed set is read from `log.md` — O(changed): every
237/// `create`/`update`/`ingest`/`rename`/`delete`/`link` entry newer than the
238/// cutoff names an object. Per-file frontmatter + link-doctrine checks then run
239/// over that set plus its incoming linkers — also O(changed). The one part that
240/// is *not* O(changed) is discovering those incoming linkers: a link to a
241/// changed path can live in the body or a typed frontmatter field of any file,
242/// so it is found by a **single** embedded-ripgrep pass over the store
243/// ([`Store::find_links_to_any`]) for the whole changed set at once — one store
244/// scan, flat in the changed-set size. (It was previously a full store read
245/// *per* changed object — `O(changed × store)`; that is the blow-up this path
246/// no longer pays.) The unavoidable single content scan is the same shape as
247/// free-text `dbmd search`; the sidecar `links` projection can't replace it
248/// because it omits body/typed-field edges.
249pub fn validate_working_set(
250    store: &Store,
251    since: Option<DateTime<FixedOffset>>,
252) -> crate::Result<Vec<Issue>> {
253    if !store_marker_present(store) {
254        return Ok(vec![not_a_store_issue(store)]);
255    }
256
257    let cutoff = match since {
258        Some(ts) => Some(ts),
259        None => last_validate_at(store),
260    };
261
262    // 1. Changed objects, straight from the log (O(changed) — never a walk).
263    let changed = changed_objects_since(store, cutoff);
264    if changed.is_empty() && since.is_none() {
265        return validate_content_sweep(store);
266    }
267
268    // 2. Add every file with an incoming wiki-link to a changed/renamed/removed
269    //    path (the linker may now be stale even though it didn't change). The
270    //    incoming-linker scan is `Store::find_links_to_any` — ONE embedded-ripgrep
271    //    pass over the store for the WHOLE changed set (one `.md` walk, one
272    //    presence-only/early-exit scan per file), not one walk per object. This
273    //    is the fix for the `O(changed × store)` blow-up that calling
274    //    `find_links_to` in a loop produced (a full store read per changed
275    //    object); the cost is now a single store scan regardless of how many
276    //    objects changed. A returned self-link is harmlessly deduped by the set
277    //    (the object is already inserted below).
278    let changed_targets: Vec<PathBuf> = changed.iter().cloned().collect();
279    let mut working: BTreeSet<PathBuf> = changed;
280    for linker in store.find_links_to_any(&changed_targets)? {
281        working.insert(linker);
282    }
283
284    let mut issues = Vec::new();
285    for rel in &working {
286        let abs = store.root.join(rel);
287        // A changed path can be a *deletion* — skip files that no longer exist;
288        // the incoming-linker scan above already flagged links into them.
289        if !abs.is_file() {
290            continue;
291        }
292        // `None` basename index: the working-set pass does not build the
293        // store-wide basename map (that is a `--all`-only structure), so a bare
294        // short-form target is reported as plain `WIKI_LINK_SHORT_FORM` and the
295        // `--all` sweep does the ambiguity upgrade.
296        check_content_file(store, rel, &abs, None, &mut issues);
297    }
298    issues.sort_by(issue_order);
299    Ok(issues)
300}
301
302fn validate_content_sweep(store: &Store) -> crate::Result<Vec<Issue>> {
303    let mut issues = Vec::new();
304    for rel in store.walk()? {
305        let abs = store.root.join(&rel);
306        check_content_file(store, &rel, &abs, None, &mut issues);
307    }
308    issues.sort_by(issue_order);
309    Ok(issues)
310}
311
312/// **Full SWEEP (O(store)).** Validate every file, every link, and every index,
313/// adding the cross-file checks that need global state: entity-dedup `DUP_*`,
314/// every-index sync (md + jsonl), and `log.md` ordering. CI / recovery, not the
315/// loop.
316pub fn validate_all(store: &Store) -> crate::Result<Vec<Issue>> {
317    if !store_marker_present(store) {
318        return Ok(vec![not_a_store_issue(store)]);
319    }
320
321    let mut issues = Vec::new();
322
323    // Store-identity file: `DB.md` shape (type / required fields / section
324    // headers). A single root file, checked once in the sweep — not a content
325    // file (it carries no `summary`), so it is not part of `walk_content_files`.
326    check_db_md(store, &mut issues);
327
328    let files = walk_content_files(&store.root);
329
330    // The basename index makes the short-form wiki-link check able to upgrade a
331    // bare-basename target to `WIKI_LINK_AMBIGUOUS` when it matches ≥2 files.
332    // Built once from the already-gathered sweep list (no extra walk); only the
333    // `--all` path has it (the working-set path stays O(changed)).
334    let basenames = build_basename_index(&files);
335
336    // Per-file checks over the whole store.
337    let mut parsed: Vec<(PathBuf, Parsed)> = Vec::new();
338    for rel in &files {
339        let abs = store.root.join(rel);
340        if let Some(p) = check_content_file(store, rel, &abs, Some(&basenames), &mut issues) {
341            parsed.push((rel.clone(), p));
342        }
343    }
344
345    // Cross-file: hard `id` + soft schema-declared `unique:` dedup collisions.
346    check_duplicates(store, &parsed, &mut issues);
347
348    // Cross-file: hierarchical index.md + index.jsonl sync.
349    check_indexes(store, &files, &mut issues);
350
351    // Cross-file: log.md well-formedness + ordering.
352    check_log(store, &mut issues);
353
354    // Cross-file: asset manifest (assets.jsonl) integrity against wrapper
355    // declarations. Text-only, no hashing, no byte reads — a SWEEP check like
356    // dedup. Byte presence/correctness is `dbmd assets verify`, not validate, so
357    // a fresh clone with no restored bytes still passes here.
358    check_assets(store, &parsed, &mut issues);
359
360    issues.sort_by(issue_order);
361    Ok(issues)
362}
363
364// ─────────────────────────────────────────────────────────────────────────────
365//  Per-file content checks (shared by both scopes)
366// ─────────────────────────────────────────────────────────────────────────────
367
368/// What `validate_all`'s cross-file pass needs from a per-file parse: the
369/// parsed YAML mapping (for dedup keys) and the raw frontmatter text (for
370/// text-based wiki-link extraction). The body and fence-line are consumed
371/// inline during the per-file pass and not carried here.
372struct Parsed {
373    /// The parsed top-level YAML mapping, keyed by string. `None` ⇒ malformed
374    /// YAML (a `FM_MALFORMED_YAML` was already emitted).
375    fm: Option<BTreeMap<String, Value>>,
376    /// The raw frontmatter YAML text (between the fences) — the source for
377    /// text-based wiki-link extraction in dedup.
378    fm_yaml: String,
379}
380
381/// Run every per-file check on one content file, pushing issues. Returns the
382/// parsed file so `validate_all` can reuse it for cross-file checks. Returns
383/// `None` only when the file is unreadable or has no frontmatter block at all
384/// (which for a content file is itself reported).
385fn check_content_file(
386    store: &Store,
387    rel: &Path,
388    abs: &Path,
389    basenames: Option<&BasenameIndex>,
390    issues: &mut Vec<Issue>,
391) -> Option<Parsed> {
392    let text = match std::fs::read_to_string(abs) {
393        Ok(t) => t,
394        Err(e) => {
395            // The file exists in the walk but can't be read as UTF-8 text
396            // (invalid bytes) or hit an I/O error. Returning `None` silently
397            // here let a store whose only content file was binary garbage pass
398            // `dbmd validate` with exit 0 — the exact vacuous-pass the fallback
399            // sweep exists to prevent. Report it so the agent gets an actionable
400            // diagnostic naming the unreadable file (and `index rebuild`, which
401            // hard-fails on the same file, isn't the only signal).
402            let detail = if e.kind() == std::io::ErrorKind::InvalidData {
403                "file is not valid UTF-8 text".to_string()
404            } else {
405                format!("file could not be read: {e}")
406            };
407            push(
408                issues,
409                Severity::Error,
410                codes::FM_UNREADABLE,
411                rel,
412                None,
413                None,
414                format!("content file is unreadable: {detail}"),
415                Some(
416                    "save the file as UTF-8 text, or remove it if it isn't a db.md content file"
417                        .into(),
418                ),
419                vec![],
420            );
421            return None;
422        }
423    };
424
425    let is_content = is_content_file(rel);
426
427    let (fm_yaml, body, fm_end_line) = match split_frontmatter(&text) {
428        Some(split) => split,
429        None => {
430            // No frontmatter at all. For a content file that means there's no
431            // `type:` and no `summary:` — report both the way a parsed-but-empty
432            // file would, so the agent gets the same actionable codes.
433            if is_content {
434                push(
435                    issues,
436                    Severity::Error,
437                    codes::FM_MISSING_TYPE,
438                    rel,
439                    None,
440                    Some("type".into()),
441                    "content file has no frontmatter `type:`".into(),
442                    Some("add a YAML frontmatter block with `type:`".into()),
443                    vec![],
444                );
445                push(
446                    issues,
447                    Severity::Error,
448                    codes::SUMMARY_MISSING,
449                    rel,
450                    None,
451                    Some("summary".into()),
452                    "content file has no `summary`".into(),
453                    Some("run `dbmd fm init`".into()),
454                    vec![],
455                );
456            }
457            return None;
458        }
459    };
460
461    // Parse the YAML block.
462    let fm: Option<BTreeMap<String, Value>> = match serde_norway::from_str::<Value>(&fm_yaml) {
463        Ok(Value::Mapping(map)) => Some(yaml_map_to_btree(&map)),
464        // An empty frontmatter block parses as Null; treat as an empty mapping.
465        Ok(Value::Null) => Some(BTreeMap::new()),
466        Ok(_) => {
467            // A scalar / sequence at the top level isn't a frontmatter mapping.
468            // Anchor to line 1 — the frontmatter block's opening `---`; the whole
469            // block is opaque, so there is no single offending field line.
470            push(
471                issues,
472                Severity::Error,
473                codes::FM_MALFORMED_YAML,
474                rel,
475                Some(1),
476                None,
477                "frontmatter is not a YAML mapping".into(),
478                Some("repair the frontmatter YAML mapping, then rerun `dbmd validate`".into()),
479                vec![],
480            );
481            None
482        }
483        Err(e) => {
484            // Anchor to line 1 (the opening `---`): an unparseable block has no
485            // single offending field line; the agent re-reads the whole block.
486            push(
487                issues,
488                Severity::Error,
489                codes::FM_MALFORMED_YAML,
490                rel,
491                Some(1),
492                None,
493                format!("frontmatter block isn't valid YAML: {e}"),
494                Some("repair the frontmatter YAML block, then rerun `dbmd validate`".into()),
495                vec![],
496            );
497            None
498        }
499    };
500
501    if let Some(map) = &fm {
502        // The detailed frontmatter checks only run when the YAML parsed.
503        check_frontmatter(store, rel, map, &fm_yaml, basenames, issues, is_content);
504    }
505
506    // Wiki-link doctrine checks run on the body of content files. They are NOT
507    // run on:
508    //   - the root append-only meta files `log.md`/`DB.md` — they reach this
509    //     function only via the working-set incoming-linker scan (`walk_all_md`
510    //     includes them), and `validate --all` never link-checks their bodies. A
511    //     historical `[[deleted-page]]` mention in a `log.md` note, or a `[[…]]`
512    //     in DB.md's `## Agent instructions`, must not be `WIKI_LINK_BROKEN`; the
513    //     log is append-only, so "fix the link" can't even be applied.
514    //   - the derived catalogs `index.md`/`index.jsonl` — their "links" are
515    //     GENERATED catalog entries, not authored body wiki-links. A folder's
516    //     `index.md` is pulled into the working set as an incoming linker (an
517    //     entry `[[records/contacts/a]]` IS a wiki-link to a member, so touching
518    //     or deleting any member drags its folder `index.md` in). Its integrity
519    //     is the job of `check_indexes` under `--all`, which reports a dangling
520    //     entry as `INDEX_STALE_ENTRY` ("run `dbmd index rebuild`"). Body-link-
521    //     checking it here instead emitted `WIKI_LINK_BROKEN` ("create the
522    //     target") for the SAME condition — a different code with the OPPOSITE
523    //     remedy across the loop default vs the sweep, steering an agent to
524    //     recreate deleted data. `walk_content_files` skips `index.md` under
525    //     `--all` for exactly this reason; the working-set scope must match.
526    // Without these guards the two scopes disagree on the same store.
527    if !is_root_meta_file(rel) && !is_index_catalog_file(rel) {
528        check_body_wiki_links(store, rel, &body, fm_end_line, basenames, issues);
529    }
530
531    // A second, misplaced frontmatter block opening the body — the classic
532    // import artifact: a source file that carried its own `---…---` frontmatter
533    // was embedded verbatim as the record body (e.g. `dbmd write --body-file`
534    // on an un-stripped Obsidian/Notion note). The file still parses because
535    // the real frontmatter at the top is valid, so nothing else flags it; this
536    // is the honest backstop for a silent-but-malformed import.
537    if is_content && body_opens_with_frontmatter(&body) {
538        push(
539            issues,
540            Severity::Warning,
541            codes::FM_IN_BODY,
542            rel,
543            Some(fm_end_line + 1),
544            None,
545            "the body opens with a second `---` frontmatter block; the record's \
546             frontmatter is the block at the top of the file, so this one is body \
547             text (usually an imported file's own frontmatter left in place)"
548                .into(),
549            Some(
550                "delete the leftover `---…---` block from the body, or move its \
551                 fields into the record's frontmatter"
552                    .into(),
553            ),
554            vec![],
555        );
556    }
557
558    Some(Parsed { fm, fm_yaml })
559}
560
561/// All frontmatter-level checks for a content file with valid YAML.
562fn check_frontmatter(
563    store: &Store,
564    rel: &Path,
565    fm: &BTreeMap<String, Value>,
566    fm_yaml: &str,
567    basenames: Option<&BasenameIndex>,
568    issues: &mut Vec<Issue>,
569    is_content: bool,
570) {
571    let type_ = fm.get("type").and_then(scalar_string);
572
573    // ── type ────────────────────────────────────────────────────────────────
574    if is_content && type_.is_none() {
575        push(
576            issues,
577            Severity::Error,
578            codes::FM_MISSING_TYPE,
579            rel,
580            fm_key_line_or_top(fm_yaml, "type"),
581            Some("type".into()),
582            "content file has no `type:`".into(),
583            Some("add a `type:` field (e.g. `type: contact`)".into()),
584            vec![],
585        );
586    }
587
588    // ── meta-type (records-only epistemic class; closed enum) ─────────────────
589    // Present-but-out-of-enum is an error; absent is fine (effective default
590    // `fact`). Sources don't normally carry one, but validating the value when
591    // present is layer-agnostic and harmless.
592    if is_content {
593        // Branch on the raw value, NOT `and_then(scalar_string)`. Pre-filtering
594        // through `scalar_string` made a list/mapping value (which returns `None`)
595        // short-circuit the whole check, so a structurally-wrong `meta-type`
596        // slipped through clean AND was silently reclassified as the default
597        // `fact` by the rest of the toolkit. Absent or explicit-`null` is fine
598        // (effective default `fact`); a present non-null value must be a scalar in
599        // the closed enum. This mirrors the sibling timestamp check below, which
600        // was already hardened against the same non-scalar escape.
601        if let Some(v) = fm.get("meta-type").filter(|v| !v.is_null()) {
602            match scalar_string(v) {
603                Some(mt) if matches!(mt.as_str(), "fact" | "operational" | "conclusion") => {}
604                Some(mt) => push(
605                    issues,
606                    Severity::Error,
607                    codes::FM_BAD_META_TYPE,
608                    rel,
609                    fm_key_line_or_top(fm_yaml, "meta-type"),
610                    Some("meta-type".into()),
611                    format!("`meta-type: {mt}` is not one of fact / operational / conclusion"),
612                    Some(
613                        "use one of: fact, operational, conclusion (or omit for the default `fact`)"
614                            .into(),
615                    ),
616                    vec![],
617                ),
618                None => push(
619                    issues,
620                    Severity::Error,
621                    codes::FM_BAD_META_TYPE,
622                    rel,
623                    fm_key_line_or_top(fm_yaml, "meta-type"),
624                    Some("meta-type".into()),
625                    "`meta-type` is not one of fact / operational / conclusion: expected a scalar \
626                     string, found a list or mapping"
627                        .to_string(),
628                    Some(
629                        "use one of: fact, operational, conclusion (or omit for the default `fact`)"
630                            .into(),
631                    ),
632                    vec![],
633                ),
634            }
635        }
636    }
637
638    // ── id (recommended stable identity; opaque token — v0.4) ────────────────
639    // Absent is fully valid (identity falls back to the path; SPEC § The `id`
640    // field). Present, it must be USABLE as an identifier: a non-empty scalar
641    // with no whitespace. The recommended FORM (lowercase ULID, what
642    // `dbmd write` mints) is deliberately not enforced — a hand-authored
643    // opaque id stays legal, which is what keeps v0.4 additive over v0.3
644    // stores — so this warns only on values that break identifier semantics.
645    // A non-scalar `id` matters doubly: `DUP_ID` reads ids via the scalar
646    // coercion, so a list/mapping value silently opts the file out of
647    // duplicate detection.
648    if is_content {
649        if let Some(v) = fm.get("id").filter(|v| !v.is_null()) {
650            let problem = match scalar_string(v) {
651                Some(id) if id.trim().is_empty() => Some("`id` is empty".to_string()),
652                Some(id) if id.chars().any(char::is_whitespace) => {
653                    Some(format!("`id` {id:?} contains whitespace"))
654                }
655                Some(_) => None,
656                None => Some(
657                    "`id` is not a scalar (found a list or mapping), so duplicate detection \
658                     (DUP_ID) cannot see it"
659                        .to_string(),
660                ),
661            };
662            if let Some(message) = problem {
663                push(
664                    issues,
665                    Severity::Warning,
666                    codes::FM_BAD_ID,
667                    rel,
668                    fm_key_line_or_top(fm_yaml, "id"),
669                    Some("id".into()),
670                    message,
671                    Some(
672                        "use one opaque token with no whitespace — the recommended form is a \
673                         lowercase ULID (`dbmd write` mints one) — or drop `id` to fall back to \
674                         filename identity"
675                            .into(),
676                    ),
677                    vec![],
678                );
679            }
680        }
681    }
682
683    // ── summary (universal on content files) ──────────────────────────────────
684    if is_content {
685        check_summary(rel, fm, fm_yaml, issues);
686    }
687
688    // ── timestamps: created / updated ─────────────────────────────────────────
689    // The `created`/`updated` contract is content-file-only; meta files
690    // (`DB.md`, `log.md`, index twins) legitimately carry no such timestamps.
691    if is_content {
692        for (key, missing_code) in [
693            ("created", codes::FM_MISSING_CREATED),
694            ("updated", codes::FM_MISSING_UPDATED),
695        ] {
696            // A key that is absent, or present-but-`null`, has *no* timestamp →
697            // `FM_MISSING_*`. The toolkit's parser also treats a null value as
698            // "no timestamp", so a null `created:` must read as missing, not
699            // silently pass.
700            let value = fm.get(key);
701            let missing = value.is_none() || value.is_some_and(Value::is_null);
702            if missing {
703                push(
704                    issues,
705                    Severity::Error,
706                    missing_code,
707                    rel,
708                    fm_key_line_or_top(fm_yaml, key),
709                    Some(key.into()),
710                    format!("content file has no `{key}:` timestamp"),
711                    Some(format!(
712                        "set `{key}` to an RFC3339 timestamp, e.g. 2026-05-27T08:00:00-07:00"
713                    )),
714                    vec![],
715                );
716            } else if let Some(v) = value {
717                // Present and non-null. A scalar is checked for ISO-8601; a
718                // sequence/mapping is not a timestamp string at all and so
719                // cannot be ISO-8601 → `FM_BAD_TIMESTAMP` (it must not slip
720                // through the way it did when `scalar_string` returned `None`
721                // and the branch silently no-oped).
722                match scalar_string(v) {
723                    Some(s) if is_iso8601(&s) => {}
724                    Some(s) => push(
725                        issues,
726                        Severity::Error,
727                        codes::FM_BAD_TIMESTAMP,
728                        rel,
729                        fm_key_line(fm_yaml, key),
730                        Some(key.into()),
731                        format!("`{key}` is not ISO-8601: {s:?}"),
732                        Some("use RFC3339, e.g. 2026-05-27T08:00:00-07:00".into()),
733                        vec![],
734                    ),
735                    None => push(
736                        issues,
737                        Severity::Error,
738                        codes::FM_BAD_TIMESTAMP,
739                        rel,
740                        fm_key_line(fm_yaml, key),
741                        Some(key.into()),
742                        format!(
743                            "`{key}` is not ISO-8601: expected a timestamp string, found a list or mapping"
744                        ),
745                        Some("use RFC3339, e.g. 2026-05-27T08:00:00-07:00".into()),
746                        vec![],
747                    ),
748                }
749            }
750        }
751    }
752    // ── tags shape ────────────────────────────────────────────────────────────
753    if let Some(tags) = fm.get("tags") {
754        if !is_flat_scalar_list(tags) {
755            push(
756                issues,
757                Severity::Warning,
758                codes::TAGS_MALFORMED,
759                rel,
760                fm_key_line(fm_yaml, "tags"),
761                Some("tags".into()),
762                "`tags` must be a flat YAML list of short scalar labels".into(),
763                Some("use block form: one `- <tag>` per line".into()),
764                vec![],
765            );
766        }
767    }
768
769    // ── inline flow-form wiki-link lists in frontmatter ──────────────────────
770    for key in detect_flow_form_link_lists(fm_yaml) {
771        push(
772            issues,
773            Severity::Error,
774            codes::WIKI_LINK_FLOW_FORM_LIST,
775            rel,
776            fm_key_line(fm_yaml, &key),
777            Some(key.clone()),
778            format!("`{key}` uses inline flow form `[[[a]], [[b]]]`"),
779            Some("use YAML block-sequence form: one `- [[...]]` per line".into()),
780            vec![],
781        );
782    }
783
784    // ── frontmatter wiki-link fields: doctrine + integrity ───────────────────
785    // Skip keys that have an explicit `link to` schema spec — those are checked
786    // (with prefix enforcement) in `check_schema`, and double-reporting the same
787    // link via two paths would be noise.
788    let schema_link_keys: BTreeSet<String> =
789        effective_schema(store, type_.as_deref().unwrap_or(""))
790            .map(|s| {
791                s.fields
792                    .iter()
793                    .filter(|f| f.link_prefix.is_some())
794                    .map(|f| f.name.clone())
795                    .collect()
796            })
797            .unwrap_or_default();
798    for (key, link) in frontmatter_link_fields_text(fm_yaml, 2) {
799        if schema_link_keys.contains(&key) {
800            continue;
801        }
802        check_wiki_link(
803            store,
804            rel,
805            &link,
806            Some(link.line),
807            Some(&key),
808            basenames,
809            issues,
810        );
811    }
812
813    // ── policies: ignored types ──────────────────────────────────────────────
814    if let Some(t) = &type_ {
815        if store.config.ignored_types.iter().any(|it| it == t) {
816            push(
817                issues,
818                Severity::Info,
819                codes::POLICY_IGNORED_TYPE_PRESENT,
820                rel,
821                fm_key_line(fm_yaml, "type"),
822                Some("type".into()),
823                format!("file has ignored type `{t}` (per DB.md ## Policies)"),
824                Some(
825                    "change the `type`, or remove it from DB.md `### Ignored types` if it should be managed"
826                        .into(),
827                ),
828                // The policy source: `DB.md` declares the ignored type.
829                vec![PathBuf::from("DB.md")],
830            );
831        }
832        // A conclusion record (`meta-type: conclusion`) deriving from an
833        // ignored-type record → warning. The decision lives in the shared
834        // `derived_from_ignored_type` entry point; this side only supplies the
835        // `derived_from` targets (with their line, which the issue carries) and
836        // renders the finding.
837        let meta_type = fm
838            .get("meta-type")
839            .and_then(scalar_string)
840            .unwrap_or_else(|| "fact".to_string());
841        for link in frontmatter_links_for_key(fm_yaml, "derived_from", 2) {
842            if let Some(hit) =
843                derived_from_ignored_type(store, &meta_type, std::iter::once(link.target.as_str()))
844            {
845                push(
846                    issues,
847                    Severity::Warning,
848                    codes::POLICY_IGNORED_TYPE_DERIVED,
849                    rel,
850                    Some(link.line),
851                    Some("derived_from".into()),
852                    format!(
853                        "conclusion record derives from ignored-type record `{}` (type `{}`)",
854                        hit.target, hit.target_type
855                    ),
856                    Some(
857                        "drop this `derived_from` link, or remove the target type from DB.md `### Ignored types`"
858                            .into(),
859                    ),
860                    // The ignored-type source record, plus `DB.md` (the policy
861                    // source that lists the ignored type).
862                    vec![
863                        PathBuf::from(format!("{}.md", hit.target)),
864                        PathBuf::from("DB.md"),
865                    ],
866                );
867            }
868        }
869    }
870
871    // ── schema enforcement: DB.md ## Schemas (the only schema source) ─────────
872    if let Some(t) = &type_ {
873        if let Some(schema) = effective_schema(store, t) {
874            check_schema(store, rel, fm, fm_yaml, &schema, issues);
875        }
876    }
877}
878
879/// `summary` rules: required, non-empty, single-line, ≤ 200 chars.
880fn check_summary(rel: &Path, fm: &BTreeMap<String, Value>, fm_yaml: &str, issues: &mut Vec<Issue>) {
881    let line = fm_key_line(fm_yaml, "summary");
882    match fm.get("summary") {
883        None => push(
884            issues,
885            Severity::Error,
886            codes::SUMMARY_MISSING,
887            rel,
888            // A missing `summary` key has no line of its own → anchor to the
889            // frontmatter block top (line 1), the EXPECTED field-absence rule.
890            fm_key_line_or_top(fm_yaml, "summary"),
891            Some("summary".into()),
892            "content file has no `summary`".into(),
893            Some("run `dbmd fm init`".into()),
894            vec![],
895        ),
896        Some(v) => {
897            let s = scalar_string(v).unwrap_or_default();
898            if s.trim().is_empty() {
899                push(
900                    issues,
901                    Severity::Error,
902                    codes::SUMMARY_EMPTY,
903                    rel,
904                    line,
905                    Some("summary".into()),
906                    "`summary` is present but empty".into(),
907                    Some("write a one-line summary, or run `dbmd fm init`".into()),
908                    vec![],
909                );
910            } else if s.contains('\n') {
911                push(
912                    issues,
913                    Severity::Error,
914                    codes::SUMMARY_MULTILINE,
915                    rel,
916                    line,
917                    Some("summary".into()),
918                    "`summary` must be one line (contains a newline)".into(),
919                    Some("collapse the summary to a single line".into()),
920                    vec![],
921                );
922            } else if s.chars().count() > MAX_SUMMARY_LEN {
923                push(
924                    issues,
925                    Severity::Warning,
926                    codes::SUMMARY_TOO_LONG,
927                    rel,
928                    line,
929                    Some("summary".into()),
930                    format!(
931                        "`summary` is {} chars (> {MAX_SUMMARY_LEN})",
932                        s.chars().count()
933                    ),
934                    Some(format!("trim the summary to ≤ {MAX_SUMMARY_LEN} chars")),
935                    vec![],
936                );
937            }
938        }
939    }
940}
941
942/// Wiki-link checks for a body. Per-link doctrine (`WIKI_LINK_*`).
943fn check_body_wiki_links(
944    store: &Store,
945    rel: &Path,
946    body: &str,
947    fm_end_line: u32,
948    basenames: Option<&BasenameIndex>,
949    issues: &mut Vec<Issue>,
950) {
951    for link in extract_wiki_links(body) {
952        // Body lines are offset past the frontmatter block. `link.line` is
953        // 1-based within `body`; the body starts at `fm_end_line + 1`.
954        let abs_line = fm_end_line + link.line;
955        check_wiki_link(store, rel, &link, Some(abs_line), None, basenames, issues);
956    }
957}
958
959/// A store-wide map from a file's bare basename (its stem, no `.md`) to every
960/// store-relative path carrying that basename. Built once per `validate --all`
961/// sweep so the short-form wiki-link check can distinguish a merely short-form
962/// target (`WIKI_LINK_SHORT_FORM`) from one that is *ambiguous* because the bare
963/// basename matches two or more files (`WIKI_LINK_AMBIGUOUS`, the defensive
964/// code). `None` in the working-set path — that loop is O(changed) and never
965/// walks the store, so it reports the plain short-form error without the scan.
966type BasenameIndex = HashMap<String, Vec<PathBuf>>;
967
968/// Build the [`BasenameIndex`] from the swept file list (already gathered by
969/// `validate_all`; no extra walk).
970fn build_basename_index(files: &[PathBuf]) -> BasenameIndex {
971    let mut idx: BasenameIndex = HashMap::new();
972    for rel in files {
973        if let Some(stem) = rel.file_stem().and_then(|s| s.to_str()) {
974            idx.entry(stem.to_string()).or_default().push(rel.clone());
975        }
976    }
977    idx
978}
979
980/// The shared per-wiki-link doctrine + integrity check used by both body links
981/// and frontmatter link-fields. `basenames` is `Some` only in the `--all`
982/// sweep, where a no-slash short-form target is upgraded to `WIKI_LINK_AMBIGUOUS`
983/// when its bare basename matches ≥2 files.
984fn check_wiki_link(
985    store: &Store,
986    rel: &Path,
987    link: &Link,
988    line: Option<u32>,
989    key: Option<&str>,
990    basenames: Option<&BasenameIndex>,
991    issues: &mut Vec<Issue>,
992) {
993    let bare = link.target.trim_end_matches(".md");
994
995    // Short-form: not a full store-relative path (no `/`, or first segment isn't
996    // a known layer).
997    if !is_full_store_path(bare) {
998        // Ambiguous (defensive) takes precedence over plain short-form when the
999        // target is a bare basename (no `/`) that matches ≥2 files in the store.
1000        // Only computable in the sweep (where `basenames` is populated); the
1001        // working-set path falls through to the plain short-form error.
1002        if !bare.contains('/') {
1003            if let Some(idx) = basenames {
1004                if let Some(matches) = idx.get(bare) {
1005                    if matches.len() >= 2 {
1006                        let mut related = matches.clone();
1007                        related.sort();
1008                        push(
1009                            issues,
1010                            Severity::Error,
1011                            codes::WIKI_LINK_AMBIGUOUS,
1012                            rel,
1013                            line,
1014                            key.map(str::to_string),
1015                            format!(
1016                                "short-form wiki-link `[[{}]]` matches multiple files",
1017                                link.target
1018                            ),
1019                            Some("use the full store-relative path to disambiguate".into()),
1020                            related,
1021                        );
1022                        return;
1023                    }
1024                }
1025            }
1026        }
1027        push(
1028            issues,
1029            Severity::Error,
1030            codes::WIKI_LINK_SHORT_FORM,
1031            rel,
1032            line,
1033            key.map(str::to_string),
1034            format!(
1035                "wiki-link `[[{}]]` is not a full store-relative path",
1036                link.target
1037            ),
1038            short_form_suggestion(bare),
1039            vec![],
1040        );
1041        // Don't also report broken; the agent must fix the form first.
1042        return;
1043    }
1044
1045    // `.md` extension → warning, then still check existence.
1046    if link.target.ends_with(".md") {
1047        push(
1048            issues,
1049            Severity::Warning,
1050            codes::WIKI_LINK_HAS_EXTENSION,
1051            rel,
1052            line,
1053            key.map(str::to_string),
1054            format!("wiki-link `[[{}]]` carries a `.md` extension", link.target),
1055            Some(format!("drop the extension: [[{bare}]]")),
1056            vec![],
1057        );
1058    }
1059
1060    // Broken: target file doesn't exist (O(1) stat). Resolve the target the
1061    // same way the graph engine does — the literal path first (so a link to a
1062    // raw `.eml`/`.pdf` source kept verbatim under `sources/` resolves), then
1063    // the `.md`-appended path.
1064    match resolve_wiki_target(store, bare) {
1065        TargetResolution::Exists => {}
1066        TargetResolution::Missing => push(
1067            issues,
1068            Severity::Error,
1069            codes::WIKI_LINK_BROKEN,
1070            rel,
1071            line,
1072            key.map(str::to_string),
1073            format!("wiki-link target `{bare}` doesn't exist"),
1074            Some(format!(
1075                "create `{bare}.md`, or point the link at an existing file"
1076            )),
1077            vec![],
1078        ),
1079        TargetResolution::Unsafe => push(
1080            issues,
1081            Severity::Error,
1082            codes::WIKI_LINK_BROKEN,
1083            rel,
1084            line,
1085            key.map(str::to_string),
1086            format!("wiki-link target `{bare}` is not a safe store-relative path"),
1087            Some("use a full store-relative path under sources/ or records/".into()),
1088            vec![],
1089        ),
1090    }
1091}
1092
1093// ─────────────────────────────────────────────────────────────────────────────
1094//  Schema enforcement (user-declared DB.md ## Schemas — the only source)
1095// ─────────────────────────────────────────────────────────────────────────────
1096
1097/// The effective schema for a type: the store's explicit `DB.md ## Schemas`
1098/// block, or `None`. This is the **only** source of schema enforcement — the
1099/// toolkit ships no implicit or built-in per-type schema (SPEC § Schemas). A
1100/// store that wants its `contact` / `expense` / etc. fields enforced declares
1101/// them in `## Schemas`; the example schema pack in SPEC § Example types is a
1102/// copy-in starting point.
1103fn effective_schema(store: &Store, type_: &str) -> Option<Schema> {
1104    store.config.schemas.get(type_).cloned()
1105}
1106
1107/// Validate a file's frontmatter against a schema's [`FieldSpec`]s.
1108fn check_schema(
1109    store: &Store,
1110    rel: &Path,
1111    fm: &BTreeMap<String, Value>,
1112    fm_yaml: &str,
1113    schema: &Schema,
1114    issues: &mut Vec<Issue>,
1115) {
1116    for spec in &schema.fields {
1117        let present = fm.get(&spec.name);
1118        let line = fm_key_line(fm_yaml, &spec.name);
1119
1120        // Required. "Empty" means: the key is absent, or its value carries no
1121        // content — a YAML `null` (`name:`), an empty list (`name: []`), an
1122        // empty mapping (`name: {}`), or a blank/whitespace-only scalar
1123        // (`name: ""`). `scalar_string` returns `None` for null/list/mapping, so
1124        // a bare `.unwrap_or(false)` wrongly treated those as non-empty and let
1125        // a required field with a null or empty-collection value pass silently;
1126        // route them through `is_empty_value` instead.
1127        let is_empty = match present {
1128            None => true,
1129            Some(v) => is_empty_value(v),
1130        };
1131        if spec.required && is_empty {
1132            push(
1133                issues,
1134                Severity::Error,
1135                codes::SCHEMA_MISSING_REQUIRED,
1136                rel,
1137                // Absent key → anchor to the frontmatter top (line 1); a
1138                // present-but-empty value keeps its own line.
1139                fm_key_line_or_top(fm_yaml, &spec.name),
1140                Some(spec.name.clone()),
1141                format!("required field `{}` is absent or empty", spec.name),
1142                Some(format!("set `{}` to a non-empty value", spec.name)),
1143                vec![],
1144            );
1145            continue;
1146        }
1147        let Some(value) = present else { continue };
1148
1149        // An OPTIONAL field that is `null` or empty is simply unset — there is
1150        // no value to shape/enum/link-check. (The required+empty case already
1151        // returned above as `SCHEMA_MISSING_REQUIRED`.) Without this, an
1152        // `paid_at: null` on an `invoice` whose schema marks `paid_at (date)`
1153        // would wrongly fire `SCHEMA_SHAPE_MISMATCH` against the empty string.
1154        let value_empty = value.is_null()
1155            || scalar_string(value)
1156                .map(|s| s.trim().is_empty())
1157                .unwrap_or(false);
1158        if !spec.required && value_empty {
1159            continue;
1160        }
1161
1162        // link to <prefix>/ — extract the link target(s) from the raw frontmatter
1163        // text (unquoted `[[...]]` is a YAML nested-sequence, not a string).
1164        if let Some(prefix) = &spec.link_prefix {
1165            check_schema_link(store, rel, &spec.name, fm_yaml, prefix, line, issues);
1166            continue; // a link field is never also shape/enum-checked
1167        }
1168
1169        // A shape- or enum-constrained field expects a SCALAR. A YAML sequence
1170        // or mapping satisfies neither, and would otherwise slip through both
1171        // checks (`scalar_string` returns `None` for non-scalars, so the enum
1172        // and shape bodies silently no-op). Flag it as a shape mismatch rather
1173        // than let a structurally-wrong value validate clean. (Link fields,
1174        // which legitimately take block-form sequences, already `continue`d.)
1175        if (spec.shape.is_some() || spec.enum_values.is_some()) && scalar_string(value).is_none() {
1176            push(
1177                issues,
1178                Severity::Error,
1179                codes::SCHEMA_SHAPE_MISMATCH,
1180                rel,
1181                line,
1182                Some(spec.name.clone()),
1183                format!(
1184                    "`{}` must be a scalar value, found a list or mapping",
1185                    spec.name
1186                ),
1187                Some(format!("set `{}` to a single scalar value", spec.name)),
1188                vec![],
1189            );
1190            continue;
1191        }
1192
1193        // enum
1194        if let Some(allowed) = &spec.enum_values {
1195            if let Some(s) = scalar_string(value) {
1196                if !allowed.iter().any(|a| a == &s) {
1197                    push(
1198                        issues,
1199                        Severity::Error,
1200                        codes::SCHEMA_ENUM_VIOLATION,
1201                        rel,
1202                        line,
1203                        Some(spec.name.clone()),
1204                        format!("`{}` value {s:?} not in enum {allowed:?}", spec.name),
1205                        Some(format!("use one of: {}", allowed.join(", "))),
1206                        vec![],
1207                    );
1208                }
1209            }
1210            continue;
1211        }
1212
1213        // shape
1214        if let Some(shape) = spec.shape {
1215            check_schema_shape(rel, &spec.name, value, shape, line, issues);
1216        }
1217    }
1218}
1219
1220/// `link to <prefix>/` enforcement: the value must be a wiki-link whose target
1221/// starts with `<prefix>`. Reads the link target(s) from the raw frontmatter
1222/// text so unquoted `field: [[...]]` (a YAML nested-sequence, not a string) is
1223/// recognized exactly like the quoted form.
1224fn check_schema_link(
1225    store: &Store,
1226    rel: &Path,
1227    field: &str,
1228    fm_yaml: &str,
1229    prefix: &Path,
1230    line: Option<u32>,
1231    issues: &mut Vec<Issue>,
1232) {
1233    let prefix_str = prefix.to_string_lossy();
1234    let prefix_str = prefix_str.trim_end_matches('/');
1235    let suggestion = |target_leaf: &str| {
1236        Some(format!(
1237            "expected `link to {prefix_str}/`; replace with [[{prefix_str}/{target_leaf}]]"
1238        ))
1239    };
1240
1241    let links = frontmatter_links_for_key(fm_yaml, field, 2);
1242    if links.is_empty() {
1243        // No wiki-link in the field's value → it's a plain string.
1244        let raw = frontmatter_raw_value_for_key(fm_yaml, field, 2).unwrap_or_default();
1245        let raw = raw.trim().trim_matches('"').trim_matches('\'').trim();
1246        let leaf = slugish(raw);
1247        push(
1248            issues,
1249            Severity::Error,
1250            codes::SCHEMA_LINK_PREFIX_MISMATCH,
1251            rel,
1252            line,
1253            Some(field.to_string()),
1254            format!(
1255                "`{field}` is a plain string {raw:?}, expected a wiki-link under `{prefix_str}/`"
1256            ),
1257            suggestion(&leaf),
1258            vec![],
1259        );
1260        return;
1261    }
1262
1263    for link in links {
1264        if link.target.ends_with(".md") {
1265            let bare = link.target.trim_end_matches(".md");
1266            push(
1267                issues,
1268                Severity::Warning,
1269                codes::WIKI_LINK_HAS_EXTENSION,
1270                rel,
1271                Some(link.line),
1272                Some(field.to_string()),
1273                format!("wiki-link `[[{}]]` carries a `.md` extension", link.target),
1274                Some(format!("drop the extension: [[{bare}]]")),
1275                vec![],
1276            );
1277        }
1278        let bare = link.target.trim_end_matches(".md");
1279        if !path_under_prefix(bare, prefix_str) {
1280            let leaf = bare.rsplit('/').next().unwrap_or(bare);
1281            push(
1282                issues,
1283                Severity::Error,
1284                codes::SCHEMA_LINK_PREFIX_MISMATCH,
1285                rel,
1286                line,
1287                Some(field.to_string()),
1288                format!("`{field}` target `{bare}` is not under `{prefix_str}/`"),
1289                suggestion(leaf),
1290                vec![],
1291            );
1292        } else {
1293            // Correct prefix — still surface a broken target so the agent sees
1294            // one consistent vocabulary. Resolve like the graph engine (literal
1295            // path first, then `.md`) so a `link to sources/` field pointing at a
1296            // raw `.eml`/`.pdf` source isn't wrongly flagged broken.
1297            match resolve_wiki_target(store, bare) {
1298                TargetResolution::Exists => {}
1299                TargetResolution::Missing => push(
1300                    issues,
1301                    Severity::Error,
1302                    codes::WIKI_LINK_BROKEN,
1303                    rel,
1304                    line,
1305                    Some(field.to_string()),
1306                    format!("wiki-link target `{bare}` doesn't exist"),
1307                    Some(format!(
1308                        "create `{bare}.md`, or point the link at an existing file"
1309                    )),
1310                    vec![],
1311                ),
1312                TargetResolution::Unsafe => push(
1313                    issues,
1314                    Severity::Error,
1315                    codes::WIKI_LINK_BROKEN,
1316                    rel,
1317                    line,
1318                    Some(field.to_string()),
1319                    format!("wiki-link target `{bare}` is not a safe store-relative path"),
1320                    Some("use a full store-relative path under sources/ or records/".into()),
1321                    vec![],
1322                ),
1323            }
1324        }
1325    }
1326}
1327
1328/// Shape enforcement for a non-link, non-enum schema field.
1329fn check_schema_shape(
1330    rel: &Path,
1331    field: &str,
1332    value: &Value,
1333    shape: Shape,
1334    line: Option<u32>,
1335    issues: &mut Vec<Issue>,
1336) {
1337    let s = scalar_string(value).unwrap_or_default();
1338    let ok = match shape {
1339        Shape::String => true, // any scalar string
1340        Shape::Int => value.is_i64() || value.is_u64() || s.trim().parse::<i64>().is_ok(),
1341        Shape::Bool => value.is_bool() || matches!(s.trim(), "true" | "false"),
1342        Shape::Date => is_iso8601_date_or_datetime(&s),
1343        Shape::Email => is_email(&s),
1344        Shape::Currency => is_currency(&s),
1345        Shape::Url => is_url(&s),
1346    };
1347    if !ok {
1348        push(
1349            issues,
1350            Severity::Error,
1351            codes::SCHEMA_SHAPE_MISMATCH,
1352            rel,
1353            line,
1354            Some(field.to_string()),
1355            format!("`{field}` value {s:?} doesn't match shape {shape:?}"),
1356            Some(shape_suggestion(shape)),
1357            vec![],
1358        );
1359    }
1360}
1361
1362// ─────────────────────────────────────────────────────────────────────────────
1363//  Cross-file: entity-dedup collisions (validate_all only)
1364// ─────────────────────────────────────────────────────────────────────────────
1365
1366/// Hard `DUP_ID` + the soft, schema-declared `DUP_UNIQUE_KEY` collisions.
1367///
1368/// `DUP_ID` is universal (two files with the same explicit `id`).
1369/// `DUP_UNIQUE_KEY` is driven entirely by the store's `DB.md ## Schemas`: each
1370/// `- unique: <field>[, <field> …]` directive on a `### <type>` declares a
1371/// uniqueness constraint, and two records of that type whose declared values
1372/// collide warn. No type carries a built-in dedup key — the store opts in.
1373///
1374/// **Reporting precedence (rule #1 in `corpus-b-edges/EXPECTED/README.md`):** a
1375/// collision group of N files yields exactly ONE issue, not N. Its `file` is the
1376/// lexicographically smallest store-relative path in the group (a total order →
1377/// deterministic); `related` is the rest, sorted. A single-field key anchors to
1378/// that field's line on the reported file and carries it as `key`; a multi-field
1379/// key anchors to line 1 with a null key.
1380fn check_duplicates(store: &Store, parsed: &[(PathBuf, Parsed)], issues: &mut Vec<Issue>) {
1381    // Path → frontmatter YAML, for resolving the anchor field's line on the
1382    // reported (smallest-path) member.
1383    let fm_yaml_of: HashMap<&PathBuf, &str> = parsed
1384        .iter()
1385        .map(|(rel, p)| (rel, p.fm_yaml.as_str()))
1386        .collect();
1387
1388    // ── DUP_ID (hard error): two files with the same explicit `id`. ──────────
1389    let mut by_id: HashMap<String, Vec<PathBuf>> = HashMap::new();
1390    for (rel, p) in parsed {
1391        if let Some(map) = &p.fm {
1392            if let Some(id) = map.get("id").and_then(scalar_string) {
1393                if !id.trim().is_empty() {
1394                    by_id.entry(id).or_default().push(rel.clone());
1395                }
1396            }
1397        }
1398    }
1399    for (id, files) in &by_id {
1400        if files.len() > 1 {
1401            let (reported, related) = canonical_and_related(files);
1402            let line = fm_yaml_of.get(&reported).and_then(|y| fm_key_line(y, "id"));
1403            push(
1404                issues,
1405                Severity::Error,
1406                codes::DUP_ID,
1407                &reported,
1408                line,
1409                Some("id".into()),
1410                format!("id {id:?} is declared by more than one file"),
1411                Some("give each file a unique `id` (or drop it to derive from the path)".into()),
1412                related,
1413            );
1414        }
1415    }
1416
1417    // ── DUP_UNIQUE_KEY (warning): schema-declared `unique:` collisions. ───────
1418    // Every constraint comes from the store's `## Schemas`; a type with no
1419    // `unique:` directive is never dedup-checked. Iteration over the BTreeMap is
1420    // key-ordered, so emitted issues are deterministic across runs.
1421    for (type_name, schema) in &store.config.schemas {
1422        for key_fields in &schema.unique_keys {
1423            soft_dup(parsed, issues, type_name, key_fields, &fm_yaml_of);
1424        }
1425    }
1426}
1427
1428/// Emit ONE `DUP_UNIQUE_KEY` warning per group of ≥2 files of `type_` whose
1429/// declared `key_fields` render to the same token tuple. Files missing any key
1430/// field are skipped — an incomplete key is never a collision.
1431///
1432/// Per reporting rule #1 the issue is keyed on the lexicographically smallest
1433/// store-relative path; `related` is the rest. A single-field key anchors to
1434/// that field's line on the reported file and carries it as `key`; a multi-field
1435/// key anchors to line 1 with a null key. `fm_yaml_of` resolves the field line.
1436fn soft_dup(
1437    parsed: &[(PathBuf, Parsed)],
1438    issues: &mut Vec<Issue>,
1439    type_: &str,
1440    key_fields: &[String],
1441    fm_yaml_of: &HashMap<&PathBuf, &str>,
1442) {
1443    if key_fields.is_empty() {
1444        return;
1445    }
1446    let mut groups: HashMap<Vec<String>, Vec<PathBuf>> = HashMap::new();
1447    for (rel, p) in parsed {
1448        let is_type =
1449            p.fm.as_ref()
1450                .and_then(|m| m.get("type"))
1451                .and_then(scalar_string)
1452                .map(|t| t == type_)
1453                .unwrap_or(false);
1454        if !is_type {
1455            continue;
1456        }
1457        if let Some(key) = dedup_key(p, key_fields) {
1458            groups.entry(key).or_default().push(rel.clone());
1459        }
1460    }
1461    // HashMap iteration is nondeterministic; sort by reported member so the
1462    // emitted issue order is stable across runs.
1463    let mut collisions: Vec<(PathBuf, Vec<PathBuf>)> = groups
1464        .values()
1465        .filter(|files| files.len() > 1)
1466        .map(|files| canonical_and_related(files))
1467        .collect();
1468    collisions.sort_by(|a, b| a.0.cmp(&b.0));
1469
1470    let fields_disp = key_fields.join(", ");
1471    for (reported, related) in collisions {
1472        // Single-field keys anchor to the field's line + carry the key; multi-
1473        // field keys anchor to line 1 with a null key.
1474        let (line, key) = if key_fields.len() == 1 {
1475            (
1476                fm_yaml_of
1477                    .get(&reported)
1478                    .and_then(|y| fm_key_line(y, &key_fields[0])),
1479                Some(key_fields[0].clone()),
1480            )
1481        } else {
1482            (Some(1), None)
1483        };
1484        let n = related.len();
1485        push(
1486            issues,
1487            Severity::Warning,
1488            codes::DUP_UNIQUE_KEY,
1489            &reported,
1490            line,
1491            key,
1492            format!("`{type_}` unique key ({fields_disp}) collides with {n} other record(s)"),
1493            Some("merge with `dbmd rename`, or cross-link with `dbmd link`".into()),
1494            related,
1495        );
1496    }
1497}
1498
1499/// Render a type's `unique:` key for one file: each field's dedup token in
1500/// order, or `None` if any field is absent/empty (an incomplete key never
1501/// collides).
1502fn dedup_key(p: &Parsed, key_fields: &[String]) -> Option<Vec<String>> {
1503    let mut out = Vec::with_capacity(key_fields.len());
1504    for f in key_fields {
1505        out.push(dedup_token(p, f)?);
1506    }
1507    Some(out)
1508}
1509
1510/// One field's normalized dedup token, or `None` when absent/empty. Wiki-link
1511/// values (single or block-sequence list) reduce to their lower-cased target
1512/// path(s); a list collapses to a sorted, de-duplicated set so item order never
1513/// matters. Plain scalars (and YAML scalar lists) lower-case and trim.
1514fn dedup_token(p: &Parsed, field: &str) -> Option<String> {
1515    // Wiki-links first — read from the raw frontmatter text so the unquoted
1516    // `field: [[...]]` (a YAML nested-sequence, not a string) is handled.
1517    let links = frontmatter_links_for_key(&p.fm_yaml, field, 2);
1518    if !links.is_empty() {
1519        let set: BTreeSet<String> = links
1520            .into_iter()
1521            .map(|l| l.target.trim_end_matches(".md").to_lowercase())
1522            .filter(|t| !t.is_empty())
1523            .collect();
1524        return if set.is_empty() {
1525            None
1526        } else {
1527            Some(set.into_iter().collect::<Vec<_>>().join(","))
1528        };
1529    }
1530    match p.fm.as_ref()?.get(field) {
1531        Some(Value::Sequence(items)) => {
1532            let set: BTreeSet<String> = items
1533                .iter()
1534                .filter_map(scalar_string)
1535                .map(|s| s.trim().to_lowercase())
1536                .filter(|t| !t.is_empty())
1537                .collect();
1538            if set.is_empty() {
1539                None
1540            } else {
1541                Some(set.into_iter().collect::<Vec<_>>().join(","))
1542            }
1543        }
1544        Some(v) => {
1545            let s = scalar_string(v)?.trim().to_lowercase();
1546            if s.is_empty() {
1547                None
1548            } else {
1549                Some(s)
1550            }
1551        }
1552        None => None,
1553    }
1554}
1555
1556/// Split a non-empty collision group into `(reported, related)`: the
1557/// lexicographically smallest store-relative path is the reported member; the
1558/// rest, sorted ascending, are `related`. Deterministic because store-relative
1559/// path is a total order — the property reporting rule #1 relies on.
1560fn canonical_and_related(files: &[PathBuf]) -> (PathBuf, Vec<PathBuf>) {
1561    let mut sorted = files.to_vec();
1562    sorted.sort();
1563    let reported = sorted[0].clone();
1564    let related = sorted[1..].to_vec();
1565    (reported, related)
1566}
1567
1568// ─────────────────────────────────────────────────────────────────────────────
1569//  Cross-file: hierarchical index.md + index.jsonl sync (validate_all only)
1570// ─────────────────────────────────────────────────────────────────────────────
1571
1572/// All `INDEX_*` and `INDEX_JSONL_*` checks across the three canonical levels.
1573fn check_indexes(store: &Store, files: &[PathBuf], issues: &mut Vec<Issue>) {
1574    // Group content files by their immediate parent folder (the type-folder,
1575    // *across date shards* — a sharded file's "type folder" is the folder right
1576    // under the layer). We key on the type-folder so shards roll up correctly.
1577    let mut type_folders: BTreeMap<PathBuf, Vec<PathBuf>> = BTreeMap::new();
1578    for rel in files {
1579        if let Some(tf) = type_folder_of(rel) {
1580            type_folders.entry(tf).or_default().push(rel.clone());
1581        }
1582    }
1583
1584    // Layers that actually contain a type-folder. The index WRITER creates a
1585    // layer/root `index.md` ONLY when a type-folder exists to roll up:
1586    // `Index::build_root`/`build_layer` populate `child_counts` from type-folders
1587    // alone, and `rebuild_all`/`write_level` remove the `index.md` when that map
1588    // is empty. A layer with ONLY loose files therefore has NO `index.md` — its
1589    // loose records live in the layer's own `index.jsonl` (checked in the loose
1590    // block below). Gating the `index.md` requirement on type-folder presence
1591    // (not on "any content file") keeps `validate --all` in parity with
1592    // `dbmd index rebuild`: requiring an `index.md` for a loose-only layer would
1593    // demand an artifact the canonical rebuild never creates, permanently
1594    // wedging the sweep on a correct store.
1595    let mut layers_with_type_folders: BTreeSet<&'static str> = BTreeSet::new();
1596    for tf in type_folders.keys() {
1597        match tf.iter().next().and_then(|s| s.to_str()) {
1598            Some("sources") => {
1599                layers_with_type_folders.insert("sources");
1600            }
1601            Some("records") => {
1602                layers_with_type_folders.insert("records");
1603            }
1604            _ => {}
1605        }
1606    }
1607
1608    // ── Root index.md ──── (only when a type-folder exists to roll up) ──────────
1609    if !type_folders.is_empty() {
1610        let root_index = store.root.join("index.md");
1611        if !root_index.is_file() {
1612            push(
1613                issues,
1614                Severity::Error,
1615                codes::INDEX_MISSING,
1616                Path::new("index.md"),
1617                None,
1618                None,
1619                "store has files but no root `index.md`".into(),
1620                Some("run `dbmd index rebuild`".into()),
1621                vec![],
1622            );
1623        } else {
1624            check_index_scope(store, Path::new("index.md"), "root", None, issues);
1625        }
1626    }
1627
1628    // ── Layer index.md ──── (only layers that contain a type-folder) ───────────
1629    for layer in &layers_with_type_folders {
1630        let layer_index_rel = PathBuf::from(layer).join("index.md");
1631        let abs = store.root.join(&layer_index_rel);
1632        if !abs.is_file() {
1633            push(
1634                issues,
1635                Severity::Error,
1636                codes::INDEX_MISSING,
1637                &layer_index_rel,
1638                None,
1639                None,
1640                format!("layer `{layer}/` has files but no `index.md`"),
1641                Some("run `dbmd index rebuild`".into()),
1642                vec![],
1643            );
1644        } else {
1645            check_index_scope(store, &layer_index_rel, "layer", Some(layer), issues);
1646        }
1647    }
1648
1649    // ── Type-folder index.md + index.jsonl ───────────────────────────────────
1650    for (tf, members) in &type_folders {
1651        let index_md_rel = tf.join("index.md");
1652        let index_md_abs = store.root.join(&index_md_rel);
1653        let index_md_present = index_md_abs.is_file();
1654        if !index_md_present {
1655            // The whole folder index is absent → a single `INDEX_MISSING` keyed
1656            // on the FOLDER (not the would-be `index.md` path). When the index is
1657            // entirely missing we do NOT additionally evaluate per-entry
1658            // completeness or the `index.jsonl` twin: one `INDEX_MISSING` covers
1659            // the folder (precedence rule #4 in `corpus-b-edges/EXPECTED`).
1660            push(
1661                issues,
1662                Severity::Error,
1663                codes::INDEX_MISSING,
1664                tf,
1665                None,
1666                None,
1667                format!("non-empty folder `{}` has no index.md", tf.display()),
1668                Some(format!(
1669                    "run `dbmd index rebuild --folder {}`",
1670                    tf.display()
1671                )),
1672                vec![],
1673            );
1674            continue;
1675        }
1676
1677        check_index_scope(store, &index_md_rel, "type-folder", tf.to_str(), issues);
1678        check_type_folder_index_md(store, tf, &index_md_rel, members, issues);
1679
1680        // index.jsonl twin — must exist and be complete (uncapped). Only checked
1681        // when the `index.md` is present (above): a folder whose entire index is
1682        // missing is one `INDEX_MISSING`, not also an `INDEX_JSONL_MISSING`.
1683        let jsonl_rel = tf.join("index.jsonl");
1684        let jsonl_abs = store.root.join(&jsonl_rel);
1685        if !jsonl_abs.is_file() {
1686            push(
1687                issues,
1688                Severity::Error,
1689                codes::INDEX_JSONL_MISSING,
1690                &jsonl_rel,
1691                None,
1692                None,
1693                format!("type-folder `{}/` has no `index.jsonl` twin", tf.display()),
1694                Some("run `dbmd index rebuild`".into()),
1695                vec![],
1696            );
1697        } else {
1698            check_type_folder_index_jsonl(store, tf, &jsonl_rel, members, issues);
1699        }
1700    }
1701
1702    // ── Loose files: content directly at a layer root (no type-folder). ──────
1703    // They are catalogued in the layer's own `index.jsonl` (the layer `index.md`
1704    // stays a type-folder rollup), so structured reads — `query`, dedup, `graph`
1705    // — see them the same way they see canonical files. Require that sidecar and
1706    // sync-check it, so a loose file is never silently absent from the catalog.
1707    // Only genuinely-loose files land here: `type_folder_of` already grouped
1708    // every file two-or-more levels under a layer into its type-folder above.
1709    let mut loose_by_layer: BTreeMap<PathBuf, Vec<PathBuf>> = BTreeMap::new();
1710    for rel in files {
1711        if !is_content_file(rel) || type_folder_of(rel).is_some() {
1712            continue;
1713        }
1714        if let Some(layer_dir) = loose_layer_dir(rel) {
1715            loose_by_layer
1716                .entry(layer_dir)
1717                .or_default()
1718                .push(rel.clone());
1719        }
1720    }
1721    for (layer_dir, members) in &loose_by_layer {
1722        let jsonl_rel = layer_dir.join("index.jsonl");
1723        if !store.root.join(&jsonl_rel).is_file() {
1724            push(
1725                issues,
1726                Severity::Error,
1727                codes::INDEX_JSONL_MISSING,
1728                &jsonl_rel,
1729                None,
1730                None,
1731                format!(
1732                    "loose files at `{}/` are not catalogued — the layer has no `index.jsonl`",
1733                    layer_dir.display()
1734                ),
1735                Some("run `dbmd index rebuild`".into()),
1736                members.clone(),
1737            );
1738        } else {
1739            // `check_type_folder_index_jsonl` ignores its `tf` arg (`let _ = tf`)
1740            // and only checks jsonl-vs-files-vs-frontmatter — exactly the layer
1741            // sidecar's contract, so it is reused verbatim.
1742            check_type_folder_index_jsonl(store, layer_dir, &jsonl_rel, members, issues);
1743        }
1744    }
1745
1746    // ── Orphan index.md: an index file in a folder with no content. ──────────
1747    for rel in walk_index_files(&store.root) {
1748        let parent = rel.parent().unwrap_or(Path::new("")).to_path_buf();
1749        let parent_str = parent.to_string_lossy().to_string();
1750        let is_canonical = parent_str.is_empty() // root
1751            || matches!(parent_str.as_str(), "sources" | "records")
1752            || type_folders.contains_key(&parent);
1753        if !is_canonical {
1754            push(
1755                issues,
1756                Severity::Warning,
1757                codes::INDEX_ORPHAN,
1758                &rel,
1759                None,
1760                None,
1761                format!(
1762                    "`{}` sits in an empty or non-canonical folder",
1763                    rel.display()
1764                ),
1765                Some("remove it, or run `dbmd index rebuild`".into()),
1766                vec![],
1767            );
1768        }
1769    }
1770}
1771
1772/// Check a type-folder `index.md`'s entries against the folder's actual files:
1773/// stale entries (target gone), missing entries (file not listed), and
1774/// summary mismatches.
1775fn check_type_folder_index_md(
1776    store: &Store,
1777    tf: &Path,
1778    index_rel: &Path,
1779    members: &[PathBuf],
1780    issues: &mut Vec<Issue>,
1781) {
1782    let abs = store.root.join(index_rel);
1783    let Ok(text) = std::fs::read_to_string(&abs) else {
1784        return;
1785    };
1786    let entries = parse_index_entries(&text);
1787
1788    let listed: BTreeSet<PathBuf> = entries
1789        .iter()
1790        .map(|e| PathBuf::from(e.target.trim_end_matches(".md")))
1791        .collect();
1792
1793    // Stale entries + summary mismatch.
1794    for entry in &entries {
1795        let bare = entry.target.trim_end_matches(".md");
1796        // Resolve like the graph engine (literal path first, then `.md`) so an
1797        // index entry naming a raw `.eml`/`.pdf` source isn't reported stale.
1798        let target_abs = match resolved_target_abs(store, bare) {
1799            Some(abs) => abs,
1800            None => {
1801                if matches!(resolve_wiki_target(store, bare), TargetResolution::Unsafe) {
1802                    push(
1803                        issues,
1804                        Severity::Error,
1805                        codes::INDEX_STALE_ENTRY,
1806                        index_rel,
1807                        Some(entry.line),
1808                        None,
1809                        format!("index entry `[[{bare}]]` is not a safe store-relative path"),
1810                        Some("run `dbmd index rebuild`".into()),
1811                        vec![],
1812                    );
1813                } else {
1814                    push(
1815                        issues,
1816                        Severity::Error,
1817                        codes::INDEX_STALE_ENTRY,
1818                        index_rel,
1819                        Some(entry.line),
1820                        None,
1821                        format!("index entry `[[{bare}]]` points at a missing file"),
1822                        Some("run `dbmd index rebuild`".into()),
1823                        // The stale target the entry names (the file that no
1824                        // longer exists) — so the agent can locate the dangling
1825                        // reference.
1826                        vec![PathBuf::from(format!("{bare}.md"))],
1827                    );
1828                }
1829                continue;
1830            }
1831        };
1832        // Summary mismatch: the entry text must equal the file's `summary`. A
1833        // bare `- [[path]]` entry (no `— <text>`) when the file HAS a non-empty
1834        // summary is also a mismatch — the SPEC requires every type-folder index
1835        // entry to quote the file's `summary` (`- [[path]] — <summary>`), so a
1836        // missing quote can't validate clean just because there's nothing to
1837        // compare.
1838        if let Some(expected) = read_summary(&target_abs) {
1839            match &entry.summary_text {
1840                // Compare with the SAME whitespace normalization the renderer
1841                // applies when it writes the `index.md` browse line
1842                // (`format_md_entry` -> `collapse_whitespace`). `text_part` is the
1843                // already-collapsed text parsed back out of `index.md`; `expected`
1844                // is the RAW file summary. Comparing a collapsed value against a
1845                // raw one falsely flagged any valid one-line summary that carries
1846                // internal whitespace (a double space, a tab) — a permanent,
1847                // rebuild-immune INDEX_SUMMARY_MISMATCH that wedged the store, since
1848                // `index rebuild` regenerates the byte-identical collapsed line.
1849                // Normalizing both sides makes the check compare like with like.
1850                Some(text_part)
1851                    if crate::summary::collapse_whitespace(text_part)
1852                        != crate::summary::collapse_whitespace(&expected) =>
1853                {
1854                    push(
1855                        issues,
1856                        Severity::Error,
1857                        codes::INDEX_SUMMARY_MISMATCH,
1858                        index_rel,
1859                        Some(entry.line),
1860                        None,
1861                        format!("index entry for `{bare}` text doesn't match the file's `summary`"),
1862                        Some("run `dbmd index rebuild`".into()),
1863                        vec![PathBuf::from(format!("{bare}.md"))],
1864                    );
1865                }
1866                None if !expected.trim().is_empty() => {
1867                    push(
1868                        issues,
1869                        Severity::Error,
1870                        codes::INDEX_SUMMARY_MISMATCH,
1871                        index_rel,
1872                        Some(entry.line),
1873                        None,
1874                        format!("index entry for `{bare}` is missing its summary text (the file has a `summary`)"),
1875                        Some("run `dbmd index rebuild`".into()),
1876                        vec![PathBuf::from(format!("{bare}.md"))],
1877                    );
1878                }
1879                _ => {}
1880            }
1881        }
1882    }
1883
1884    // Missing entries: a member file not listed. Skip the index/log meta files.
1885    // The browse view caps at 500; only flag a missing entry when the folder is
1886    // under the cap (a capped folder legitimately omits older files).
1887    let content_members: Vec<&PathBuf> = members.iter().filter(|m| is_content_file(m)).collect();
1888    if content_members.len() <= 500 {
1889        for m in content_members {
1890            let bare = PathBuf::from(m.to_string_lossy().trim_end_matches(".md").to_string());
1891            if !listed.contains(&bare) {
1892                push(
1893                    issues,
1894                    Severity::Error,
1895                    codes::INDEX_MISSING_ENTRY,
1896                    index_rel,
1897                    None,
1898                    None,
1899                    format!(
1900                        "file `{}` is not listed in its folder's `index.md`",
1901                        m.display()
1902                    ),
1903                    Some("run `dbmd index rebuild`".into()),
1904                    vec![(*m).clone()],
1905                );
1906            }
1907        }
1908    }
1909    let _ = tf;
1910}
1911
1912/// Check a type-folder `index.jsonl` twin: it must list **every** file in the
1913/// folder (uncapped), every record must point at a real file, and each record's
1914/// fields must match the file's frontmatter.
1915fn check_type_folder_index_jsonl(
1916    store: &Store,
1917    tf: &Path,
1918    jsonl_rel: &Path,
1919    members: &[PathBuf],
1920    issues: &mut Vec<Issue>,
1921) {
1922    let abs = store.root.join(jsonl_rel);
1923    let Ok(text) = std::fs::read_to_string(&abs) else {
1924        return;
1925    };
1926
1927    // Parse records (last-write-wins by path), tolerating tombstones/blank lines.
1928    let mut records: BTreeMap<PathBuf, serde_json::Value> = BTreeMap::new();
1929    for (i, line) in text.lines().enumerate() {
1930        let line = line.trim();
1931        if line.is_empty() {
1932            continue;
1933        }
1934        let rec: serde_json::Value = match serde_json::from_str(line) {
1935            Ok(v) => v,
1936            Err(e) => {
1937                push(
1938                    issues,
1939                    Severity::Error,
1940                    codes::INDEX_JSONL_DESYNC,
1941                    jsonl_rel,
1942                    Some((i + 1) as u32),
1943                    None,
1944                    format!("`index.jsonl` line {} is not valid JSON: {e}", i + 1),
1945                    Some("run `dbmd index rebuild`".into()),
1946                    vec![],
1947                );
1948                continue;
1949            }
1950        };
1951        if let Some(path) = rec.get("path").and_then(|v| v.as_str()) {
1952            if !is_safe_store_relative_path(Path::new(path)) {
1953                push(
1954                    issues,
1955                    Severity::Error,
1956                    codes::INDEX_JSONL_DESYNC,
1957                    jsonl_rel,
1958                    Some((i + 1) as u32),
1959                    None,
1960                    format!("`index.jsonl` record path `{path}` is not a safe store-relative path"),
1961                    Some("run `dbmd index rebuild`".into()),
1962                    vec![],
1963                );
1964                continue;
1965            }
1966            records.insert(PathBuf::from(path), rec);
1967        }
1968    }
1969
1970    let member_set: BTreeSet<PathBuf> = members
1971        .iter()
1972        .filter(|m| is_content_file(m))
1973        .cloned()
1974        .collect();
1975
1976    // jsonl record → missing file = desync.
1977    for path in records.keys() {
1978        let target_abs = store.root.join(path);
1979        if !target_abs.is_file() {
1980            push(
1981                issues,
1982                Severity::Error,
1983                codes::INDEX_JSONL_DESYNC,
1984                jsonl_rel,
1985                None,
1986                None,
1987                format!(
1988                    "`index.jsonl` record points at missing file `{}`",
1989                    path.display()
1990                ),
1991                Some("run `dbmd index rebuild`".into()),
1992                vec![],
1993            );
1994        }
1995    }
1996
1997    // file not in jsonl = desync (the jsonl is the complete twin — no cap).
1998    for m in &member_set {
1999        if !records.contains_key(m) {
2000            push(
2001                issues,
2002                Severity::Error,
2003                codes::INDEX_JSONL_DESYNC,
2004                jsonl_rel,
2005                None,
2006                None,
2007                format!(
2008                    "file `{}` is missing from the complete `index.jsonl`",
2009                    m.display()
2010                ),
2011                Some("run `dbmd index rebuild`".into()),
2012                vec![m.clone()],
2013            );
2014        }
2015    }
2016
2017    // Record fields stale vs. frontmatter. SPEC § Validation defines
2018    // `INDEX_JSONL_STALE` as "an `index.jsonl` record's fields don't match the
2019    // file's frontmatter" — ANY field, not just `summary`/`type`. The query and
2020    // search paths read every field straight from these sidecars (`tags`,
2021    // `links`, `created`, `updated`, plus type-specific `email` / `domain` /
2022    // `company` / `amount` / `vendor` …), so a single field left unchecked lets
2023    // a stale value answer queries with data that exists in no `.md` file.
2024    //
2025    // Rather than re-list (and drift from) every projected key, rebuild the
2026    // record the canonical projection would write for this file
2027    // ([`IndexRecord::expected_from_file`], the same path `index rebuild` uses)
2028    // and diff the two as flat JSON maps. Every key the projection emits is
2029    // covered automatically; `path` is the join key and is skipped.
2030    for (path, rec) in &records {
2031        let target_abs = store.root.join(path);
2032        if !target_abs.is_file() {
2033            continue;
2034        }
2035        let Ok(expected) = crate::index::IndexRecord::expected_from_file(&target_abs, path.clone())
2036        else {
2037            continue; // unreadable / unparseable frontmatter is reported elsewhere
2038        };
2039        let Ok(expected_json) = serde_json::to_value(&expected) else {
2040            continue;
2041        };
2042        let (Some(have), Some(want)) = (rec.as_object(), expected_json.as_object()) else {
2043            continue;
2044        };
2045
2046        // Compare the union of keys present on either side; a key the file
2047        // projects but the sidecar omits is just as stale as a wrong value.
2048        let mut mismatched_keys: BTreeSet<&str> = BTreeSet::new();
2049        for key in have.keys().chain(want.keys()) {
2050            if key == "path" {
2051                continue;
2052            }
2053            if have.get(key) != want.get(key) {
2054                mismatched_keys.insert(key);
2055            }
2056        }
2057
2058        if !mismatched_keys.is_empty() {
2059            let keys: Vec<&str> = mismatched_keys.into_iter().collect();
2060            push(
2061                issues,
2062                Severity::Error,
2063                codes::INDEX_JSONL_STALE,
2064                jsonl_rel,
2065                None,
2066                Some(keys.join(",")),
2067                format!(
2068                    "`index.jsonl` record for `{}` is stale ({})",
2069                    path.display(),
2070                    keys.join(", ")
2071                ),
2072                Some("run `dbmd index rebuild`".into()),
2073                vec![path.clone()],
2074            );
2075        }
2076    }
2077    let _ = tf;
2078}
2079
2080/// Check an index's `scope:` frontmatter against its filesystem location.
2081fn check_index_scope(
2082    store: &Store,
2083    index_rel: &Path,
2084    expected_scope: &str,
2085    expected_folder: Option<&str>,
2086    issues: &mut Vec<Issue>,
2087) {
2088    let abs = store.root.join(index_rel);
2089    let Ok(text) = std::fs::read_to_string(&abs) else {
2090        return;
2091    };
2092    let Some((yaml, _, _)) = split_frontmatter(&text) else {
2093        return;
2094    };
2095    let Ok(Value::Mapping(map)) = serde_norway::from_str::<Value>(&yaml) else {
2096        return;
2097    };
2098    let fm = yaml_map_to_btree(&map);
2099
2100    if let Some(scope) = fm.get("scope").and_then(scalar_string) {
2101        // Accept "type-folder" and the SPEC example's looser "folder" alias.
2102        let scope_ok =
2103            scope == expected_scope || (expected_scope == "type-folder" && scope == "folder");
2104        if !scope_ok {
2105            push(
2106                issues,
2107                Severity::Warning,
2108                codes::INDEX_WRONG_SCOPE,
2109                index_rel,
2110                fm_key_line(&yaml, "scope"),
2111                Some("scope".into()),
2112                format!(
2113                    "index `scope: {scope}` doesn't match location (expected `{expected_scope}`)"
2114                ),
2115                Some(format!("set `scope: {expected_scope}`")),
2116                vec![],
2117            );
2118        }
2119    }
2120    // folder: must match for layer/type-folder indexes.
2121    if let Some(expected) = expected_folder {
2122        if let Some(folder) = fm.get("folder").and_then(scalar_string) {
2123            if folder.trim_end_matches('/') != expected.trim_end_matches('/') {
2124                push(
2125                    issues,
2126                    Severity::Warning,
2127                    codes::INDEX_WRONG_SCOPE,
2128                    index_rel,
2129                    fm_key_line(&yaml, "folder"),
2130                    Some("folder".into()),
2131                    format!("index `folder: {folder}` doesn't match location `{expected}`"),
2132                    Some(format!("set `folder: {expected}`")),
2133                    vec![],
2134                );
2135            }
2136        }
2137    }
2138}
2139
2140// ─────────────────────────────────────────────────────────────────────────────
2141//  Cross-file: log.md well-formedness + ordering (validate_all only)
2142// ─────────────────────────────────────────────────────────────────────────────
2143
2144/// `LOG_*` checks: bad timestamps, unknown kinds, out-of-order entries — across
2145/// the active `log.md` AND the rotated `log/<YYYY-MM>.md` archives.
2146///
2147/// [`Log::append`] rolls strictly-prior-month entries into `log/<YYYY-MM>.md`,
2148/// and `Log::tail`/`Log::since` deliberately read those archives back. If the
2149/// LOG_* checks read only the active file, an entry `validate --all` flagged
2150/// while it lived in `log.md` would stop being flagged the moment a newer-month
2151/// append rotated it into an archive — even though the log readers still surface
2152/// that exact entry to the curator. Scanning the archives too keeps validate and
2153/// the readers in agreement after a rotation.
2154///
2155/// Order: archives oldest-month first, then the active `log.md` last — the true
2156/// chronological timeline — so the out-of-order check threads `prev` across the
2157/// rotation boundary the same way it does within a single file.
2158fn check_log(store: &Store, issues: &mut Vec<Issue>) {
2159    let mut prev: Option<DateTime<FixedOffset>> = None;
2160    for rel in log_files_chronological(store) {
2161        check_log_file(store, &rel, &mut prev, issues);
2162    }
2163}
2164
2165/// The log files to scan, in chronological order: every `log/<YYYY-MM>.md`
2166/// archive oldest-month first, then the active `log.md` last. Missing files are
2167/// simply absent from the list.
2168fn log_files_chronological(store: &Store) -> Vec<PathBuf> {
2169    let mut files: Vec<PathBuf> = Vec::new();
2170    let archive_dir = store.root.join("log");
2171    if let Ok(entries) = std::fs::read_dir(&archive_dir) {
2172        let mut archives: Vec<PathBuf> = entries
2173            .flatten()
2174            .map(|e| e.path())
2175            .filter(|p| {
2176                p.is_file()
2177                    && p.file_name()
2178                        .and_then(|s| s.to_str())
2179                        .and_then(|n| n.strip_suffix(".md"))
2180                        .is_some_and(is_year_month_archive)
2181            })
2182            .filter_map(|p| p.strip_prefix(&store.root).ok().map(Path::to_path_buf))
2183            .collect();
2184        // `YYYY-MM` stems sort lexically == chronologically; oldest first.
2185        archives.sort();
2186        files.extend(archives);
2187    }
2188    // The active file holds the current month — newest, so it comes last.
2189    if store.root.join("log.md").is_file() {
2190        files.push(PathBuf::from("log.md"));
2191    }
2192    files
2193}
2194
2195/// Scan one log file's entry headers, threading the running `prev` timestamp so
2196/// the out-of-order check spans file (rotation) boundaries. Issues anchor to the
2197/// given store-relative path so an archived entry points at its archive file.
2198fn check_log_file(
2199    store: &Store,
2200    log_rel: &Path,
2201    prev: &mut Option<DateTime<FixedOffset>>,
2202    issues: &mut Vec<Issue>,
2203) {
2204    let abs = store.root.join(log_rel);
2205    let Ok(text) = std::fs::read_to_string(&abs) else {
2206        return;
2207    };
2208
2209    for (i, line) in text.lines().enumerate() {
2210        if !line.starts_with("## [") {
2211            continue;
2212        }
2213        let line_no = (i + 1) as u32;
2214        match parse_log_header(line) {
2215            None => push(
2216                issues,
2217                Severity::Error,
2218                codes::LOG_BAD_TIMESTAMP,
2219                log_rel,
2220                Some(line_no),
2221                None,
2222                format!("log entry header has an unparseable timestamp: {line:?}"),
2223                Some("use `## [YYYY-MM-DD HH:MM] <kind> | <object>`".into()),
2224                vec![],
2225            ),
2226            Some((ts, kind, _object)) => {
2227                if !RECOGNIZED_LOG_KINDS.contains(&kind.as_str()) {
2228                    push(
2229                        issues,
2230                        Severity::Warning,
2231                        codes::LOG_UNKNOWN_KIND,
2232                        log_rel,
2233                        Some(line_no),
2234                        None,
2235                        format!("log entry kind `{kind}` is not recognized"),
2236                        Some(format!("use one of: {}", RECOGNIZED_LOG_KINDS.join(", "))),
2237                        vec![],
2238                    );
2239                }
2240                if let Some(p) = *prev {
2241                    if ts < p {
2242                        push(
2243                            issues,
2244                            Severity::Warning,
2245                            codes::LOG_OUT_OF_ORDER,
2246                            log_rel,
2247                            Some(line_no),
2248                            None,
2249                            "log entry is older than the entry above it (possible rewrite)".into(),
2250                            Some("append corrective entries; never reorder past ones".into()),
2251                            vec![],
2252                        );
2253                    }
2254                }
2255                *prev = Some(ts);
2256            }
2257        }
2258    }
2259}
2260
2261// ─────────────────────────────────────────────────────────────────────────────
2262//  Self-contained primitives (collapse onto sibling modules once they land)
2263// ─────────────────────────────────────────────────────────────────────────────
2264
2265/// A minimal wiki-link found in a body: target, optional display, 1-based line.
2266#[derive(Debug)]
2267struct Link {
2268    target: String,
2269    line: u32,
2270}
2271
2272/// True if the store marker (`DB.md`, uppercase) is present at the root. On a
2273/// case-insensitive filesystem `db.md` would also match `DB.md`; we require the
2274/// exact-cased directory entry to be present.
2275fn store_marker_present(store: &Store) -> bool {
2276    let want = store.root.join("DB.md");
2277    if !want.is_file() {
2278        return false;
2279    }
2280    // Reject a case-folded match (`db.md`) on case-insensitive filesystems.
2281    match std::fs::read_dir(&store.root) {
2282        Ok(entries) => entries
2283            .flatten()
2284            .any(|e| e.file_name().to_str() == Some("DB.md")),
2285        Err(_) => true, // can't enumerate; trust the is_file() above
2286    }
2287}
2288
2289/// Validate the store's identity file, `DB.md`: its frontmatter `type:` must be
2290/// `db-md`, it must carry both `scope` and `owner`, and its body may contain
2291/// only the three recognized `##` sections (`Agent instructions`, `Policies`,
2292/// `Schemas`).
2293///
2294/// `DB.md` is not a content file (no `summary`), so it is checked here rather
2295/// than through `check_content_file`. The marker presence is established by the
2296/// caller (`store_marker_present`); a malformed-frontmatter `DB.md` still counts
2297/// as a store (the marker is the filename), so we report its shape rather than
2298/// `NOT_A_STORE`. Issues anchor to `DB.md` as the store-relative path.
2299fn check_db_md(store: &Store, issues: &mut Vec<Issue>) {
2300    let rel = Path::new("DB.md");
2301    let abs = store.root.join("DB.md");
2302    let Ok(text) = std::fs::read_to_string(&abs) else {
2303        return; // marker present but unreadable: nothing more to say.
2304    };
2305
2306    let Some((fm_yaml, body, fm_end_line)) = split_frontmatter(&text) else {
2307        // No frontmatter block at all → it cannot declare `type: db-md` and has
2308        // neither required field. Report the type and both missing fields,
2309        // anchored to line 1 (the would-be opening fence).
2310        push(
2311            issues,
2312            Severity::Error,
2313            codes::DB_MD_BAD_TYPE,
2314            rel,
2315            Some(1),
2316            Some("type".into()),
2317            "DB.md has no frontmatter; it must declare `type: db-md`".into(),
2318            Some("add a `---` frontmatter block with `type: db-md`".into()),
2319            vec![],
2320        );
2321        for field in ["scope", "owner"] {
2322            push(
2323                issues,
2324                Severity::Error,
2325                codes::DB_MD_MISSING_FIELD,
2326                rel,
2327                Some(1),
2328                Some(field.into()),
2329                format!("DB.md frontmatter is missing required field `{field}`"),
2330                Some(format!("add `{field}:` to the DB.md frontmatter")),
2331                vec![],
2332            );
2333        }
2334        return;
2335    };
2336
2337    // Parse the frontmatter mapping. If it doesn't parse, we can still say the
2338    // identity contract is unmet (no provable `type: db-md`, no provable fields).
2339    let fm: Option<BTreeMap<String, Value>> = match serde_norway::from_str::<Value>(&fm_yaml) {
2340        Ok(Value::Mapping(map)) => Some(yaml_map_to_btree(&map)),
2341        Ok(Value::Null) => Some(BTreeMap::new()),
2342        _ => None,
2343    };
2344
2345    match &fm {
2346        Some(map) => {
2347            // ── type: db-md ──────────────────────────────────────────────────
2348            let type_ = map.get("type").and_then(scalar_string);
2349            if type_.as_deref() != Some("db-md") {
2350                let (line, msg) = match &type_ {
2351                    Some(t) => (
2352                        fm_key_line(&fm_yaml, "type"),
2353                        format!("DB.md has `type: {t}`; a store's DB.md must be `type: db-md`"),
2354                    ),
2355                    None => (
2356                        Some(1),
2357                        "DB.md frontmatter has no `type:`; it must be `type: db-md`".to_string(),
2358                    ),
2359                };
2360                push(
2361                    issues,
2362                    Severity::Error,
2363                    codes::DB_MD_BAD_TYPE,
2364                    rel,
2365                    line,
2366                    Some("type".into()),
2367                    msg,
2368                    Some("set `type: db-md` in the DB.md frontmatter".into()),
2369                    vec![],
2370                );
2371            }
2372
2373            // ── required fields: scope + owner ───────────────────────────────
2374            for field in ["scope", "owner"] {
2375                let present = map
2376                    .get(field)
2377                    .and_then(scalar_string)
2378                    .map(|s| !s.trim().is_empty())
2379                    .unwrap_or(false);
2380                if !present {
2381                    push(
2382                        issues,
2383                        Severity::Error,
2384                        codes::DB_MD_MISSING_FIELD,
2385                        rel,
2386                        // A present-but-empty field anchors to its line; a fully
2387                        // absent one to the block top.
2388                        fm_key_line_or_top(&fm_yaml, field),
2389                        Some(field.into()),
2390                        format!("DB.md frontmatter is missing required field `{field}`"),
2391                        Some(format!("add `{field}:` to the DB.md frontmatter")),
2392                        vec![],
2393                    );
2394                }
2395            }
2396        }
2397        None => {
2398            // Unparseable frontmatter: the identity contract is unprovable. Emit
2399            // the type error and both field errors, anchored to the block top.
2400            push(
2401                issues,
2402                Severity::Error,
2403                codes::DB_MD_BAD_TYPE,
2404                rel,
2405                Some(1),
2406                Some("type".into()),
2407                "DB.md frontmatter isn't valid YAML; it must declare `type: db-md`".into(),
2408                Some("fix the DB.md frontmatter and set `type: db-md`".into()),
2409                vec![],
2410            );
2411            for field in ["scope", "owner"] {
2412                push(
2413                    issues,
2414                    Severity::Error,
2415                    codes::DB_MD_MISSING_FIELD,
2416                    rel,
2417                    Some(1),
2418                    Some(field.into()),
2419                    format!("DB.md frontmatter is missing required field `{field}`"),
2420                    Some(format!("add `{field}:` to the DB.md frontmatter")),
2421                    vec![],
2422                );
2423            }
2424        }
2425    }
2426
2427    // ── recognized `##` section headers only ─────────────────────────────────
2428    // The body's H2 headings must be one of the four the toolkit reads; any
2429    // other is a likely typo / misplacement (warning — the parser ignores it,
2430    // so the config is not corrupted, but the operator wrote a section that will
2431    // never be read). H3 sub-headings (Frozen pages, Ignored types, `### <type>`
2432    // schema blocks) live under their H2 and are not flagged here.
2433    //
2434    // `## Folders` is recognized: `parse_db_md` reads it into `Config.folders`
2435    // (parser.rs) and the index renders folder display names + descriptions from
2436    // it (index.rs `render_*_md_from_stats`). Flagging it `DB_MD_UNKNOWN_SECTION`
2437    // with "remove this heading" told the operator to delete a working,
2438    // round-tripped config block — destroying curator-authored rollup names. It
2439    // is a real, shipped section; SPEC.md documents it alongside the other three.
2440    for section in crate::parser::extract_sections(&body) {
2441        if section.level != 2 {
2442            continue;
2443        }
2444        let name = section.heading.trim().to_ascii_lowercase();
2445        if matches!(
2446            name.as_str(),
2447            "agent instructions" | "policies" | "schemas" | "folders"
2448        ) {
2449            continue;
2450        }
2451        // `Section::line` is 1-based within the body; the body begins at file
2452        // line `fm_end_line + 1`.
2453        let file_line = fm_end_line + section.line;
2454        push(
2455            issues,
2456            Severity::Warning,
2457            codes::DB_MD_UNKNOWN_SECTION,
2458            rel,
2459            Some(file_line),
2460            None,
2461            format!(
2462                "DB.md has an unrecognized `## {}` section",
2463                section.heading.trim()
2464            ),
2465            Some(
2466                "DB.md sections are `## Agent instructions`, `## Policies`, `## Schemas`, \
2467                 `## Folders` — remove or rename this heading"
2468                    .into(),
2469            ),
2470            vec![],
2471        );
2472    }
2473
2474    // ── `## Schemas` field-declaration lint ──────────────────────────────────
2475    // Without this, every schema misparse is silent: the operator/agent gets no
2476    // signal that DB.md is interpreting their schema differently from what they
2477    // wrote, and downstream records are validated against the degraded schema.
2478    check_db_md_schemas(store, rel, &body, fm_end_line, issues);
2479}
2480
2481/// Lint the parsed `## Schemas` field declarations: an empty field name, a
2482/// duplicate field name within a type, or an unrecognized modifier all parse
2483/// "successfully" into a degraded [`Schema`] today, so a bad declaration never
2484/// surfaces. The parsed schemas live in `store.config.schemas` (directives
2485/// already separated out); this pass reports the suspicious *field* shapes,
2486/// anchored to the `### <type>` heading line so the agent can find the block.
2487fn check_db_md_schemas(
2488    store: &Store,
2489    rel: &Path,
2490    body: &str,
2491    fm_end_line: u32,
2492    issues: &mut Vec<Issue>,
2493) {
2494    if store.config.schemas.is_empty() {
2495        return;
2496    }
2497
2498    // Map each `### <type>` heading (under `## Schemas`) to its file line, so a
2499    // per-type issue can anchor to the declaration block. `extract_sections`
2500    // returns a flat list with 1-based body lines; the body starts at file line
2501    // `fm_end_line + 1`.
2502    let mut type_line: BTreeMap<String, u32> = BTreeMap::new();
2503    let mut current_h2: Option<String> = None;
2504    for section in crate::parser::extract_sections(body) {
2505        match section.level {
2506            2 => current_h2 = Some(section.heading.trim().to_ascii_lowercase()),
2507            3 if current_h2.as_deref() == Some("schemas") => {
2508                // The H3 heading text (as written) is the type name — the same
2509                // key `parse_db_md` inserts into `config.schemas`.
2510                type_line
2511                    .entry(section.heading.trim().to_string())
2512                    .or_insert(fm_end_line + section.line);
2513            }
2514            _ => {}
2515        }
2516    }
2517
2518    for (type_name, schema) in &store.config.schemas {
2519        let line = type_line.get(type_name).copied();
2520        let mut seen: BTreeSet<String> = BTreeSet::new();
2521        for field in &schema.fields {
2522            let name = field.name.trim();
2523
2524            // Empty field name: a `- (string)` / bare `- ` bullet parses to a
2525            // nameless field that can never match a frontmatter key, so its
2526            // required/shape/enum constraints silently never apply.
2527            if name.is_empty() {
2528                push(
2529                    issues,
2530                    Severity::Warning,
2531                    codes::DB_MD_SCHEMA_FIELD,
2532                    rel,
2533                    line,
2534                    None,
2535                    format!("`### {type_name}` has a schema field bullet with no field name"),
2536                    Some(
2537                        "write each field as `- <name> (<modifiers>)`, e.g. `- email (required, email)`"
2538                            .into(),
2539                    ),
2540                    vec![],
2541                );
2542                continue;
2543            }
2544
2545            // Duplicate field name within a type: the second declaration's
2546            // constraints are interpreted independently of the first, so the
2547            // author's intent is ambiguous and likely wrong.
2548            if !seen.insert(name.to_string()) {
2549                push(
2550                    issues,
2551                    Severity::Warning,
2552                    codes::DB_MD_SCHEMA_FIELD,
2553                    rel,
2554                    line,
2555                    Some(name.to_string()),
2556                    format!("`### {type_name}` declares field `{name}` more than once"),
2557                    Some(
2558                        "remove the duplicate field bullet, or merge the modifiers onto one".into(),
2559                    ),
2560                    vec![],
2561                );
2562            }
2563
2564            // Unrecognized modifiers: the parser stashes anything outside the
2565            // known vocabulary (`required` / a shape / `link to …` / `default …`
2566            // / `enum: …`) in `unknown_modifiers`. Surface them as Info so a
2567            // typo'd modifier (`requierd`, `unqiue`) doesn't silently do nothing.
2568            for modifier in &field.unknown_modifiers {
2569                let modifier = modifier.trim();
2570                if modifier.is_empty() {
2571                    continue;
2572                }
2573                push(
2574                    issues,
2575                    Severity::Info,
2576                    codes::DB_MD_SCHEMA_FIELD,
2577                    rel,
2578                    line,
2579                    Some(name.to_string()),
2580                    format!(
2581                        "`### {type_name}` field `{name}` has an unrecognized modifier `{modifier}`"
2582                    ),
2583                    Some(
2584                        "recognized modifiers are `required`, a shape (`string`/`int`/`bool`/`date`/`email`/`currency`/`url`), `link to <prefix>/`, `default <value>`, `enum: <v1>, <v2>, …`"
2585                            .into(),
2586                    ),
2587                    vec![],
2588                );
2589            }
2590        }
2591
2592        // A `unique:` key silently skips any record missing (or leaving empty)
2593        // one of its fields — an incomplete key never collides (`dedup_key`).
2594        // So a key that names a field the schema doesn't mark `required` stops
2595        // checking exactly the records most likely to be re-entered partially
2596        // filled. Surface the gap at the declaration: every key field should
2597        // be a `required` field. (A field declared more than once counts as
2598        // required if any declaration marks it — the duplicate itself is
2599        // already flagged above.)
2600        let mut declared: BTreeMap<&str, bool> = BTreeMap::new();
2601        for f in &schema.fields {
2602            let e = declared.entry(f.name.trim()).or_insert(false);
2603            *e = *e || f.required;
2604        }
2605        let mut flagged: BTreeSet<&str> = BTreeSet::new();
2606        for key_fields in &schema.unique_keys {
2607            for field in key_fields {
2608                let name = field.trim();
2609                if name.is_empty()
2610                    || declared.get(name).copied() == Some(true)
2611                    || !flagged.insert(name)
2612                {
2613                    continue;
2614                }
2615                let message = if declared.contains_key(name) {
2616                    format!(
2617                        "`### {type_name}` `unique:` key field `{name}` is not `required` — a record missing or leaving it empty is silently skipped by the unique check"
2618                    )
2619                } else {
2620                    format!(
2621                        "`### {type_name}` `unique:` key field `{name}` is not declared in the schema, so it can never be `required` — a record missing it is silently skipped by the unique check"
2622                    )
2623                };
2624                push(
2625                    issues,
2626                    Severity::Warning,
2627                    codes::DB_MD_SCHEMA_FIELD,
2628                    rel,
2629                    line,
2630                    Some(name.to_string()),
2631                    message,
2632                    Some(format!(
2633                        "mark `{name}` `required` in `### {type_name}`, or build the `unique:` key from required fields only"
2634                    )),
2635                    vec![],
2636                );
2637            }
2638        }
2639    }
2640}
2641
2642/// The `NOT_A_STORE` issue for a root with no `DB.md`.
2643fn not_a_store_issue(store: &Store) -> Issue {
2644    Issue {
2645        severity: Severity::Error,
2646        code: codes::NOT_A_STORE,
2647        file: store.root.clone(),
2648        line: None,
2649        key: None,
2650        message: format!("{} has no DB.md; not a db.md store", store.root.display()),
2651        suggestion: Some("create a `DB.md` at the store root".into()),
2652        related: vec![],
2653    }
2654}
2655
2656/// True if a store-relative path is a content file: under `sources/` or
2657/// `records/` and not an `index.md`/`index.jsonl`/`log.md`.
2658fn is_content_file(rel: &Path) -> bool {
2659    // Defense in depth: a real content file is always a forward (Normal-only)
2660    // store-relative path. Reject any `..`/absolute/prefix component so a
2661    // malformed object slot judged only by its FIRST component (`records/../..`)
2662    // can never turn a per-file read into a store escape, even if a future caller
2663    // forgets the path-safety gate `changed_objects_since` now applies.
2664    if !is_safe_store_relative_path(rel) {
2665        return false;
2666    }
2667    let Some(first) = rel.iter().next().and_then(|s| s.to_str()) else {
2668        return false;
2669    };
2670    if !matches!(first, "sources" | "records") {
2671        return false;
2672    }
2673    let name = rel.file_name().and_then(|s| s.to_str()).unwrap_or("");
2674    // Only the derived catalog twins are meta INSIDE a layer. `DB.md` / `log.md`
2675    // are reserved meta only at the store ROOT, which the `first` layer check
2676    // above already excludes — so a content file named `log.md` / `DB.md` inside
2677    // a layer (e.g. `records/docs/log.md`) is real content, consistent with
2678    // `Store::walk`.
2679    if matches!(name, "index.md" | "index.jsonl") {
2680        return false;
2681    }
2682    name.ends_with(".md")
2683}
2684
2685/// True for the store's ROOT append-only meta files (`DB.md` / `log.md`): a
2686/// single-component store-relative path whose name is one of those two. An
2687/// in-layer `records/docs/log.md` is real content (multiple components), not a
2688/// root meta file. These reach `check_content_file` only via the working-set
2689/// incoming-linker scan; their bodies are deliberately not link-checked there
2690/// because `validate --all` doesn't link-check them either.
2691fn is_root_meta_file(rel: &Path) -> bool {
2692    let mut comps = rel.components();
2693    let Some(Component::Normal(only)) = comps.next() else {
2694        return false;
2695    };
2696    if comps.next().is_some() {
2697        return false; // has a parent dir → not a root file
2698    }
2699    matches!(only.to_str(), Some("DB.md") | Some("log.md"))
2700}
2701
2702/// True for a derived index-catalog file (`index.md` / `index.jsonl`) at any
2703/// depth. Its entries are GENERATED wiki-links to type-folder members, not
2704/// authored body links: in the working-set scope it is pulled in as an incoming
2705/// linker, but its integrity belongs to `check_indexes` under `--all` (which
2706/// reports a dangling entry as `INDEX_STALE_ENTRY`, not `WIKI_LINK_BROKEN`). So
2707/// `check_content_file` never body-link-checks it, matching `walk_content_files`
2708/// (which skips `index.md` under `--all`).
2709fn is_index_catalog_file(rel: &Path) -> bool {
2710    matches!(
2711        rel.file_name().and_then(|n| n.to_str()),
2712        Some("index.md") | Some("index.jsonl")
2713    )
2714}
2715
2716/// Split a file into `(frontmatter_yaml, body, closing_fence_line)`. The block
2717/// must start at the very first line with `---` and end at the next `---`.
2718/// Returns `None` if there's no leading frontmatter block.
2719fn split_frontmatter(text: &str) -> Option<(String, String, u32)> {
2720    // Tolerate a single leading UTF-8 BOM, matching parser/store/index (which
2721    // already strip it). Without this, a BOM-prefixed file is read as having no
2722    // frontmatter here while the catalog still indexes it — so validate would
2723    // silently skip frontmatter checks on a file the rest of the toolkit sees.
2724    let text = text.strip_prefix('\u{feff}').unwrap_or(text);
2725    let mut lines = text.lines();
2726    let first = lines.next()?;
2727    if first.trim_end() != "---" {
2728        return None;
2729    }
2730    let mut yaml = String::new();
2731    let mut close_line: Option<u32> = None;
2732    // line 1 is the opening fence; YAML starts at line 2.
2733    let mut current = 1u32;
2734    for line in lines {
2735        current += 1;
2736        if line.trim_end() == "---" {
2737            close_line = Some(current);
2738            break;
2739        }
2740        yaml.push_str(line);
2741        yaml.push('\n');
2742    }
2743    let close_line = close_line?;
2744    // Body = everything after the closing fence.
2745    let body: String = text
2746        .lines()
2747        .skip(close_line as usize)
2748        .collect::<Vec<_>>()
2749        .join("\n");
2750    Some((yaml, body, close_line))
2751}
2752
2753/// True when `body` opens with a second frontmatter block: a leading `---`
2754/// fence pair whose contents parse as a non-empty YAML mapping. Requiring a
2755/// MAPPING is what keeps a `---` thematic-break rule or a fenced ```yaml
2756/// example from false-firing — only genuinely misplaced frontmatter parses as
2757/// a mapping. Leading blank lines are skipped so `\n---\n…` is still caught.
2758/// Reuses the same fence-splitting the format itself uses, so this fires
2759/// exactly when the body would independently parse as having frontmatter.
2760fn body_opens_with_frontmatter(body: &str) -> bool {
2761    let start: String = body
2762        .lines()
2763        .skip_while(|l| l.trim().is_empty())
2764        .collect::<Vec<_>>()
2765        .join("\n");
2766    match split_frontmatter(&start) {
2767        Some((yaml, _, _)) => matches!(
2768            serde_norway::from_str::<Value>(&yaml),
2769            Ok(Value::Mapping(m)) if !m.is_empty()
2770        ),
2771        None => false,
2772    }
2773}
2774
2775/// Read just the `summary` field of a file, or `None` if absent/unparseable.
2776fn read_summary(abs: &Path) -> Option<String> {
2777    let text = std::fs::read_to_string(abs).ok()?;
2778    let (yaml, _, _) = split_frontmatter(&text)?;
2779    let value: Value = serde_norway::from_str(&yaml).ok()?;
2780    if let Value::Mapping(m) = value {
2781        m.get(Value::String("summary".into()))
2782            .and_then(scalar_string)
2783    } else {
2784        None
2785    }
2786}
2787
2788/// Convert a `serde_norway` mapping into a string-keyed [`BTreeMap`], dropping
2789/// non-string keys (frontmatter keys are always strings).
2790fn yaml_map_to_btree(map: &serde_norway::Mapping) -> BTreeMap<String, Value> {
2791    let mut out = BTreeMap::new();
2792    for (k, v) in map {
2793        if let Value::String(s) = k {
2794            out.insert(s.clone(), v.clone());
2795        }
2796    }
2797    out
2798}
2799
2800/// A scalar YAML value as a string (`String`/`Number`/`Bool`); `None` for
2801/// sequences/mappings/null.
2802fn scalar_string(v: &Value) -> Option<String> {
2803    match v {
2804        Value::String(s) => Some(s.clone()),
2805        Value::Number(n) => Some(n.to_string()),
2806        Value::Bool(b) => Some(b.to_string()),
2807        _ => None,
2808    }
2809}
2810
2811/// True if a frontmatter value carries no content for a *required*-field check:
2812/// a YAML `null` (`name:`), an empty sequence (`name: []`), an empty mapping
2813/// (`name: {}`), or a blank/whitespace-only scalar (`name: ""`). A non-empty
2814/// list or mapping is NOT treated as empty here — a structurally-wrong value on
2815/// a shape/enum field is caught by the later non-scalar shape check, not by the
2816/// required-presence check.
2817fn is_empty_value(v: &Value) -> bool {
2818    match v {
2819        Value::Null => true,
2820        Value::Sequence(items) => items.is_empty(),
2821        Value::Mapping(map) => map.is_empty(),
2822        other => scalar_string(other)
2823            .map(|s| s.trim().is_empty())
2824            .unwrap_or(true),
2825    }
2826}
2827
2828/// True if `tags` is a flat YAML sequence of scalars. A mapping, a scalar, or a
2829/// sequence containing a nested sequence/mapping → false (`TAGS_MALFORMED`).
2830fn is_flat_scalar_list(v: &Value) -> bool {
2831    match v {
2832        Value::Sequence(items) => items.iter().all(|it| scalar_string(it).is_some()),
2833        _ => false,
2834    }
2835}
2836
2837/// Extract every frontmatter wiki-link, returning `(key, Link)` pairs with the
2838/// link's 1-based file line. **Text-based, by necessity:** an unquoted
2839/// `company: [[records/companies/x]]` parses in YAML as a nested *sequence*, not
2840/// a string (because `[[x]]` is YAML flow-list-in-a-list); a quoted
2841/// `"[[...]]"` parses as a string. Scanning the raw frontmatter text catches
2842/// both forms uniformly, the way the link textually appears — the doctrine view.
2843///
2844/// `fm_start_line` is the file line of the first YAML line (file line 2, since
2845/// line 1 is the opening `---`), so the returned `Link::line` is absolute.
2846fn frontmatter_link_fields_text(fm_yaml: &str, fm_start_line: u32) -> Vec<(String, Link)> {
2847    let mut out = Vec::new();
2848    for (key, _value_text, links) in frontmatter_key_blocks(fm_yaml, fm_start_line) {
2849        for link in links {
2850            out.push((key.clone(), link));
2851        }
2852    }
2853    out
2854}
2855
2856/// The wiki-link targets declared under a single top-level frontmatter key
2857/// (text-based; handles quoted + unquoted forms). Empty if the key is absent or
2858/// carries no `[[...]]`.
2859fn frontmatter_links_for_key(fm_yaml: &str, key: &str, fm_start_line: u32) -> Vec<Link> {
2860    for (k, _value_text, links) in frontmatter_key_blocks(fm_yaml, fm_start_line) {
2861        if k == key {
2862            return links;
2863        }
2864    }
2865    Vec::new()
2866}
2867
2868/// The raw value text under a single top-level frontmatter key (the remainder of
2869/// the key line plus any indented continuation/sequence lines), trimmed. Used to
2870/// decide whether a `link to` field holds a plain string vs. a wiki-link.
2871fn frontmatter_raw_value_for_key(fm_yaml: &str, key: &str, fm_start_line: u32) -> Option<String> {
2872    for (k, value_text, _links) in frontmatter_key_blocks(fm_yaml, fm_start_line) {
2873        if k == key {
2874            return Some(value_text);
2875        }
2876    }
2877    None
2878}
2879
2880/// Split a frontmatter YAML block into `(key, raw_value_text, wiki_links)` for
2881/// each top-level key. A top-level key is a line with no leading indentation in
2882/// `name:` form; its value spans the rest of that line plus any deeper-indented
2883/// continuation lines (block scalars, block sequences) until the next top-level
2884/// key. Wiki-links are every `[[...]]` found anywhere in that span, with their
2885/// absolute file line.
2886fn frontmatter_key_blocks(fm_yaml: &str, fm_start_line: u32) -> Vec<(String, String, Vec<Link>)> {
2887    let mut blocks: Vec<(String, String, Vec<Link>)> = Vec::new();
2888    let mut current: Option<(String, String, Vec<Link>)> = None;
2889
2890    for (idx, raw_line) in fm_yaml.lines().enumerate() {
2891        let file_line = fm_start_line + idx as u32;
2892        let indented = raw_line.starts_with(' ') || raw_line.starts_with('\t');
2893        let trimmed = raw_line.trim();
2894
2895        // A new top-level key: no indentation, `name:` prefix, not a list dash or
2896        // comment. (Indented or dash lines belong to the current key's value.)
2897        let new_key = if !indented && !trimmed.starts_with('#') && !trimmed.starts_with('-') {
2898            top_level_key(raw_line)
2899        } else {
2900            None
2901        };
2902
2903        if let Some((key, after)) = new_key {
2904            if let Some(done) = current.take() {
2905                blocks.push(done);
2906            }
2907            let mut links = Vec::new();
2908            collect_line_links(after, file_line, &mut links);
2909            current = Some((key, after.trim().to_string(), links));
2910        } else if let Some((_k, value_text, links)) = current.as_mut() {
2911            // Continuation of the current key's value (indented or dash line).
2912            if !value_text.is_empty() {
2913                value_text.push('\n');
2914            }
2915            value_text.push_str(trimmed);
2916            collect_line_links(raw_line, file_line, links);
2917        }
2918    }
2919    if let Some(done) = current.take() {
2920        blocks.push(done);
2921    }
2922    blocks
2923}
2924
2925/// Parse a top-level frontmatter key line into `(key, value_after_colon)`.
2926/// `None` if the line isn't a `name:` mapping entry.
2927fn top_level_key(line: &str) -> Option<(String, &str)> {
2928    let (key, rest) = line.split_once(':')?;
2929    let key = key.trim();
2930    if key.is_empty()
2931        || !key
2932            .chars()
2933            .all(|c| c.is_alphanumeric() || c == '_' || c == '-')
2934    {
2935        return None;
2936    }
2937    Some((key.to_string(), rest))
2938}
2939
2940/// Append every `[[target]]` / `[[target|display]]` found in `s` to `links`,
2941/// each tagged with `file_line`.
2942fn collect_line_links(s: &str, file_line: u32, links: &mut Vec<Link>) {
2943    let bytes = s.as_bytes();
2944    let mut i = 0;
2945    while i + 1 < bytes.len() {
2946        if bytes[i] == b'[' && bytes[i + 1] == b'[' {
2947            if let Some(close) = s[i + 2..].find("]]") {
2948                let inner = &s[i + 2..i + 2 + close];
2949                // Guard against `[[[` (nested) double-counting: the inner must
2950                // not itself open another `[[`.
2951                let target = inner
2952                    .trim_start_matches('[')
2953                    .split('|')
2954                    .next()
2955                    .unwrap_or(inner)
2956                    .trim()
2957                    .to_string();
2958                if !target.is_empty() {
2959                    links.push(Link {
2960                        target,
2961                        line: file_line,
2962                    });
2963                }
2964                i = i + 2 + close + 2;
2965                continue;
2966            }
2967        }
2968        i += 1;
2969    }
2970}
2971
2972/// Extract every `[[...]]` wiki-link from a body, with 1-based line numbers.
2973/// Skips fenced code blocks, so example links in docs don't trip the validator.
2974///
2975/// Fence tracking matches the toolkit's parser ([`crate::parser`]'s
2976/// `extract_sections`): an open fence is `(fence char, run length)` and closes
2977/// only on a line that is the **same** fence character with a run **at least as
2978/// long**. A naive "toggle a bool on any ``` or ~~~ line" inverts the state when
2979/// a `~~~` block legally contains a ```` ``` ```` line (the standard way to
2980/// document a backtick fence) — the inner backtick line would flip `in_fence`
2981/// off and the demo `[[…]]` inside the code block would be checked as a live
2982/// link, falsely flagging a legal store.
2983fn extract_wiki_links(body: &str) -> Vec<Link> {
2984    let mut out = Vec::new();
2985    let mut fence: Option<(u8, usize)> = None;
2986    for (idx, line) in body.lines().enumerate() {
2987        let content = line.trim_end_matches('\r');
2988        if let Some(f) = fence {
2989            // Inside a fence: the only thing that matters is whether THIS line
2990            // closes it (matching char, run ≥ the opening run). Everything else
2991            // is opaque code — no link extraction.
2992            if fence_closes(content, f) {
2993                fence = None;
2994            }
2995            continue;
2996        }
2997        if let Some(opened) = fence_opens(content) {
2998            fence = Some(opened);
2999            continue;
3000        }
3001        let line_no = (idx + 1) as u32;
3002        let bytes = line.as_bytes();
3003        let mut i = 0;
3004        while i + 1 < bytes.len() {
3005            if bytes[i] == b'[' && bytes[i + 1] == b'[' {
3006                if let Some(close) = line[i + 2..].find("]]") {
3007                    let inner = &line[i + 2..i + 2 + close];
3008                    let target = inner.split('|').next().unwrap_or(inner).trim().to_string();
3009                    // Skip a triple-bracket `[[[…` opening: the inner content
3010                    // starts with `[`, so this is the rejected flow-form list
3011                    // mis-encoding (`[[[a]], [[b]]]`), not a real wiki-link. A
3012                    // legitimate target never starts with `[`. The frontmatter
3013                    // `WIKI_LINK_FLOW_FORM_LIST` check already owns that error;
3014                    // extracting a bogus body link here would double-report it as
3015                    // a spurious `WIKI_LINK_SHORT_FORM`.
3016                    if !target.is_empty() && !target.starts_with('[') {
3017                        out.push(Link {
3018                            target,
3019                            line: line_no,
3020                        });
3021                    }
3022                    i = i + 2 + close + 2;
3023                    continue;
3024                }
3025            }
3026            i += 1;
3027        }
3028    }
3029    out
3030}
3031
3032/// If `line` opens a fenced code block, return `(fence byte, run length)`. A
3033/// local mirror of the parser's `opening_fence` so the validator's fence
3034/// tracking matches the rest of the toolkit: a fence is ``` ``` ``` or `~~~`
3035/// (run ≥ 3) at ≤ 3 spaces of indent, and a backtick fence's info string may
3036/// not itself contain a backtick.
3037fn fence_opens(line: &str) -> Option<(u8, usize)> {
3038    let indent = line.len() - line.trim_start_matches(' ').len();
3039    if indent > 3 {
3040        return None;
3041    }
3042    let rest = &line[indent..];
3043    let byte = rest.bytes().next()?;
3044    if byte != b'`' && byte != b'~' {
3045        return None;
3046    }
3047    let run = rest.len() - rest.trim_start_matches(byte as char).len();
3048    if run < 3 {
3049        return None;
3050    }
3051    // A backtick fence's info string may not itself contain a backtick.
3052    if byte == b'`' && rest[run..].contains('`') {
3053        return None;
3054    }
3055    Some((byte, run))
3056}
3057
3058/// True if `line` closes the currently open `fence`: same char, run at least as
3059/// long, nothing but trailing whitespace after. Local mirror of the parser's
3060/// `is_closing_fence` — so an inner fence of the *other* character (a ``` ``` ```
3061/// line inside a `~~~` block) does NOT close the outer fence.
3062fn fence_closes(line: &str, fence: (u8, usize)) -> bool {
3063    let (byte, open_len) = fence;
3064    let indent = line.len() - line.trim_start_matches(' ').len();
3065    if indent > 3 {
3066        return false;
3067    }
3068    let rest = &line[indent..];
3069    let run = rest.len() - rest.trim_start_matches(byte as char).len();
3070    if run < open_len {
3071        return false;
3072    }
3073    rest[run..].trim().is_empty()
3074}
3075
3076/// Detect the frontmatter INLINE flow-form wiki-link-list mis-encoding —
3077/// `attendees: [[[a]], [[b]]]` — and return the offending keys.
3078///
3079/// **Scoped to the inline value on the key line.** The SPEC's canonical
3080/// list-of-links form is the *unquoted YAML block sequence* (`- [[a]]` per
3081/// indented line), which is explicitly correct (SPEC § Linking) and MUST NOT be
3082/// flagged — even though, parsed whole, it nests the same way the rejected
3083/// inline flow form does. So this check looks only at the value written *inline*
3084/// after the colon: if it opens a flow sequence (`[…]`) whose parsed shape is a
3085/// nested sequence (a list whose items are themselves lists — the wiki-link-list
3086/// mis-encoding), it is flagged. A key with no inline value (the block form,
3087/// whose items live on continuation lines) is never inspected here.
3088///
3089/// Parsing the inline value (rather than a literal `starts_with("[[[")` text
3090/// test) is what catches the whitespace variant `attendees: [ [[a]] ]`, which
3091/// encodes the identical nested sequence but evaded the old prefix match.
3092fn detect_flow_form_link_lists(fm_yaml: &str) -> Vec<String> {
3093    let mut out = Vec::new();
3094    for line in fm_yaml.lines() {
3095        // Top-level key lines only (no indentation, not a comment or list dash).
3096        if line.starts_with(' ') || line.starts_with('\t') {
3097            continue;
3098        }
3099        let Some((key, rest)) = line.split_once(':') else {
3100            continue;
3101        };
3102        let key = key.trim();
3103        if key.is_empty()
3104            || key.starts_with('#')
3105            || key.starts_with('-')
3106            || !key
3107                .chars()
3108                .all(|c| c.is_alphanumeric() || c == '_' || c == '-')
3109        {
3110            continue;
3111        }
3112        let rest = rest.trim();
3113        // Only an inline flow sequence (`[…]`) on the key line is a candidate;
3114        // the unquoted block form has an empty inline value and is never flagged.
3115        if !rest.starts_with('[') {
3116            continue;
3117        }
3118        // Parse just the inline value and test its shape: a list whose items are
3119        // themselves lists is the wiki-link-list mis-encoding (`[[[a]]]` parses
3120        // to `Seq[Seq[Seq[String]]]`; the scalar inline link `[[a]]` is only
3121        // `Seq[Seq[String]]` and is NOT flagged).
3122        if let Ok(Value::Sequence(items)) = serde_norway::from_str::<Value>(rest) {
3123            let nested = items.iter().any(|item| match item {
3124                Value::Sequence(inner) => inner.iter().any(|x| matches!(x, Value::Sequence(_))),
3125                _ => false,
3126            });
3127            if nested {
3128                out.push(key.to_string());
3129            }
3130        }
3131    }
3132    out
3133}
3134
3135/// True if a bare target (no `.md`) is a full store-relative path: it contains a
3136/// `/` and its first segment is a known layer.
3137fn is_full_store_path(bare: &str) -> bool {
3138    let mut parts = bare.splitn(2, '/');
3139    let first = parts.next().unwrap_or("");
3140    let has_rest = parts.next().map(|r| !r.is_empty()).unwrap_or(false);
3141    matches!(first, "sources" | "records") && has_rest
3142}
3143
3144/// True if a path contains only normal relative components. Validator inputs
3145/// come from user-authored markdown/JSON sidecars; never let absolute paths,
3146/// platform prefixes, or `..` turn a validation probe into a filesystem escape.
3147fn is_safe_store_relative_path(path: &Path) -> bool {
3148    let mut saw_component = false;
3149    for component in path.components() {
3150        match component {
3151            Component::Normal(_) => saw_component = true,
3152            Component::CurDir => {}
3153            Component::ParentDir | Component::RootDir | Component::Prefix(_) => return false,
3154        }
3155    }
3156    saw_component
3157}
3158
3159fn safe_md_target_rel(bare: &str) -> Option<PathBuf> {
3160    let path = Path::new(bare);
3161    if !is_safe_store_relative_path(path) {
3162        return None;
3163    }
3164    Some(PathBuf::from(format!("{bare}.md")))
3165}
3166
3167/// How a wiki-link / index-entry target resolves on disk.
3168enum TargetResolution {
3169    /// The target exists (either as the literal path or with a `.md` suffix).
3170    Exists,
3171    /// The target is a safe store-relative path but no file exists for it.
3172    Missing,
3173    /// The target escapes the store (absolute, `..`, prefix) — never probe it.
3174    Unsafe,
3175}
3176
3177/// Resolve a bare wiki-link / index-entry target the way the graph engine does
3178/// ([`crate::graph`]'s `resolve_existing`): try the path **as written** first
3179/// (so a link to a raw non-`.md` source file kept verbatim under `sources/` —
3180/// `[[sources/emails/x.eml]]`, `[[sources/contracts/y.pdf]]` — resolves to the
3181/// real file), then the `.md`-appended path (the common case for content
3182/// pages). Without trying the literal path first, a legal link to a raw source
3183/// file is wrongly flagged `WIKI_LINK_BROKEN` even though `graph backlinks`
3184/// resolves it.
3185fn resolve_wiki_target(store: &Store, bare: &str) -> TargetResolution {
3186    // The literal path and the `.md`-appended path share the same safety check
3187    // (`safe_md_target_rel` only differs by appending `.md`), so an unsafe bare
3188    // target is unsafe in both forms.
3189    if !is_safe_store_relative_path(Path::new(bare)) {
3190        return TargetResolution::Unsafe;
3191    }
3192    match resolved_target_abs(store, bare) {
3193        Some(_) => TargetResolution::Exists,
3194        None => TargetResolution::Missing,
3195    }
3196}
3197
3198/// The absolute on-disk path a bare wiki-link / index-entry target resolves to,
3199/// trying the literal path first, then `.md`-appended — mirroring the graph
3200/// engine. `None` when neither exists, or when the bare target escapes the store
3201/// (callers that need to distinguish unsafe from merely-missing use
3202/// [`resolve_wiki_target`]).
3203///
3204/// **Existence is EXACT-CASE, deliberately platform-independent.** A db.md store
3205/// is Git-synced across machines, so a `validate --all` that passes on the
3206/// author's box must guarantee link integrity on the box that serves the store.
3207/// Bare `Path::is_file()` honors the *host* filesystem's case sensitivity: on
3208/// case-insensitive APFS/macOS (or NTFS) a wrong-case link `[[records/x/BOB]]`
3209/// resolves to the on-disk `records/x/bob.md` and passes — but on case-sensitive
3210/// Linux that file genuinely does not exist (`WIKI_LINK_BROKEN`, per SPEC.md
3211/// § Validation: "target file doesn't exist"). To stay platform-independent we
3212/// confirm not just that *a* file exists for the candidate but that its real
3213/// on-disk casing matches the requested store-relative path character-for-
3214/// character (via [`disk_case_matches`]); a case mismatch is treated as NOT
3215/// found, so macOS reports the same broken links Linux would.
3216///
3217/// NOTE on the residual validate-vs-graph divergence on macOS: the graph engine
3218/// ([`crate::graph`]) intentionally mirrors host `is_file()` + ASCII-lowercased
3219/// keys for its internal backlink/rename bookkeeping on a *single* host, so on
3220/// case-insensitive macOS `graph backlinks` will still resolve a wrong-case link
3221/// that `validate` now flags. That divergence is by design: the graph's job is
3222/// single-host consistency; `validate`'s job is cross-platform link integrity.
3223fn resolved_target_abs(store: &Store, bare: &str) -> Option<PathBuf> {
3224    if !is_safe_store_relative_path(Path::new(bare)) {
3225        return None;
3226    }
3227    // The literal path, as written (e.g. an `.eml`/`.pdf` source file kept
3228    // verbatim under `sources/`).
3229    let literal = store.root.join(bare);
3230    if literal.is_file() && disk_case_matches(store, &literal, bare) {
3231        return Some(literal);
3232    }
3233    // The `.md`-appended path (a content page referenced without its extension).
3234    let with_md_rel = format!("{bare}.md");
3235    let with_md = store.root.join(&with_md_rel);
3236    if with_md.is_file() && disk_case_matches(store, &with_md, &with_md_rel) {
3237        return Some(with_md);
3238    }
3239    None
3240}
3241
3242/// True if `abs` (already confirmed to be an existing file under `store.root`)
3243/// has the exact on-disk casing of the requested store-relative path `requested`.
3244///
3245/// Makes wiki-link existence resolution platform-independent: on case-insensitive
3246/// filesystems (APFS/macOS, NTFS) `Path::is_file()` says yes to a wrong-case
3247/// path, so we canonicalize the candidate — which returns the *real* on-disk
3248/// casing — and compare its store-relative portion to `requested`
3249/// case-sensitively. A mismatch means the file the link actually names does not
3250/// exist on a case-sensitive host, so the caller treats it as not found.
3251///
3252/// Conservative on `canonicalize` failure: if we cannot read the real path (a
3253/// transient FS error, a symlink we cannot resolve, a root that is itself a
3254/// symlink we cannot strip), we fall back to accepting the `is_file()` result
3255/// rather than producing a spurious `WIKI_LINK_BROKEN`. This keeps the check
3256/// additive — it only ever *adds* the case-mismatch detection; it never makes a
3257/// genuinely-resolvable correct-case link fail.
3258fn disk_case_matches(store: &Store, abs: &Path, requested: &str) -> bool {
3259    let Ok(canon_abs) = abs.canonicalize() else {
3260        return true; // cannot read real casing — don't invent a broken link
3261    };
3262    // Strip the store root (also canonicalized so a symlinked root still cancels)
3263    // to get the real on-disk store-relative path, then compare to what the link
3264    // asked for. `canonicalize` on the root may itself fail (e.g. the root no
3265    // longer exists by the time we probe) — be conservative there too.
3266    let Ok(canon_root) = store.root.canonicalize() else {
3267        return true;
3268    };
3269    let Ok(disk_rel) = canon_abs.strip_prefix(&canon_root) else {
3270        // The real file lives outside the (canonical) root — e.g. reached via a
3271        // symlink in the store. Containment is already enforced by
3272        // `is_safe_store_relative_path`; here we simply cannot make a
3273        // case-comparison, so don't manufacture a broken link.
3274        return true;
3275    };
3276    // Compare store-relative paths component-by-component, case-sensitively,
3277    // independent of the host's path separator and case folding.
3278    disk_rel == Path::new(requested)
3279}
3280
3281/// True if a bare target path is under `prefix` (both `.md`-stripped).
3282fn path_under_prefix(bare: &str, prefix: &str) -> bool {
3283    let prefix = prefix.trim_end_matches('/');
3284    bare == prefix || bare.starts_with(&format!("{prefix}/"))
3285}
3286
3287/// The type-folder for a store-relative content path: `<layer>/<type-folder>`
3288/// (the folder directly under the layer; date-shards roll up to it). `None` for
3289/// files directly in a layer folder or outside the two layers.
3290fn type_folder_of(rel: &Path) -> Option<PathBuf> {
3291    let comps: Vec<&str> = rel.iter().filter_map(|s| s.to_str()).collect();
3292    if comps.len() < 3 {
3293        return None; // need layer/type-folder/file at minimum
3294    }
3295    if !matches!(comps[0], "sources" | "records") {
3296        return None;
3297    }
3298    Some(PathBuf::from(comps[0]).join(comps[1]))
3299}
3300
3301/// The layer dir a *loose* content file sits directly in (`records`/`sources`):
3302/// exactly two path components, the first a known layer. `None` for a file
3303/// inside a type-folder or outside any layer. Counterpart to the index crate's
3304/// `loose_layer_of`, kept local so `validate` needs no index internals.
3305fn loose_layer_dir(rel: &Path) -> Option<PathBuf> {
3306    let comps: Vec<&str> = rel.iter().filter_map(|s| s.to_str()).collect();
3307    if comps.len() != 2 || !matches!(comps[0], "sources" | "records") {
3308        return None;
3309    }
3310    Some(PathBuf::from(comps[0]))
3311}
3312
3313/// **SWEEP.** Walk every `.md` content file under `sources/`/`records/`,
3314/// returning store-relative paths to be parsed in full. Skips hidden dirs and
3315/// the index twin (`index.jsonl`). Used only by `validate_all`; the working-set
3316/// incoming-linker scan rides the embedded-ripgrep `Store::find_links_to_any`
3317/// (a single presence-only pass), so the loop default never walks-and-*parses*
3318/// the whole content tree.
3319///
3320/// **`log/` is NOT pruned here.** Only the *root-level* `log/` rotation archive
3321/// is reserved (`Store::is_in_log_dir` checks only the first path component);
3322/// the walk roots are the two layers, so the root archive is already out of
3323/// scope. A `log`-named folder *inside* a layer (e.g. `records/log/` — a
3324/// decision log) is real content (see `is_content_file`), so pruning every
3325/// `name == "log"` made `--all` silently skip those files — reporting fewer
3326/// errors than the default working-set scope on the same store.
3327fn walk_content_files(root: &Path) -> Vec<PathBuf> {
3328    let mut out = Vec::new();
3329    for layer in ["sources", "records"] {
3330        let base = root.join(layer);
3331        if !base.is_dir() {
3332            continue;
3333        }
3334        for entry in walkdir::WalkDir::new(&base)
3335            // Follow symlinks, matching the loop-default `md_walker`
3336            // (store.rs `follow_links(true)`): a content file that is a symlink
3337            // into the store, or that lives in a symlinked-in type-folder, is
3338            // checked by `dbmd validate` (the loop default rides `Store::walk` /
3339            // `walk_all_md`, both following symlinks). Without this the `--all`
3340            // sweep silently SKIPPED such files, so the authoritative superset
3341            // reported FEWER issues than the loop scope on the same store —
3342            // inverting the `--all`-is-the-superset contract. walkdir's loop
3343            // detection drops a symlink cycle (yields an Err that `.flatten()`
3344            // discards), so this cannot hang.
3345            .follow_links(true)
3346            .into_iter()
3347            .filter_entry(|e| {
3348                let name = e.file_name().to_str().unwrap_or("");
3349                !name.starts_with('.')
3350            })
3351            .flatten()
3352        {
3353            if !entry.file_type().is_file() {
3354                continue;
3355            }
3356            let name = entry.file_name().to_str().unwrap_or("");
3357            if name.ends_with(".md") && name != "index.md" {
3358                if let Ok(rel) = entry.path().strip_prefix(root) {
3359                    out.push(rel.to_path_buf());
3360                }
3361            }
3362        }
3363    }
3364    out.sort();
3365    out
3366}
3367
3368/// Every `index.md` under the store (root + layers + type-folders), as
3369/// store-relative paths. Used to detect orphan indexes. Like
3370/// [`walk_content_files`], a `log`-named folder *inside* a layer is real content
3371/// and its `index.md` is not pruned (only the root-level `log/` archive is
3372/// reserved, and the walk roots are the two layers, so it is already
3373/// out of scope).
3374fn walk_index_files(root: &Path) -> Vec<PathBuf> {
3375    let mut out = Vec::new();
3376    if root.join("index.md").is_file() {
3377        out.push(PathBuf::from("index.md"));
3378    }
3379    for layer in ["sources", "records"] {
3380        let base = root.join(layer);
3381        if !base.is_dir() {
3382            continue;
3383        }
3384        for entry in walkdir::WalkDir::new(&base)
3385            // Follow symlinks, matching the loop-default `md_walker`
3386            // (store.rs `follow_links(true)`): a content file that is a symlink
3387            // into the store, or that lives in a symlinked-in type-folder, is
3388            // checked by `dbmd validate` (the loop default rides `Store::walk` /
3389            // `walk_all_md`, both following symlinks). Without this the `--all`
3390            // sweep silently SKIPPED such files, so the authoritative superset
3391            // reported FEWER issues than the loop scope on the same store —
3392            // inverting the `--all`-is-the-superset contract. walkdir's loop
3393            // detection drops a symlink cycle (yields an Err that `.flatten()`
3394            // discards), so this cannot hang.
3395            .follow_links(true)
3396            .into_iter()
3397            .filter_entry(|e| {
3398                let name = e.file_name().to_str().unwrap_or("");
3399                !name.starts_with('.')
3400            })
3401            .flatten()
3402        {
3403            if entry.file_type().is_file() && entry.file_name().to_str() == Some("index.md") {
3404                if let Ok(rel) = entry.path().strip_prefix(root) {
3405                    out.push(rel.to_path_buf());
3406                }
3407            }
3408        }
3409    }
3410    out.sort();
3411    out
3412}
3413
3414/// A parsed `index.md` entry line: the wiki-link target, the optional summary
3415/// text after the `—`, and the 1-based line number.
3416struct IndexEntry {
3417    target: String,
3418    summary_text: Option<String>,
3419    line: u32,
3420}
3421
3422/// Parse the `- [[<path>]] — <summary>` entry lines of an `index.md`. Stops at a
3423/// `## More` footer (those lines aren't file entries). Root/layer entries with a
3424/// `|display` segment and a `(N)` count are parsed too — the target is the bare
3425/// path, the summary text is whatever follows the em dash.
3426fn parse_index_entries(text: &str) -> Vec<IndexEntry> {
3427    let mut out = Vec::new();
3428    let mut in_more = false;
3429    for (idx, line) in text.lines().enumerate() {
3430        let trimmed = line.trim_start();
3431        if trimmed.starts_with("## More") {
3432            in_more = true;
3433            continue;
3434        }
3435        if in_more {
3436            continue;
3437        }
3438        if !trimmed.starts_with("- ") {
3439            continue;
3440        }
3441        // Find the first `[[...]]`.
3442        let Some(open) = trimmed.find("[[") else {
3443            continue;
3444        };
3445        let Some(close_rel) = trimmed[open + 2..].find("]]") else {
3446            continue;
3447        };
3448        let inner = &trimmed[open + 2..open + 2 + close_rel];
3449        let target = inner.split('|').next().unwrap_or(inner).trim().to_string();
3450
3451        // Summary text: whatever follows the first em dash (`—`) or ` - `.
3452        let after = &trimmed[open + 2 + close_rel + 2..];
3453        let summary_text = extract_index_entry_summary(after);
3454
3455        out.push(IndexEntry {
3456            target,
3457            summary_text,
3458            line: (idx + 1) as u32,
3459        });
3460    }
3461    out
3462}
3463
3464/// Pull the summary portion out of the text trailing an index entry's
3465/// wiki-link: drop a leading `(N files)` count, then the `—`/`-` separator, then
3466/// strip a trailing `  ·  #tag` suffix **only when it is a genuine tag block**
3467/// (so a literal `·` inside the summary text is preserved, not mistaken for the
3468/// renderer's tag separator).
3469fn extract_index_entry_summary(after: &str) -> Option<String> {
3470    let mut s = after.trim();
3471    // Drop a leading "(N ...)" count segment, if present.
3472    if s.starts_with('(') {
3473        if let Some(close) = s.find(')') {
3474            s = s[close + 1..].trim_start();
3475        }
3476    }
3477    // Require an em dash or hyphen separator before the summary.
3478    let s = if let Some(rest) = s.strip_prefix('—') {
3479        rest.trim()
3480    } else if let Some(rest) = s.strip_prefix('-') {
3481        rest.trim()
3482    } else {
3483        return None;
3484    };
3485    if s.is_empty() {
3486        return None;
3487    }
3488    // Strip a trailing tag block — but ONLY when it matches the EXACT delimiter
3489    // the renderer emits: `  ·  #tag #tag` (a *double*-spaced middot, per
3490    // `crate::index::format_md_entry`'s `format!("  ·  {tags}")`), dropped when
3491    // the file has no tags. The previous code also accepted a *single*-spaced
3492    // ` · ` separator, which collided with a legal summary whose own text ends
3493    // in a single-spaced middot-plus-hashtag tail — e.g. a tagless file with
3494    // `summary: "Standup notes · #standup"`. The renderer round-trips that
3495    // summary verbatim (no tag block, since there are no tags), but the loose
3496    // strip mistook the ` · #standup` for the renderer's tag suffix, compared
3497    // `"Standup notes"` against the file's full summary, and emitted a spurious
3498    // `INDEX_SUMMARY_MISMATCH` that `dbmd index rebuild` could never fix
3499    // (rebuild regenerates the identical line). Matching the renderer's exact
3500    // double-spaced delimiter makes the comparison round-trip. `rsplit_once`
3501    // matches from the right so only the real trailing tag block is considered.
3502    let s = match s.rsplit_once("  ·  ") {
3503        Some((summary, tags)) if is_tag_suffix(tags) => summary.trim(),
3504        _ => s,
3505    };
3506    Some(s.to_string())
3507}
3508
3509/// True if `s` is a non-empty tag block: one or more whitespace-separated tokens
3510/// each starting with `#`, the exact shape the index renderer appends after the
3511/// `·` separator (`crate::index::format_md_entry`). Used to distinguish the
3512/// renderer's `  ·  #tag` suffix from a literal `·` inside the summary text.
3513fn is_tag_suffix(s: &str) -> bool {
3514    let mut any = false;
3515    for tok in s.split_whitespace() {
3516        if !tok.starts_with('#') || tok.len() < 2 {
3517            return false;
3518        }
3519        any = true;
3520    }
3521    any
3522}
3523
3524/// Parse a `log.md` entry header `## [YYYY-MM-DD HH:MM] <kind> | <object>`.
3525/// Returns `(timestamp, kind, object)`; `None` if the timestamp is unparseable
3526/// or the header isn't well-formed.
3527fn parse_log_header(line: &str) -> Option<(DateTime<FixedOffset>, String, Option<String>)> {
3528    let rest = line.strip_prefix("## [")?;
3529    let close = rest.find(']')?;
3530    let ts_str = &rest[..close];
3531    let tail = rest[close + 1..].trim();
3532
3533    // Parse `YYYY-MM-DD HH:MM` (the SPEC header form) as a naive local time and
3534    // attach a zero offset — the log header carries minute precision, no zone.
3535    let naive = NaiveDateTime::parse_from_str(ts_str.trim(), "%Y-%m-%d %H:%M").ok()?;
3536    let offset = FixedOffset::east_opt(0)?;
3537    let ts = naive.and_local_timezone(offset).single()?;
3538
3539    // kind | object
3540    let (kind, object) = match tail.split_once('|') {
3541        Some((k, o)) => {
3542            let o = o.trim();
3543            (
3544                k.trim().to_string(),
3545                if o.is_empty() {
3546                    None
3547                } else {
3548                    Some(o.to_string())
3549                },
3550            )
3551        }
3552        None => (tail.to_string(), None),
3553    };
3554    if kind.is_empty() {
3555        return None;
3556    }
3557    Some((ts, kind, object))
3558}
3559
3560/// Every log file that holds entries for the working-set scan: the active
3561/// `log.md` plus every `log/<YYYY-MM>.md` archive. [`Log::append`] rotates
3562/// strictly-prior-month entries into the archives, so the active file alone is
3563/// NOT the full timeline — both the last `validate` cutoff and a changed-but-
3564/// unvalidated object can live in an archive after a month rollover. Reading the
3565/// archives here keeps the working-set readers in sync with the rest of the log
3566/// layer (`Log::since`/`Log::tail`), which deliberately cross archives, and
3567/// prevents `dbmd validate` from silently skipping archived changed files. Reads
3568/// only log headers, never the content store, so the loop budget is preserved.
3569fn log_files_for_working_set(store: &Store) -> Vec<PathBuf> {
3570    let mut files = vec![store.root.join("log.md")];
3571    let archive_dir = store.root.join("log");
3572    if let Ok(entries) = std::fs::read_dir(&archive_dir) {
3573        let mut archives: Vec<PathBuf> = entries
3574            .flatten()
3575            .map(|e| e.path())
3576            .filter(|p| {
3577                p.is_file()
3578                    && p.file_name()
3579                        .and_then(|s| s.to_str())
3580                        .and_then(|n| n.strip_suffix(".md"))
3581                        .is_some_and(is_year_month_archive)
3582            })
3583            .collect();
3584        // Deterministic order (oldest month first); the callers fold across all
3585        // files so order doesn't affect the result, but a stable order keeps the
3586        // scan reproducible.
3587        archives.sort();
3588        files.extend(archives);
3589    }
3590    files
3591}
3592
3593/// True if `s` looks like a `YYYY-MM` archive stem (4 digits, `-`, 2 digits) —
3594/// the `log/<YYYY-MM>.md` naming the rotation in [`crate::log`] emits.
3595fn is_year_month_archive(s: &str) -> bool {
3596    let b = s.as_bytes();
3597    b.len() == 7
3598        && b[..4].iter().all(u8::is_ascii_digit)
3599        && b[4] == b'-'
3600        && b[5..7].iter().all(u8::is_ascii_digit)
3601}
3602
3603/// The timestamp of the most recent `validate` entry across the active `log.md`
3604/// **and** the `log/<YYYY-MM>.md` archives — the default working-set cutoff.
3605/// Reads only headers; never the whole store. Archive-aware so a `validate`
3606/// entry that rotated into an archive after a month rollover still anchors the
3607/// cutoff (without this, the cutoff silently resets to `None`).
3608fn last_validate_at(store: &Store) -> Option<DateTime<FixedOffset>> {
3609    let mut latest: Option<DateTime<FixedOffset>> = None;
3610    for file in log_files_for_working_set(store) {
3611        let Ok(text) = std::fs::read_to_string(&file) else {
3612            continue;
3613        };
3614        for line in text.lines() {
3615            if !line.starts_with("## [") {
3616                continue;
3617            }
3618            if let Some((ts, kind, _)) = parse_log_header(line) {
3619                if kind == "validate" {
3620                    latest = Some(match latest {
3621                        Some(p) if p >= ts => p,
3622                        _ => ts,
3623                    });
3624                }
3625            }
3626        }
3627    }
3628    latest
3629}
3630
3631/// The set of content objects changed since `cutoff`, read from log entries
3632/// whose kind mutates a file. When `cutoff` is `None`, every mutating entry
3633/// counts (no prior validate window). Returns store-relative `.md` paths.
3634///
3635/// Scans the active `log.md` **and** every `log/<YYYY-MM>.md` archive: after a
3636/// month rollover [`Log::append`] rotates prior-month entries out of the active
3637/// file, so an object changed-but-never-validated in a prior month lives only in
3638/// an archive. Reading the archives here is what keeps `dbmd validate` from
3639/// silently skipping those files. Reads only log headers, never the content
3640/// store.
3641fn changed_objects_since(
3642    store: &Store,
3643    cutoff: Option<DateTime<FixedOffset>>,
3644) -> BTreeSet<PathBuf> {
3645    let mut out = BTreeSet::new();
3646    for file in log_files_for_working_set(store) {
3647        let Ok(text) = std::fs::read_to_string(&file) else {
3648            continue;
3649        };
3650        for line in text.lines() {
3651            if !line.starts_with("## [") {
3652                continue;
3653            }
3654            let Some((ts, kind, object)) = parse_log_header(line) else {
3655                continue;
3656            };
3657            if let Some(c) = cutoff {
3658                if ts < c {
3659                    continue;
3660                }
3661            }
3662            if !matches!(
3663                kind.as_str(),
3664                "create" | "update" | "ingest" | "rename" | "delete" | "link"
3665            ) {
3666                continue;
3667            }
3668            if let Some(obj) = object {
3669                // The object slot is a store-relative path (or a wiki-link target).
3670                let bare = obj
3671                    .trim()
3672                    .trim_start_matches("[[")
3673                    .trim_end_matches("]]")
3674                    .split('|')
3675                    .next()
3676                    .unwrap_or("")
3677                    .trim()
3678                    .trim_end_matches(".md")
3679                    .to_string();
3680                if bare.is_empty() {
3681                    continue;
3682                }
3683                // Containment: the object slot is a log-header field that can
3684                // carry a `..`/absolute/prefix path (a hand-edited or
3685                // merge-malformed log line). Route it through the same safety gate
3686                // every other disk-touching validator path uses
3687                // (`safe_md_target_rel`, which `link_target_type` already applies)
3688                // so a `records/../../leaky` object cannot make
3689                // `validate_working_set` read + frontmatter-report on a file
3690                // OUTSIDE the store root. An unsafe object is dropped from the
3691                // changed set rather than probed.
3692                if let Some(rel) = safe_md_target_rel(&bare) {
3693                    out.insert(rel);
3694                }
3695            }
3696        }
3697    }
3698    out
3699}
3700
3701/// The result of the [`derived_from_ignored_type`] policy check: the
3702/// `derived_from` target that resolves to an ignored-type record, plus that
3703/// record's type. Carries exactly what both the validate finding and the
3704/// write-time warning need to render their message.
3705#[derive(Debug, Clone, PartialEq, Eq)]
3706pub struct DerivedFromIgnored {
3707    /// The `derived_from` wiki-link target as written (bare store-relative path,
3708    /// no `.md`).
3709    pub target: String,
3710    /// The resolved `type` of that target, which is present in
3711    /// `store.config.ignored_types`.
3712    pub target_type: String,
3713}
3714
3715/// **The single authoritative `### Ignored types` derivation check.** Decides
3716/// whether a conclusion record derives from an ignored-type record: the
3717/// `meta-type` must be `conclusion`, `### Ignored types` must be non-empty, and
3718/// some `derived_from` target must resolve to a record whose `type` is in
3719/// `ignored_types`. Returns the first such target (and its type), or `None`.
3720///
3721/// Both surfaces call this so the policy lives in exactly one place:
3722/// [`check_content_file`] (read side — `dbmd validate`) feeds it the
3723/// `derived_from` targets it scanned from the raw frontmatter, and the write
3724/// surface (`dbmd write`) feeds it the targets from the composed frontmatter.
3725/// The link *extraction* differs per surface (text-scan with line numbers vs.
3726/// the parsed `Frontmatter`); the *decision* — type gate, target-type
3727/// resolution, and `ignored_types` membership — does not.
3728pub fn derived_from_ignored_type<I, S>(
3729    store: &Store,
3730    meta_type: &str,
3731    derived_from_targets: I,
3732) -> Option<DerivedFromIgnored>
3733where
3734    I: IntoIterator<Item = S>,
3735    S: AsRef<str>,
3736{
3737    if meta_type != "conclusion" || store.config.ignored_types.is_empty() {
3738        return None;
3739    }
3740    for target in derived_from_targets {
3741        let target = target.as_ref();
3742        if let Some(target_type) = link_target_type(store, target) {
3743            if store.config.ignored_types.contains(&target_type) {
3744                return Some(DerivedFromIgnored {
3745                    target: target.to_string(),
3746                    target_type,
3747                });
3748            }
3749        }
3750    }
3751    None
3752}
3753
3754/// Resolve the `type` of a wiki-link target file (bare, no `.md`), or `None`.
3755fn link_target_type(store: &Store, target: &str) -> Option<String> {
3756    let bare = target.trim_end_matches(".md");
3757    let abs = store.root.join(safe_md_target_rel(bare)?);
3758    let text = std::fs::read_to_string(&abs).ok()?;
3759    let (yaml, _, _) = split_frontmatter(&text)?;
3760    let value: Value = serde_norway::from_str(&yaml).ok()?;
3761    if let Value::Mapping(m) = value {
3762        m.get(Value::String("type".into())).and_then(scalar_string)
3763    } else {
3764        None
3765    }
3766}
3767
3768// ── Shape validators ─────────────────────────────────────────────────────────
3769
3770/// True if a string is RFC3339 / ISO-8601 with a time + zone (the
3771/// `created`/`updated` contract: `2026-05-27T08:00:00-07:00`).
3772fn is_iso8601(s: &str) -> bool {
3773    DateTime::parse_from_rfc3339(s.trim()).is_ok()
3774}
3775
3776/// True if a string is an ISO-8601 *date* (`2026-05-27`) or a full RFC3339
3777/// datetime. Type-specific date fields (`expense.date`, `contact.last_touch`)
3778/// accept the date-only form per the SPEC's worked example.
3779fn is_iso8601_date_or_datetime(s: &str) -> bool {
3780    let s = s.trim();
3781    if DateTime::parse_from_rfc3339(s).is_ok() {
3782        return true;
3783    }
3784    chrono::NaiveDate::parse_from_str(s, "%Y-%m-%d").is_ok()
3785}
3786
3787/// True for `<local>@<domain>` with a non-empty local part and a dotted domain.
3788/// There must be exactly one `@`: a domain that still contains an `@` after the
3789/// split (the common double-`@` typo `sarah@@acme.com`, or `a@b@c.com`) is
3790/// rejected — without this the domain `@acme.com` passed every other check.
3791fn is_email(s: &str) -> bool {
3792    let s = s.trim();
3793    let Some((local, domain)) = s.split_once('@') else {
3794        return false;
3795    };
3796    !local.is_empty()
3797        && !domain.contains('@')
3798        && domain.contains('.')
3799        && !domain.starts_with('.')
3800        && !domain.ends_with('.')
3801        && !domain.contains(' ')
3802        && !local.contains(' ')
3803}
3804
3805/// True for a currency amount: an optional symbol or 3-letter ISO code, then a
3806/// plain decimal number with optional thousands separators and ≤ 2 decimals.
3807///
3808/// The numeric part is validated by hand (not `f64::parse`) so the non-numeric
3809/// floats `f64` accepts — `inf`, `-inf`, `NaN`, and `1e3`-style exponents — are
3810/// rejected, and the ≤ 2-decimal rule is actually enforced.
3811fn is_currency(s: &str) -> bool {
3812    let mut t = s.trim();
3813    // Strip a leading currency symbol …
3814    for sym in ["$", "€", "£", "¥"] {
3815        if let Some(rest) = t.strip_prefix(sym) {
3816            t = rest.trim_start();
3817            break;
3818        }
3819    }
3820    // … or a leading 3-letter ISO-4217-ish code (`USD 100`, `EUR 9.50`). The
3821    // code must be exactly three ASCII letters and separated from the number by
3822    // whitespace, so a bare `USD` with no amount still fails.
3823    if let Some((head, rest)) = t.split_once(char::is_whitespace) {
3824        if head.len() == 3 && head.chars().all(|c| c.is_ascii_alphabetic()) {
3825            t = rest.trim_start();
3826        }
3827    }
3828
3829    let cleaned: String = t.chars().filter(|c| *c != ',').collect();
3830    is_plain_amount(cleaned.trim())
3831}
3832
3833/// True for a bare decimal amount: optional sign, ≥ 1 digit, an optional
3834/// fractional part of 1–2 digits. No exponents, no `inf`/`NaN`, no empty string.
3835fn is_plain_amount(s: &str) -> bool {
3836    let digits = s.strip_prefix(['+', '-']).unwrap_or(s);
3837    let (int_part, frac_part) = match digits.split_once('.') {
3838        Some((i, f)) => (i, Some(f)),
3839        None => (digits, None),
3840    };
3841    if int_part.is_empty() || !int_part.bytes().all(|b| b.is_ascii_digit()) {
3842        return false;
3843    }
3844    match frac_part {
3845        None => true,
3846        Some(f) => (1..=2).contains(&f.len()) && f.bytes().all(|b| b.is_ascii_digit()),
3847    }
3848}
3849
3850/// True for an http(s) URL: a recognized scheme prefix with at least one
3851/// character after it. The length guard uses the *matched* scheme's own length,
3852/// so a single-character host on the shorter `http://` scheme (`http://x`, 8
3853/// bytes — e.g. an intranet/container hostname) is accepted; a bare scheme with
3854/// nothing after it (`http://`, `https://`) is rejected.
3855fn is_url(s: &str) -> bool {
3856    let s = s.trim();
3857    for scheme in ["http://", "https://"] {
3858        if let Some(rest) = s.strip_prefix(scheme) {
3859            return !rest.is_empty();
3860        }
3861    }
3862    false
3863}
3864
3865/// A short, deterministic suggestion for a `SCHEMA_SHAPE_MISMATCH`.
3866fn shape_suggestion(shape: Shape) -> String {
3867    match shape {
3868        Shape::String => "use a scalar string".into(),
3869        Shape::Int => "use an integer".into(),
3870        Shape::Bool => "use `true` or `false`".into(),
3871        Shape::Date => "use an ISO-8601 date, e.g. 2026-05-27".into(),
3872        Shape::Email => "use a `<local>@<domain>` address".into(),
3873        Shape::Currency => "use a numeric amount, e.g. 1234.56".into(),
3874        Shape::Url => "use an http(s) URL".into(),
3875    }
3876}
3877
3878/// Suggest a full-path rewrite for a short-form wiki-link. Without the layer we
3879/// can't know the folder, so the suggestion is generic but actionable.
3880fn short_form_suggestion(bare: &str) -> Option<String> {
3881    Some(format!(
3882        "use a full store-relative path, e.g. [[records/contacts/{}]]",
3883        slugish(bare)
3884    ))
3885}
3886
3887/// A filesystem-ish leaf for a plain string (lowercase, spaces → hyphens).
3888fn slugish(s: &str) -> String {
3889    s.trim()
3890        .to_lowercase()
3891        .chars()
3892        .map(|c| if c.is_whitespace() { '-' } else { c })
3893        .filter(|c| c.is_alphanumeric() || *c == '-' || *c == '/' || *c == '_')
3894        .collect()
3895}
3896
3897/// Cross-file asset-manifest integrity (the `--all` sweep). Text-only: it never
3898/// hashes a byte or reads an asset file's contents — byte presence and hash
3899/// correctness are `dbmd assets verify`, not `validate`, so a fresh clone with
3900/// no restored bytes still passes. Cross-checks `assets.jsonl` against every
3901/// content file's `asset`/`assets` declarations.
3902fn check_assets(store: &Store, parsed: &[(PathBuf, Parsed)], issues: &mut Vec<Issue>) {
3903    use crate::assets;
3904
3905    let manifest_rel = Path::new(assets::MANIFEST_FILE);
3906    let manifest_abs = store.root.join(assets::MANIFEST_FILE);
3907
3908    // Lenient manifest read: a malformed line is reported, not fatal.
3909    let mut manifest: BTreeMap<String, assets::AssetRecord> = BTreeMap::new();
3910    if let Ok(text) = std::fs::read_to_string(&manifest_abs) {
3911        for (i, line) in text.lines().enumerate() {
3912            if line.trim().is_empty() {
3913                continue;
3914            }
3915            match serde_json::from_str::<assets::AssetRecord>(line) {
3916                Ok(rec) => {
3917                    manifest.insert(rec.path.clone(), rec);
3918                }
3919                Err(e) => push(
3920                    issues,
3921                    Severity::Error,
3922                    codes::ASSET_MANIFEST_MALFORMED,
3923                    manifest_rel,
3924                    Some((i as u32) + 1),
3925                    None,
3926                    format!("invalid {} record: {e}", assets::MANIFEST_FILE),
3927                    Some("run `dbmd assets scan` to rebuild the manifest".to_string()),
3928                    vec![],
3929                ),
3930            }
3931        }
3932    }
3933
3934    // Per-wrapper declarations: every declared asset must be in the manifest and
3935    // must not point at a markdown content file.
3936    let mut declared: std::collections::BTreeSet<String> = std::collections::BTreeSet::new();
3937    for (rel, p) in parsed {
3938        let Some(map) = &p.fm else {
3939            continue;
3940        };
3941        for decl in assets::declarations_from_yaml_map(map) {
3942            let norm = match assets::normalize_asset_path(&decl.path) {
3943                Ok(n) => n,
3944                Err(_) => continue, // a bad declared path is surfaced by `scan`, not here
3945            };
3946            declared.insert(norm.clone());
3947            let is_md = Path::new(&norm)
3948                .extension()
3949                .and_then(|e| e.to_str())
3950                .map(|e| e.eq_ignore_ascii_case("md"))
3951                .unwrap_or(false);
3952            if is_md {
3953                push(
3954                    issues,
3955                    Severity::Warning,
3956                    codes::ASSET_PATH_IS_CONTENT,
3957                    rel,
3958                    None,
3959                    Some("asset".to_string()),
3960                    format!("asset path `{norm}` points at a markdown content file"),
3961                    Some("assets are raw binaries; reference a non-markdown path".to_string()),
3962                    vec![PathBuf::from(&norm)],
3963                );
3964            }
3965            if !manifest.contains_key(&norm) {
3966                push(
3967                    issues,
3968                    Severity::Error,
3969                    codes::ASSET_UNDECLARED,
3970                    rel,
3971                    None,
3972                    Some("asset".to_string()),
3973                    format!(
3974                        "references asset `{norm}` with no record in {}",
3975                        assets::MANIFEST_FILE
3976                    ),
3977                    Some("run `dbmd assets scan` to catalog it".to_string()),
3978                    vec![PathBuf::from(&norm)],
3979                );
3980            }
3981        }
3982    }
3983
3984    // Per-record: wrapper existence + orphan detection.
3985    for (path, rec) in &manifest {
3986        for w in &rec.wrappers {
3987            if !store.root.join(w).is_file() {
3988                push(
3989                    issues,
3990                    Severity::Error,
3991                    codes::ASSET_WRAPPER_BROKEN,
3992                    Path::new(path),
3993                    None,
3994                    None,
3995                    format!("manifest record for `{path}` names a missing wrapper `{w}`"),
3996                    Some("run `dbmd assets scan` to reconcile the manifest".to_string()),
3997                    vec![PathBuf::from(w)],
3998                );
3999            }
4000        }
4001        if !declared.contains(path) {
4002            push(
4003                issues,
4004                Severity::Warning,
4005                codes::ASSET_MANIFEST_ORPHAN,
4006                Path::new(path),
4007                None,
4008                None,
4009                format!(
4010                    "`{path}` is in {} but no wrapper references it",
4011                    assets::MANIFEST_FILE
4012                ),
4013                Some("run `dbmd assets scan` to drop the orphan, or add a wrapper".to_string()),
4014                vec![],
4015            );
4016        }
4017    }
4018}
4019
4020/// Push a fully-formed [`Issue`].
4021#[allow(clippy::too_many_arguments)]
4022fn push(
4023    issues: &mut Vec<Issue>,
4024    severity: Severity,
4025    code: &'static str,
4026    file: &Path,
4027    line: Option<u32>,
4028    key: Option<String>,
4029    message: String,
4030    suggestion: Option<String>,
4031    related: Vec<PathBuf>,
4032) {
4033    issues.push(Issue {
4034        severity,
4035        code,
4036        file: file.to_path_buf(),
4037        line,
4038        key,
4039        message,
4040        suggestion,
4041        related,
4042    });
4043}
4044
4045/// 1-based line of a top-level frontmatter key inside the YAML block, offset to
4046/// the file (the YAML starts at file line 2). `None` if not found.
4047fn fm_key_line(fm_yaml: &str, key: &str) -> Option<u32> {
4048    for (i, line) in fm_yaml.lines().enumerate() {
4049        let trimmed = line.trim_start();
4050        // A top-level key line: `key:` with no leading list dash.
4051        if let Some(rest) = trimmed.strip_prefix(key) {
4052            if rest.starts_with(':') && line.starts_with(key) {
4053                // +2: file line 1 is the opening `---`, YAML line 0 → file line 2.
4054                return Some((i as u32) + 2);
4055            }
4056        }
4057    }
4058    None
4059}
4060
4061/// The line a *field-absence* issue (a required key that is missing entirely)
4062/// anchors to: the key's line when present, else line `1` — the frontmatter
4063/// block's opening `---`. A missing key has no line of its own; anchoring it to
4064/// the block top gives the agent (and the `EXPECTED` golden) a stable, non-null
4065/// line to point at instead of an unhelpful `null`.
4066fn fm_key_line_or_top(fm_yaml: &str, key: &str) -> Option<u32> {
4067    fm_key_line(fm_yaml, key).or(Some(1))
4068}
4069
4070/// A stable sort order for issues: by file, then line, then code. Keeps `--json`
4071/// output deterministic across runs.
4072fn issue_order(a: &Issue, b: &Issue) -> std::cmp::Ordering {
4073    a.file
4074        .cmp(&b.file)
4075        .then(a.line.cmp(&b.line))
4076        .then(a.code.cmp(b.code))
4077        .then(a.key.cmp(&b.key))
4078}
4079
4080// ═════════════════════════════════════════════════════════════════════════════
4081//  Tests
4082// ═════════════════════════════════════════════════════════════════════════════
4083
4084#[cfg(test)]
4085mod tests {
4086    use super::*;
4087    use crate::parser::{Config, FieldSpec};
4088    use std::fs;
4089    use tempfile::TempDir;
4090
4091    #[test]
4092    fn split_frontmatter_tolerates_leading_bom() {
4093        // Regression (finding #19 cross-module): a UTF-8 BOM before the opening
4094        // fence must not make validate treat the file as frontmatter-less while
4095        // the catalog indexes it. Pre-fix `first.trim_end() != "---"` was true
4096        // for `\u{feff}---` and the function returned None.
4097        let text = "\u{feff}---\ntype: contact\nsummary: hi\n---\nbody\n";
4098        let parsed = split_frontmatter(text);
4099        assert!(
4100            parsed.is_some(),
4101            "a leading BOM must not hide frontmatter from validate"
4102        );
4103        let (yaml, body, close_line) = parsed.unwrap();
4104        assert_eq!(yaml, "type: contact\nsummary: hi\n");
4105        assert_eq!(body, "body");
4106        assert_eq!(close_line, 4, "BOM is inline on line 1, not a new line");
4107    }
4108
4109    /// A test store builder over a real tempdir. Every helper writes real files
4110    /// so the assertions exercise real behavior, not mocks.
4111    struct Fixture {
4112        dir: TempDir,
4113        config: Config,
4114    }
4115
4116    impl Fixture {
4117        /// A fresh store with a **valid** `DB.md` (the identity contract:
4118        /// `type: db-md` + `scope` + `owner`) and the two layer dirs. A valid
4119        /// DB.md keeps `check_db_md` silent so a "clean store" fixture is truly
4120        /// clean; tests that want a broken DB.md write their own via `write`.
4121        fn new() -> Self {
4122            let dir = TempDir::new().unwrap();
4123            fs::write(
4124                dir.path().join("DB.md"),
4125                "---\ntype: db-md\nscope: company\nowner: Test\n---\n",
4126            )
4127            .unwrap();
4128            for layer in ["sources", "records"] {
4129                fs::create_dir_all(dir.path().join(layer)).unwrap();
4130            }
4131            Fixture {
4132                dir,
4133                config: Config::default(),
4134            }
4135        }
4136
4137        /// A store with no `DB.md` marker.
4138        fn bare() -> Self {
4139            let dir = TempDir::new().unwrap();
4140            Fixture {
4141                dir,
4142                config: Config::default(),
4143            }
4144        }
4145
4146        /// Write a file at a store-relative path, creating parent dirs.
4147        fn write(&self, rel: &str, contents: &str) {
4148            let abs = self.dir.path().join(rel);
4149            fs::create_dir_all(abs.parent().unwrap()).unwrap();
4150            fs::write(abs, contents).unwrap();
4151        }
4152
4153        fn store(&self) -> Store {
4154            Store {
4155                root: self.dir.path().to_path_buf(),
4156                config: self.config.clone(),
4157            }
4158        }
4159
4160        fn store_all(&self) -> Vec<Issue> {
4161            validate_all(&self.store()).unwrap()
4162        }
4163
4164        /// Write the canonical `index.md` + `index.jsonl` at every level via the
4165        /// real builder ([`crate::index::Index::rebuild_all`]) — the same
4166        /// projection a `dbmd index rebuild` produces. Use this (rather than a
4167        /// hand-typed sidecar line) whenever a test asserts a *clean* store, so
4168        /// the sidecar carries the COMPLETE per-field projection and the fixture
4169        /// can't silently drift from what the index writer emits.
4170        fn rebuild_indexes(&self) {
4171            crate::index::Index::rebuild_all(&self.store()).unwrap();
4172        }
4173    }
4174
4175    /// True if any issue has this code.
4176    fn has(issues: &[Issue], code: &str) -> bool {
4177        issues.iter().any(|i| i.code == code)
4178    }
4179
4180    /// Count issues with a code.
4181    fn count(issues: &[Issue], code: &str) -> usize {
4182        issues.iter().filter(|i| i.code == code).count()
4183    }
4184
4185    /// The first issue with a code, or panic.
4186    fn find<'a>(issues: &'a [Issue], code: &str) -> &'a Issue {
4187        issues
4188            .iter()
4189            .find(|i| i.code == code)
4190            .unwrap_or_else(|| panic!("expected an issue with code {code}; got {issues:#?}"))
4191    }
4192
4193    /// A minimal valid `contact` body for reuse.
4194    fn valid_contact(summary: &str) -> String {
4195        format!(
4196            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"{summary}\"\nname: A\n---\n\n# A\n"
4197        )
4198    }
4199
4200    // ── store marker ──────────────────────────────────────────────────────────
4201
4202    #[test]
4203    fn not_a_store_when_db_md_absent() {
4204        let fx = Fixture::bare();
4205        let issues = fx.store_all();
4206        assert_eq!(issues.len(), 1, "only NOT_A_STORE expected: {issues:#?}");
4207        assert_eq!(issues[0].code, codes::NOT_A_STORE);
4208        assert!(issues[0].is_error());
4209    }
4210
4211    #[test]
4212    fn working_set_also_reports_not_a_store() {
4213        let fx = Fixture::bare();
4214        let issues = validate_working_set(&fx.store(), None).unwrap();
4215        assert!(has(&issues, codes::NOT_A_STORE));
4216    }
4217
4218    #[test]
4219    fn clean_store_has_no_issues() {
4220        let fx = Fixture::new();
4221        fx.write("records/contacts/a.md", &valid_contact("A contact"));
4222        // Build the canonical indexes (complete per-field jsonl included) the
4223        // same way `dbmd index rebuild` does, so a freshly-rebuilt store is
4224        // proven clean across every projected field, not just summary/type.
4225        fx.rebuild_indexes();
4226        let issues = fx.store_all();
4227        assert!(
4228            issues.is_empty(),
4229            "expected a clean store, got: {issues:#?}"
4230        );
4231    }
4232
4233    // ── meta-type closed enum ─────────────────────────────────────────────────
4234
4235    /// Regression (adversarial review): a NON-SCALAR `meta-type` (a YAML list or
4236    /// mapping) must be rejected with `FM_BAD_META_TYPE`, not silently slip past
4237    /// the enum check (and then get reclassified as the default `fact`). Pre-fix
4238    /// the check was gated on `and_then(scalar_string)`, which returned `None`
4239    /// for a sequence/mapping and short-circuited the whole branch.
4240    #[test]
4241    fn meta_type_enum_is_closed_for_scalars_and_non_scalars() {
4242        let fx = Fixture::new();
4243        let body = |mt: &str| {
4244            format!(
4245                "---\ntype: profile\nmeta-type: {mt}\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n"
4246            )
4247        };
4248
4249        // Valid enum members + absent (default fact) → no FM_BAD_META_TYPE.
4250        for ok in ["fact", "operational", "conclusion"] {
4251            fx.write("records/profiles/ok.md", &body(ok));
4252            let issues = validate_working_set(&fx.store(), None).unwrap();
4253            assert!(
4254                !has(&issues, codes::FM_BAD_META_TYPE),
4255                "`meta-type: {ok}` must be accepted; got {issues:#?}"
4256            );
4257        }
4258        fx.write(
4259            "records/profiles/absent.md",
4260            "---\ntype: profile\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n",
4261        );
4262        assert!(
4263            !has(
4264                &validate_working_set(&fx.store(), None).unwrap(),
4265                codes::FM_BAD_META_TYPE
4266            ),
4267            "an absent meta-type is the default `fact` and must be accepted"
4268        );
4269
4270        // Scalar-but-wrong, AND non-scalar (list / mapping) → FM_BAD_META_TYPE.
4271        for bad in ["xyz", "Fact", "[fact, conclusion]", "{kind: conclusion}"] {
4272            let fx2 = Fixture::new();
4273            fx2.write("records/profiles/bad.md", &body(bad));
4274            let issues = validate_working_set(&fx2.store(), None).unwrap();
4275            assert!(
4276                has(&issues, codes::FM_BAD_META_TYPE),
4277                "`meta-type: {bad}` must be rejected with FM_BAD_META_TYPE; got {issues:#?}"
4278            );
4279        }
4280    }
4281
4282    // ── id: recommended + opaque; FM_BAD_ID is structural only (v0.4) ────────
4283
4284    /// The additive-v0.4 guarantee, pinned: an ABSENT id, a hand-authored
4285    /// opaque slug id (legal since v0.3 and present in the shipped examples),
4286    /// a minted lowercase ULID, and a numeric scalar are ALL silent. The
4287    /// recommended ULID form is never a validation gate — a check that flags
4288    /// `id: sarah-chen` would retroactively dirty every v0.3 store and break
4289    /// the "v0.3 validates unchanged under v0.4" contract.
4290    #[test]
4291    fn id_absent_slug_ulid_and_numeric_are_all_silent() {
4292        let body = |id_line: &str| {
4293            format!(
4294                "---\ntype: contact\n{id_line}created: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n"
4295            )
4296        };
4297        for (case, id_line) in [
4298            ("absent", ""),
4299            ("slug", "id: sarah-chen\n"),
4300            ("ulid", "id: 01j5qc3v9k4ym8rwbn2tqe6f7d\n"),
4301            ("numeric-scalar", "id: 100\n"),
4302        ] {
4303            let fx = Fixture::new();
4304            fx.write("records/contacts/a.md", &body(id_line));
4305            let issues = validate_working_set(&fx.store(), None).unwrap();
4306            assert!(
4307                !has(&issues, codes::FM_BAD_ID),
4308                "id case `{case}` must be silent; got {issues:#?}"
4309            );
4310        }
4311    }
4312
4313    /// FM_BAD_ID (warning) fires exactly on ids that cannot work as an
4314    /// identifier: empty / whitespace-only, internal whitespace, and
4315    /// non-scalar (list / mapping) — the last also being the shape that
4316    /// silently escapes `DUP_ID`'s scalar read.
4317    #[test]
4318    fn id_unusable_as_identifier_warns_fm_bad_id() {
4319        let body = |id_line: &str| {
4320            format!(
4321                "---\ntype: contact\n{id_line}\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n"
4322            )
4323        };
4324        for bad in [
4325            "id: \"\"",
4326            "id: \"   \"",
4327            "id: two words",
4328            "id: [a, b]",
4329            "id: {k: v}",
4330        ] {
4331            let fx = Fixture::new();
4332            fx.write("records/contacts/a.md", &body(bad));
4333            let issues = validate_working_set(&fx.store(), None).unwrap();
4334            let issue = issues
4335                .iter()
4336                .find(|i| i.code == codes::FM_BAD_ID)
4337                .unwrap_or_else(|| panic!("`{bad}` must fire FM_BAD_ID; got {issues:#?}"));
4338            assert!(
4339                matches!(issue.severity, Severity::Warning),
4340                "FM_BAD_ID is a warning (additive v0.4 — it must never block a store): {issue:#?}"
4341            );
4342            assert_eq!(issue.key.as_deref(), Some("id"));
4343            assert!(
4344                !issue.is_error(),
4345                "FM_BAD_ID must not fail validation: {issue:#?}"
4346            );
4347        }
4348    }
4349
4350    /// Two records sharing a minted-form (ULID) id collide exactly like any
4351    /// other id — `DUP_ID`, hard error, store-scoped (the v0.4 uniqueness
4352    /// scope is the store).
4353    #[test]
4354    fn dup_id_fires_on_shared_ulid_ids() {
4355        let fx = Fixture::new();
4356        let rec = |name: &str| {
4357            format!(
4358                "---\ntype: contact\nid: 01j5qc3v9k4ym8rwbn2tqe6f7d\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: {name}\nname: {name}\n---\n\n# {name}\n"
4359            )
4360        };
4361        fx.write("records/contacts/a.md", &rec("A"));
4362        fx.write("records/contacts/b.md", &rec("B"));
4363        let issues = fx.store_all();
4364        assert_eq!(count(&issues, codes::DUP_ID), 1, "{issues:#?}");
4365        let issue = issues.iter().find(|i| i.code == codes::DUP_ID).unwrap();
4366        assert!(issue.is_error());
4367        // The well-formed ULID itself stays silent — only the collision fires.
4368        assert!(!has(&issues, codes::FM_BAD_ID), "{issues:#?}");
4369    }
4370
4371    // ── DB.md structure ───────────────────────────────────────────────────────
4372
4373    /// The `Fixture::new` DB.md is valid → no `DB_MD_*` issue. This pins the
4374    /// "valid identity file is silent" half (a bug that flagged a valid DB.md
4375    /// would fail here).
4376    #[test]
4377    fn valid_db_md_emits_no_structure_issue() {
4378        let fx = Fixture::new();
4379        let issues = fx.store_all();
4380        assert!(
4381            !has(&issues, codes::DB_MD_BAD_TYPE)
4382                && !has(&issues, codes::DB_MD_MISSING_FIELD)
4383                && !has(&issues, codes::DB_MD_UNKNOWN_SECTION),
4384            "a valid DB.md (type: db-md + scope + owner, recognized sections) is silent: {issues:#?}"
4385        );
4386    }
4387
4388    /// A DB.md whose `type:` isn't `db-md` → `DB_MD_BAD_TYPE`, keyed on `type`,
4389    /// anchored to the `type:` line (file line 2). Failing to read the type, or
4390    /// accepting a non-`db-md` type, breaks this.
4391    #[test]
4392    fn db_md_wrong_type_is_error() {
4393        let fx = Fixture::new();
4394        fx.write("DB.md", "---\ntype: notes\nscope: company\nowner: T\n---\n");
4395        let issues = fx.store_all();
4396        let i = find(&issues, codes::DB_MD_BAD_TYPE);
4397        assert!(i.is_error());
4398        assert_eq!(i.file, PathBuf::from("DB.md"));
4399        assert_eq!(i.key.as_deref(), Some("type"));
4400        assert_eq!(i.line, Some(2), "anchors to the `type:` line");
4401    }
4402
4403    /// A DB.md missing `scope` and `owner` → one `DB_MD_MISSING_FIELD` per
4404    /// absent field, each keyed on its field name, anchored to the block top.
4405    #[test]
4406    fn db_md_missing_scope_and_owner_each_report() {
4407        let fx = Fixture::new();
4408        fx.write("DB.md", "---\ntype: db-md\n---\n");
4409        let issues = fx.store_all();
4410        assert_eq!(
4411            count(&issues, codes::DB_MD_MISSING_FIELD),
4412            2,
4413            "both scope and owner absent → two issues: {issues:#?}"
4414        );
4415        let keys: BTreeSet<Option<String>> = issues
4416            .iter()
4417            .filter(|i| i.code == codes::DB_MD_MISSING_FIELD)
4418            .map(|i| i.key.clone())
4419            .collect();
4420        assert_eq!(
4421            keys,
4422            BTreeSet::from([Some("scope".to_string()), Some("owner".to_string())]),
4423            "one issue keyed on each missing field"
4424        );
4425        for i in issues
4426            .iter()
4427            .filter(|i| i.code == codes::DB_MD_MISSING_FIELD)
4428        {
4429            assert!(i.is_error());
4430            assert_eq!(i.line, Some(1), "absent field anchors to the block top");
4431        }
4432    }
4433
4434    /// A present-but-blank required field is still missing (`DB_MD_MISSING_FIELD`),
4435    /// anchored to its own line — guarding against an "is the key textually
4436    /// present?" shortcut that would miss `owner:` with an empty value.
4437    #[test]
4438    fn db_md_blank_required_field_is_missing() {
4439        let fx = Fixture::new();
4440        fx.write(
4441            "DB.md",
4442            "---\ntype: db-md\nscope: company\nowner: \"\"\n---\n",
4443        );
4444        let issues = fx.store_all();
4445        let i = find(&issues, codes::DB_MD_MISSING_FIELD);
4446        assert_eq!(i.key.as_deref(), Some("owner"));
4447        assert_eq!(
4448            i.line,
4449            Some(4),
4450            "a present-but-empty field anchors to its line"
4451        );
4452        assert!(
4453            count(&issues, codes::DB_MD_MISSING_FIELD) == 1,
4454            "scope is present and non-empty → only owner reported"
4455        );
4456    }
4457
4458    /// An unrecognized `##` section → `DB_MD_UNKNOWN_SECTION` (warning), anchored
4459    /// to the heading's file line; the three recognized sections stay silent.
4460    #[test]
4461    fn db_md_unknown_section_is_warning() {
4462        let fx = Fixture::new();
4463        fx.write(
4464            "DB.md",
4465            // line 1 `---`, 2 type, 3 scope, 4 owner, 5 `---`, 6 blank,
4466            // 7 `## Agent instructions`, 8 blank, 9 prose, 10 blank,
4467            // 11 `## Glossary`.
4468            "---\ntype: db-md\nscope: company\nowner: T\n---\n\n## Agent instructions\n\nbe good\n\n## Glossary\n\nterms\n",
4469        );
4470        let issues = fx.store_all();
4471        let i = find(&issues, codes::DB_MD_UNKNOWN_SECTION);
4472        assert!(!i.is_error(), "unknown section is a warning, not an error");
4473        assert_eq!(i.severity, Severity::Warning);
4474        assert_eq!(
4475            i.line,
4476            Some(11),
4477            "anchors to the `## Glossary` heading line"
4478        );
4479        assert!(
4480            i.message.contains("Glossary"),
4481            "the message names the offending section: {}",
4482            i.message
4483        );
4484        // The recognized `## Agent instructions` section did NOT fire.
4485        assert_eq!(
4486            count(&issues, codes::DB_MD_UNKNOWN_SECTION),
4487            1,
4488            "only the unrecognized section is flagged: {issues:#?}"
4489        );
4490    }
4491
4492    /// A DB.md with no frontmatter at all → `DB_MD_BAD_TYPE` plus both
4493    /// `DB_MD_MISSING_FIELD`s (no provable type, no provable fields).
4494    #[test]
4495    fn db_md_no_frontmatter_reports_type_and_both_fields() {
4496        let fx = Fixture::new();
4497        fx.write("DB.md", "# just a heading, no frontmatter\n");
4498        let issues = fx.store_all();
4499        assert!(has(&issues, codes::DB_MD_BAD_TYPE));
4500        assert_eq!(count(&issues, codes::DB_MD_MISSING_FIELD), 2);
4501    }
4502
4503    // ── frontmatter ─────────────────────────────────────────────────────────
4504
4505    #[test]
4506    fn missing_type_is_error() {
4507        let fx = Fixture::new();
4508        fx.write(
4509            "records/contacts/a.md",
4510            "---\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\n# A\n",
4511        );
4512        let issues = fx.store_all();
4513        assert!(has(&issues, codes::FM_MISSING_TYPE));
4514        assert!(find(&issues, codes::FM_MISSING_TYPE).is_error());
4515    }
4516
4517    #[test]
4518    fn missing_universal_timestamps_are_errors_on_content_files() {
4519        let fx = Fixture::new();
4520        fx.write(
4521            "records/contacts/a.md",
4522            "---\ntype: contact\nsummary: x\nname: A\n---\n\n# A\n",
4523        );
4524        let issues = fx.store_all();
4525
4526        let missing_created = find(&issues, codes::FM_MISSING_CREATED);
4527        assert_eq!(missing_created.key.as_deref(), Some("created"));
4528        assert!(missing_created.is_error());
4529
4530        let missing_updated = find(&issues, codes::FM_MISSING_UPDATED);
4531        assert_eq!(missing_updated.key.as_deref(), Some("updated"));
4532        assert!(missing_updated.is_error());
4533    }
4534
4535    #[test]
4536    fn meta_files_do_not_require_universal_timestamps() {
4537        let fx = Fixture::new();
4538        let issues = fx.store_all();
4539
4540        assert!(
4541            !has(&issues, codes::FM_MISSING_CREATED),
4542            "DB.md/log/index meta files must not require content timestamps: {issues:#?}"
4543        );
4544        assert!(
4545            !has(&issues, codes::FM_MISSING_UPDATED),
4546            "DB.md/log/index meta files must not require content timestamps: {issues:#?}"
4547        );
4548    }
4549
4550    #[test]
4551    fn content_file_with_no_frontmatter_block_reports_type_and_summary() {
4552        let fx = Fixture::new();
4553        fx.write(
4554            "records/profiles/a.md",
4555            "# Just a heading\n\nNo frontmatter here.\n",
4556        );
4557        let issues = fx.store_all();
4558        assert!(has(&issues, codes::FM_MISSING_TYPE), "{issues:#?}");
4559        assert!(has(&issues, codes::SUMMARY_MISSING), "{issues:#?}");
4560    }
4561
4562    #[test]
4563    fn content_file_with_empty_frontmatter_reports_type_and_summary() {
4564        let fx = Fixture::new();
4565        fx.write("records/profiles/a.md", "---\n---\n\nbody\n");
4566        let issues = fx.store_all();
4567        assert!(has(&issues, codes::FM_MISSING_TYPE), "{issues:#?}");
4568        assert!(has(&issues, codes::SUMMARY_MISSING), "{issues:#?}");
4569    }
4570
4571    #[test]
4572    fn malformed_yaml_is_error_and_suppresses_field_checks() {
4573        let fx = Fixture::new();
4574        // A tab inside a mapping value is invalid YAML.
4575        fx.write(
4576            "records/contacts/a.md",
4577            "---\ntype: contact\n  bad: : : :\n: : nope\n---\n\nbody\n",
4578        );
4579        let issues = fx.store_all();
4580        let issue = find(&issues, codes::FM_MALFORMED_YAML);
4581        assert!(issue.is_error());
4582        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4583        // When YAML doesn't parse we don't *also* claim the summary is missing;
4584        // the agent fixes the YAML first.
4585        assert!(
4586            !has(&issues, codes::SUMMARY_MISSING),
4587            "malformed YAML should suppress SUMMARY_MISSING: {issues:#?}"
4588        );
4589    }
4590
4591    #[test]
4592    fn bad_created_timestamp_is_error() {
4593        let fx = Fixture::new();
4594        fx.write(
4595            "records/contacts/a.md",
4596            "---\ntype: contact\ncreated: not-a-date\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
4597        );
4598        let issues = fx.store_all();
4599        let issue = find(&issues, codes::FM_BAD_TIMESTAMP);
4600        assert_eq!(issue.key.as_deref(), Some("created"));
4601        assert!(issue.is_error());
4602    }
4603
4604    #[test]
4605    fn date_only_created_is_rejected_but_type_date_field_accepted() {
4606        let fx = Fixture::new();
4607        // `created` must be a full RFC3339 datetime → a date-only value is bad.
4608        // `last_touch` is a type-specific date field → date-only is fine.
4609        fx.write(
4610            "records/contacts/a.md",
4611            "---\ntype: contact\ncreated: 2026-05-22\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\nlast_touch: 2026-05-22\n---\n\n# A\n",
4612        );
4613        let issues = fx.store_all();
4614        let created_issues: Vec<_> = issues
4615            .iter()
4616            .filter(|i| i.code == codes::FM_BAD_TIMESTAMP && i.key.as_deref() == Some("created"))
4617            .collect();
4618        assert_eq!(
4619            created_issues.len(),
4620            1,
4621            "date-only `created` must fail: {issues:#?}"
4622        );
4623        assert!(
4624            !issues.iter().any(
4625                |i| i.code == codes::FM_BAD_TIMESTAMP && i.key.as_deref() == Some("last_touch")
4626            ),
4627            "date-only `last_touch` is valid: {issues:#?}"
4628        );
4629    }
4630
4631    // ── summary ─────────────────────────────────────────────────────────────
4632
4633    #[test]
4634    fn summary_missing_empty_multiline_toolong() {
4635        let fx = Fixture::new();
4636        fx.write(
4637            "records/profiles/missing.md",
4638            "---\ntype: profile\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\n---\n\nbody\n",
4639        );
4640        fx.write(
4641            "records/profiles/empty.md",
4642            "---\ntype: profile\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"   \"\n---\n\nbody\n",
4643        );
4644        let long = "x".repeat(201);
4645        fx.write(
4646            "records/profiles/long.md",
4647            &format!("---\ntype: profile\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"{long}\"\n---\n\nbody\n"),
4648        );
4649        let issues = fx.store_all();
4650        assert!(has(&issues, codes::SUMMARY_MISSING));
4651        assert_eq!(
4652            find(&issues, codes::SUMMARY_MISSING).file,
4653            PathBuf::from("records/profiles/missing.md")
4654        );
4655        assert!(has(&issues, codes::SUMMARY_EMPTY));
4656        assert!(has(&issues, codes::SUMMARY_TOO_LONG));
4657        assert_eq!(
4658            find(&issues, codes::SUMMARY_TOO_LONG).severity,
4659            Severity::Warning
4660        );
4661    }
4662
4663    #[test]
4664    fn summary_multiline_via_yaml_block_scalar() {
4665        let fx = Fixture::new();
4666        // A literal block scalar produces a value with a newline.
4667        fx.write(
4668            "records/profiles/a.md",
4669            "---\ntype: profile\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: |\n  line one\n  line two\n---\n\nbody\n",
4670        );
4671        let issues = fx.store_all();
4672        assert!(has(&issues, codes::SUMMARY_MULTILINE), "{issues:#?}");
4673    }
4674
4675    #[test]
4676    fn summary_exactly_200_chars_is_ok() {
4677        let fx = Fixture::new();
4678        let s = "y".repeat(200);
4679        fx.write(
4680            "records/profiles/a.md",
4681            &format!("---\ntype: profile\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"{s}\"\n---\n\nbody\n"),
4682        );
4683        let issues = fx.store_all();
4684        assert!(
4685            !has(&issues, codes::SUMMARY_TOO_LONG),
4686            "200 is the bound, inclusive: {issues:#?}"
4687        );
4688    }
4689
4690    #[test]
4691    fn meta_files_need_no_summary() {
4692        let fx = Fixture::new();
4693        // The root/layer/type indexes + log carry no summary and must not be
4694        // flagged. (A lone DB.md store with one contact and full indexes.)
4695        fx.write("records/contacts/a.md", &valid_contact("A contact"));
4696        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n# I\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
4697        fx.write(
4698            "records/index.md",
4699            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
4700        );
4701        fx.write("records/contacts/index.md", "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — A contact\n");
4702        fx.write(
4703            "records/contacts/index.jsonl",
4704            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"A contact\"}\n",
4705        );
4706        fx.write("log.md", "---\ntype: log\n---\n\n# Log\n");
4707        let issues = fx.store_all();
4708        assert!(!has(&issues, codes::SUMMARY_MISSING), "{issues:#?}");
4709    }
4710
4711    // ── tags ────────────────────────────────────────────────────────────────
4712
4713    #[test]
4714    fn nested_tags_warns_flat_tags_ok() {
4715        let fx = Fixture::new();
4716        fx.write(
4717            "records/contacts/nested.md",
4718            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ntags:\n  - good\n  - [nested, list]\n---\n\n# A\n",
4719        );
4720        fx.write(
4721            "records/contacts/flat.md",
4722            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ntags: [customer, vip]\n---\n\n# A\n",
4723        );
4724        let issues = fx.store_all();
4725        let tag_issues: Vec<_> = issues
4726            .iter()
4727            .filter(|i| i.code == codes::TAGS_MALFORMED)
4728            .collect();
4729        assert_eq!(
4730            tag_issues.len(),
4731            1,
4732            "only the nested-tags file should warn: {issues:#?}"
4733        );
4734        assert_eq!(
4735            tag_issues[0].file,
4736            PathBuf::from("records/contacts/nested.md")
4737        );
4738        assert_eq!(tag_issues[0].severity, Severity::Warning);
4739    }
4740
4741    // ── wiki-links ────────────────────────────────────────────────────────────
4742
4743    #[test]
4744    fn short_form_wiki_link_is_error() {
4745        let fx = Fixture::new();
4746        let mut body = valid_contact("links to a short form");
4747        body.push_str("\nSee [[sarah-chen]] for details.\n");
4748        fx.write("records/contacts/a.md", &body);
4749        let issues = fx.store_all();
4750        let issue = find(&issues, codes::WIKI_LINK_SHORT_FORM);
4751        assert!(issue.is_error());
4752        assert!(issue.message.contains("sarah-chen"));
4753        // A short-form link must NOT also be reported broken — fix the form first.
4754        assert!(
4755            !issues
4756                .iter()
4757                .any(|i| i.code == codes::WIKI_LINK_BROKEN && i.message.contains("sarah-chen")),
4758            "short-form should suppress broken: {issues:#?}"
4759        );
4760    }
4761
4762    #[test]
4763    fn broken_full_path_wiki_link_is_error() {
4764        let fx = Fixture::new();
4765        let mut body = valid_contact("links to a missing file");
4766        body.push_str("\nSee [[records/contacts/ghost]].\n");
4767        fx.write("records/contacts/a.md", &body);
4768        let issues = fx.store_all();
4769        let issue = find(&issues, codes::WIKI_LINK_BROKEN);
4770        assert!(issue.is_error());
4771        assert!(issue.message.contains("records/contacts/ghost"));
4772        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4773    }
4774
4775    #[test]
4776    fn traversal_full_path_wiki_link_is_rejected_before_probe() {
4777        let fx = Fixture::new();
4778        let mut body = valid_contact("links with traversal");
4779        body.push_str("\nSee [[records/contacts/../../ghost]].\n");
4780        fx.write("records/contacts/a.md", &body);
4781        let issues = fx.store_all();
4782        let issue = find(&issues, codes::WIKI_LINK_BROKEN);
4783        assert!(issue.message.contains("not a safe store-relative path"));
4784        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
4785    }
4786
4787    #[test]
4788    fn valid_full_path_wiki_link_passes() {
4789        let fx = Fixture::new();
4790        fx.write("records/contacts/target.md", &valid_contact("target"));
4791        let mut body = valid_contact("links to target");
4792        body.push_str("\nSee [[records/contacts/target]].\n");
4793        fx.write("records/contacts/a.md", &body);
4794        let issues = fx.store_all();
4795        assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
4796        assert!(!has(&issues, codes::WIKI_LINK_SHORT_FORM), "{issues:#?}");
4797    }
4798
4799    #[test]
4800    fn md_extension_wiki_link_warns_and_resolves() {
4801        let fx = Fixture::new();
4802        fx.write("records/contacts/target.md", &valid_contact("target"));
4803        let mut body = valid_contact("links with extension");
4804        body.push_str("\nSee [[records/contacts/target.md]].\n");
4805        fx.write("records/contacts/a.md", &body);
4806        let issues = fx.store_all();
4807        let issue = find(&issues, codes::WIKI_LINK_HAS_EXTENSION);
4808        assert_eq!(issue.severity, Severity::Warning);
4809        assert_eq!(
4810            issue.suggestion.as_deref(),
4811            Some("drop the extension: [[records/contacts/target]]")
4812        );
4813        // The target exists once `.md` is stripped → not broken.
4814        assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
4815    }
4816
4817    #[test]
4818    fn wiki_links_in_code_fences_are_ignored() {
4819        let fx = Fixture::new();
4820        let mut body = valid_contact("has a fenced example");
4821        body.push_str("\n```\n[[sarah-chen]]\n```\n");
4822        fx.write("records/contacts/a.md", &body);
4823        let issues = fx.store_all();
4824        assert!(
4825            !has(&issues, codes::WIKI_LINK_SHORT_FORM),
4826            "fenced wiki-links must be ignored: {issues:#?}"
4827        );
4828    }
4829
4830    #[test]
4831    fn flow_form_link_list_in_frontmatter_is_error() {
4832        let fx = Fixture::new();
4833        fx.write(
4834            "records/meetings/m.md",
4835            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a meeting\ndate: 2026-05-22\nattendees: [[[records/contacts/a]], [[records/contacts/b]]]\n---\n\n# M\n",
4836        );
4837        let issues = fx.store_all();
4838        let issue = find(&issues, codes::WIKI_LINK_FLOW_FORM_LIST);
4839        assert!(issue.is_error());
4840        assert_eq!(issue.key.as_deref(), Some("attendees"));
4841    }
4842
4843    #[test]
4844    fn block_form_link_list_in_frontmatter_is_not_flow_form() {
4845        let fx = Fixture::new();
4846        fx.write("records/contacts/a.md", &valid_contact("a"));
4847        fx.write("records/contacts/b.md", &valid_contact("b"));
4848        fx.write(
4849            "records/meetings/m.md",
4850            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a meeting\ndate: 2026-05-22\nattendees:\n  - [[records/contacts/a]]\n  - [[records/contacts/b]]\n---\n\n# M\n",
4851        );
4852        let issues = fx.store_all();
4853        assert!(
4854            !has(&issues, codes::WIKI_LINK_FLOW_FORM_LIST),
4855            "{issues:#?}"
4856        );
4857        // Block-form link targets are still integrity-checked (both exist here).
4858        assert!(!has(&issues, codes::WIKI_LINK_BROKEN), "{issues:#?}");
4859    }
4860
4861    #[test]
4862    fn frontmatter_short_form_link_field_is_error() {
4863        let fx = Fixture::new();
4864        // `related` is a *custom* (non-schema) wiki-link field, so it goes
4865        // through the generic doctrine path → a short form is WIKI_LINK_SHORT_FORM.
4866        fx.write(
4867            "records/synthesis/a.md",
4868            "---\ntype: synthesis\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nrelated: \"[[sarah-chen]]\"\n---\n\n# A\n",
4869        );
4870        let issues = fx.store_all();
4871        let issue = find(&issues, codes::WIKI_LINK_SHORT_FORM);
4872        assert!(issue.is_error());
4873        assert_eq!(issue.key.as_deref(), Some("related"));
4874    }
4875
4876    #[test]
4877    fn unquoted_frontmatter_link_is_recognized() {
4878        // An UNQUOTED `[[...]]` parses in YAML as a nested sequence, not a
4879        // string. The validator must still see it as a wiki-link (text-based
4880        // extraction). A short-form custom field must report SHORT_FORM, and a
4881        // full-path one with a missing target must report BROKEN.
4882        let fx = Fixture::new();
4883        fx.write(
4884            "records/synthesis/short.md",
4885            "---\ntype: synthesis\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nrelated: [[sarah-chen]]\n---\n\n# A\n",
4886        );
4887        fx.write(
4888            "records/synthesis/broken.md",
4889            "---\ntype: synthesis\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nrelated: [[records/contacts/ghost]]\n---\n\n# A\n",
4890        );
4891        let issues = fx.store_all();
4892        assert!(
4893            issues.iter().any(|i| i.code == codes::WIKI_LINK_SHORT_FORM
4894                && i.file == Path::new("records/synthesis/short.md")
4895                && i.key.as_deref() == Some("related")),
4896            "unquoted short-form frontmatter link must be caught: {issues:#?}"
4897        );
4898        assert!(
4899            issues.iter().any(|i| i.code == codes::WIKI_LINK_BROKEN
4900                && i.file == Path::new("records/synthesis/broken.md")),
4901            "unquoted full-path frontmatter link to a missing file must be caught: {issues:#?}"
4902        );
4903    }
4904
4905    #[test]
4906    fn short_form_in_declared_link_field_is_prefix_mismatch_not_double_reported() {
4907        // A short-form value in a *declared* link field (a `### contact` schema
4908        // with `company link to records/companies/`) is SCHEMA_LINK_PREFIX_MISMATCH
4909        // (the target isn't under the prefix), and must NOT also be reported as a
4910        // bare WIKI_LINK_SHORT_FORM — the schema path owns that field once.
4911        let mut fx = Fixture::new();
4912        fx.config.schemas.insert(
4913            "contact".into(),
4914            Schema {
4915                fields: vec![FieldSpec {
4916                    name: "company".into(),
4917                    link_prefix: Some(PathBuf::from("records/companies")),
4918                    ..Default::default()
4919                }],
4920                ..Default::default()
4921            },
4922        );
4923        fx.write(
4924            "records/contacts/a.md",
4925            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ncompany: \"[[northstar]]\"\n---\n\n# A\n",
4926        );
4927        let issues = fx.store_all();
4928        let issue = find(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH);
4929        assert_eq!(issue.key.as_deref(), Some("company"));
4930        // The same link must NOT also be double-reported via the generic path.
4931        assert!(
4932            !issues
4933                .iter()
4934                .any(|i| i.code == codes::WIKI_LINK_SHORT_FORM
4935                    && i.key.as_deref() == Some("company")),
4936            "schema link fields are checked once, by the schema path: {issues:#?}"
4937        );
4938    }
4939
4940    #[test]
4941    fn schema_link_field_with_md_extension_still_warns() {
4942        let mut fx = Fixture::new();
4943        fx.config.schemas.insert(
4944            "contact".into(),
4945            Schema {
4946                fields: vec![FieldSpec {
4947                    name: "company".into(),
4948                    link_prefix: Some(PathBuf::from("records/companies")),
4949                    ..Default::default()
4950                }],
4951                ..Default::default()
4952            },
4953        );
4954        fx.write(
4955            "records/companies/acme.md",
4956            "---\ntype: company\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: Acme\nname: Acme\n---\n\n# Acme\n",
4957        );
4958        fx.write(
4959            "records/contacts/a.md",
4960            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\ncompany: \"[[records/companies/acme.md]]\"\n---\n\n# A\n",
4961        );
4962        let issues = fx.store_all();
4963        let issue = issues
4964            .iter()
4965            .find(|i| {
4966                i.code == codes::WIKI_LINK_HAS_EXTENSION && i.key.as_deref() == Some("company")
4967            })
4968            .unwrap_or_else(|| panic!("schema link extension warning missing: {issues:#?}"));
4969        assert_eq!(issue.severity, Severity::Warning);
4970        assert!(
4971            !issues
4972                .iter()
4973                .any(|i| i.code == codes::WIKI_LINK_BROKEN && i.key.as_deref() == Some("company")),
4974            "extensionless existence check should still find acme.md: {issues:#?}"
4975        );
4976    }
4977
4978    // ── schema: explicit DB.md schema (required / shape / enum) ───────────────
4979
4980    #[test]
4981    fn explicit_schema_required_shape_enum() {
4982        let fx = {
4983            let mut fx = Fixture::new();
4984            // contact schema: name required, email required+email shape,
4985            // status enum: active|inactive
4986            let schema = Schema {
4987                fields: vec![
4988                    FieldSpec {
4989                        name: "name".into(),
4990                        required: true,
4991                        ..Default::default()
4992                    },
4993                    FieldSpec {
4994                        name: "email".into(),
4995                        required: true,
4996                        shape: Some(Shape::Email),
4997                        ..Default::default()
4998                    },
4999                    FieldSpec {
5000                        name: "status".into(),
5001                        enum_values: Some(vec!["active".into(), "inactive".into()]),
5002                        ..Default::default()
5003                    },
5004                ],
5005                ..Default::default()
5006            };
5007            fx.config.schemas.insert("contact".into(), schema);
5008            fx
5009        };
5010        fx.write(
5011            "records/contacts/a.md",
5012            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nemail: not-an-email\nstatus: archived\n---\n\n# A\n",
5013        );
5014        let issues = fx.store_all();
5015        // name absent → MISSING_REQUIRED
5016        assert!(
5017            issues
5018                .iter()
5019                .any(|i| i.code == codes::SCHEMA_MISSING_REQUIRED
5020                    && i.key.as_deref() == Some("name")),
5021            "{issues:#?}"
5022        );
5023        // email malformed → SHAPE_MISMATCH
5024        assert!(
5025            issues.iter().any(
5026                |i| i.code == codes::SCHEMA_SHAPE_MISMATCH && i.key.as_deref() == Some("email")
5027            ),
5028            "{issues:#?}"
5029        );
5030        // status archived not in enum → ENUM_VIOLATION
5031        assert!(
5032            issues
5033                .iter()
5034                .any(|i| i.code == codes::SCHEMA_ENUM_VIOLATION
5035                    && i.key.as_deref() == Some("status")),
5036            "{issues:#?}"
5037        );
5038    }
5039
5040    #[test]
5041    fn schema_without_link_field_allows_plain_value() {
5042        // A `contact` schema with no `company` link field means a plain `company`
5043        // string is fine — schema enforcement is exactly what the store declares,
5044        // nothing implicit.
5045        let mut fx = Fixture::new();
5046        fx.config.schemas.insert(
5047            "contact".into(),
5048            Schema {
5049                fields: vec![FieldSpec {
5050                    name: "name".into(),
5051                    required: true,
5052                    ..Default::default()
5053                }],
5054                ..Default::default()
5055            },
5056        );
5057        fx.write(
5058            "records/contacts/a.md",
5059            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: Sarah\ncompany: \"Acme Co\"\n---\n\n# Sarah\n",
5060        );
5061        let issues = fx.store_all();
5062        assert!(
5063            !has(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH),
5064            "no declared link field for `company` → a plain value is fine: {issues:#?}"
5065        );
5066    }
5067
5068    #[test]
5069    fn schema_link_field_plain_value_is_prefix_mismatch() {
5070        // The surviving link-enforcement path: a declared `link to <prefix>/`
5071        // field with a plain-string value is SCHEMA_LINK_PREFIX_MISMATCH.
5072        let mut fx = Fixture::new();
5073        fx.config.schemas.insert(
5074            "contact".into(),
5075            Schema {
5076                fields: vec![FieldSpec {
5077                    name: "company".into(),
5078                    link_prefix: Some(PathBuf::from("records/companies")),
5079                    ..Default::default()
5080                }],
5081                ..Default::default()
5082            },
5083        );
5084        fx.write(
5085            "records/contacts/a.md",
5086            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: Sarah\ncompany: \"Acme Co\"\n---\n\n# Sarah\n",
5087        );
5088        let issues = fx.store_all();
5089        let issue = find(&issues, codes::SCHEMA_LINK_PREFIX_MISMATCH);
5090        assert_eq!(issue.key.as_deref(), Some("company"));
5091        assert!(issue
5092            .suggestion
5093            .as_deref()
5094            .unwrap()
5095            .contains("records/companies/"));
5096    }
5097
5098    #[test]
5099    fn schema_shape_int_and_url_and_currency() {
5100        let mut fx = Fixture::new();
5101        fx.config.schemas.insert(
5102            "widget".into(),
5103            Schema {
5104                fields: vec![
5105                    FieldSpec {
5106                        name: "qty".into(),
5107                        shape: Some(Shape::Int),
5108                        ..Default::default()
5109                    },
5110                    FieldSpec {
5111                        name: "site".into(),
5112                        shape: Some(Shape::Url),
5113                        ..Default::default()
5114                    },
5115                    FieldSpec {
5116                        name: "price".into(),
5117                        shape: Some(Shape::Currency),
5118                        ..Default::default()
5119                    },
5120                ],
5121                ..Default::default()
5122            },
5123        );
5124        // `USD 100` is the corpus-realistic shape (an `expense.currency`-style
5125        // ISO code + amount). It must pass — it used to spuriously fail.
5126        fx.write(
5127            "records/widgets/ok.md",
5128            "---\ntype: widget\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: ok\nqty: 5\nsite: https://example.com\nprice: \"USD 1,234.50\"\n---\n\n# ok\n",
5129        );
5130        // `free` is non-numeric; `inf`/`NaN`/3-decimal used to slip through
5131        // because the old impl leaned on `f64::parse`. `price: inf` here guards
5132        // the under-rejection half of the finding.
5133        fx.write(
5134            "records/widgets/bad.md",
5135            "---\ntype: widget\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: bad\nqty: five\nsite: ftp://nope\nprice: inf\n---\n\n# bad\n",
5136        );
5137        let issues = fx.store_all();
5138        let bad_shape: Vec<_> = issues
5139            .iter()
5140            .filter(|i| {
5141                i.code == codes::SCHEMA_SHAPE_MISMATCH
5142                    && i.file == Path::new("records/widgets/bad.md")
5143            })
5144            .map(|i| i.key.clone().unwrap_or_default())
5145            .collect();
5146        assert!(bad_shape.contains(&"qty".to_string()), "{issues:#?}");
5147        assert!(bad_shape.contains(&"site".to_string()), "{issues:#?}");
5148        assert!(
5149            bad_shape.contains(&"price".to_string()),
5150            "inf must be rejected as currency: {issues:#?}"
5151        );
5152        assert!(
5153            !issues.iter().any(|i| i.code == codes::SCHEMA_SHAPE_MISMATCH
5154                && i.file == Path::new("records/widgets/ok.md")),
5155            "valid shapes (incl. `USD 1,234.50`) must not fire: {issues:#?}"
5156        );
5157    }
5158
5159    #[test]
5160    fn schema_shape_or_enum_field_with_non_scalar_value_is_shape_mismatch() {
5161        let mut fx = Fixture::new();
5162        fx.config.schemas.insert(
5163            "contact".into(),
5164            Schema {
5165                fields: vec![
5166                    FieldSpec {
5167                        name: "email".into(),
5168                        required: true,
5169                        shape: Some(Shape::Email),
5170                        ..Default::default()
5171                    },
5172                    FieldSpec {
5173                        name: "status".into(),
5174                        enum_values: Some(vec!["active".into(), "inactive".into()]),
5175                        ..Default::default()
5176                    },
5177                ],
5178                ..Default::default()
5179            },
5180        );
5181        // A required EMAIL field and an ENUM field, each holding a LIST. Both
5182        // used to slip through entirely (`scalar_string` → None → the shape and
5183        // enum bodies silently no-op); now they flag SCHEMA_SHAPE_MISMATCH.
5184        fx.write(
5185            "records/contacts/bad.md",
5186            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: bad\nemail:\n  - a@b.com\n  - c@d.com\nstatus:\n  - active\n---\n\n# bad\n",
5187        );
5188        let issues = fx.store_all();
5189        let mismatched: Vec<_> = issues
5190            .iter()
5191            .filter(|i| i.code == codes::SCHEMA_SHAPE_MISMATCH)
5192            .map(|i| i.key.clone().unwrap_or_default())
5193            .collect();
5194        assert!(
5195            mismatched.contains(&"email".to_string()),
5196            "list-valued required email must flag: {issues:#?}"
5197        );
5198        assert!(
5199            mismatched.contains(&"status".to_string()),
5200            "list-valued enum must flag: {issues:#?}"
5201        );
5202    }
5203
5204    #[test]
5205    fn is_currency_accepts_codes_and_rejects_non_numeric() {
5206        // Symbols and 3-letter ISO codes both strip; plain numbers pass.
5207        for ok in [
5208            "100",
5209            "1234.56",
5210            "$1,234.50",
5211            "USD 100", // the finding's headline probe — used to be false
5212            "usd 100", // case-insensitive code
5213            "EUR 9.50",
5214            "£12",
5215            "¥1000",
5216            "-5.00", // signed amounts are real (refunds)
5217            "+5",
5218            "1,000,000",
5219        ] {
5220            assert!(is_currency(ok), "expected currency: {ok:?}");
5221        }
5222        // Non-numeric floats `f64::parse` would accept, and the > 2-decimal /
5223        // bare-code / exponent cases the docstring forbids.
5224        for bad in [
5225            "inf", "-inf", "infinity", "NaN", "nan",    // f64 accepts these; we must not
5226            "12.999", // 3 decimals
5227            "1.2345", // 4 decimals
5228            "USD",    // bare code, no amount
5229            "$",      // bare symbol
5230            "free", "", " ", "1e3",      // exponent form
5231            "1.",       // trailing dot, no fractional digits
5232            ".5",       // leading dot, no integer digits
5233            "1 000",    // space as separator is not a thousands separator
5234            "USDD 100", // 4-letter "code" must not strip
5235        ] {
5236            assert!(!is_currency(bad), "expected NOT currency: {bad:?}");
5237        }
5238    }
5239
5240    // ── policies ───────────────────────────────────────────────────────────
5241
5242    #[test]
5243    fn ignored_type_present_is_info() {
5244        let mut fx = Fixture::new();
5245        fx.config.ignored_types.push("temp".into());
5246        fx.write(
5247            "records/temps/x.md",
5248            "---\ntype: temp\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a temp\n---\n\n# x\n",
5249        );
5250        let issues = fx.store_all();
5251        let issue = find(&issues, codes::POLICY_IGNORED_TYPE_PRESENT);
5252        assert_eq!(issue.severity, Severity::Info);
5253        assert!(!issue.is_error());
5254        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
5255    }
5256
5257    #[test]
5258    fn conclusion_record_derived_from_ignored_type_warns() {
5259        let mut fx = Fixture::new();
5260        fx.config.ignored_types.push("temp".into());
5261        fx.write(
5262            "records/temps/x.md",
5263            "---\ntype: temp\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a temp\n---\n\n# x\n",
5264        );
5265        // The policy now gates on `meta-type: conclusion` (not the retired
5266        // `type: wiki-page`): a conclusion record that derives from an
5267        // ignored-type record warns.
5268        fx.write(
5269            "records/synthesis/t.md",
5270            "---\ntype: synthesis\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: derived\nderived_from: \"[[records/temps/x]]\"\n---\n\n# t\n",
5271        );
5272        let issues = fx.store_all();
5273        let issue = find(&issues, codes::POLICY_IGNORED_TYPE_DERIVED);
5274        assert_eq!(issue.severity, Severity::Warning);
5275        assert_eq!(issue.key.as_deref(), Some("derived_from"));
5276        assert!(issue.suggestion.as_deref().is_some_and(|s| !s.is_empty()));
5277    }
5278
5279    /// The shared `derived_from_ignored_type` entry point — the single
5280    /// policy-decision both `dbmd validate` (read) and `dbmd write` (write-time
5281    /// warning) now route through, so they cannot diverge. This pins its
5282    /// contract directly: the meta-type gate (now `meta-type: conclusion`, not
5283    /// the retired `type: wiki-page`), the empty-ignored-types gate, a positive
5284    /// match carrying the resolved target type, and a non-ignored target
5285    /// rejected.
5286    #[test]
5287    fn derived_from_ignored_type_is_the_shared_policy_decision() {
5288        let mut fx = Fixture::new();
5289        fx.config.ignored_types.push("secret".into());
5290        // An ignored-type record …
5291        fx.write(
5292            "records/secrets/s.md",
5293            "---\ntype: secret\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: hush\n---\n\n# s\n",
5294        );
5295        // … and a non-ignored record.
5296        fx.write(
5297            "records/contacts/c.md",
5298            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: ok\nname: C\n---\n\n# c\n",
5299        );
5300        let store = fx.store();
5301
5302        // Positive: a conclusion record deriving from the ignored-type record
5303        // matches, and the hit carries both the target (as written) and its
5304        // resolved type.
5305        let hit =
5306            derived_from_ignored_type(&store, "conclusion", std::iter::once("records/secrets/s"))
5307                .expect("conclusion → ignored-type record must match");
5308        assert_eq!(hit.target, "records/secrets/s");
5309        assert_eq!(hit.target_type, "secret");
5310
5311        // Meta-type gate: a non-`conclusion` meta-type never triggers, even with
5312        // the same ignored-type target.
5313        assert_eq!(
5314            derived_from_ignored_type(&store, "fact", std::iter::once("records/secrets/s")),
5315            None,
5316            "only conclusion derivation is policed"
5317        );
5318
5319        // Target gate: a conclusion deriving from a non-ignored record is fine.
5320        assert_eq!(
5321            derived_from_ignored_type(&store, "conclusion", std::iter::once("records/contacts/c")),
5322            None,
5323            "deriving from a non-ignored type is allowed"
5324        );
5325
5326        // First match wins across multiple targets (here the second is the hit).
5327        let hit = derived_from_ignored_type(
5328            &store,
5329            "conclusion",
5330            ["records/contacts/c", "records/secrets/s"],
5331        )
5332        .expect("a later ignored-type target must still be found");
5333        assert_eq!(hit.target, "records/secrets/s");
5334
5335        // Empty-policy gate: with no `### Ignored types`, nothing is policed.
5336        fx.config.ignored_types.clear();
5337        let store = fx.store();
5338        assert_eq!(
5339            derived_from_ignored_type(&store, "conclusion", std::iter::once("records/secrets/s")),
5340            None,
5341            "an empty ignored-types policy short-circuits"
5342        );
5343    }
5344
5345    // ── duplicates ───────────────────────────────────────────────────────────
5346
5347    #[test]
5348    fn dup_id_is_hard_error_with_related() {
5349        let fx = Fixture::new();
5350        fx.write(
5351            "records/contacts/a.md",
5352            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a\nname: A\n---\n\n# A\n",
5353        );
5354        fx.write(
5355            "records/contacts/b.md",
5356            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: b\nname: B\n---\n\n# B\n",
5357        );
5358        let issues = fx.store_all();
5359        // Reporting rule #1: ONE issue per collision group, keyed on the
5360        // lexicographically smallest path (`a.md`), partner in `related`.
5361        assert_eq!(
5362            count(&issues, codes::DUP_ID),
5363            1,
5364            "one issue per group: {issues:#?}"
5365        );
5366        let a = issues.iter().find(|i| i.code == codes::DUP_ID).unwrap();
5367        assert_eq!(a.file, PathBuf::from("records/contacts/a.md"));
5368        assert!(a.is_error());
5369        assert_eq!(a.key.as_deref(), Some("id"));
5370        assert_eq!(
5371            a.line,
5372            Some(3),
5373            "anchors to the `id` line on the reported file"
5374        );
5375        assert_eq!(a.related, vec![PathBuf::from("records/contacts/b.md")]);
5376    }
5377
5378    #[test]
5379    fn dup_id_not_fired_in_working_set() {
5380        // DUP_* is an --all-only cross-file check; the working set must not run it.
5381        let fx = Fixture::new();
5382        fx.write(
5383            "records/contacts/a.md",
5384            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a\nname: A\n---\n\n# A\n",
5385        );
5386        fx.write(
5387            "records/contacts/b.md",
5388            "---\ntype: contact\nid: shared\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: b\nname: B\n---\n\n# B\n",
5389        );
5390        // Log says both changed since epoch, so they're in the working set.
5391        fx.write(
5392            "log.md",
5393            "---\ntype: log\n---\n\n## [2026-05-22 10:00] create | records/contacts/a\nx\n\n## [2026-05-22 10:01] create | records/contacts/b\nx\n",
5394        );
5395        let issues = validate_working_set(&fx.store(), None).unwrap();
5396        assert!(
5397            !has(&issues, codes::DUP_ID),
5398            "DUP_ID is --all only: {issues:#?}"
5399        );
5400    }
5401
5402    #[test]
5403    fn dup_unique_key_single_field_is_warning() {
5404        let mut fx = Fixture::new();
5405        // contact declares `- unique: email`.
5406        fx.config.schemas.insert(
5407            "contact".into(),
5408            Schema {
5409                unique_keys: vec![vec!["email".into()]],
5410                ..Default::default()
5411            },
5412        );
5413        for (f, name) in [("a", "A"), ("b", "B")] {
5414            fx.write(
5415                &format!("records/contacts/{f}.md"),
5416                &format!("---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: s\nname: {name}\nemail: dup@x.com\n---\n\n# {name}\n"),
5417            );
5418        }
5419        let issues = fx.store_all();
5420        // One issue per group (rule #1), keyed on the smallest path, anchored to
5421        // the single `email` field.
5422        assert_eq!(count(&issues, codes::DUP_UNIQUE_KEY), 1);
5423        let dup = find(&issues, codes::DUP_UNIQUE_KEY);
5424        assert_eq!(dup.severity, Severity::Warning);
5425        assert_eq!(dup.file, PathBuf::from("records/contacts/a.md"));
5426        assert_eq!(dup.key.as_deref(), Some("email"));
5427        assert_eq!(dup.related, vec![PathBuf::from("records/contacts/b.md")]);
5428    }
5429
5430    #[test]
5431    fn dup_unique_key_compound_and_clean_when_one_field_differs() {
5432        let mut fx = Fixture::new();
5433        // expense declares `- unique: date, amount, vendor` (a compound key).
5434        fx.config.schemas.insert(
5435            "expense".into(),
5436            Schema {
5437                unique_keys: vec![vec!["date".into(), "amount".into(), "vendor".into()]],
5438                ..Default::default()
5439            },
5440        );
5441        fx.write("records/companies/acme.md", "---\ntype: company\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: c\nname: Acme\n---\n# A\n");
5442        let exp = |f: &str, amount: &str| {
5443            format!(
5444            "---\ntype: expense\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: e\ndate: 2026-05-01\namount: {amount}\nvendor: \"[[records/companies/acme]]\"\n---\n\n# {f}\n"
5445        )
5446        };
5447        fx.write("records/expenses/e1.md", &exp("e1", "100"));
5448        fx.write("records/expenses/e2.md", &exp("e2", "100"));
5449        fx.write("records/expenses/e3.md", &exp("e3", "200")); // different amount
5450        let issues = fx.store_all();
5451        // One issue for the e1+e2 group (rule #1), keyed on the smallest path
5452        // (e1) with e2 in `related`; e3 differs on amount and never appears.
5453        assert_eq!(
5454            count(&issues, codes::DUP_UNIQUE_KEY),
5455            1,
5456            "only e1+e2 collide, one issue: {issues:#?}"
5457        );
5458        let dup = find(&issues, codes::DUP_UNIQUE_KEY);
5459        assert_eq!(dup.file, PathBuf::from("records/expenses/e1.md"));
5460        assert_eq!(
5461            dup.line,
5462            Some(1),
5463            "compound-key collision anchors to line 1"
5464        );
5465        assert_eq!(dup.related, vec![PathBuf::from("records/expenses/e2.md")]);
5466        assert!(
5467            !issues.iter().any(|i| i.code == codes::DUP_UNIQUE_KEY
5468                && i.related.contains(&PathBuf::from("records/expenses/e3.md"))),
5469            "e3 differs on amount and must not collide: {issues:#?}"
5470        );
5471    }
5472
5473    #[test]
5474    fn dup_unique_key_list_field_is_order_independent() {
5475        let mut fx = Fixture::new();
5476        // meeting declares `- unique: date, attendees`; the list field is a set.
5477        fx.config.schemas.insert(
5478            "meeting".into(),
5479            Schema {
5480                unique_keys: vec![vec!["date".into(), "attendees".into()]],
5481                ..Default::default()
5482            },
5483        );
5484        fx.write("records/contacts/a.md", &valid_contact("a"));
5485        fx.write("records/contacts/b.md", &valid_contact("b"));
5486        let m = |f: &str, order: &str| {
5487            let attendees = if order == "ab" {
5488                "  - [[records/contacts/a]]\n  - [[records/contacts/b]]"
5489            } else {
5490                "  - [[records/contacts/b]]\n  - [[records/contacts/a]]"
5491            };
5492            format!(
5493                "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: m\ndate: 2026-05-01\nattendees:\n{attendees}\n---\n\n# {f}\n"
5494            )
5495        };
5496        fx.write("records/meetings/m1.md", &m("m1", "ab"));
5497        fx.write("records/meetings/m2.md", &m("m2", "ba"));
5498        let issues = fx.store_all();
5499        // The attendee SET is order-independent, so m1 (ab) and m2 (ba) collide
5500        // → a single issue on the smaller path.
5501        assert_eq!(
5502            count(&issues, codes::DUP_UNIQUE_KEY),
5503            1,
5504            "same date + same attendee set (any order) collide as one issue: {issues:#?}"
5505        );
5506        let dup = find(&issues, codes::DUP_UNIQUE_KEY);
5507        assert_eq!(dup.file, PathBuf::from("records/meetings/m1.md"));
5508        assert_eq!(dup.related, vec![PathBuf::from("records/meetings/m2.md")]);
5509    }
5510
5511    // ── indexes ───────────────────────────────────────────────────────────────
5512
5513    #[test]
5514    fn missing_indexes_at_all_three_levels() {
5515        let fx = Fixture::new();
5516        fx.write("records/contacts/a.md", &valid_contact("a"));
5517        let issues = fx.store_all();
5518        // root, layer (records), and type-folder (records/contacts) all missing.
5519        // The type-folder INDEX_MISSING is keyed on the FOLDER path (not its
5520        // would-be index.md), per the field convention `EXPECTED` pins.
5521        let missing_files: BTreeSet<PathBuf> = issues
5522            .iter()
5523            .filter(|i| i.code == codes::INDEX_MISSING)
5524            .map(|i| i.file.clone())
5525            .collect();
5526        assert!(
5527            missing_files.contains(&PathBuf::from("index.md")),
5528            "{issues:#?}"
5529        );
5530        assert!(
5531            missing_files.contains(&PathBuf::from("records/index.md")),
5532            "{issues:#?}"
5533        );
5534        assert!(
5535            missing_files.contains(&PathBuf::from("records/contacts")),
5536            "{issues:#?}"
5537        );
5538        // When the index.md is entirely absent we do NOT additionally fire
5539        // INDEX_JSONL_MISSING — one INDEX_MISSING covers the folder (rule #4).
5540        assert!(!has(&issues, codes::INDEX_JSONL_MISSING), "{issues:#?}");
5541    }
5542
5543    #[test]
5544    fn index_stale_entry_and_missing_entry() {
5545        let fx = Fixture::new();
5546        fx.write(
5547            "records/contacts/present.md",
5548            &valid_contact("present contact"),
5549        );
5550        // Indexes for the parents (root/layer) present so we isolate type-folder.
5551        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5552        fx.write(
5553            "records/index.md",
5554            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5555        );
5556        // Type-folder index lists a GHOST (stale) and omits `present` (missing).
5557        fx.write(
5558            "records/contacts/index.md",
5559            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/ghost]] — gone\n",
5560        );
5561        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/present.md\",\"type\":\"contact\",\"summary\":\"present contact\"}\n");
5562        let issues = fx.store_all();
5563        let stale = find(&issues, codes::INDEX_STALE_ENTRY);
5564        assert!(stale.message.contains("ghost"));
5565        assert!(stale.is_error());
5566        let missing = find(&issues, codes::INDEX_MISSING_ENTRY);
5567        assert!(
5568            missing.message.contains("present.md"),
5569            "{}",
5570            missing.message
5571        );
5572    }
5573
5574    #[test]
5575    fn index_md_entry_with_traversal_path_is_stale_not_probe() {
5576        let fx = Fixture::new();
5577        fx.write("records/contacts/a.md", &valid_contact("a"));
5578        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5579        fx.write(
5580            "records/index.md",
5581            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5582        );
5583        fx.write(
5584            "records/contacts/index.md",
5585            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/../../ghost]] — unsafe\n",
5586        );
5587        fx.write(
5588            "records/contacts/index.jsonl",
5589            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
5590        );
5591        let issues = fx.store_all();
5592        let stale = find(&issues, codes::INDEX_STALE_ENTRY);
5593        assert!(stale.message.contains("not a safe store-relative path"));
5594    }
5595
5596    #[test]
5597    fn index_summary_mismatch() {
5598        let fx = Fixture::new();
5599        fx.write("records/contacts/a.md", &valid_contact("the real summary"));
5600        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5601        fx.write(
5602            "records/index.md",
5603            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5604        );
5605        fx.write(
5606            "records/contacts/index.md",
5607            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a STALE summary\n",
5608        );
5609        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"the real summary\"}\n");
5610        let issues = fx.store_all();
5611        let issue = find(&issues, codes::INDEX_SUMMARY_MISMATCH);
5612        assert!(issue.is_error());
5613        assert_eq!(issue.related, vec![PathBuf::from("records/contacts/a.md")]);
5614    }
5615
5616    #[test]
5617    fn index_summary_match_passes() {
5618        let fx = Fixture::new();
5619        fx.write("records/contacts/a.md", &valid_contact("matching summary"));
5620        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5621        fx.write(
5622            "records/index.md",
5623            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5624        );
5625        fx.write(
5626            "records/contacts/index.md",
5627            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — matching summary\n",
5628        );
5629        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"matching summary\"}\n");
5630        let issues = fx.store_all();
5631        assert!(!has(&issues, codes::INDEX_SUMMARY_MISMATCH), "{issues:#?}");
5632    }
5633
5634    #[test]
5635    fn index_entry_with_tag_suffix_matches_summary() {
5636        let fx = Fixture::new();
5637        fx.write("records/contacts/a.md", &valid_contact("clean summary"));
5638        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5639        fx.write(
5640            "records/index.md",
5641            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5642        );
5643        // Entry carries the renderer's `  ·  #tag` suffix (the EXACT double-spaced
5644        // delimiter `crate::index::format_md_entry` emits for a tagged file),
5645        // which must be stripped before comparing against the file's summary.
5646        fx.write(
5647            "records/contacts/index.md",
5648            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — clean summary  ·  #customer\n",
5649        );
5650        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"clean summary\"}\n");
5651        let issues = fx.store_all();
5652        assert!(
5653            !has(&issues, codes::INDEX_SUMMARY_MISMATCH),
5654            "tag suffix should be stripped: {issues:#?}"
5655        );
5656    }
5657
5658    #[test]
5659    fn index_entry_single_spaced_middot_tail_is_part_of_summary() {
5660        // Regression (the finding): a tagless file whose `summary` legitimately
5661        // ends in a single-spaced ` · #word` tail round-trips through `index
5662        // rebuild` verbatim (the renderer appends NO `  ·  #tag` block, since the
5663        // file has no tags). The validator must NOT mistake that single-spaced
5664        // tail for the renderer's tag suffix, or it reports a spurious — and
5665        // unfixable — INDEX_SUMMARY_MISMATCH on a freshly rebuilt store.
5666        let fx = Fixture::new();
5667        fx.write(
5668            "records/contacts/a.md",
5669            &valid_contact("Standup notes · #standup"),
5670        );
5671        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5672        fx.write(
5673            "records/index.md",
5674            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5675        );
5676        fx.write(
5677            "records/contacts/index.md",
5678            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — Standup notes · #standup\n",
5679        );
5680        fx.write("records/contacts/index.jsonl", "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"Standup notes · #standup\"}\n");
5681        let issues = fx.store_all();
5682        assert!(
5683            !has(&issues, codes::INDEX_SUMMARY_MISMATCH),
5684            "a single-spaced middot tail is part of the summary, not a tag block: {issues:#?}"
5685        );
5686    }
5687
5688    #[test]
5689    fn index_jsonl_desync_missing_file_in_jsonl() {
5690        let fx = Fixture::new();
5691        fx.write("records/contacts/a.md", &valid_contact("a"));
5692        fx.write("records/contacts/b.md", &valid_contact("b"));
5693        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (2 files)\n");
5694        fx.write(
5695            "records/index.md",
5696            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5697        );
5698        fx.write(
5699            "records/contacts/index.md",
5700            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n- [[records/contacts/b]] — b\n",
5701        );
5702        // jsonl only lists `a` → `b` is a desync (the twin must be complete).
5703        fx.write(
5704            "records/contacts/index.jsonl",
5705            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
5706        );
5707        let issues = fx.store_all();
5708        let desync = find(&issues, codes::INDEX_JSONL_DESYNC);
5709        assert!(desync.message.contains("b.md"), "{}", desync.message);
5710    }
5711
5712    #[test]
5713    fn index_jsonl_desync_record_points_at_missing_file() {
5714        let fx = Fixture::new();
5715        fx.write("records/contacts/a.md", &valid_contact("a"));
5716        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5717        fx.write(
5718            "records/index.md",
5719            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5720        );
5721        fx.write(
5722            "records/contacts/index.md",
5723            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n",
5724        );
5725        fx.write(
5726            "records/contacts/index.jsonl",
5727            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n{\"path\":\"records/contacts/ghost.md\",\"type\":\"contact\",\"summary\":\"x\"}\n",
5728        );
5729        let issues = fx.store_all();
5730        assert!(
5731            issues
5732                .iter()
5733                .any(|i| i.code == codes::INDEX_JSONL_DESYNC && i.message.contains("ghost.md")),
5734            "{issues:#?}"
5735        );
5736    }
5737
5738    #[test]
5739    fn index_jsonl_record_with_traversal_path_is_desync_not_probe() {
5740        let fx = Fixture::new();
5741        fx.write("records/contacts/a.md", &valid_contact("a"));
5742        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5743        fx.write(
5744            "records/index.md",
5745            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5746        );
5747        fx.write(
5748            "records/contacts/index.md",
5749            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n",
5750        );
5751        fx.write(
5752            "records/contacts/index.jsonl",
5753            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n{\"path\":\"records/contacts/../../ghost.md\",\"type\":\"contact\",\"summary\":\"x\"}\n",
5754        );
5755        let issues = fx.store_all();
5756        assert!(
5757            issues.iter().any(|i| i.code == codes::INDEX_JSONL_DESYNC
5758                && i.message.contains("not a safe store-relative path")),
5759            "{issues:#?}"
5760        );
5761    }
5762
5763    #[test]
5764    fn index_jsonl_stale_summary() {
5765        let fx = Fixture::new();
5766        fx.write("records/contacts/a.md", &valid_contact("real summary"));
5767        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5768        fx.write(
5769            "records/index.md",
5770            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5771        );
5772        fx.write(
5773            "records/contacts/index.md",
5774            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — real summary\n",
5775        );
5776        // jsonl summary disagrees with the file frontmatter.
5777        fx.write(
5778            "records/contacts/index.jsonl",
5779            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"OUTDATED\"}\n",
5780        );
5781        let issues = fx.store_all();
5782        let stale = find(&issues, codes::INDEX_JSONL_STALE);
5783        assert_eq!(stale.related, vec![PathBuf::from("records/contacts/a.md")]);
5784        assert!(stale.key.as_deref().unwrap().contains("summary"));
5785    }
5786
5787    /// The whole point of `INDEX_JSONL_STALE`: a sidecar field the query/search
5788    /// path actually reads (`email`, `domain`, the `(date,amount,vendor)` dedup
5789    /// tuple, `tags`, `updated`, `links`, `company` …) that disagrees with the
5790    /// `.md` is STALE — even when `summary` and `type` are perfectly correct.
5791    /// Pre-fix the validator only diffed summary+type, so a sidecar with a wrong
5792    /// `email` validated clean and answered `--where email=…` with a phantom
5793    /// value present in no file. This is the direct regression guard.
5794    #[test]
5795    fn index_jsonl_stale_queryable_field_email() {
5796        let fx = Fixture::new();
5797        let contact = "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"a contact\"\nname: A\nemail: real@correct.com\n---\n\n# A\n";
5798        fx.write("records/contacts/a.md", contact);
5799        // Start from the canonical, fully-correct sidecar set …
5800        fx.rebuild_indexes();
5801        let jsonl_path = fx.dir.path().join("records/contacts/index.jsonl");
5802        let good = fs::read_to_string(&jsonl_path).unwrap();
5803        // sanity: the canonical store is clean (no STALE on a fresh rebuild).
5804        assert!(
5805            !has(&fx.store_all(), codes::INDEX_JSONL_STALE),
5806            "freshly-rebuilt sidecar must not be stale"
5807        );
5808        // … then desync ONLY the email so it's the single differing field.
5809        assert!(
5810            good.contains("real@correct.com"),
5811            "sidecar projects email: {good}"
5812        );
5813        fx.write(
5814            "records/contacts/index.jsonl",
5815            &good.replace("real@correct.com", "STALE-WRONG@evil.com"),
5816        );
5817
5818        let issues = fx.store_all();
5819        let stale = find(&issues, codes::INDEX_JSONL_STALE);
5820        assert_eq!(stale.related, vec![PathBuf::from("records/contacts/a.md")]);
5821        // The mismatch is reported precisely on `email`, and summary/type — which
5822        // still match — are NOT named.
5823        let key = stale.key.as_deref().unwrap();
5824        assert!(
5825            key.contains("email"),
5826            "expected `email` in stale key, got {key:?}"
5827        );
5828        assert!(!key.contains("summary"), "summary still matches: {key:?}");
5829        assert!(!key.contains("type"), "type still matches: {key:?}");
5830    }
5831
5832    /// Broaden the guard across the typed/list/timestamp projections at once:
5833    /// a wrong `tags`, `updated`, and a custom dedup field (`amount`) are each
5834    /// caught, with all three named in one issue.
5835    #[test]
5836    fn index_jsonl_stale_typed_and_list_fields() {
5837        let fx = Fixture::new();
5838        let expense = "---\ntype: expense\ncreated: 2026-05-20T08:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: \"office chairs\"\ntags: [furniture, q2]\namount: 1299\nvendor: Acme\ndate: 2026-05-20\n---\n\n# Expense\n";
5839        fx.write("records/expenses/e.md", expense);
5840        fx.rebuild_indexes();
5841        let jsonl_path = fx.dir.path().join("records/expenses/index.jsonl");
5842        let good = fs::read_to_string(&jsonl_path).unwrap();
5843        assert!(
5844            !has(&fx.store_all(), codes::INDEX_JSONL_STALE),
5845            "freshly-rebuilt sidecar must not be stale"
5846        );
5847        // Desync a list field (tags), a timestamp (updated), and a number (amount).
5848        let stale_line = good
5849            .replace("\"q2\"", "\"WRONG-TAG\"")
5850            .replace("2026-05-22T10:00:00-07:00", "2099-01-01T00:00:00-07:00")
5851            .replace("1299", "9999");
5852        fx.write("records/expenses/index.jsonl", &stale_line);
5853
5854        let issues = fx.store_all();
5855        let stale = find(&issues, codes::INDEX_JSONL_STALE);
5856        let key = stale.key.as_deref().unwrap();
5857        for expected in ["amount", "tags", "updated"] {
5858            assert!(
5859                key.contains(expected),
5860                "expected `{expected}` in stale key, got {key:?}"
5861            );
5862        }
5863    }
5864
5865    #[test]
5866    fn index_orphan_in_noncanonical_folder() {
5867        let fx = Fixture::new();
5868        fx.write("records/contacts/a.md", &valid_contact("a"));
5869        // Build the canonical indexes so they aren't reported as orphans.
5870        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5871        fx.write(
5872            "records/index.md",
5873            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5874        );
5875        fx.write("records/contacts/index.md", "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n");
5876        fx.write(
5877            "records/contacts/index.jsonl",
5878            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
5879        );
5880        // An index.md inside a sub-sub-folder (operator territory) is an orphan.
5881        fx.write(
5882            "records/contacts/subfolder/index.md",
5883            "---\ntype: index\nscope: type-folder\n---\n\n# stray\n",
5884        );
5885        let issues = fx.store_all();
5886        let orphan = find(&issues, codes::INDEX_ORPHAN);
5887        assert_eq!(orphan.severity, Severity::Warning);
5888        assert_eq!(
5889            orphan.file,
5890            PathBuf::from("records/contacts/subfolder/index.md")
5891        );
5892    }
5893
5894    #[test]
5895    fn index_wrong_scope() {
5896        let fx = Fixture::new();
5897        fx.write("records/contacts/a.md", &valid_contact("a"));
5898        // Root index declares the wrong scope.
5899        fx.write("index.md", "---\ntype: index\nscope: layer\n---\n\n## Records\n- [[records/contacts/index|C]] (1 files)\n");
5900        fx.write(
5901            "records/index.md",
5902            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5903        );
5904        fx.write("records/contacts/index.md", "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/a]] — a\n");
5905        fx.write(
5906            "records/contacts/index.jsonl",
5907            "{\"path\":\"records/contacts/a.md\",\"type\":\"contact\",\"summary\":\"a\"}\n",
5908        );
5909        let issues = fx.store_all();
5910        let issue = find(&issues, codes::INDEX_WRONG_SCOPE);
5911        assert_eq!(issue.severity, Severity::Warning);
5912        assert_eq!(issue.file, PathBuf::from("index.md"));
5913    }
5914
5915    #[test]
5916    fn capped_type_folder_index_does_not_flag_missing_entries() {
5917        // Over the 500-entry cap, omitted entries are expected, not an error.
5918        let fx = Fixture::new();
5919        for i in 0..501 {
5920            fx.write(
5921                &format!("records/contacts/c{i:04}.md"),
5922                &valid_contact(&format!("contact {i}")),
5923            );
5924        }
5925        fx.write("index.md", "---\ntype: index\nscope: root\n---\n\n## Records\n- [[records/contacts/index|C]] (501 files)\n");
5926        fx.write(
5927            "records/index.md",
5928            "---\ntype: index\nscope: layer\nfolder: records\n---\n# r\n",
5929        );
5930        // Type-folder index lists only ONE entry + a More footer.
5931        fx.write(
5932            "records/contacts/index.md",
5933            "---\ntype: index\nscope: type-folder\nfolder: records/contacts\n---\n\n- [[records/contacts/c0000]] — contact 0\n\n## More\n\nThis folder has 501 files.\n",
5934        );
5935        // jsonl must still be complete — write all 501 lines.
5936        let mut jsonl = String::new();
5937        for i in 0..501 {
5938            jsonl.push_str(&format!(
5939                "{{\"path\":\"records/contacts/c{i:04}.md\",\"type\":\"contact\",\"summary\":\"contact {i}\"}}\n"
5940            ));
5941        }
5942        fx.write("records/contacts/index.jsonl", &jsonl);
5943        let issues = fx.store_all();
5944        assert!(
5945            !has(&issues, codes::INDEX_MISSING_ENTRY),
5946            "over the cap, missing browse entries are expected: {issues:#?}"
5947        );
5948        // But the jsonl is complete → no desync.
5949        assert!(
5950            !has(&issues, codes::INDEX_JSONL_DESYNC),
5951            "{:#?}",
5952            issues
5953                .iter()
5954                .filter(|i| i.code == codes::INDEX_JSONL_DESYNC)
5955                .collect::<Vec<_>>()
5956        );
5957    }
5958
5959    // ── log ────────────────────────────────────────────────────────────────
5960
5961    #[test]
5962    fn log_bad_timestamp_unknown_kind_out_of_order() {
5963        let fx = Fixture::new();
5964        fx.write(
5965            "log.md",
5966            concat!(
5967                "---\ntype: log\n---\n\n# Log\n\n",
5968                "## [2026-05-27 10:00] create | records/contacts/a\nx\n\n",
5969                "## [2026-05-27 09:00] update | records/contacts/b\nx\n\n", // out of order
5970                "## [2026-05-27 11:00] frobnicate | records/contacts/c\nx\n\n", // unknown kind
5971                "## [not-a-date] create | records/contacts/d\nx\n",         // bad timestamp
5972            ),
5973        );
5974        let issues = fx.store_all();
5975        assert!(has(&issues, codes::LOG_OUT_OF_ORDER), "{issues:#?}");
5976        assert_eq!(
5977            find(&issues, codes::LOG_OUT_OF_ORDER).severity,
5978            Severity::Warning
5979        );
5980        let unknown = find(&issues, codes::LOG_UNKNOWN_KIND);
5981        assert_eq!(unknown.severity, Severity::Warning);
5982        assert!(unknown.message.contains("frobnicate"));
5983        assert!(unknown
5984            .suggestion
5985            .as_deref()
5986            .is_some_and(|s| s.contains("create")));
5987        let bad = find(&issues, codes::LOG_BAD_TIMESTAMP);
5988        assert!(bad.is_error());
5989    }
5990
5991    #[test]
5992    fn log_validate_entry_without_object_is_well_formed() {
5993        let fx = Fixture::new();
5994        fx.write(
5995            "log.md",
5996            "---\ntype: log\n---\n\n## [2026-05-27 10:00] validate\nPASS\n",
5997        );
5998        let issues = fx.store_all();
5999        assert!(!has(&issues, codes::LOG_BAD_TIMESTAMP), "{issues:#?}");
6000        assert!(!has(&issues, codes::LOG_UNKNOWN_KIND), "{issues:#?}");
6001    }
6002
6003    #[test]
6004    fn log_in_order_is_clean() {
6005        let fx = Fixture::new();
6006        fx.write(
6007            "log.md",
6008            concat!(
6009                "---\ntype: log\n---\n\n",
6010                "## [2026-05-27 10:00] create | records/contacts/a\nx\n\n",
6011                "## [2026-05-27 10:05] update | records/contacts/a\nx\n",
6012            ),
6013        );
6014        let issues = fx.store_all();
6015        assert!(!has(&issues, codes::LOG_OUT_OF_ORDER), "{issues:#?}");
6016    }
6017
6018    #[test]
6019    fn log_not_checked_in_working_set() {
6020        // log.md ordering is an --all-only check.
6021        let fx = Fixture::new();
6022        fx.write(
6023            "log.md",
6024            concat!(
6025                "---\ntype: log\n---\n\n",
6026                "## [2026-05-27 10:00] create | records/contacts/a\nx\n\n",
6027                "## [2026-05-27 09:00] update | records/contacts/a\nx\n",
6028            ),
6029        );
6030        let issues = validate_working_set(&fx.store(), None).unwrap();
6031        assert!(
6032            !has(&issues, codes::LOG_OUT_OF_ORDER),
6033            "log ordering is --all only: {issues:#?}"
6034        );
6035    }
6036
6037    // ── working-set scoping ───────────────────────────────────────────────────
6038
6039    #[test]
6040    fn working_set_validates_only_changed_files() {
6041        let fx = Fixture::new();
6042        // `dirty` has a bad timestamp; `clean_but_unlogged` also does but is NOT
6043        // in the log → working set must skip it.
6044        fx.write(
6045            "records/contacts/dirty.md",
6046            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
6047        );
6048        fx.write(
6049            "records/contacts/unlogged.md",
6050            "---\ntype: contact\ncreated: ALSO-BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: B\n---\n\n# B\n",
6051        );
6052        fx.write(
6053            "log.md",
6054            "---\ntype: log\n---\n\n## [2026-05-22 10:00] update | records/contacts/dirty\nedited\n",
6055        );
6056        let issues = validate_working_set(&fx.store(), None).unwrap();
6057        assert!(
6058            issues.iter().any(|i| i.code == codes::FM_BAD_TIMESTAMP
6059                && i.file == Path::new("records/contacts/dirty.md")),
6060            "{issues:#?}"
6061        );
6062        assert!(
6063            !issues
6064                .iter()
6065                .any(|i| i.file == Path::new("records/contacts/unlogged.md")),
6066            "unlogged file must not be in the working set: {issues:#?}"
6067        );
6068    }
6069
6070    #[test]
6071    fn working_set_includes_incoming_linkers_to_changed_path() {
6072        let fx = Fixture::new();
6073        // `changed` was renamed/removed (logged). `linker` points at it with a
6074        // now-broken link and was NOT itself logged — but must be pulled in.
6075        fx.write(
6076            "records/profiles/linker.md",
6077            "---\ntype: profile\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: links to a removed page\n---\n\nSee [[records/contacts/changed]].\n",
6078        );
6079        // `changed.md` does NOT exist on disk (removed).
6080        fx.write(
6081            "log.md",
6082            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/changed\nremoved\n",
6083        );
6084        let issues = validate_working_set(&fx.store(), None).unwrap();
6085        assert!(
6086            issues.iter().any(|i| i.code == codes::WIKI_LINK_BROKEN
6087                && i.file == Path::new("records/profiles/linker.md")),
6088            "incoming linker to a removed path must be validated: {issues:#?}"
6089        );
6090    }
6091
6092    #[test]
6093    fn working_set_respects_explicit_since_cutoff() {
6094        let fx = Fixture::new();
6095        fx.write(
6096            "records/contacts/old.md",
6097            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
6098        );
6099        fx.write(
6100            "records/contacts/new.md",
6101            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: B\n---\n\n# B\n",
6102        );
6103        fx.write(
6104            "log.md",
6105            concat!(
6106                "---\ntype: log\n---\n\n",
6107                "## [2026-05-20 10:00] update | records/contacts/old\nx\n\n",
6108                "## [2026-05-25 10:00] update | records/contacts/new\nx\n",
6109            ),
6110        );
6111        // Cutoff after `old` but before `new`.
6112        let since = DateTime::parse_from_rfc3339("2026-05-22T00:00:00+00:00").unwrap();
6113        let issues = validate_working_set(&fx.store(), Some(since)).unwrap();
6114        assert!(
6115            issues
6116                .iter()
6117                .any(|i| i.file == Path::new("records/contacts/new.md")),
6118            "{issues:#?}"
6119        );
6120        assert!(
6121            !issues
6122                .iter()
6123                .any(|i| i.file == Path::new("records/contacts/old.md")),
6124            "old change is before the cutoff: {issues:#?}"
6125        );
6126    }
6127
6128    #[test]
6129    fn working_set_default_since_is_last_validate_entry() {
6130        let fx = Fixture::new();
6131        // `before` changed before the last validate; `after` changed after.
6132        fx.write(
6133            "records/contacts/before.md",
6134            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
6135        );
6136        fx.write(
6137            "records/contacts/after.md",
6138            "---\ntype: contact\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: B\n---\n\n# B\n",
6139        );
6140        fx.write(
6141            "log.md",
6142            concat!(
6143                "---\ntype: log\n---\n\n",
6144                "## [2026-05-20 10:00] update | records/contacts/before\nx\n\n",
6145                "## [2026-05-21 10:00] validate\nPASS\n\n",
6146                "## [2026-05-22 10:00] update | records/contacts/after\nx\n",
6147            ),
6148        );
6149        let issues = validate_working_set(&fx.store(), None).unwrap();
6150        assert!(
6151            issues
6152                .iter()
6153                .any(|i| i.file == Path::new("records/contacts/after.md")),
6154            "{issues:#?}"
6155        );
6156        assert!(
6157            !issues
6158                .iter()
6159                .any(|i| i.file == Path::new("records/contacts/before.md")),
6160            "change before the last validate entry is outside the default window: {issues:#?}"
6161        );
6162    }
6163
6164    // ── ordering / determinism ────────────────────────────────────────────────
6165
6166    #[test]
6167    fn issues_are_sorted_by_file_then_line() {
6168        let fx = Fixture::new();
6169        fx.write("records/profiles/z.md", "---\ntype: profile\nmeta-type: conclusion\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n");
6170        fx.write("records/profiles/a.md", "---\ntype: profile\nmeta-type: conclusion\ncreated: BAD\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nbody\n");
6171        let issues = fx.store_all();
6172        let files: Vec<&PathBuf> = issues.iter().map(|i| &i.file).collect();
6173        let mut sorted = files.clone();
6174        sorted.sort();
6175        assert_eq!(
6176            files, sorted,
6177            "issues must be emitted in a stable file order"
6178        );
6179    }
6180
6181    // ── boundaries: codes validate must NOT emit ──────────────────────────────
6182
6183    #[test]
6184    fn frozen_page_is_not_a_validate_error() {
6185        // POLICY_FROZEN_PAGE is a *write-time* refusal, never a validate finding.
6186        // A clean file listed in `### Frozen pages` must validate clean.
6187        let mut fx = Fixture::new();
6188        fx.config
6189            .frozen_pages
6190            .push(PathBuf::from("records/decisions/d.md"));
6191        fx.write(
6192            "records/decisions/d.md",
6193            "---\ntype: decision\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a finalized decision\n---\n\n# D\n",
6194        );
6195        let issues = fx.store_all();
6196        assert!(
6197            !has(&issues, codes::POLICY_FROZEN_PAGE),
6198            "frozen pages are enforced at write-time, not by validate: {issues:#?}"
6199        );
6200    }
6201
6202    #[test]
6203    fn wiki_link_ambiguous_is_never_emitted_under_full_path_doctrine() {
6204        // The full-path doctrine makes ambiguity impossible; the defensive code
6205        // must never fire on a normal store.
6206        let fx = Fixture::new();
6207        fx.write("records/contacts/sarah-chen.md", &valid_contact("sarah"));
6208        let mut body = valid_contact("links to sarah");
6209        body.push_str("\nSee [[records/contacts/sarah-chen]].\n");
6210        fx.write("records/contacts/p.md", &body);
6211        let issues = fx.store_all();
6212        assert!(!has(&issues, codes::WIKI_LINK_AMBIGUOUS), "{issues:#?}");
6213    }
6214
6215    // ── unknown-type / unknown-field passthrough ──────────────────────────────
6216
6217    #[test]
6218    fn unknown_type_passes_through() {
6219        // A custom type is ambient context: it has a `type`, so no
6220        // FM_MISSING_TYPE, and with no matching schema there are no schema
6221        // errors. Only the universal contract (summary, timestamps) applies.
6222        let fx = Fixture::new();
6223        fx.write(
6224            "records/proposals/x.md",
6225            "---\ntype: proposal\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: a proposal\ncustom_field: anything\nbudget: 5000\n---\n\n# Proposal\n",
6226        );
6227        let issues = fx.store_all();
6228        assert!(!has(&issues, codes::FM_MISSING_TYPE), "{issues:#?}");
6229        assert!(!has(&issues, codes::SCHEMA_MISSING_REQUIRED), "{issues:#?}");
6230        assert!(!has(&issues, codes::SCHEMA_SHAPE_MISMATCH), "{issues:#?}");
6231        // The unknown fields don't trip anything.
6232        assert!(
6233            !issues
6234                .iter()
6235                .any(|i| i.key.as_deref() == Some("custom_field")
6236                    || i.key.as_deref() == Some("budget")),
6237            "unknown fields are ambient context: {issues:#?}"
6238        );
6239    }
6240
6241    // ── find_links_to prefix-collision safety (working set) ───────────────────
6242
6243    #[test]
6244    fn incoming_linker_scan_does_not_prefix_match() {
6245        // A changed `records/contacts/sarah` must NOT pull in a file that only
6246        // links to `records/contacts/sarah-chen` (a longer path sharing a prefix).
6247        let fx = Fixture::new();
6248        fx.write(
6249            "records/profiles/only-sarah-chen.md",
6250            "---\ntype: profile\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nSee [[records/contacts/sarah-chen]].\n",
6251        );
6252        // The log says `records/contacts/sarah` (the shorter path) changed.
6253        fx.write(
6254            "log.md",
6255            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/sarah\nremoved\n",
6256        );
6257        let issues = validate_working_set(&fx.store(), None).unwrap();
6258        assert!(
6259            !issues
6260                .iter()
6261                .any(|i| i.file == Path::new("records/profiles/only-sarah-chen.md")),
6262            "a prefix-sharing link must not pull a file into the working set: {issues:#?}"
6263        );
6264    }
6265
6266    #[test]
6267    fn working_set_does_not_flag_stale_catalog_index_as_wiki_link_broken() {
6268        // The working-set incoming-linker scan rides embedded-ripgrep
6269        // `Store::find_links_to`, which scans EVERY `.md` — so a type-folder
6270        // `index.md` listing a now-deleted target IS pulled into the working set.
6271        // But its entries are GENERATED catalog entries, not authored body links:
6272        // a dangling one is an `INDEX_STALE_ENTRY` ("run `dbmd index rebuild`"),
6273        // the job of `check_indexes` under `--all` — NOT a `WIKI_LINK_BROKEN`
6274        // ("create the target"), whose remedy would steer an agent to recreate
6275        // the very data it just deleted. The loop default must therefore NOT
6276        // body-link-check the derived catalog (index integrity is an O(store)
6277        // sweep concern, not an O(changed) loop concern). Adversarial review #11:
6278        // the prior behavior gave WIKI_LINK_BROKEN here while `--all` gave
6279        // INDEX_STALE_ENTRY for the identical condition — two codes, opposite
6280        // remedies, across the loop default vs the sweep.
6281        let fx = Fixture::new();
6282        // A catalog that still lists the deleted contact (a real, common stale
6283        // state after an out-of-band `delete`).
6284        fx.write(
6285            "records/contacts/index.md",
6286            "---\ntype: index\n---\n\n- [[records/contacts/sarah-chen]] — Sarah Chen\n",
6287        );
6288        // The log says `records/contacts/sarah-chen` was deleted.
6289        fx.write(
6290            "log.md",
6291            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/sarah-chen\nremoved\n",
6292        );
6293        let issues = validate_working_set(&fx.store(), None).unwrap();
6294        assert!(
6295            !issues
6296                .iter()
6297                .any(|i| i.file == Path::new("records/contacts/index.md")
6298                    && i.code == codes::WIKI_LINK_BROKEN),
6299            "a stale catalog `index.md` entry must NOT be WIKI_LINK_BROKEN in the \
6300             working set (it is an INDEX_STALE_ENTRY under `--all`): {issues:#?}"
6301        );
6302    }
6303
6304    #[test]
6305    fn incoming_linker_scan_covers_the_whole_changed_set_in_one_pass() {
6306        // CONTRACT (the O(changed × store) fix): the working-set scan finds
6307        // incoming linkers for EVERY changed object, and does so via the single
6308        // batch pass `Store::find_links_to_any` — not one full store read per
6309        // changed object. This test pins the behavior that makes the single-pass
6310        // correct: with two DISTINCT deleted targets, the linker to EACH is pulled
6311        // into the working set and flagged. A regression that scanned for only the
6312        // first/last changed object, or that dropped the batch union, would leave
6313        // one of the two broken links unreported and fail here.
6314        let fx = Fixture::new();
6315        // Linker A → deleted target #1 (in the body).
6316        fx.write(
6317            "records/profiles/refers-sarah.md",
6318            "---\ntype: profile\nmeta-type: conclusion\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\n---\n\nSee [[records/contacts/sarah-chen]].\n",
6319        );
6320        // Linker B → deleted target #2 (in a typed frontmatter field — an edge the
6321        // sidecar `links` projection would miss, which is why this must be a
6322        // content scan, not a sidecar read).
6323        fx.write(
6324            "records/meetings/2026/05/kickoff.md",
6325            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: m\ndate: 2026-05-01\ncompany: \"[[records/companies/acme]]\"\n---\n\n# Kickoff\n",
6326        );
6327        // The log says BOTH targets were deleted in this window.
6328        fx.write(
6329            "log.md",
6330            "---\ntype: log\n---\n\n## [2026-05-22 10:00] delete | records/contacts/sarah-chen\nremoved\n\n## [2026-05-22 10:05] delete | records/companies/acme\nremoved\n",
6331        );
6332
6333        let issues = validate_working_set(&fx.store(), None).unwrap();
6334        assert!(
6335            issues
6336                .iter()
6337                .any(|i| i.file == Path::new("records/profiles/refers-sarah.md")
6338                    && i.code == codes::WIKI_LINK_BROKEN),
6339            "linker to the FIRST deleted target must be pulled in and flagged: {issues:#?}"
6340        );
6341        assert!(
6342            issues.iter().any(
6343                |i| i.file == Path::new("records/meetings/2026/05/kickoff.md")
6344                    && i.code == codes::WIKI_LINK_BROKEN
6345            ),
6346            "linker to the SECOND deleted target (typed-field edge) must also be \
6347             pulled in and flagged — proves the scan covers the whole changed set, \
6348             not just one object: {issues:#?}"
6349        );
6350    }
6351
6352    #[test]
6353    fn frontmatter_block_sequence_links_each_get_their_own_line() {
6354        // Each block-sequence wiki-link reports on its own source line.
6355        let fx = Fixture::new();
6356        // Neither target exists → two WIKI_LINK_BROKEN, on different lines.
6357        fx.write(
6358            "records/meetings/m.md",
6359            "---\ntype: meeting\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: m\ndate: 2026-05-01\nparticipants:\n  - [[records/contacts/ghost1]]\n  - [[records/contacts/ghost2]]\n---\n\n# M\n",
6360        );
6361        let issues = fx.store_all();
6362        let broken_lines: BTreeSet<Option<u32>> = issues
6363            .iter()
6364            .filter(|i| i.code == codes::WIKI_LINK_BROKEN)
6365            .map(|i| i.line)
6366            .collect();
6367        assert_eq!(
6368            broken_lines.len(),
6369            2,
6370            "two distinct broken-link lines: {issues:#?}"
6371        );
6372    }
6373
6374    // ── Regression: null / non-scalar created/updated ────────────────────────
6375
6376    #[test]
6377    fn null_created_is_missing_not_silently_passed() {
6378        // Regression: a present-but-`null` `created:` previously slipped past
6379        // both FM_MISSING_CREATED (only `!contains_key` was checked) and
6380        // FM_BAD_TIMESTAMP (`scalar_string(null)` is None → branch no-oped).
6381        let fx = Fixture::new();
6382        fx.write(
6383            "records/contacts/a.md",
6384            "---\ntype: contact\ncreated:\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
6385        );
6386        let issues = fx.store_all();
6387        assert!(
6388            has(&issues, codes::FM_MISSING_CREATED),
6389            "null `created:` must read as missing: {issues:#?}"
6390        );
6391    }
6392
6393    #[test]
6394    fn sequence_created_is_bad_timestamp() {
6395        // A non-scalar `created: [2026]` is not a timestamp string → FM_BAD_TIMESTAMP.
6396        let fx = Fixture::new();
6397        fx.write(
6398            "records/contacts/a.md",
6399            "---\ntype: contact\ncreated: [2026]\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\n# A\n",
6400        );
6401        let issues = fx.store_all();
6402        assert!(
6403            issues
6404                .iter()
6405                .any(|i| i.code == codes::FM_BAD_TIMESTAMP && i.key.as_deref() == Some("created")),
6406            "a sequence `created:` must be FM_BAD_TIMESTAMP: {issues:#?}"
6407        );
6408    }
6409
6410    // ── Regression: schema required null / empty-collection ──────────────────
6411
6412    #[test]
6413    fn required_field_null_or_empty_collection_is_missing() {
6414        // Regression: a plain required field (no shape/enum) holding YAML null
6415        // (`name:`), an empty list (`name: []`), or an empty mapping (`name: {}`)
6416        // previously validated with 0 issues — `scalar_string` returned None and
6417        // `.unwrap_or(false)` treated the value as non-empty.
6418        for value in ["", " []", " {}"] {
6419            let mut fx = Fixture::new();
6420            fx.config.schemas.insert(
6421                "contact".into(),
6422                Schema {
6423                    fields: vec![FieldSpec {
6424                        name: "name".into(),
6425                        required: true,
6426                        ..Default::default()
6427                    }],
6428                    ..Default::default()
6429                },
6430            );
6431            fx.write(
6432                "records/contacts/a.md",
6433                &format!(
6434                    "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname:{value}\n---\n\n# A\n"
6435                ),
6436            );
6437            let issues = fx.store_all();
6438            assert!(
6439                issues
6440                    .iter()
6441                    .any(|i| i.code == codes::SCHEMA_MISSING_REQUIRED
6442                        && i.key.as_deref() == Some("name")),
6443                "required `name:{value}` must be SCHEMA_MISSING_REQUIRED: {issues:#?}"
6444            );
6445        }
6446    }
6447
6448    // ── Regression: WIKI_LINK_BROKEN on raw source files ─────────────────────
6449
6450    #[test]
6451    fn wiki_link_to_raw_source_file_resolves() {
6452        // Regression: a body link to a raw `.eml`/`.pdf` source kept verbatim
6453        // under `sources/` was flagged WIKI_LINK_BROKEN because the existence
6454        // probe only ever stat'd `{bare}.md`. It must resolve the literal path.
6455        let fx = Fixture::new();
6456        fx.write("sources/emails/2026-05-22-elena.eml", "raw email bytes\n");
6457        fx.write(
6458            "records/contacts/a.md",
6459            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\nSee [[sources/emails/2026-05-22-elena.eml]] for context.\n",
6460        );
6461        let issues = fx.store_all();
6462        assert!(
6463            !issues.iter().any(|i| i.code == codes::WIKI_LINK_BROKEN),
6464            "a link to an existing raw source file must not be broken: {issues:#?}"
6465        );
6466    }
6467
6468    // ── Regression: wrong-case wiki-link must be platform-independent ─────────
6469
6470    #[test]
6471    fn wrong_case_wiki_link_is_broken_exact_case() {
6472        // Regression (cross-platform false-negative): on case-insensitive
6473        // APFS/macOS, `Path::is_file()` resolves `[[records/contacts/BOB]]` to the
6474        // on-disk `bob.md`, so validate passed — but on case-sensitive Linux that
6475        // file does not exist (WIKI_LINK_BROKEN). Existence resolution is now
6476        // exact-case, so a wrong-case target is flagged on every platform.
6477        let fx = Fixture::new();
6478        fx.write("records/contacts/bob.md", &valid_contact("Bob"));
6479        let mut body = valid_contact("links with the wrong case");
6480        body.push_str("\nKnows [[records/contacts/BOB]].\n");
6481        fx.write("records/contacts/alice.md", &body);
6482        let issues = fx.store_all();
6483        let issue = find(&issues, codes::WIKI_LINK_BROKEN);
6484        assert!(issue.is_error());
6485        assert!(
6486            issue.message.contains("records/contacts/BOB"),
6487            "the wrong-case target must be named in the issue: {issues:#?}"
6488        );
6489    }
6490
6491    #[test]
6492    fn correct_case_wiki_link_still_resolves() {
6493        // The companion to the exact-case fix: a *correct*-case lowercase link to
6494        // the same on-disk file must STILL resolve clean. Only a genuine case
6495        // mismatch is newly flagged; correct case is never a false positive.
6496        let fx = Fixture::new();
6497        fx.write("records/contacts/bob.md", &valid_contact("Bob"));
6498        let mut body = valid_contact("links with the right case");
6499        body.push_str("\nKnows [[records/contacts/bob]].\n");
6500        fx.write("records/contacts/alice.md", &body);
6501        let issues = fx.store_all();
6502        assert!(
6503            !issues
6504                .iter()
6505                .any(|i| i.code == codes::WIKI_LINK_BROKEN && i.message.contains("contacts/bob")),
6506            "a correct-case link must resolve clean: {issues:#?}"
6507        );
6508    }
6509
6510    #[test]
6511    fn wrong_case_raw_source_wiki_link_is_broken() {
6512        // The literal-path candidate (raw `.eml`/`.pdf` sources kept verbatim)
6513        // gets the same exact-case treatment as the `.md`-appended candidate: a
6514        // wrong-case link to a raw source is broken on a case-sensitive host, so
6515        // it must flag on macOS too.
6516        let fx = Fixture::new();
6517        fx.write("sources/emails/2026-05-22-elena.eml", "raw email bytes\n");
6518        fx.write(
6519            "records/contacts/a.md",
6520            "---\ntype: contact\ncreated: 2026-05-22T10:00:00-07:00\nupdated: 2026-05-22T10:00:00-07:00\nsummary: x\nname: A\n---\n\nSee [[sources/emails/2026-05-22-ELENA.eml]] for context.\n",
6521        );
6522        let issues = fx.store_all();
6523        let issue = find(&issues, codes::WIKI_LINK_BROKEN);
6524        assert!(issue.is_error());
6525        assert!(
6526            issue.message.contains("2026-05-22-ELENA.eml"),
6527            "the wrong-case raw-source target must be flagged: {issues:#?}"
6528        );
6529    }
6530
6531    // ── Regression: unreadable (non-UTF-8) content file ──────────────────────
6532
6533    #[test]
6534    fn non_utf8_content_file_is_reported() {
6535        // Regression: a content file with invalid UTF-8 bytes made
6536        // check_content_file return None silently, so the store passed with exit
6537        // 0. It must surface FM_UNREADABLE instead of passing vacuously.
6538        let fx = Fixture::new();
6539        let abs = fx.dir.path().join("records/notes/corrupt.md");
6540        fs::create_dir_all(abs.parent().unwrap()).unwrap();
6541        fs::write(&abs, [0xFF, 0xFE, 0x00, 0x01]).unwrap();
6542        let issues = validate_working_set(&fx.store(), None).unwrap();
6543        assert!(
6544            has(&issues, codes::FM_UNREADABLE),
6545            "an unreadable content file must be reported, not silently skipped: {issues:#?}"
6546        );
6547    }
6548
6549    // ── Regression: code-fence char/run tracking ─────────────────────────────
6550
6551    #[test]
6552    fn tilde_fence_containing_backtick_fence_does_not_invert() {
6553        // Regression: a `~~~` block legally contains ``` lines (documenting a
6554        // backtick fence); a naive toggle inverted `in_fence` and checked the
6555        // demo `[[fake]]` inside the code block as a live link. The link inside
6556        // BOTH fences must be skipped.
6557        let body = "~~~markdown\n```\n[[fake-link]]\n```\n~~~\n";
6558        let links = extract_wiki_links(body);
6559        assert!(
6560            links.is_empty(),
6561            "wiki-link inside a nested code fence must be skipped: {links:?}"
6562        );
6563    }
6564
6565    // ── Regression: --all skips in-layer `log/` folder ───────────────────────
6566
6567    #[test]
6568    fn all_sweep_visits_in_layer_log_folder() {
6569        // Regression: `validate --all` pruned every dir named `log`, so a real
6570        // content folder like `records/log/` was invisible to the full sweep —
6571        // reporting FEWER errors than the default scope. A frontmatter-less file
6572        // there must still surface FM_MISSING_TYPE under --all.
6573        let fx = Fixture::new();
6574        fx.write("records/log/2026-06-01-pricing.md", "no frontmatter here\n");
6575        let issues = fx.store_all();
6576        assert!(
6577            has(&issues, codes::FM_MISSING_TYPE),
6578            "--all must validate files under an in-layer `log/` folder: {issues:#?}"
6579        );
6580    }
6581
6582    // ── Regression: flow-form list with whitespace ───────────────────────────
6583
6584    #[test]
6585    fn flow_form_link_list_with_spaces_is_flagged() {
6586        // Regression: `attendees: [ [[a]] ]` parses to the same nested-sequence
6587        // mis-encoding as `[[[a]]]` but evaded the literal `starts_with("[[[")`
6588        // text test. The value-based detector must catch the whitespace variant.
6589        let keys = detect_flow_form_link_lists("attendees: [ [[records/contacts/elena]] ]\n");
6590        assert!(
6591            keys.iter().any(|k| k == "attendees"),
6592            "spaced flow-form list must be detected: {keys:?}"
6593        );
6594    }
6595
6596    // ── Regression: INDEX_SUMMARY_MISMATCH middot tail ───────────────────────
6597
6598    #[test]
6599    fn middot_hashtag_summary_tail_round_trips() {
6600        // Regression: a tagless summary that legitimately ends in a single-spaced
6601        // ` · #word` tail round-trips through the renderer verbatim, but the loose
6602        // ` · ` strip mistook it for the tag block and reported a spurious,
6603        // unfixable INDEX_SUMMARY_MISMATCH. The strip must use the renderer's
6604        // exact double-spaced `  ·  ` delimiter.
6605        assert_eq!(
6606            extract_index_entry_summary("— Standup notes · #standup").as_deref(),
6607            Some("Standup notes · #standup"),
6608            "a single-spaced middot tail is part of the summary, not a tag block"
6609        );
6610        // The renderer's real double-spaced tag suffix IS still stripped.
6611        assert_eq!(
6612            extract_index_entry_summary("— Renewal champion  ·  #renewal #acme").as_deref(),
6613            Some("Renewal champion"),
6614            "the renderer's double-spaced `  ·  #tag` suffix is stripped"
6615        );
6616    }
6617
6618    // ── Regression: shape Url / Email edge cases ─────────────────────────────
6619
6620    #[test]
6621    fn url_shape_accepts_short_http_and_rejects_bare_scheme() {
6622        assert!(is_url("http://x"), "an 8-char http URL is valid");
6623        assert!(is_url("https://x"), "a 9-char https URL is valid");
6624        assert!(!is_url("http://"), "a bare scheme with no host is rejected");
6625        assert!(!is_url("https://"), "a bare https scheme is rejected");
6626    }
6627
6628    #[test]
6629    fn email_shape_rejects_double_at() {
6630        assert!(!is_email("sarah@@acme.com"), "double-@ domain is rejected");
6631        assert!(!is_email("a@b@c.com"), "two @ signs are rejected");
6632        assert!(is_email("sarah@acme.com"), "a normal address still passes");
6633    }
6634
6635    // ── Regression: working-set vs --all agree on log.md links ───────────────
6636
6637    #[test]
6638    fn working_set_does_not_flag_log_md_body_links() {
6639        // Regression: the working-set incoming-linker scan runs root `log.md`
6640        // through the body wiki-link check, flagging a historical `[[deleted]]`
6641        // mention as WIKI_LINK_BROKEN — an error `--all` never reports and that
6642        // the append-only log can't have "fixed". The root meta files must be
6643        // excluded from the body link check, matching --all.
6644        let fx = Fixture::new();
6645        fx.write("records/contacts/a.md", &valid_contact("A"));
6646        fx.write(
6647            "log.md",
6648            "---\ntype: log\n---\n\n## [2026-06-01 10:00] delete | records/contacts/ghost\n\nRemoved [[records/contacts/ghost]] per cleanup.\n",
6649        );
6650        let issues = validate_working_set(&fx.store(), None).unwrap();
6651        assert!(
6652            !issues
6653                .iter()
6654                .any(|i| i.code == codes::WIKI_LINK_BROKEN
6655                    && i.file == std::path::Path::new("log.md")),
6656            "a broken wiki-link inside append-only log.md must not be flagged: {issues:#?}"
6657        );
6658    }
6659
6660    // ── Regression: DB.md schema field lint ──────────────────────────────────
6661
6662    #[test]
6663    fn schema_duplicate_field_name_is_flagged() {
6664        let mut fx = Fixture::new();
6665        fx.config.schemas.insert(
6666            "contact".into(),
6667            Schema {
6668                fields: vec![
6669                    FieldSpec {
6670                        name: "name".into(),
6671                        required: true,
6672                        ..Default::default()
6673                    },
6674                    FieldSpec {
6675                        name: "name".into(),
6676                        ..Default::default()
6677                    },
6678                ],
6679                ..Default::default()
6680            },
6681        );
6682        let issues = fx.store_all();
6683        assert!(
6684            issues
6685                .iter()
6686                .any(|i| i.code == codes::DB_MD_SCHEMA_FIELD && i.key.as_deref() == Some("name")),
6687            "a duplicate schema field name must be flagged: {issues:#?}"
6688        );
6689    }
6690
6691    #[test]
6692    fn schema_unknown_modifier_is_info() {
6693        let mut fx = Fixture::new();
6694        fx.config.schemas.insert(
6695            "contact".into(),
6696            Schema {
6697                fields: vec![FieldSpec {
6698                    name: "name".into(),
6699                    unknown_modifiers: vec!["requierd".into()],
6700                    ..Default::default()
6701                }],
6702                ..Default::default()
6703            },
6704        );
6705        let issues = fx.store_all();
6706        assert!(
6707            issues.iter().any(|i| i.code == codes::DB_MD_SCHEMA_FIELD
6708                && i.severity == Severity::Info
6709                && i.key.as_deref() == Some("name")),
6710            "an unrecognized schema modifier must surface as Info: {issues:#?}"
6711        );
6712    }
6713
6714    /// A `unique:` key naming a declared-but-optional field silently skips
6715    /// every record missing that field (an incomplete key never collides), so
6716    /// the declaration itself must warn. The dogfood case: `unique: date,
6717    /// amount, vendor` with `vendor` optional — a vendorless re-entered
6718    /// expense sails past the check.
6719    #[test]
6720    fn schema_unique_key_optional_field_is_warning() {
6721        let mut fx = Fixture::new();
6722        fx.config.schemas.insert(
6723            "expense".into(),
6724            Schema {
6725                fields: vec![
6726                    FieldSpec {
6727                        name: "date".into(),
6728                        required: true,
6729                        ..Default::default()
6730                    },
6731                    FieldSpec {
6732                        name: "amount".into(),
6733                        required: true,
6734                        ..Default::default()
6735                    },
6736                    FieldSpec {
6737                        name: "vendor".into(),
6738                        ..Default::default()
6739                    },
6740                ],
6741                unique_keys: vec![vec!["date".into(), "amount".into(), "vendor".into()]],
6742                ..Default::default()
6743            },
6744        );
6745        let issues = fx.store_all();
6746        assert!(
6747            issues.iter().any(|i| i.code == codes::DB_MD_SCHEMA_FIELD
6748                && i.severity == Severity::Warning
6749                && i.key.as_deref() == Some("vendor")
6750                && i.message.contains("unique")),
6751            "a `unique:` key field not marked required must warn: {issues:#?}"
6752        );
6753        // The required key fields are fine — no warning for them.
6754        assert!(
6755            !issues.iter().any(|i| i.code == codes::DB_MD_SCHEMA_FIELD
6756                && matches!(i.key.as_deref(), Some("date") | Some("amount"))),
6757            "required key fields must not warn: {issues:#?}"
6758        );
6759    }
6760
6761    /// The double-frontmatter import artifact: a source file's own `---…---`
6762    /// embedded verbatim as the record body (the `dbmd write --body-file` on an
6763    /// un-stripped note). The record's real frontmatter at the top is valid, so
6764    /// only `FM_IN_BODY` should surface the leftover block.
6765    #[test]
6766    fn body_leading_frontmatter_block_is_warning() {
6767        let fx = Fixture::new();
6768        fx.write(
6769            "records/notes/imported.md",
6770            "---\ntype: note\nsummary: an imported daily note\ncreated: 2026-06-02T09:00:00-07:00\nupdated: 2026-06-02T09:00:00-07:00\n---\n---\ntags: [daily]\n---\n# 2026-06-02\n\nSigned the SOW.\n",
6771        );
6772        let issues = fx.store_all();
6773        assert!(
6774            issues
6775                .iter()
6776                .any(|i| i.code == codes::FM_IN_BODY && i.severity == Severity::Warning),
6777            "a body opening with a second frontmatter block must warn: {issues:#?}"
6778        );
6779    }
6780
6781    /// A `---` thematic-break rule around prose is NOT frontmatter (it parses
6782    /// as a scalar, not a mapping), so it must not false-fire `FM_IN_BODY`.
6783    #[test]
6784    fn body_thematic_break_rules_do_not_warn() {
6785        let fx = Fixture::new();
6786        fx.write(
6787            "records/notes/rules.md",
6788            "---\ntype: note\nsummary: a note using horizontal rules\ncreated: 2026-06-02T09:00:00-07:00\nupdated: 2026-06-02T09:00:00-07:00\n---\n---\nJust some prose between two rules.\n---\nMore text.\n",
6789        );
6790        let issues = fx.store_all();
6791        assert!(
6792            !has(&issues, codes::FM_IN_BODY),
6793            "a `---` thematic rule around prose (not a YAML mapping) must NOT warn: {issues:#?}"
6794        );
6795    }
6796
6797    /// A fenced ```yaml / ```markdown example that shows a frontmatter block is
6798    /// body content, not a second frontmatter block — the fence line, not
6799    /// `---`, opens the body, so `FM_IN_BODY` must stay silent.
6800    #[test]
6801    fn body_fenced_frontmatter_example_does_not_warn() {
6802        let fx = Fixture::new();
6803        fx.write(
6804            "records/notes/doc.md",
6805            "---\ntype: note\nsummary: a note showing an example record\ncreated: 2026-06-02T09:00:00-07:00\nupdated: 2026-06-02T09:00:00-07:00\n---\n```markdown\n---\ntype: contact\nname: Sam\n---\n```\n",
6806        );
6807        let issues = fx.store_all();
6808        assert!(
6809            !has(&issues, codes::FM_IN_BODY),
6810            "a fenced example block (body opens with a code fence, not `---`) must NOT warn: {issues:#?}"
6811        );
6812    }
6813
6814    /// A `unique:` key naming a field the schema never declares can also never
6815    /// be `required` — same silent skip, same warning.
6816    #[test]
6817    fn schema_unique_key_undeclared_field_is_warning() {
6818        let mut fx = Fixture::new();
6819        fx.config.schemas.insert(
6820            "expense".into(),
6821            Schema {
6822                fields: vec![FieldSpec {
6823                    name: "date".into(),
6824                    required: true,
6825                    ..Default::default()
6826                }],
6827                unique_keys: vec![vec!["date".into(), "vendor".into()]],
6828                ..Default::default()
6829            },
6830        );
6831        let issues = fx.store_all();
6832        assert!(
6833            issues.iter().any(|i| i.code == codes::DB_MD_SCHEMA_FIELD
6834                && i.severity == Severity::Warning
6835                && i.key.as_deref() == Some("vendor")
6836                && i.message.contains("not declared")),
6837            "a `unique:` key field absent from the schema must warn: {issues:#?}"
6838        );
6839    }
6840
6841    /// The clean shape — every key field `required` — stays silent.
6842    #[test]
6843    fn schema_unique_key_all_required_is_clean() {
6844        let mut fx = Fixture::new();
6845        fx.config.schemas.insert(
6846            "expense".into(),
6847            Schema {
6848                fields: vec![
6849                    FieldSpec {
6850                        name: "date".into(),
6851                        required: true,
6852                        ..Default::default()
6853                    },
6854                    FieldSpec {
6855                        name: "amount".into(),
6856                        required: true,
6857                        ..Default::default()
6858                    },
6859                ],
6860                unique_keys: vec![vec!["date".into(), "amount".into()]],
6861                ..Default::default()
6862            },
6863        );
6864        let issues = fx.store_all();
6865        assert!(
6866            !issues
6867                .iter()
6868                .any(|i| i.code == codes::DB_MD_SCHEMA_FIELD && i.message.contains("unique")),
6869            "an all-required unique key must not warn: {issues:#?}"
6870        );
6871    }
6872
6873    /// Every code in `mod codes` must appear as a row in SPEC.md § Validation —
6874    /// the SPEC table is the declared "complete vocabulary" an agent branches on,
6875    /// and the module doc-comment promises this code implements "exactly those
6876    /// codes — no more, no fewer." This guards against the code/SPEC drift where a
6877    /// new validation code is added to the engine but never documented.
6878    #[test]
6879    fn every_code_constant_is_documented_in_spec() {
6880        // Parse the canonical constant *values* straight out of this module's
6881        // source, so a future `pub const X: &str = "X";` is covered with no test
6882        // edit. Format is uniform: `    pub const NAME: &str = "VALUE";`.
6883        let this_src = include_str!("validate.rs");
6884        let mut codes_in_module: Vec<String> = Vec::new();
6885        let mut in_codes_mod = false;
6886        for line in this_src.lines() {
6887            let t = line.trim();
6888            if t.starts_with("pub mod codes") {
6889                in_codes_mod = true;
6890                continue;
6891            }
6892            // The `mod codes` block ends at its closing brace at column 0.
6893            if in_codes_mod && line == "}" {
6894                break;
6895            }
6896            if in_codes_mod {
6897                if let Some(rest) = t.strip_prefix("pub const ") {
6898                    // rest = `NAME: &str = "VALUE";`
6899                    let value = rest
6900                        .split_once('=')
6901                        .map(|(_, v)| v.trim())
6902                        .and_then(|v| v.strip_prefix('"'))
6903                        .and_then(|v| v.strip_suffix("\";"))
6904                        .unwrap_or_else(|| panic!("unparseable code constant line: {line:?}"));
6905                    codes_in_module.push(value.to_string());
6906                }
6907            }
6908        }
6909        assert!(
6910            codes_in_module.len() >= 36,
6911            "parsed only {} code constants from `mod codes`; the parser likely \
6912             broke against a source-format change",
6913            codes_in_module.len()
6914        );
6915
6916        // SPEC.md lives at the repo root, two levels up from this crate's manifest.
6917        let spec_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR")).join("../../SPEC.md");
6918        let spec = fs::read_to_string(&spec_path)
6919            .unwrap_or_else(|e| panic!("cannot read {}: {e}", spec_path.display()));
6920
6921        // Each code must appear as a SPEC § Validation table cell: `` | `CODE` | ``.
6922        let missing: Vec<&String> = codes_in_module
6923            .iter()
6924            .filter(|code| !spec.contains(&format!("| `{code}` |")))
6925            .collect();
6926        assert!(
6927            missing.is_empty(),
6928            "validation codes emitted by the engine but absent from SPEC.md \
6929             § Validation (the declared complete vocabulary): {missing:?}"
6930        );
6931    }
6932
6933    // ── loose files (directly at a layer root, no type-folder) ───────────────
6934
6935    const LOOSE_ALICE: &str = "---\ntype: contact\nid: alice\ncreated: 2026-06-01T08:00:00-07:00\nupdated: 2026-06-01T08:00:00-07:00\nsummary: Alice\n---\nbody\n";
6936    const LOOSE_BOB: &str = "---\ntype: contact\nid: bob\ncreated: 2026-06-01T08:00:00-07:00\nupdated: 2026-06-01T08:00:00-07:00\nsummary: Bob loose\n---\nbody\n";
6937
6938    #[test]
6939    fn loose_file_catalogued_in_layer_jsonl_validates_clean() {
6940        let fx = Fixture::new();
6941        fx.write("records/contacts/alice.md", LOOSE_ALICE);
6942        fx.write("records/bob.md", LOOSE_BOB); // loose, directly under records/
6943        fx.rebuild_indexes();
6944        let issues = fx.store_all();
6945        assert!(
6946            issues.is_empty(),
6947            "a rebuilt store with a catalogued loose file must validate clean, got: {issues:?}"
6948        );
6949    }
6950
6951    #[test]
6952    fn loose_file_with_missing_layer_jsonl_is_index_jsonl_missing() {
6953        let fx = Fixture::new();
6954        fx.write("records/contacts/alice.md", LOOSE_ALICE);
6955        fx.write("records/bob.md", LOOSE_BOB);
6956        fx.rebuild_indexes();
6957        // Simulate the layer sidecar going missing (a hand-deletion / bad sync).
6958        fs::remove_file(fx.dir.path().join("records/index.jsonl")).unwrap();
6959        let issues = fx.store_all();
6960        assert!(
6961            has(&issues, codes::INDEX_JSONL_MISSING),
6962            "a loose file with no layer index.jsonl must raise INDEX_JSONL_MISSING, got: {issues:?}"
6963        );
6964    }
6965}